[RS6000] Align .toc section
[official-gcc.git] / gcc / config / rs6000 / rs6000.c
blobfb522fb6144e2c409f436b8b3bed36d4346b63bc
1 /* Subroutines used for code generation on IBM RS/6000.
2 Copyright (C) 1991-2016 Free Software Foundation, Inc.
3 Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published
9 by the Free Software Foundation; either version 3, or (at your
10 option) any later version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "gimple.h"
28 #include "cfghooks.h"
29 #include "cfgloop.h"
30 #include "df.h"
31 #include "tm_p.h"
32 #include "stringpool.h"
33 #include "expmed.h"
34 #include "optabs.h"
35 #include "regs.h"
36 #include "ira.h"
37 #include "recog.h"
38 #include "cgraph.h"
39 #include "diagnostic-core.h"
40 #include "insn-attr.h"
41 #include "flags.h"
42 #include "alias.h"
43 #include "fold-const.h"
44 #include "stor-layout.h"
45 #include "calls.h"
46 #include "print-tree.h"
47 #include "varasm.h"
48 #include "explow.h"
49 #include "expr.h"
50 #include "output.h"
51 #include "dbxout.h"
52 #include "common/common-target.h"
53 #include "langhooks.h"
54 #include "reload.h"
55 #include "sched-int.h"
56 #include "gimplify.h"
57 #include "gimple-iterator.h"
58 #include "gimple-walk.h"
59 #include "intl.h"
60 #include "params.h"
61 #include "tm-constrs.h"
62 #include "tree-vectorizer.h"
63 #include "target-globals.h"
64 #include "builtins.h"
65 #include "context.h"
66 #include "tree-pass.h"
67 #if TARGET_XCOFF
68 #include "xcoffout.h" /* get declarations of xcoff_*_section_name */
69 #endif
70 #if TARGET_MACHO
71 #include "gstab.h" /* for N_SLINE */
72 #endif
73 #include "case-cfn-macros.h"
74 #include "ppc-auxv.h"
76 /* This file should be included last. */
77 #include "target-def.h"
79 #ifndef TARGET_NO_PROTOTYPE
80 #define TARGET_NO_PROTOTYPE 0
81 #endif
83 #define min(A,B) ((A) < (B) ? (A) : (B))
84 #define max(A,B) ((A) > (B) ? (A) : (B))
86 /* Structure used to define the rs6000 stack */
87 typedef struct rs6000_stack {
88 int reload_completed; /* stack info won't change from here on */
89 int first_gp_reg_save; /* first callee saved GP register used */
90 int first_fp_reg_save; /* first callee saved FP register used */
91 int first_altivec_reg_save; /* first callee saved AltiVec register used */
92 int lr_save_p; /* true if the link reg needs to be saved */
93 int cr_save_p; /* true if the CR reg needs to be saved */
94 unsigned int vrsave_mask; /* mask of vec registers to save */
95 int push_p; /* true if we need to allocate stack space */
96 int calls_p; /* true if the function makes any calls */
97 int world_save_p; /* true if we're saving *everything*:
98 r13-r31, cr, f14-f31, vrsave, v20-v31 */
99 enum rs6000_abi abi; /* which ABI to use */
100 int gp_save_offset; /* offset to save GP regs from initial SP */
101 int fp_save_offset; /* offset to save FP regs from initial SP */
102 int altivec_save_offset; /* offset to save AltiVec regs from initial SP */
103 int lr_save_offset; /* offset to save LR from initial SP */
104 int cr_save_offset; /* offset to save CR from initial SP */
105 int vrsave_save_offset; /* offset to save VRSAVE from initial SP */
106 int spe_gp_save_offset; /* offset to save spe 64-bit gprs */
107 int varargs_save_offset; /* offset to save the varargs registers */
108 int ehrd_offset; /* offset to EH return data */
109 int ehcr_offset; /* offset to EH CR field data */
110 int reg_size; /* register size (4 or 8) */
111 HOST_WIDE_INT vars_size; /* variable save area size */
112 int parm_size; /* outgoing parameter size */
113 int save_size; /* save area size */
114 int fixed_size; /* fixed size of stack frame */
115 int gp_size; /* size of saved GP registers */
116 int fp_size; /* size of saved FP registers */
117 int altivec_size; /* size of saved AltiVec registers */
118 int cr_size; /* size to hold CR if not in fixed area */
119 int vrsave_size; /* size to hold VRSAVE */
120 int altivec_padding_size; /* size of altivec alignment padding */
121 int spe_gp_size; /* size of 64-bit GPR save size for SPE */
122 int spe_padding_size;
123 HOST_WIDE_INT total_size; /* total bytes allocated for stack */
124 int spe_64bit_regs_used;
125 int savres_strategy;
126 } rs6000_stack_t;
128 /* A C structure for machine-specific, per-function data.
129 This is added to the cfun structure. */
130 typedef struct GTY(()) machine_function
132 /* Whether the instruction chain has been scanned already. */
133 int spe_insn_chain_scanned_p;
134 /* Flags if __builtin_return_address (n) with n >= 1 was used. */
135 int ra_needs_full_frame;
136 /* Flags if __builtin_return_address (0) was used. */
137 int ra_need_lr;
138 /* Cache lr_save_p after expansion of builtin_eh_return. */
139 int lr_save_state;
140 /* Whether we need to save the TOC to the reserved stack location in the
141 function prologue. */
142 bool save_toc_in_prologue;
143 /* Offset from virtual_stack_vars_rtx to the start of the ABI_V4
144 varargs save area. */
145 HOST_WIDE_INT varargs_save_offset;
146 /* Temporary stack slot to use for SDmode copies. This slot is
147 64-bits wide and is allocated early enough so that the offset
148 does not overflow the 16-bit load/store offset field. */
149 rtx sdmode_stack_slot;
150 /* Alternative internal arg pointer for -fsplit-stack. */
151 rtx split_stack_arg_pointer;
152 bool split_stack_argp_used;
153 /* Flag if r2 setup is needed with ELFv2 ABI. */
154 bool r2_setup_needed;
155 } machine_function;
157 /* Support targetm.vectorize.builtin_mask_for_load. */
158 static GTY(()) tree altivec_builtin_mask_for_load;
160 /* Set to nonzero once AIX common-mode calls have been defined. */
161 static GTY(()) int common_mode_defined;
163 /* Label number of label created for -mrelocatable, to call to so we can
164 get the address of the GOT section */
165 static int rs6000_pic_labelno;
167 #ifdef USING_ELFOS_H
168 /* Counter for labels which are to be placed in .fixup. */
169 int fixuplabelno = 0;
170 #endif
172 /* Whether to use variant of AIX ABI for PowerPC64 Linux. */
173 int dot_symbols;
175 /* Specify the machine mode that pointers have. After generation of rtl, the
176 compiler makes no further distinction between pointers and any other objects
177 of this machine mode. The type is unsigned since not all things that
178 include rs6000.h also include machmode.h. */
179 unsigned rs6000_pmode;
181 /* Width in bits of a pointer. */
182 unsigned rs6000_pointer_size;
184 #ifdef HAVE_AS_GNU_ATTRIBUTE
185 /* Flag whether floating point values have been passed/returned. */
186 static bool rs6000_passes_float;
187 /* Flag whether vector values have been passed/returned. */
188 static bool rs6000_passes_vector;
189 /* Flag whether small (<= 8 byte) structures have been returned. */
190 static bool rs6000_returns_struct;
191 #endif
193 /* Value is TRUE if register/mode pair is acceptable. */
194 bool rs6000_hard_regno_mode_ok_p[NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
196 /* Maximum number of registers needed for a given register class and mode. */
197 unsigned char rs6000_class_max_nregs[NUM_MACHINE_MODES][LIM_REG_CLASSES];
199 /* How many registers are needed for a given register and mode. */
200 unsigned char rs6000_hard_regno_nregs[NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
202 /* Map register number to register class. */
203 enum reg_class rs6000_regno_regclass[FIRST_PSEUDO_REGISTER];
205 static int dbg_cost_ctrl;
207 /* Built in types. */
208 tree rs6000_builtin_types[RS6000_BTI_MAX];
209 tree rs6000_builtin_decls[RS6000_BUILTIN_COUNT];
211 /* Flag to say the TOC is initialized */
212 int toc_initialized, need_toc_init;
213 char toc_label_name[10];
215 /* Cached value of rs6000_variable_issue. This is cached in
216 rs6000_variable_issue hook and returned from rs6000_sched_reorder2. */
217 static short cached_can_issue_more;
219 static GTY(()) section *read_only_data_section;
220 static GTY(()) section *private_data_section;
221 static GTY(()) section *tls_data_section;
222 static GTY(()) section *tls_private_data_section;
223 static GTY(()) section *read_only_private_data_section;
224 static GTY(()) section *sdata2_section;
225 static GTY(()) section *toc_section;
227 struct builtin_description
229 const HOST_WIDE_INT mask;
230 const enum insn_code icode;
231 const char *const name;
232 const enum rs6000_builtins code;
235 /* Describe the vector unit used for modes. */
236 enum rs6000_vector rs6000_vector_unit[NUM_MACHINE_MODES];
237 enum rs6000_vector rs6000_vector_mem[NUM_MACHINE_MODES];
239 /* Register classes for various constraints that are based on the target
240 switches. */
241 enum reg_class rs6000_constraints[RS6000_CONSTRAINT_MAX];
243 /* Describe the alignment of a vector. */
244 int rs6000_vector_align[NUM_MACHINE_MODES];
246 /* Map selected modes to types for builtins. */
247 static GTY(()) tree builtin_mode_to_type[MAX_MACHINE_MODE][2];
249 /* What modes to automatically generate reciprocal divide estimate (fre) and
250 reciprocal sqrt (frsqrte) for. */
251 unsigned char rs6000_recip_bits[MAX_MACHINE_MODE];
253 /* Masks to determine which reciprocal esitmate instructions to generate
254 automatically. */
255 enum rs6000_recip_mask {
256 RECIP_SF_DIV = 0x001, /* Use divide estimate */
257 RECIP_DF_DIV = 0x002,
258 RECIP_V4SF_DIV = 0x004,
259 RECIP_V2DF_DIV = 0x008,
261 RECIP_SF_RSQRT = 0x010, /* Use reciprocal sqrt estimate. */
262 RECIP_DF_RSQRT = 0x020,
263 RECIP_V4SF_RSQRT = 0x040,
264 RECIP_V2DF_RSQRT = 0x080,
266 /* Various combination of flags for -mrecip=xxx. */
267 RECIP_NONE = 0,
268 RECIP_ALL = (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
269 | RECIP_V2DF_DIV | RECIP_SF_RSQRT | RECIP_DF_RSQRT
270 | RECIP_V4SF_RSQRT | RECIP_V2DF_RSQRT),
272 RECIP_HIGH_PRECISION = RECIP_ALL,
274 /* On low precision machines like the power5, don't enable double precision
275 reciprocal square root estimate, since it isn't accurate enough. */
276 RECIP_LOW_PRECISION = (RECIP_ALL & ~(RECIP_DF_RSQRT | RECIP_V2DF_RSQRT))
279 /* -mrecip options. */
280 static struct
282 const char *string; /* option name */
283 unsigned int mask; /* mask bits to set */
284 } recip_options[] = {
285 { "all", RECIP_ALL },
286 { "none", RECIP_NONE },
287 { "div", (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
288 | RECIP_V2DF_DIV) },
289 { "divf", (RECIP_SF_DIV | RECIP_V4SF_DIV) },
290 { "divd", (RECIP_DF_DIV | RECIP_V2DF_DIV) },
291 { "rsqrt", (RECIP_SF_RSQRT | RECIP_DF_RSQRT | RECIP_V4SF_RSQRT
292 | RECIP_V2DF_RSQRT) },
293 { "rsqrtf", (RECIP_SF_RSQRT | RECIP_V4SF_RSQRT) },
294 { "rsqrtd", (RECIP_DF_RSQRT | RECIP_V2DF_RSQRT) },
297 /* Used by __builtin_cpu_is(), mapping from PLATFORM names to values. */
298 static const struct
300 const char *cpu;
301 unsigned int cpuid;
302 } cpu_is_info[] = {
303 { "power9", PPC_PLATFORM_POWER9 },
304 { "power8", PPC_PLATFORM_POWER8 },
305 { "power7", PPC_PLATFORM_POWER7 },
306 { "power6x", PPC_PLATFORM_POWER6X },
307 { "power6", PPC_PLATFORM_POWER6 },
308 { "power5+", PPC_PLATFORM_POWER5_PLUS },
309 { "power5", PPC_PLATFORM_POWER5 },
310 { "ppc970", PPC_PLATFORM_PPC970 },
311 { "power4", PPC_PLATFORM_POWER4 },
312 { "ppca2", PPC_PLATFORM_PPCA2 },
313 { "ppc476", PPC_PLATFORM_PPC476 },
314 { "ppc464", PPC_PLATFORM_PPC464 },
315 { "ppc440", PPC_PLATFORM_PPC440 },
316 { "ppc405", PPC_PLATFORM_PPC405 },
317 { "ppc-cell-be", PPC_PLATFORM_CELL_BE }
320 /* Used by __builtin_cpu_supports(), mapping from HWCAP names to masks. */
321 static const struct
323 const char *hwcap;
324 int mask;
325 unsigned int id;
326 } cpu_supports_info[] = {
327 /* AT_HWCAP masks. */
328 { "4xxmac", PPC_FEATURE_HAS_4xxMAC, 0 },
329 { "altivec", PPC_FEATURE_HAS_ALTIVEC, 0 },
330 { "arch_2_05", PPC_FEATURE_ARCH_2_05, 0 },
331 { "arch_2_06", PPC_FEATURE_ARCH_2_06, 0 },
332 { "archpmu", PPC_FEATURE_PERFMON_COMPAT, 0 },
333 { "booke", PPC_FEATURE_BOOKE, 0 },
334 { "cellbe", PPC_FEATURE_CELL_BE, 0 },
335 { "dfp", PPC_FEATURE_HAS_DFP, 0 },
336 { "efpdouble", PPC_FEATURE_HAS_EFP_DOUBLE, 0 },
337 { "efpsingle", PPC_FEATURE_HAS_EFP_SINGLE, 0 },
338 { "fpu", PPC_FEATURE_HAS_FPU, 0 },
339 { "ic_snoop", PPC_FEATURE_ICACHE_SNOOP, 0 },
340 { "mmu", PPC_FEATURE_HAS_MMU, 0 },
341 { "notb", PPC_FEATURE_NO_TB, 0 },
342 { "pa6t", PPC_FEATURE_PA6T, 0 },
343 { "power4", PPC_FEATURE_POWER4, 0 },
344 { "power5", PPC_FEATURE_POWER5, 0 },
345 { "power5+", PPC_FEATURE_POWER5_PLUS, 0 },
346 { "power6x", PPC_FEATURE_POWER6_EXT, 0 },
347 { "ppc32", PPC_FEATURE_32, 0 },
348 { "ppc601", PPC_FEATURE_601_INSTR, 0 },
349 { "ppc64", PPC_FEATURE_64, 0 },
350 { "ppcle", PPC_FEATURE_PPC_LE, 0 },
351 { "smt", PPC_FEATURE_SMT, 0 },
352 { "spe", PPC_FEATURE_HAS_SPE, 0 },
353 { "true_le", PPC_FEATURE_TRUE_LE, 0 },
354 { "ucache", PPC_FEATURE_UNIFIED_CACHE, 0 },
355 { "vsx", PPC_FEATURE_HAS_VSX, 0 },
357 /* AT_HWCAP2 masks. */
358 { "arch_2_07", PPC_FEATURE2_ARCH_2_07, 1 },
359 { "dscr", PPC_FEATURE2_HAS_DSCR, 1 },
360 { "ebb", PPC_FEATURE2_HAS_EBB, 1 },
361 { "htm", PPC_FEATURE2_HAS_HTM, 1 },
362 { "htm-nosc", PPC_FEATURE2_HTM_NOSC, 1 },
363 { "isel", PPC_FEATURE2_HAS_ISEL, 1 },
364 { "tar", PPC_FEATURE2_HAS_TAR, 1 },
365 { "vcrypto", PPC_FEATURE2_HAS_VEC_CRYPTO, 1 },
366 { "arch_3_00", PPC_FEATURE2_ARCH_3_00, 1 },
367 { "ieee128", PPC_FEATURE2_HAS_IEEE128, 1 }
370 /* Newer LIBCs explicitly export this symbol to declare that they provide
371 the AT_PLATFORM and AT_HWCAP/AT_HWCAP2 values in the TCB. We emit a
372 reference to this symbol whenever we expand a CPU builtin, so that
373 we never link against an old LIBC. */
374 const char *tcb_verification_symbol = "__parse_hwcap_and_convert_at_platform";
376 /* True if we have expanded a CPU builtin. */
377 bool cpu_builtin_p;
379 /* Pointer to function (in rs6000-c.c) that can define or undefine target
380 macros that have changed. Languages that don't support the preprocessor
381 don't link in rs6000-c.c, so we can't call it directly. */
382 void (*rs6000_target_modify_macros_ptr) (bool, HOST_WIDE_INT, HOST_WIDE_INT);
384 /* Simplfy register classes into simpler classifications. We assume
385 GPR_REG_TYPE - FPR_REG_TYPE are ordered so that we can use a simple range
386 check for standard register classes (gpr/floating/altivec/vsx) and
387 floating/vector classes (float/altivec/vsx). */
389 enum rs6000_reg_type {
390 NO_REG_TYPE,
391 PSEUDO_REG_TYPE,
392 GPR_REG_TYPE,
393 VSX_REG_TYPE,
394 ALTIVEC_REG_TYPE,
395 FPR_REG_TYPE,
396 SPR_REG_TYPE,
397 CR_REG_TYPE,
398 SPE_ACC_TYPE,
399 SPEFSCR_REG_TYPE
402 /* Map register class to register type. */
403 static enum rs6000_reg_type reg_class_to_reg_type[N_REG_CLASSES];
405 /* First/last register type for the 'normal' register types (i.e. general
406 purpose, floating point, altivec, and VSX registers). */
407 #define IS_STD_REG_TYPE(RTYPE) IN_RANGE(RTYPE, GPR_REG_TYPE, FPR_REG_TYPE)
409 #define IS_FP_VECT_REG_TYPE(RTYPE) IN_RANGE(RTYPE, VSX_REG_TYPE, FPR_REG_TYPE)
412 /* Register classes we care about in secondary reload or go if legitimate
413 address. We only need to worry about GPR, FPR, and Altivec registers here,
414 along an ANY field that is the OR of the 3 register classes. */
416 enum rs6000_reload_reg_type {
417 RELOAD_REG_GPR, /* General purpose registers. */
418 RELOAD_REG_FPR, /* Traditional floating point regs. */
419 RELOAD_REG_VMX, /* Altivec (VMX) registers. */
420 RELOAD_REG_ANY, /* OR of GPR, FPR, Altivec masks. */
421 N_RELOAD_REG
424 /* For setting up register classes, loop through the 3 register classes mapping
425 into real registers, and skip the ANY class, which is just an OR of the
426 bits. */
427 #define FIRST_RELOAD_REG_CLASS RELOAD_REG_GPR
428 #define LAST_RELOAD_REG_CLASS RELOAD_REG_VMX
430 /* Map reload register type to a register in the register class. */
431 struct reload_reg_map_type {
432 const char *name; /* Register class name. */
433 int reg; /* Register in the register class. */
436 static const struct reload_reg_map_type reload_reg_map[N_RELOAD_REG] = {
437 { "Gpr", FIRST_GPR_REGNO }, /* RELOAD_REG_GPR. */
438 { "Fpr", FIRST_FPR_REGNO }, /* RELOAD_REG_FPR. */
439 { "VMX", FIRST_ALTIVEC_REGNO }, /* RELOAD_REG_VMX. */
440 { "Any", -1 }, /* RELOAD_REG_ANY. */
443 /* Mask bits for each register class, indexed per mode. Historically the
444 compiler has been more restrictive which types can do PRE_MODIFY instead of
445 PRE_INC and PRE_DEC, so keep track of sepaate bits for these two. */
446 typedef unsigned char addr_mask_type;
448 #define RELOAD_REG_VALID 0x01 /* Mode valid in register.. */
449 #define RELOAD_REG_MULTIPLE 0x02 /* Mode takes multiple registers. */
450 #define RELOAD_REG_INDEXED 0x04 /* Reg+reg addressing. */
451 #define RELOAD_REG_OFFSET 0x08 /* Reg+offset addressing. */
452 #define RELOAD_REG_PRE_INCDEC 0x10 /* PRE_INC/PRE_DEC valid. */
453 #define RELOAD_REG_PRE_MODIFY 0x20 /* PRE_MODIFY valid. */
454 #define RELOAD_REG_AND_M16 0x40 /* AND -16 addressing. */
456 /* Register type masks based on the type, of valid addressing modes. */
457 struct rs6000_reg_addr {
458 enum insn_code reload_load; /* INSN to reload for loading. */
459 enum insn_code reload_store; /* INSN to reload for storing. */
460 enum insn_code reload_fpr_gpr; /* INSN to move from FPR to GPR. */
461 enum insn_code reload_gpr_vsx; /* INSN to move from GPR to VSX. */
462 enum insn_code reload_vsx_gpr; /* INSN to move from VSX to GPR. */
463 enum insn_code fusion_gpr_ld; /* INSN for fusing gpr ADDIS/loads. */
464 /* INSNs for fusing addi with loads
465 or stores for each reg. class. */
466 enum insn_code fusion_addi_ld[(int)N_RELOAD_REG];
467 enum insn_code fusion_addi_st[(int)N_RELOAD_REG];
468 /* INSNs for fusing addis with loads
469 or stores for each reg. class. */
470 enum insn_code fusion_addis_ld[(int)N_RELOAD_REG];
471 enum insn_code fusion_addis_st[(int)N_RELOAD_REG];
472 addr_mask_type addr_mask[(int)N_RELOAD_REG]; /* Valid address masks. */
473 bool scalar_in_vmx_p; /* Scalar value can go in VMX. */
474 bool fused_toc; /* Mode supports TOC fusion. */
477 static struct rs6000_reg_addr reg_addr[NUM_MACHINE_MODES];
479 /* Helper function to say whether a mode supports PRE_INC or PRE_DEC. */
480 static inline bool
481 mode_supports_pre_incdec_p (machine_mode mode)
483 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_INCDEC)
484 != 0);
487 /* Helper function to say whether a mode supports PRE_MODIFY. */
488 static inline bool
489 mode_supports_pre_modify_p (machine_mode mode)
491 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_MODIFY)
492 != 0);
495 /* Return true if we have D-form addressing in altivec registers. */
496 static inline bool
497 mode_supports_vmx_dform (machine_mode mode)
499 return ((reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_OFFSET) != 0);
503 /* Target cpu costs. */
505 struct processor_costs {
506 const int mulsi; /* cost of SImode multiplication. */
507 const int mulsi_const; /* cost of SImode multiplication by constant. */
508 const int mulsi_const9; /* cost of SImode mult by short constant. */
509 const int muldi; /* cost of DImode multiplication. */
510 const int divsi; /* cost of SImode division. */
511 const int divdi; /* cost of DImode division. */
512 const int fp; /* cost of simple SFmode and DFmode insns. */
513 const int dmul; /* cost of DFmode multiplication (and fmadd). */
514 const int sdiv; /* cost of SFmode division (fdivs). */
515 const int ddiv; /* cost of DFmode division (fdiv). */
516 const int cache_line_size; /* cache line size in bytes. */
517 const int l1_cache_size; /* size of l1 cache, in kilobytes. */
518 const int l2_cache_size; /* size of l2 cache, in kilobytes. */
519 const int simultaneous_prefetches; /* number of parallel prefetch
520 operations. */
521 const int sfdf_convert; /* cost of SF->DF conversion. */
524 const struct processor_costs *rs6000_cost;
526 /* Processor costs (relative to an add) */
528 /* Instruction size costs on 32bit processors. */
529 static const
530 struct processor_costs size32_cost = {
531 COSTS_N_INSNS (1), /* mulsi */
532 COSTS_N_INSNS (1), /* mulsi_const */
533 COSTS_N_INSNS (1), /* mulsi_const9 */
534 COSTS_N_INSNS (1), /* muldi */
535 COSTS_N_INSNS (1), /* divsi */
536 COSTS_N_INSNS (1), /* divdi */
537 COSTS_N_INSNS (1), /* fp */
538 COSTS_N_INSNS (1), /* dmul */
539 COSTS_N_INSNS (1), /* sdiv */
540 COSTS_N_INSNS (1), /* ddiv */
541 32, /* cache line size */
542 0, /* l1 cache */
543 0, /* l2 cache */
544 0, /* streams */
545 0, /* SF->DF convert */
548 /* Instruction size costs on 64bit processors. */
549 static const
550 struct processor_costs size64_cost = {
551 COSTS_N_INSNS (1), /* mulsi */
552 COSTS_N_INSNS (1), /* mulsi_const */
553 COSTS_N_INSNS (1), /* mulsi_const9 */
554 COSTS_N_INSNS (1), /* muldi */
555 COSTS_N_INSNS (1), /* divsi */
556 COSTS_N_INSNS (1), /* divdi */
557 COSTS_N_INSNS (1), /* fp */
558 COSTS_N_INSNS (1), /* dmul */
559 COSTS_N_INSNS (1), /* sdiv */
560 COSTS_N_INSNS (1), /* ddiv */
561 128, /* cache line size */
562 0, /* l1 cache */
563 0, /* l2 cache */
564 0, /* streams */
565 0, /* SF->DF convert */
568 /* Instruction costs on RS64A processors. */
569 static const
570 struct processor_costs rs64a_cost = {
571 COSTS_N_INSNS (20), /* mulsi */
572 COSTS_N_INSNS (12), /* mulsi_const */
573 COSTS_N_INSNS (8), /* mulsi_const9 */
574 COSTS_N_INSNS (34), /* muldi */
575 COSTS_N_INSNS (65), /* divsi */
576 COSTS_N_INSNS (67), /* divdi */
577 COSTS_N_INSNS (4), /* fp */
578 COSTS_N_INSNS (4), /* dmul */
579 COSTS_N_INSNS (31), /* sdiv */
580 COSTS_N_INSNS (31), /* ddiv */
581 128, /* cache line size */
582 128, /* l1 cache */
583 2048, /* l2 cache */
584 1, /* streams */
585 0, /* SF->DF convert */
588 /* Instruction costs on MPCCORE processors. */
589 static const
590 struct processor_costs mpccore_cost = {
591 COSTS_N_INSNS (2), /* mulsi */
592 COSTS_N_INSNS (2), /* mulsi_const */
593 COSTS_N_INSNS (2), /* mulsi_const9 */
594 COSTS_N_INSNS (2), /* muldi */
595 COSTS_N_INSNS (6), /* divsi */
596 COSTS_N_INSNS (6), /* divdi */
597 COSTS_N_INSNS (4), /* fp */
598 COSTS_N_INSNS (5), /* dmul */
599 COSTS_N_INSNS (10), /* sdiv */
600 COSTS_N_INSNS (17), /* ddiv */
601 32, /* cache line size */
602 4, /* l1 cache */
603 16, /* l2 cache */
604 1, /* streams */
605 0, /* SF->DF convert */
608 /* Instruction costs on PPC403 processors. */
609 static const
610 struct processor_costs ppc403_cost = {
611 COSTS_N_INSNS (4), /* mulsi */
612 COSTS_N_INSNS (4), /* mulsi_const */
613 COSTS_N_INSNS (4), /* mulsi_const9 */
614 COSTS_N_INSNS (4), /* muldi */
615 COSTS_N_INSNS (33), /* divsi */
616 COSTS_N_INSNS (33), /* divdi */
617 COSTS_N_INSNS (11), /* fp */
618 COSTS_N_INSNS (11), /* dmul */
619 COSTS_N_INSNS (11), /* sdiv */
620 COSTS_N_INSNS (11), /* ddiv */
621 32, /* cache line size */
622 4, /* l1 cache */
623 16, /* l2 cache */
624 1, /* streams */
625 0, /* SF->DF convert */
628 /* Instruction costs on PPC405 processors. */
629 static const
630 struct processor_costs ppc405_cost = {
631 COSTS_N_INSNS (5), /* mulsi */
632 COSTS_N_INSNS (4), /* mulsi_const */
633 COSTS_N_INSNS (3), /* mulsi_const9 */
634 COSTS_N_INSNS (5), /* muldi */
635 COSTS_N_INSNS (35), /* divsi */
636 COSTS_N_INSNS (35), /* divdi */
637 COSTS_N_INSNS (11), /* fp */
638 COSTS_N_INSNS (11), /* dmul */
639 COSTS_N_INSNS (11), /* sdiv */
640 COSTS_N_INSNS (11), /* ddiv */
641 32, /* cache line size */
642 16, /* l1 cache */
643 128, /* l2 cache */
644 1, /* streams */
645 0, /* SF->DF convert */
648 /* Instruction costs on PPC440 processors. */
649 static const
650 struct processor_costs ppc440_cost = {
651 COSTS_N_INSNS (3), /* mulsi */
652 COSTS_N_INSNS (2), /* mulsi_const */
653 COSTS_N_INSNS (2), /* mulsi_const9 */
654 COSTS_N_INSNS (3), /* muldi */
655 COSTS_N_INSNS (34), /* divsi */
656 COSTS_N_INSNS (34), /* divdi */
657 COSTS_N_INSNS (5), /* fp */
658 COSTS_N_INSNS (5), /* dmul */
659 COSTS_N_INSNS (19), /* sdiv */
660 COSTS_N_INSNS (33), /* ddiv */
661 32, /* cache line size */
662 32, /* l1 cache */
663 256, /* l2 cache */
664 1, /* streams */
665 0, /* SF->DF convert */
668 /* Instruction costs on PPC476 processors. */
669 static const
670 struct processor_costs ppc476_cost = {
671 COSTS_N_INSNS (4), /* mulsi */
672 COSTS_N_INSNS (4), /* mulsi_const */
673 COSTS_N_INSNS (4), /* mulsi_const9 */
674 COSTS_N_INSNS (4), /* muldi */
675 COSTS_N_INSNS (11), /* divsi */
676 COSTS_N_INSNS (11), /* divdi */
677 COSTS_N_INSNS (6), /* fp */
678 COSTS_N_INSNS (6), /* dmul */
679 COSTS_N_INSNS (19), /* sdiv */
680 COSTS_N_INSNS (33), /* ddiv */
681 32, /* l1 cache line size */
682 32, /* l1 cache */
683 512, /* l2 cache */
684 1, /* streams */
685 0, /* SF->DF convert */
688 /* Instruction costs on PPC601 processors. */
689 static const
690 struct processor_costs ppc601_cost = {
691 COSTS_N_INSNS (5), /* mulsi */
692 COSTS_N_INSNS (5), /* mulsi_const */
693 COSTS_N_INSNS (5), /* mulsi_const9 */
694 COSTS_N_INSNS (5), /* muldi */
695 COSTS_N_INSNS (36), /* divsi */
696 COSTS_N_INSNS (36), /* divdi */
697 COSTS_N_INSNS (4), /* fp */
698 COSTS_N_INSNS (5), /* dmul */
699 COSTS_N_INSNS (17), /* sdiv */
700 COSTS_N_INSNS (31), /* ddiv */
701 32, /* cache line size */
702 32, /* l1 cache */
703 256, /* l2 cache */
704 1, /* streams */
705 0, /* SF->DF convert */
708 /* Instruction costs on PPC603 processors. */
709 static const
710 struct processor_costs ppc603_cost = {
711 COSTS_N_INSNS (5), /* mulsi */
712 COSTS_N_INSNS (3), /* mulsi_const */
713 COSTS_N_INSNS (2), /* mulsi_const9 */
714 COSTS_N_INSNS (5), /* muldi */
715 COSTS_N_INSNS (37), /* divsi */
716 COSTS_N_INSNS (37), /* divdi */
717 COSTS_N_INSNS (3), /* fp */
718 COSTS_N_INSNS (4), /* dmul */
719 COSTS_N_INSNS (18), /* sdiv */
720 COSTS_N_INSNS (33), /* ddiv */
721 32, /* cache line size */
722 8, /* l1 cache */
723 64, /* l2 cache */
724 1, /* streams */
725 0, /* SF->DF convert */
728 /* Instruction costs on PPC604 processors. */
729 static const
730 struct processor_costs ppc604_cost = {
731 COSTS_N_INSNS (4), /* mulsi */
732 COSTS_N_INSNS (4), /* mulsi_const */
733 COSTS_N_INSNS (4), /* mulsi_const9 */
734 COSTS_N_INSNS (4), /* muldi */
735 COSTS_N_INSNS (20), /* divsi */
736 COSTS_N_INSNS (20), /* divdi */
737 COSTS_N_INSNS (3), /* fp */
738 COSTS_N_INSNS (3), /* dmul */
739 COSTS_N_INSNS (18), /* sdiv */
740 COSTS_N_INSNS (32), /* ddiv */
741 32, /* cache line size */
742 16, /* l1 cache */
743 512, /* l2 cache */
744 1, /* streams */
745 0, /* SF->DF convert */
748 /* Instruction costs on PPC604e processors. */
749 static const
750 struct processor_costs ppc604e_cost = {
751 COSTS_N_INSNS (2), /* mulsi */
752 COSTS_N_INSNS (2), /* mulsi_const */
753 COSTS_N_INSNS (2), /* mulsi_const9 */
754 COSTS_N_INSNS (2), /* muldi */
755 COSTS_N_INSNS (20), /* divsi */
756 COSTS_N_INSNS (20), /* divdi */
757 COSTS_N_INSNS (3), /* fp */
758 COSTS_N_INSNS (3), /* dmul */
759 COSTS_N_INSNS (18), /* sdiv */
760 COSTS_N_INSNS (32), /* ddiv */
761 32, /* cache line size */
762 32, /* l1 cache */
763 1024, /* l2 cache */
764 1, /* streams */
765 0, /* SF->DF convert */
768 /* Instruction costs on PPC620 processors. */
769 static const
770 struct processor_costs ppc620_cost = {
771 COSTS_N_INSNS (5), /* mulsi */
772 COSTS_N_INSNS (4), /* mulsi_const */
773 COSTS_N_INSNS (3), /* mulsi_const9 */
774 COSTS_N_INSNS (7), /* muldi */
775 COSTS_N_INSNS (21), /* divsi */
776 COSTS_N_INSNS (37), /* divdi */
777 COSTS_N_INSNS (3), /* fp */
778 COSTS_N_INSNS (3), /* dmul */
779 COSTS_N_INSNS (18), /* sdiv */
780 COSTS_N_INSNS (32), /* ddiv */
781 128, /* cache line size */
782 32, /* l1 cache */
783 1024, /* l2 cache */
784 1, /* streams */
785 0, /* SF->DF convert */
788 /* Instruction costs on PPC630 processors. */
789 static const
790 struct processor_costs ppc630_cost = {
791 COSTS_N_INSNS (5), /* mulsi */
792 COSTS_N_INSNS (4), /* mulsi_const */
793 COSTS_N_INSNS (3), /* mulsi_const9 */
794 COSTS_N_INSNS (7), /* muldi */
795 COSTS_N_INSNS (21), /* divsi */
796 COSTS_N_INSNS (37), /* divdi */
797 COSTS_N_INSNS (3), /* fp */
798 COSTS_N_INSNS (3), /* dmul */
799 COSTS_N_INSNS (17), /* sdiv */
800 COSTS_N_INSNS (21), /* ddiv */
801 128, /* cache line size */
802 64, /* l1 cache */
803 1024, /* l2 cache */
804 1, /* streams */
805 0, /* SF->DF convert */
808 /* Instruction costs on Cell processor. */
809 /* COSTS_N_INSNS (1) ~ one add. */
810 static const
811 struct processor_costs ppccell_cost = {
812 COSTS_N_INSNS (9/2)+2, /* mulsi */
813 COSTS_N_INSNS (6/2), /* mulsi_const */
814 COSTS_N_INSNS (6/2), /* mulsi_const9 */
815 COSTS_N_INSNS (15/2)+2, /* muldi */
816 COSTS_N_INSNS (38/2), /* divsi */
817 COSTS_N_INSNS (70/2), /* divdi */
818 COSTS_N_INSNS (10/2), /* fp */
819 COSTS_N_INSNS (10/2), /* dmul */
820 COSTS_N_INSNS (74/2), /* sdiv */
821 COSTS_N_INSNS (74/2), /* ddiv */
822 128, /* cache line size */
823 32, /* l1 cache */
824 512, /* l2 cache */
825 6, /* streams */
826 0, /* SF->DF convert */
829 /* Instruction costs on PPC750 and PPC7400 processors. */
830 static const
831 struct processor_costs ppc750_cost = {
832 COSTS_N_INSNS (5), /* mulsi */
833 COSTS_N_INSNS (3), /* mulsi_const */
834 COSTS_N_INSNS (2), /* mulsi_const9 */
835 COSTS_N_INSNS (5), /* muldi */
836 COSTS_N_INSNS (17), /* divsi */
837 COSTS_N_INSNS (17), /* divdi */
838 COSTS_N_INSNS (3), /* fp */
839 COSTS_N_INSNS (3), /* dmul */
840 COSTS_N_INSNS (17), /* sdiv */
841 COSTS_N_INSNS (31), /* ddiv */
842 32, /* cache line size */
843 32, /* l1 cache */
844 512, /* l2 cache */
845 1, /* streams */
846 0, /* SF->DF convert */
849 /* Instruction costs on PPC7450 processors. */
850 static const
851 struct processor_costs ppc7450_cost = {
852 COSTS_N_INSNS (4), /* mulsi */
853 COSTS_N_INSNS (3), /* mulsi_const */
854 COSTS_N_INSNS (3), /* mulsi_const9 */
855 COSTS_N_INSNS (4), /* muldi */
856 COSTS_N_INSNS (23), /* divsi */
857 COSTS_N_INSNS (23), /* divdi */
858 COSTS_N_INSNS (5), /* fp */
859 COSTS_N_INSNS (5), /* dmul */
860 COSTS_N_INSNS (21), /* sdiv */
861 COSTS_N_INSNS (35), /* ddiv */
862 32, /* cache line size */
863 32, /* l1 cache */
864 1024, /* l2 cache */
865 1, /* streams */
866 0, /* SF->DF convert */
869 /* Instruction costs on PPC8540 processors. */
870 static const
871 struct processor_costs ppc8540_cost = {
872 COSTS_N_INSNS (4), /* mulsi */
873 COSTS_N_INSNS (4), /* mulsi_const */
874 COSTS_N_INSNS (4), /* mulsi_const9 */
875 COSTS_N_INSNS (4), /* muldi */
876 COSTS_N_INSNS (19), /* divsi */
877 COSTS_N_INSNS (19), /* divdi */
878 COSTS_N_INSNS (4), /* fp */
879 COSTS_N_INSNS (4), /* dmul */
880 COSTS_N_INSNS (29), /* sdiv */
881 COSTS_N_INSNS (29), /* ddiv */
882 32, /* cache line size */
883 32, /* l1 cache */
884 256, /* l2 cache */
885 1, /* prefetch streams /*/
886 0, /* SF->DF convert */
889 /* Instruction costs on E300C2 and E300C3 cores. */
890 static const
891 struct processor_costs ppce300c2c3_cost = {
892 COSTS_N_INSNS (4), /* mulsi */
893 COSTS_N_INSNS (4), /* mulsi_const */
894 COSTS_N_INSNS (4), /* mulsi_const9 */
895 COSTS_N_INSNS (4), /* muldi */
896 COSTS_N_INSNS (19), /* divsi */
897 COSTS_N_INSNS (19), /* divdi */
898 COSTS_N_INSNS (3), /* fp */
899 COSTS_N_INSNS (4), /* dmul */
900 COSTS_N_INSNS (18), /* sdiv */
901 COSTS_N_INSNS (33), /* ddiv */
903 16, /* l1 cache */
904 16, /* l2 cache */
905 1, /* prefetch streams /*/
906 0, /* SF->DF convert */
909 /* Instruction costs on PPCE500MC processors. */
910 static const
911 struct processor_costs ppce500mc_cost = {
912 COSTS_N_INSNS (4), /* mulsi */
913 COSTS_N_INSNS (4), /* mulsi_const */
914 COSTS_N_INSNS (4), /* mulsi_const9 */
915 COSTS_N_INSNS (4), /* muldi */
916 COSTS_N_INSNS (14), /* divsi */
917 COSTS_N_INSNS (14), /* divdi */
918 COSTS_N_INSNS (8), /* fp */
919 COSTS_N_INSNS (10), /* dmul */
920 COSTS_N_INSNS (36), /* sdiv */
921 COSTS_N_INSNS (66), /* ddiv */
922 64, /* cache line size */
923 32, /* l1 cache */
924 128, /* l2 cache */
925 1, /* prefetch streams /*/
926 0, /* SF->DF convert */
929 /* Instruction costs on PPCE500MC64 processors. */
930 static const
931 struct processor_costs ppce500mc64_cost = {
932 COSTS_N_INSNS (4), /* mulsi */
933 COSTS_N_INSNS (4), /* mulsi_const */
934 COSTS_N_INSNS (4), /* mulsi_const9 */
935 COSTS_N_INSNS (4), /* muldi */
936 COSTS_N_INSNS (14), /* divsi */
937 COSTS_N_INSNS (14), /* divdi */
938 COSTS_N_INSNS (4), /* fp */
939 COSTS_N_INSNS (10), /* dmul */
940 COSTS_N_INSNS (36), /* sdiv */
941 COSTS_N_INSNS (66), /* ddiv */
942 64, /* cache line size */
943 32, /* l1 cache */
944 128, /* l2 cache */
945 1, /* prefetch streams /*/
946 0, /* SF->DF convert */
949 /* Instruction costs on PPCE5500 processors. */
950 static const
951 struct processor_costs ppce5500_cost = {
952 COSTS_N_INSNS (5), /* mulsi */
953 COSTS_N_INSNS (5), /* mulsi_const */
954 COSTS_N_INSNS (4), /* mulsi_const9 */
955 COSTS_N_INSNS (5), /* muldi */
956 COSTS_N_INSNS (14), /* divsi */
957 COSTS_N_INSNS (14), /* divdi */
958 COSTS_N_INSNS (7), /* fp */
959 COSTS_N_INSNS (10), /* dmul */
960 COSTS_N_INSNS (36), /* sdiv */
961 COSTS_N_INSNS (66), /* ddiv */
962 64, /* cache line size */
963 32, /* l1 cache */
964 128, /* l2 cache */
965 1, /* prefetch streams /*/
966 0, /* SF->DF convert */
969 /* Instruction costs on PPCE6500 processors. */
970 static const
971 struct processor_costs ppce6500_cost = {
972 COSTS_N_INSNS (5), /* mulsi */
973 COSTS_N_INSNS (5), /* mulsi_const */
974 COSTS_N_INSNS (4), /* mulsi_const9 */
975 COSTS_N_INSNS (5), /* muldi */
976 COSTS_N_INSNS (14), /* divsi */
977 COSTS_N_INSNS (14), /* divdi */
978 COSTS_N_INSNS (7), /* fp */
979 COSTS_N_INSNS (10), /* dmul */
980 COSTS_N_INSNS (36), /* sdiv */
981 COSTS_N_INSNS (66), /* ddiv */
982 64, /* cache line size */
983 32, /* l1 cache */
984 128, /* l2 cache */
985 1, /* prefetch streams /*/
986 0, /* SF->DF convert */
989 /* Instruction costs on AppliedMicro Titan processors. */
990 static const
991 struct processor_costs titan_cost = {
992 COSTS_N_INSNS (5), /* mulsi */
993 COSTS_N_INSNS (5), /* mulsi_const */
994 COSTS_N_INSNS (5), /* mulsi_const9 */
995 COSTS_N_INSNS (5), /* muldi */
996 COSTS_N_INSNS (18), /* divsi */
997 COSTS_N_INSNS (18), /* divdi */
998 COSTS_N_INSNS (10), /* fp */
999 COSTS_N_INSNS (10), /* dmul */
1000 COSTS_N_INSNS (46), /* sdiv */
1001 COSTS_N_INSNS (72), /* ddiv */
1002 32, /* cache line size */
1003 32, /* l1 cache */
1004 512, /* l2 cache */
1005 1, /* prefetch streams /*/
1006 0, /* SF->DF convert */
1009 /* Instruction costs on POWER4 and POWER5 processors. */
1010 static const
1011 struct processor_costs power4_cost = {
1012 COSTS_N_INSNS (3), /* mulsi */
1013 COSTS_N_INSNS (2), /* mulsi_const */
1014 COSTS_N_INSNS (2), /* mulsi_const9 */
1015 COSTS_N_INSNS (4), /* muldi */
1016 COSTS_N_INSNS (18), /* divsi */
1017 COSTS_N_INSNS (34), /* divdi */
1018 COSTS_N_INSNS (3), /* fp */
1019 COSTS_N_INSNS (3), /* dmul */
1020 COSTS_N_INSNS (17), /* sdiv */
1021 COSTS_N_INSNS (17), /* ddiv */
1022 128, /* cache line size */
1023 32, /* l1 cache */
1024 1024, /* l2 cache */
1025 8, /* prefetch streams /*/
1026 0, /* SF->DF convert */
1029 /* Instruction costs on POWER6 processors. */
1030 static const
1031 struct processor_costs power6_cost = {
1032 COSTS_N_INSNS (8), /* mulsi */
1033 COSTS_N_INSNS (8), /* mulsi_const */
1034 COSTS_N_INSNS (8), /* mulsi_const9 */
1035 COSTS_N_INSNS (8), /* muldi */
1036 COSTS_N_INSNS (22), /* divsi */
1037 COSTS_N_INSNS (28), /* divdi */
1038 COSTS_N_INSNS (3), /* fp */
1039 COSTS_N_INSNS (3), /* dmul */
1040 COSTS_N_INSNS (13), /* sdiv */
1041 COSTS_N_INSNS (16), /* ddiv */
1042 128, /* cache line size */
1043 64, /* l1 cache */
1044 2048, /* l2 cache */
1045 16, /* prefetch streams */
1046 0, /* SF->DF convert */
1049 /* Instruction costs on POWER7 processors. */
1050 static const
1051 struct processor_costs power7_cost = {
1052 COSTS_N_INSNS (2), /* mulsi */
1053 COSTS_N_INSNS (2), /* mulsi_const */
1054 COSTS_N_INSNS (2), /* mulsi_const9 */
1055 COSTS_N_INSNS (2), /* muldi */
1056 COSTS_N_INSNS (18), /* divsi */
1057 COSTS_N_INSNS (34), /* divdi */
1058 COSTS_N_INSNS (3), /* fp */
1059 COSTS_N_INSNS (3), /* dmul */
1060 COSTS_N_INSNS (13), /* sdiv */
1061 COSTS_N_INSNS (16), /* ddiv */
1062 128, /* cache line size */
1063 32, /* l1 cache */
1064 256, /* l2 cache */
1065 12, /* prefetch streams */
1066 COSTS_N_INSNS (3), /* SF->DF convert */
1069 /* Instruction costs on POWER8 processors. */
1070 static const
1071 struct processor_costs power8_cost = {
1072 COSTS_N_INSNS (3), /* mulsi */
1073 COSTS_N_INSNS (3), /* mulsi_const */
1074 COSTS_N_INSNS (3), /* mulsi_const9 */
1075 COSTS_N_INSNS (3), /* muldi */
1076 COSTS_N_INSNS (19), /* divsi */
1077 COSTS_N_INSNS (35), /* divdi */
1078 COSTS_N_INSNS (3), /* fp */
1079 COSTS_N_INSNS (3), /* dmul */
1080 COSTS_N_INSNS (14), /* sdiv */
1081 COSTS_N_INSNS (17), /* ddiv */
1082 128, /* cache line size */
1083 32, /* l1 cache */
1084 256, /* l2 cache */
1085 12, /* prefetch streams */
1086 COSTS_N_INSNS (3), /* SF->DF convert */
1089 /* Instruction costs on POWER9 processors. */
1090 static const
1091 struct processor_costs power9_cost = {
1092 COSTS_N_INSNS (3), /* mulsi */
1093 COSTS_N_INSNS (3), /* mulsi_const */
1094 COSTS_N_INSNS (3), /* mulsi_const9 */
1095 COSTS_N_INSNS (3), /* muldi */
1096 COSTS_N_INSNS (19), /* divsi */
1097 COSTS_N_INSNS (35), /* divdi */
1098 COSTS_N_INSNS (3), /* fp */
1099 COSTS_N_INSNS (3), /* dmul */
1100 COSTS_N_INSNS (14), /* sdiv */
1101 COSTS_N_INSNS (17), /* ddiv */
1102 128, /* cache line size */
1103 32, /* l1 cache */
1104 256, /* l2 cache */
1105 12, /* prefetch streams */
1106 COSTS_N_INSNS (3), /* SF->DF convert */
1109 /* Instruction costs on POWER A2 processors. */
1110 static const
1111 struct processor_costs ppca2_cost = {
1112 COSTS_N_INSNS (16), /* mulsi */
1113 COSTS_N_INSNS (16), /* mulsi_const */
1114 COSTS_N_INSNS (16), /* mulsi_const9 */
1115 COSTS_N_INSNS (16), /* muldi */
1116 COSTS_N_INSNS (22), /* divsi */
1117 COSTS_N_INSNS (28), /* divdi */
1118 COSTS_N_INSNS (3), /* fp */
1119 COSTS_N_INSNS (3), /* dmul */
1120 COSTS_N_INSNS (59), /* sdiv */
1121 COSTS_N_INSNS (72), /* ddiv */
1123 16, /* l1 cache */
1124 2048, /* l2 cache */
1125 16, /* prefetch streams */
1126 0, /* SF->DF convert */
1130 /* Table that classifies rs6000 builtin functions (pure, const, etc.). */
1131 #undef RS6000_BUILTIN_1
1132 #undef RS6000_BUILTIN_2
1133 #undef RS6000_BUILTIN_3
1134 #undef RS6000_BUILTIN_A
1135 #undef RS6000_BUILTIN_D
1136 #undef RS6000_BUILTIN_E
1137 #undef RS6000_BUILTIN_H
1138 #undef RS6000_BUILTIN_P
1139 #undef RS6000_BUILTIN_Q
1140 #undef RS6000_BUILTIN_S
1141 #undef RS6000_BUILTIN_X
1143 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE) \
1144 { NAME, ICODE, MASK, ATTR },
1146 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE) \
1147 { NAME, ICODE, MASK, ATTR },
1149 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE) \
1150 { NAME, ICODE, MASK, ATTR },
1152 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE) \
1153 { NAME, ICODE, MASK, ATTR },
1155 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE) \
1156 { NAME, ICODE, MASK, ATTR },
1158 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE) \
1159 { NAME, ICODE, MASK, ATTR },
1161 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) \
1162 { NAME, ICODE, MASK, ATTR },
1164 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) \
1165 { NAME, ICODE, MASK, ATTR },
1167 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE) \
1168 { NAME, ICODE, MASK, ATTR },
1170 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE) \
1171 { NAME, ICODE, MASK, ATTR },
1173 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE) \
1174 { NAME, ICODE, MASK, ATTR },
1176 struct rs6000_builtin_info_type {
1177 const char *name;
1178 const enum insn_code icode;
1179 const HOST_WIDE_INT mask;
1180 const unsigned attr;
1183 static const struct rs6000_builtin_info_type rs6000_builtin_info[] =
1185 #include "rs6000-builtin.def"
1188 #undef RS6000_BUILTIN_1
1189 #undef RS6000_BUILTIN_2
1190 #undef RS6000_BUILTIN_3
1191 #undef RS6000_BUILTIN_A
1192 #undef RS6000_BUILTIN_D
1193 #undef RS6000_BUILTIN_E
1194 #undef RS6000_BUILTIN_H
1195 #undef RS6000_BUILTIN_P
1196 #undef RS6000_BUILTIN_Q
1197 #undef RS6000_BUILTIN_S
1198 #undef RS6000_BUILTIN_X
1200 /* Support for -mveclibabi=<xxx> to control which vector library to use. */
1201 static tree (*rs6000_veclib_handler) (combined_fn, tree, tree);
1204 static bool rs6000_debug_legitimate_address_p (machine_mode, rtx, bool);
1205 static bool spe_func_has_64bit_regs_p (void);
1206 static struct machine_function * rs6000_init_machine_status (void);
1207 static int rs6000_ra_ever_killed (void);
1208 static tree rs6000_handle_longcall_attribute (tree *, tree, tree, int, bool *);
1209 static tree rs6000_handle_altivec_attribute (tree *, tree, tree, int, bool *);
1210 static tree rs6000_handle_struct_attribute (tree *, tree, tree, int, bool *);
1211 static tree rs6000_builtin_vectorized_libmass (combined_fn, tree, tree);
1212 static void rs6000_emit_set_long_const (rtx, HOST_WIDE_INT);
1213 static int rs6000_memory_move_cost (machine_mode, reg_class_t, bool);
1214 static bool rs6000_debug_rtx_costs (rtx, machine_mode, int, int, int *, bool);
1215 static int rs6000_debug_address_cost (rtx, machine_mode, addr_space_t,
1216 bool);
1217 static int rs6000_debug_adjust_cost (rtx_insn *, rtx, rtx_insn *, int);
1218 static bool is_microcoded_insn (rtx_insn *);
1219 static bool is_nonpipeline_insn (rtx_insn *);
1220 static bool is_cracked_insn (rtx_insn *);
1221 static bool is_load_insn (rtx, rtx *);
1222 static bool is_store_insn (rtx, rtx *);
1223 static bool set_to_load_agen (rtx_insn *,rtx_insn *);
1224 static bool insn_terminates_group_p (rtx_insn *, enum group_termination);
1225 static bool insn_must_be_first_in_group (rtx_insn *);
1226 static bool insn_must_be_last_in_group (rtx_insn *);
1227 static void altivec_init_builtins (void);
1228 static tree builtin_function_type (machine_mode, machine_mode,
1229 machine_mode, machine_mode,
1230 enum rs6000_builtins, const char *name);
1231 static void rs6000_common_init_builtins (void);
1232 static void paired_init_builtins (void);
1233 static rtx paired_expand_predicate_builtin (enum insn_code, tree, rtx);
1234 static void spe_init_builtins (void);
1235 static void htm_init_builtins (void);
1236 static rtx spe_expand_predicate_builtin (enum insn_code, tree, rtx);
1237 static rtx spe_expand_evsel_builtin (enum insn_code, tree, rtx);
1238 static int rs6000_emit_int_cmove (rtx, rtx, rtx, rtx);
1239 static rs6000_stack_t *rs6000_stack_info (void);
1240 static void is_altivec_return_reg (rtx, void *);
1241 int easy_vector_constant (rtx, machine_mode);
1242 static rtx rs6000_debug_legitimize_address (rtx, rtx, machine_mode);
1243 static rtx rs6000_legitimize_tls_address (rtx, enum tls_model);
1244 static rtx rs6000_darwin64_record_arg (CUMULATIVE_ARGS *, const_tree,
1245 bool, bool);
1246 #if TARGET_MACHO
1247 static void macho_branch_islands (void);
1248 #endif
1249 static rtx rs6000_legitimize_reload_address (rtx, machine_mode, int, int,
1250 int, int *);
1251 static rtx rs6000_debug_legitimize_reload_address (rtx, machine_mode, int,
1252 int, int, int *);
1253 static bool rs6000_mode_dependent_address (const_rtx);
1254 static bool rs6000_debug_mode_dependent_address (const_rtx);
1255 static enum reg_class rs6000_secondary_reload_class (enum reg_class,
1256 machine_mode, rtx);
1257 static enum reg_class rs6000_debug_secondary_reload_class (enum reg_class,
1258 machine_mode,
1259 rtx);
1260 static enum reg_class rs6000_preferred_reload_class (rtx, enum reg_class);
1261 static enum reg_class rs6000_debug_preferred_reload_class (rtx,
1262 enum reg_class);
1263 static bool rs6000_secondary_memory_needed (enum reg_class, enum reg_class,
1264 machine_mode);
1265 static bool rs6000_debug_secondary_memory_needed (enum reg_class,
1266 enum reg_class,
1267 machine_mode);
1268 static bool rs6000_cannot_change_mode_class (machine_mode,
1269 machine_mode,
1270 enum reg_class);
1271 static bool rs6000_debug_cannot_change_mode_class (machine_mode,
1272 machine_mode,
1273 enum reg_class);
1274 static bool rs6000_save_toc_in_prologue_p (void);
1275 static rtx rs6000_internal_arg_pointer (void);
1277 rtx (*rs6000_legitimize_reload_address_ptr) (rtx, machine_mode, int, int,
1278 int, int *)
1279 = rs6000_legitimize_reload_address;
1281 static bool (*rs6000_mode_dependent_address_ptr) (const_rtx)
1282 = rs6000_mode_dependent_address;
1284 enum reg_class (*rs6000_secondary_reload_class_ptr) (enum reg_class,
1285 machine_mode, rtx)
1286 = rs6000_secondary_reload_class;
1288 enum reg_class (*rs6000_preferred_reload_class_ptr) (rtx, enum reg_class)
1289 = rs6000_preferred_reload_class;
1291 bool (*rs6000_secondary_memory_needed_ptr) (enum reg_class, enum reg_class,
1292 machine_mode)
1293 = rs6000_secondary_memory_needed;
1295 bool (*rs6000_cannot_change_mode_class_ptr) (machine_mode,
1296 machine_mode,
1297 enum reg_class)
1298 = rs6000_cannot_change_mode_class;
1300 const int INSN_NOT_AVAILABLE = -1;
1302 static void rs6000_print_isa_options (FILE *, int, const char *,
1303 HOST_WIDE_INT);
1304 static void rs6000_print_builtin_options (FILE *, int, const char *,
1305 HOST_WIDE_INT);
1307 static enum rs6000_reg_type register_to_reg_type (rtx, bool *);
1308 static bool rs6000_secondary_reload_move (enum rs6000_reg_type,
1309 enum rs6000_reg_type,
1310 machine_mode,
1311 secondary_reload_info *,
1312 bool);
1313 rtl_opt_pass *make_pass_analyze_swaps (gcc::context*);
1314 static bool rs6000_keep_leaf_when_profiled () __attribute__ ((unused));
1316 /* Hash table stuff for keeping track of TOC entries. */
1318 struct GTY((for_user)) toc_hash_struct
1320 /* `key' will satisfy CONSTANT_P; in fact, it will satisfy
1321 ASM_OUTPUT_SPECIAL_POOL_ENTRY_P. */
1322 rtx key;
1323 machine_mode key_mode;
1324 int labelno;
1327 struct toc_hasher : ggc_ptr_hash<toc_hash_struct>
1329 static hashval_t hash (toc_hash_struct *);
1330 static bool equal (toc_hash_struct *, toc_hash_struct *);
1333 static GTY (()) hash_table<toc_hasher> *toc_hash_table;
1335 /* Hash table to keep track of the argument types for builtin functions. */
1337 struct GTY((for_user)) builtin_hash_struct
1339 tree type;
1340 machine_mode mode[4]; /* return value + 3 arguments. */
1341 unsigned char uns_p[4]; /* and whether the types are unsigned. */
1344 struct builtin_hasher : ggc_ptr_hash<builtin_hash_struct>
1346 static hashval_t hash (builtin_hash_struct *);
1347 static bool equal (builtin_hash_struct *, builtin_hash_struct *);
1350 static GTY (()) hash_table<builtin_hasher> *builtin_hash_table;
1353 /* Default register names. */
1354 char rs6000_reg_names[][8] =
1356 "0", "1", "2", "3", "4", "5", "6", "7",
1357 "8", "9", "10", "11", "12", "13", "14", "15",
1358 "16", "17", "18", "19", "20", "21", "22", "23",
1359 "24", "25", "26", "27", "28", "29", "30", "31",
1360 "0", "1", "2", "3", "4", "5", "6", "7",
1361 "8", "9", "10", "11", "12", "13", "14", "15",
1362 "16", "17", "18", "19", "20", "21", "22", "23",
1363 "24", "25", "26", "27", "28", "29", "30", "31",
1364 "mq", "lr", "ctr","ap",
1365 "0", "1", "2", "3", "4", "5", "6", "7",
1366 "ca",
1367 /* AltiVec registers. */
1368 "0", "1", "2", "3", "4", "5", "6", "7",
1369 "8", "9", "10", "11", "12", "13", "14", "15",
1370 "16", "17", "18", "19", "20", "21", "22", "23",
1371 "24", "25", "26", "27", "28", "29", "30", "31",
1372 "vrsave", "vscr",
1373 /* SPE registers. */
1374 "spe_acc", "spefscr",
1375 /* Soft frame pointer. */
1376 "sfp",
1377 /* HTM SPR registers. */
1378 "tfhar", "tfiar", "texasr",
1379 /* SPE High registers. */
1380 "0", "1", "2", "3", "4", "5", "6", "7",
1381 "8", "9", "10", "11", "12", "13", "14", "15",
1382 "16", "17", "18", "19", "20", "21", "22", "23",
1383 "24", "25", "26", "27", "28", "29", "30", "31"
1386 #ifdef TARGET_REGNAMES
1387 static const char alt_reg_names[][8] =
1389 "%r0", "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7",
1390 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
1391 "%r16", "%r17", "%r18", "%r19", "%r20", "%r21", "%r22", "%r23",
1392 "%r24", "%r25", "%r26", "%r27", "%r28", "%r29", "%r30", "%r31",
1393 "%f0", "%f1", "%f2", "%f3", "%f4", "%f5", "%f6", "%f7",
1394 "%f8", "%f9", "%f10", "%f11", "%f12", "%f13", "%f14", "%f15",
1395 "%f16", "%f17", "%f18", "%f19", "%f20", "%f21", "%f22", "%f23",
1396 "%f24", "%f25", "%f26", "%f27", "%f28", "%f29", "%f30", "%f31",
1397 "mq", "lr", "ctr", "ap",
1398 "%cr0", "%cr1", "%cr2", "%cr3", "%cr4", "%cr5", "%cr6", "%cr7",
1399 "ca",
1400 /* AltiVec registers. */
1401 "%v0", "%v1", "%v2", "%v3", "%v4", "%v5", "%v6", "%v7",
1402 "%v8", "%v9", "%v10", "%v11", "%v12", "%v13", "%v14", "%v15",
1403 "%v16", "%v17", "%v18", "%v19", "%v20", "%v21", "%v22", "%v23",
1404 "%v24", "%v25", "%v26", "%v27", "%v28", "%v29", "%v30", "%v31",
1405 "vrsave", "vscr",
1406 /* SPE registers. */
1407 "spe_acc", "spefscr",
1408 /* Soft frame pointer. */
1409 "sfp",
1410 /* HTM SPR registers. */
1411 "tfhar", "tfiar", "texasr",
1412 /* SPE High registers. */
1413 "%rh0", "%rh1", "%rh2", "%rh3", "%rh4", "%rh5", "%rh6", "%rh7",
1414 "%rh8", "%rh9", "%rh10", "%r11", "%rh12", "%rh13", "%rh14", "%rh15",
1415 "%rh16", "%rh17", "%rh18", "%rh19", "%rh20", "%rh21", "%rh22", "%rh23",
1416 "%rh24", "%rh25", "%rh26", "%rh27", "%rh28", "%rh29", "%rh30", "%rh31"
1418 #endif
1420 /* Table of valid machine attributes. */
1422 static const struct attribute_spec rs6000_attribute_table[] =
1424 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
1425 affects_type_identity } */
1426 { "altivec", 1, 1, false, true, false, rs6000_handle_altivec_attribute,
1427 false },
1428 { "longcall", 0, 0, false, true, true, rs6000_handle_longcall_attribute,
1429 false },
1430 { "shortcall", 0, 0, false, true, true, rs6000_handle_longcall_attribute,
1431 false },
1432 { "ms_struct", 0, 0, false, false, false, rs6000_handle_struct_attribute,
1433 false },
1434 { "gcc_struct", 0, 0, false, false, false, rs6000_handle_struct_attribute,
1435 false },
1436 #ifdef SUBTARGET_ATTRIBUTE_TABLE
1437 SUBTARGET_ATTRIBUTE_TABLE,
1438 #endif
1439 { NULL, 0, 0, false, false, false, NULL, false }
1442 #ifndef TARGET_PROFILE_KERNEL
1443 #define TARGET_PROFILE_KERNEL 0
1444 #endif
1446 /* The VRSAVE bitmask puts bit %v0 as the most significant bit. */
1447 #define ALTIVEC_REG_BIT(REGNO) (0x80000000 >> ((REGNO) - FIRST_ALTIVEC_REGNO))
1449 /* Initialize the GCC target structure. */
1450 #undef TARGET_ATTRIBUTE_TABLE
1451 #define TARGET_ATTRIBUTE_TABLE rs6000_attribute_table
1452 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
1453 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES rs6000_set_default_type_attributes
1454 #undef TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P
1455 #define TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P rs6000_attribute_takes_identifier_p
1457 #undef TARGET_ASM_ALIGNED_DI_OP
1458 #define TARGET_ASM_ALIGNED_DI_OP DOUBLE_INT_ASM_OP
1460 /* Default unaligned ops are only provided for ELF. Find the ops needed
1461 for non-ELF systems. */
1462 #ifndef OBJECT_FORMAT_ELF
1463 #if TARGET_XCOFF
1464 /* For XCOFF. rs6000_assemble_integer will handle unaligned DIs on
1465 64-bit targets. */
1466 #undef TARGET_ASM_UNALIGNED_HI_OP
1467 #define TARGET_ASM_UNALIGNED_HI_OP "\t.vbyte\t2,"
1468 #undef TARGET_ASM_UNALIGNED_SI_OP
1469 #define TARGET_ASM_UNALIGNED_SI_OP "\t.vbyte\t4,"
1470 #undef TARGET_ASM_UNALIGNED_DI_OP
1471 #define TARGET_ASM_UNALIGNED_DI_OP "\t.vbyte\t8,"
1472 #else
1473 /* For Darwin. */
1474 #undef TARGET_ASM_UNALIGNED_HI_OP
1475 #define TARGET_ASM_UNALIGNED_HI_OP "\t.short\t"
1476 #undef TARGET_ASM_UNALIGNED_SI_OP
1477 #define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
1478 #undef TARGET_ASM_UNALIGNED_DI_OP
1479 #define TARGET_ASM_UNALIGNED_DI_OP "\t.quad\t"
1480 #undef TARGET_ASM_ALIGNED_DI_OP
1481 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
1482 #endif
1483 #endif
1485 /* This hook deals with fixups for relocatable code and DI-mode objects
1486 in 64-bit code. */
1487 #undef TARGET_ASM_INTEGER
1488 #define TARGET_ASM_INTEGER rs6000_assemble_integer
1490 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
1491 #undef TARGET_ASM_ASSEMBLE_VISIBILITY
1492 #define TARGET_ASM_ASSEMBLE_VISIBILITY rs6000_assemble_visibility
1493 #endif
1495 #undef TARGET_SET_UP_BY_PROLOGUE
1496 #define TARGET_SET_UP_BY_PROLOGUE rs6000_set_up_by_prologue
1498 #undef TARGET_EXTRA_LIVE_ON_ENTRY
1499 #define TARGET_EXTRA_LIVE_ON_ENTRY rs6000_live_on_entry
1501 #undef TARGET_INTERNAL_ARG_POINTER
1502 #define TARGET_INTERNAL_ARG_POINTER rs6000_internal_arg_pointer
1504 #undef TARGET_HAVE_TLS
1505 #define TARGET_HAVE_TLS HAVE_AS_TLS
1507 #undef TARGET_CANNOT_FORCE_CONST_MEM
1508 #define TARGET_CANNOT_FORCE_CONST_MEM rs6000_cannot_force_const_mem
1510 #undef TARGET_DELEGITIMIZE_ADDRESS
1511 #define TARGET_DELEGITIMIZE_ADDRESS rs6000_delegitimize_address
1513 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
1514 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P rs6000_const_not_ok_for_debug_p
1516 #undef TARGET_ASM_FUNCTION_PROLOGUE
1517 #define TARGET_ASM_FUNCTION_PROLOGUE rs6000_output_function_prologue
1518 #undef TARGET_ASM_FUNCTION_EPILOGUE
1519 #define TARGET_ASM_FUNCTION_EPILOGUE rs6000_output_function_epilogue
1521 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
1522 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA rs6000_output_addr_const_extra
1524 #undef TARGET_LEGITIMIZE_ADDRESS
1525 #define TARGET_LEGITIMIZE_ADDRESS rs6000_legitimize_address
1527 #undef TARGET_SCHED_VARIABLE_ISSUE
1528 #define TARGET_SCHED_VARIABLE_ISSUE rs6000_variable_issue
1530 #undef TARGET_SCHED_ISSUE_RATE
1531 #define TARGET_SCHED_ISSUE_RATE rs6000_issue_rate
1532 #undef TARGET_SCHED_ADJUST_COST
1533 #define TARGET_SCHED_ADJUST_COST rs6000_adjust_cost
1534 #undef TARGET_SCHED_ADJUST_PRIORITY
1535 #define TARGET_SCHED_ADJUST_PRIORITY rs6000_adjust_priority
1536 #undef TARGET_SCHED_IS_COSTLY_DEPENDENCE
1537 #define TARGET_SCHED_IS_COSTLY_DEPENDENCE rs6000_is_costly_dependence
1538 #undef TARGET_SCHED_INIT
1539 #define TARGET_SCHED_INIT rs6000_sched_init
1540 #undef TARGET_SCHED_FINISH
1541 #define TARGET_SCHED_FINISH rs6000_sched_finish
1542 #undef TARGET_SCHED_REORDER
1543 #define TARGET_SCHED_REORDER rs6000_sched_reorder
1544 #undef TARGET_SCHED_REORDER2
1545 #define TARGET_SCHED_REORDER2 rs6000_sched_reorder2
1547 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1548 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD rs6000_use_sched_lookahead
1550 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
1551 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD rs6000_use_sched_lookahead_guard
1553 #undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
1554 #define TARGET_SCHED_ALLOC_SCHED_CONTEXT rs6000_alloc_sched_context
1555 #undef TARGET_SCHED_INIT_SCHED_CONTEXT
1556 #define TARGET_SCHED_INIT_SCHED_CONTEXT rs6000_init_sched_context
1557 #undef TARGET_SCHED_SET_SCHED_CONTEXT
1558 #define TARGET_SCHED_SET_SCHED_CONTEXT rs6000_set_sched_context
1559 #undef TARGET_SCHED_FREE_SCHED_CONTEXT
1560 #define TARGET_SCHED_FREE_SCHED_CONTEXT rs6000_free_sched_context
1562 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
1563 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD rs6000_builtin_mask_for_load
1564 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
1565 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
1566 rs6000_builtin_support_vector_misalignment
1567 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
1568 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE rs6000_vector_alignment_reachable
1569 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
1570 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
1571 rs6000_builtin_vectorization_cost
1572 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
1573 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
1574 rs6000_preferred_simd_mode
1575 #undef TARGET_VECTORIZE_INIT_COST
1576 #define TARGET_VECTORIZE_INIT_COST rs6000_init_cost
1577 #undef TARGET_VECTORIZE_ADD_STMT_COST
1578 #define TARGET_VECTORIZE_ADD_STMT_COST rs6000_add_stmt_cost
1579 #undef TARGET_VECTORIZE_FINISH_COST
1580 #define TARGET_VECTORIZE_FINISH_COST rs6000_finish_cost
1581 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
1582 #define TARGET_VECTORIZE_DESTROY_COST_DATA rs6000_destroy_cost_data
1584 #undef TARGET_INIT_BUILTINS
1585 #define TARGET_INIT_BUILTINS rs6000_init_builtins
1586 #undef TARGET_BUILTIN_DECL
1587 #define TARGET_BUILTIN_DECL rs6000_builtin_decl
1589 #undef TARGET_EXPAND_BUILTIN
1590 #define TARGET_EXPAND_BUILTIN rs6000_expand_builtin
1592 #undef TARGET_MANGLE_TYPE
1593 #define TARGET_MANGLE_TYPE rs6000_mangle_type
1595 #undef TARGET_INIT_LIBFUNCS
1596 #define TARGET_INIT_LIBFUNCS rs6000_init_libfuncs
1598 #if TARGET_MACHO
1599 #undef TARGET_BINDS_LOCAL_P
1600 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1601 #endif
1603 #undef TARGET_MS_BITFIELD_LAYOUT_P
1604 #define TARGET_MS_BITFIELD_LAYOUT_P rs6000_ms_bitfield_layout_p
1606 #undef TARGET_ASM_OUTPUT_MI_THUNK
1607 #define TARGET_ASM_OUTPUT_MI_THUNK rs6000_output_mi_thunk
1609 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1610 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
1612 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1613 #define TARGET_FUNCTION_OK_FOR_SIBCALL rs6000_function_ok_for_sibcall
1615 #undef TARGET_REGISTER_MOVE_COST
1616 #define TARGET_REGISTER_MOVE_COST rs6000_register_move_cost
1617 #undef TARGET_MEMORY_MOVE_COST
1618 #define TARGET_MEMORY_MOVE_COST rs6000_memory_move_cost
1619 #undef TARGET_CANNOT_COPY_INSN_P
1620 #define TARGET_CANNOT_COPY_INSN_P rs6000_cannot_copy_insn_p
1621 #undef TARGET_RTX_COSTS
1622 #define TARGET_RTX_COSTS rs6000_rtx_costs
1623 #undef TARGET_ADDRESS_COST
1624 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
1626 #undef TARGET_DWARF_REGISTER_SPAN
1627 #define TARGET_DWARF_REGISTER_SPAN rs6000_dwarf_register_span
1629 #undef TARGET_INIT_DWARF_REG_SIZES_EXTRA
1630 #define TARGET_INIT_DWARF_REG_SIZES_EXTRA rs6000_init_dwarf_reg_sizes_extra
1632 #undef TARGET_MEMBER_TYPE_FORCES_BLK
1633 #define TARGET_MEMBER_TYPE_FORCES_BLK rs6000_member_type_forces_blk
1635 #undef TARGET_PROMOTE_FUNCTION_MODE
1636 #define TARGET_PROMOTE_FUNCTION_MODE rs6000_promote_function_mode
1638 #undef TARGET_RETURN_IN_MEMORY
1639 #define TARGET_RETURN_IN_MEMORY rs6000_return_in_memory
1641 #undef TARGET_RETURN_IN_MSB
1642 #define TARGET_RETURN_IN_MSB rs6000_return_in_msb
1644 #undef TARGET_SETUP_INCOMING_VARARGS
1645 #define TARGET_SETUP_INCOMING_VARARGS setup_incoming_varargs
1647 /* Always strict argument naming on rs6000. */
1648 #undef TARGET_STRICT_ARGUMENT_NAMING
1649 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
1650 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
1651 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED hook_bool_CUMULATIVE_ARGS_true
1652 #undef TARGET_SPLIT_COMPLEX_ARG
1653 #define TARGET_SPLIT_COMPLEX_ARG hook_bool_const_tree_true
1654 #undef TARGET_MUST_PASS_IN_STACK
1655 #define TARGET_MUST_PASS_IN_STACK rs6000_must_pass_in_stack
1656 #undef TARGET_PASS_BY_REFERENCE
1657 #define TARGET_PASS_BY_REFERENCE rs6000_pass_by_reference
1658 #undef TARGET_ARG_PARTIAL_BYTES
1659 #define TARGET_ARG_PARTIAL_BYTES rs6000_arg_partial_bytes
1660 #undef TARGET_FUNCTION_ARG_ADVANCE
1661 #define TARGET_FUNCTION_ARG_ADVANCE rs6000_function_arg_advance
1662 #undef TARGET_FUNCTION_ARG
1663 #define TARGET_FUNCTION_ARG rs6000_function_arg
1664 #undef TARGET_FUNCTION_ARG_BOUNDARY
1665 #define TARGET_FUNCTION_ARG_BOUNDARY rs6000_function_arg_boundary
1667 #undef TARGET_BUILD_BUILTIN_VA_LIST
1668 #define TARGET_BUILD_BUILTIN_VA_LIST rs6000_build_builtin_va_list
1670 #undef TARGET_EXPAND_BUILTIN_VA_START
1671 #define TARGET_EXPAND_BUILTIN_VA_START rs6000_va_start
1673 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1674 #define TARGET_GIMPLIFY_VA_ARG_EXPR rs6000_gimplify_va_arg
1676 #undef TARGET_EH_RETURN_FILTER_MODE
1677 #define TARGET_EH_RETURN_FILTER_MODE rs6000_eh_return_filter_mode
1679 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1680 #define TARGET_SCALAR_MODE_SUPPORTED_P rs6000_scalar_mode_supported_p
1682 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1683 #define TARGET_VECTOR_MODE_SUPPORTED_P rs6000_vector_mode_supported_p
1685 #undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN
1686 #define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN invalid_arg_for_unprototyped_fn
1688 #undef TARGET_ASM_LOOP_ALIGN_MAX_SKIP
1689 #define TARGET_ASM_LOOP_ALIGN_MAX_SKIP rs6000_loop_align_max_skip
1691 #undef TARGET_MD_ASM_ADJUST
1692 #define TARGET_MD_ASM_ADJUST rs6000_md_asm_adjust
1694 #undef TARGET_OPTION_OVERRIDE
1695 #define TARGET_OPTION_OVERRIDE rs6000_option_override
1697 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
1698 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
1699 rs6000_builtin_vectorized_function
1701 #undef TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION
1702 #define TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION \
1703 rs6000_builtin_md_vectorized_function
1705 #if !TARGET_MACHO
1706 #undef TARGET_STACK_PROTECT_FAIL
1707 #define TARGET_STACK_PROTECT_FAIL rs6000_stack_protect_fail
1708 #endif
1710 #ifdef HAVE_AS_TLS
1711 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1712 #define TARGET_ASM_OUTPUT_DWARF_DTPREL rs6000_output_dwarf_dtprel
1713 #endif
1715 /* Use a 32-bit anchor range. This leads to sequences like:
1717 addis tmp,anchor,high
1718 add dest,tmp,low
1720 where tmp itself acts as an anchor, and can be shared between
1721 accesses to the same 64k page. */
1722 #undef TARGET_MIN_ANCHOR_OFFSET
1723 #define TARGET_MIN_ANCHOR_OFFSET -0x7fffffff - 1
1724 #undef TARGET_MAX_ANCHOR_OFFSET
1725 #define TARGET_MAX_ANCHOR_OFFSET 0x7fffffff
1726 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1727 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P rs6000_use_blocks_for_constant_p
1728 #undef TARGET_USE_BLOCKS_FOR_DECL_P
1729 #define TARGET_USE_BLOCKS_FOR_DECL_P rs6000_use_blocks_for_decl_p
1731 #undef TARGET_BUILTIN_RECIPROCAL
1732 #define TARGET_BUILTIN_RECIPROCAL rs6000_builtin_reciprocal
1734 #undef TARGET_EXPAND_TO_RTL_HOOK
1735 #define TARGET_EXPAND_TO_RTL_HOOK rs6000_alloc_sdmode_stack_slot
1737 #undef TARGET_INSTANTIATE_DECLS
1738 #define TARGET_INSTANTIATE_DECLS rs6000_instantiate_decls
1740 #undef TARGET_SECONDARY_RELOAD
1741 #define TARGET_SECONDARY_RELOAD rs6000_secondary_reload
1743 #undef TARGET_LEGITIMATE_ADDRESS_P
1744 #define TARGET_LEGITIMATE_ADDRESS_P rs6000_legitimate_address_p
1746 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
1747 #define TARGET_MODE_DEPENDENT_ADDRESS_P rs6000_mode_dependent_address_p
1749 #undef TARGET_LRA_P
1750 #define TARGET_LRA_P rs6000_lra_p
1752 #undef TARGET_CAN_ELIMINATE
1753 #define TARGET_CAN_ELIMINATE rs6000_can_eliminate
1755 #undef TARGET_CONDITIONAL_REGISTER_USAGE
1756 #define TARGET_CONDITIONAL_REGISTER_USAGE rs6000_conditional_register_usage
1758 #undef TARGET_TRAMPOLINE_INIT
1759 #define TARGET_TRAMPOLINE_INIT rs6000_trampoline_init
1761 #undef TARGET_FUNCTION_VALUE
1762 #define TARGET_FUNCTION_VALUE rs6000_function_value
1764 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
1765 #define TARGET_OPTION_VALID_ATTRIBUTE_P rs6000_valid_attribute_p
1767 #undef TARGET_OPTION_SAVE
1768 #define TARGET_OPTION_SAVE rs6000_function_specific_save
1770 #undef TARGET_OPTION_RESTORE
1771 #define TARGET_OPTION_RESTORE rs6000_function_specific_restore
1773 #undef TARGET_OPTION_PRINT
1774 #define TARGET_OPTION_PRINT rs6000_function_specific_print
1776 #undef TARGET_CAN_INLINE_P
1777 #define TARGET_CAN_INLINE_P rs6000_can_inline_p
1779 #undef TARGET_SET_CURRENT_FUNCTION
1780 #define TARGET_SET_CURRENT_FUNCTION rs6000_set_current_function
1782 #undef TARGET_LEGITIMATE_CONSTANT_P
1783 #define TARGET_LEGITIMATE_CONSTANT_P rs6000_legitimate_constant_p
1785 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
1786 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK rs6000_vectorize_vec_perm_const_ok
1788 #undef TARGET_CAN_USE_DOLOOP_P
1789 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
1791 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
1792 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV rs6000_atomic_assign_expand_fenv
1794 #undef TARGET_LIBGCC_CMP_RETURN_MODE
1795 #define TARGET_LIBGCC_CMP_RETURN_MODE rs6000_abi_word_mode
1796 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
1797 #define TARGET_LIBGCC_SHIFT_COUNT_MODE rs6000_abi_word_mode
1798 #undef TARGET_UNWIND_WORD_MODE
1799 #define TARGET_UNWIND_WORD_MODE rs6000_abi_word_mode
1801 #undef TARGET_OFFLOAD_OPTIONS
1802 #define TARGET_OFFLOAD_OPTIONS rs6000_offload_options
1804 #undef TARGET_C_MODE_FOR_SUFFIX
1805 #define TARGET_C_MODE_FOR_SUFFIX rs6000_c_mode_for_suffix
1807 #undef TARGET_INVALID_BINARY_OP
1808 #define TARGET_INVALID_BINARY_OP rs6000_invalid_binary_op
1810 #undef TARGET_OPTAB_SUPPORTED_P
1811 #define TARGET_OPTAB_SUPPORTED_P rs6000_optab_supported_p
1814 /* Processor table. */
1815 struct rs6000_ptt
1817 const char *const name; /* Canonical processor name. */
1818 const enum processor_type processor; /* Processor type enum value. */
1819 const HOST_WIDE_INT target_enable; /* Target flags to enable. */
1822 static struct rs6000_ptt const processor_target_table[] =
1824 #define RS6000_CPU(NAME, CPU, FLAGS) { NAME, CPU, FLAGS },
1825 #include "rs6000-cpus.def"
1826 #undef RS6000_CPU
1829 /* Look up a processor name for -mcpu=xxx and -mtune=xxx. Return -1 if the
1830 name is invalid. */
1832 static int
1833 rs6000_cpu_name_lookup (const char *name)
1835 size_t i;
1837 if (name != NULL)
1839 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
1840 if (! strcmp (name, processor_target_table[i].name))
1841 return (int)i;
1844 return -1;
1848 /* Return number of consecutive hard regs needed starting at reg REGNO
1849 to hold something of mode MODE.
1850 This is ordinarily the length in words of a value of mode MODE
1851 but can be less for certain modes in special long registers.
1853 For the SPE, GPRs are 64 bits but only 32 bits are visible in
1854 scalar instructions. The upper 32 bits are only available to the
1855 SIMD instructions.
1857 POWER and PowerPC GPRs hold 32 bits worth;
1858 PowerPC64 GPRs and FPRs point register holds 64 bits worth. */
1860 static int
1861 rs6000_hard_regno_nregs_internal (int regno, machine_mode mode)
1863 unsigned HOST_WIDE_INT reg_size;
1865 /* 128-bit floating point usually takes 2 registers, unless it is IEEE
1866 128-bit floating point that can go in vector registers, which has VSX
1867 memory addressing. */
1868 if (FP_REGNO_P (regno))
1869 reg_size = (VECTOR_MEM_VSX_P (mode) || FLOAT128_VECTOR_P (mode)
1870 ? UNITS_PER_VSX_WORD
1871 : UNITS_PER_FP_WORD);
1873 else if (SPE_SIMD_REGNO_P (regno) && TARGET_SPE && SPE_VECTOR_MODE (mode))
1874 reg_size = UNITS_PER_SPE_WORD;
1876 else if (ALTIVEC_REGNO_P (regno))
1877 reg_size = UNITS_PER_ALTIVEC_WORD;
1879 /* The value returned for SCmode in the E500 double case is 2 for
1880 ABI compatibility; storing an SCmode value in a single register
1881 would require function_arg and rs6000_spe_function_arg to handle
1882 SCmode so as to pass the value correctly in a pair of
1883 registers. */
1884 else if (TARGET_E500_DOUBLE && FLOAT_MODE_P (mode) && mode != SCmode
1885 && !DECIMAL_FLOAT_MODE_P (mode) && SPE_SIMD_REGNO_P (regno))
1886 reg_size = UNITS_PER_FP_WORD;
1888 else
1889 reg_size = UNITS_PER_WORD;
1891 return (GET_MODE_SIZE (mode) + reg_size - 1) / reg_size;
1894 /* Value is 1 if hard register REGNO can hold a value of machine-mode
1895 MODE. */
1896 static int
1897 rs6000_hard_regno_mode_ok (int regno, machine_mode mode)
1899 int last_regno = regno + rs6000_hard_regno_nregs[mode][regno] - 1;
1901 if (COMPLEX_MODE_P (mode))
1902 mode = GET_MODE_INNER (mode);
1904 /* PTImode can only go in GPRs. Quad word memory operations require even/odd
1905 register combinations, and use PTImode where we need to deal with quad
1906 word memory operations. Don't allow quad words in the argument or frame
1907 pointer registers, just registers 0..31. */
1908 if (mode == PTImode)
1909 return (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
1910 && IN_RANGE (last_regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
1911 && ((regno & 1) == 0));
1913 /* VSX registers that overlap the FPR registers are larger than for non-VSX
1914 implementations. Don't allow an item to be split between a FP register
1915 and an Altivec register. Allow TImode in all VSX registers if the user
1916 asked for it. */
1917 if (TARGET_VSX && VSX_REGNO_P (regno)
1918 && (VECTOR_MEM_VSX_P (mode)
1919 || FLOAT128_VECTOR_P (mode)
1920 || reg_addr[mode].scalar_in_vmx_p
1921 || (TARGET_VSX_TIMODE && mode == TImode)
1922 || (TARGET_VADDUQM && mode == V1TImode)))
1924 if (FP_REGNO_P (regno))
1925 return FP_REGNO_P (last_regno);
1927 if (ALTIVEC_REGNO_P (regno))
1929 if (GET_MODE_SIZE (mode) != 16 && !reg_addr[mode].scalar_in_vmx_p)
1930 return 0;
1932 return ALTIVEC_REGNO_P (last_regno);
1936 /* The GPRs can hold any mode, but values bigger than one register
1937 cannot go past R31. */
1938 if (INT_REGNO_P (regno))
1939 return INT_REGNO_P (last_regno);
1941 /* The float registers (except for VSX vector modes) can only hold floating
1942 modes and DImode. */
1943 if (FP_REGNO_P (regno))
1945 if (FLOAT128_VECTOR_P (mode))
1946 return false;
1948 if (SCALAR_FLOAT_MODE_P (mode)
1949 && (mode != TDmode || (regno % 2) == 0)
1950 && FP_REGNO_P (last_regno))
1951 return 1;
1953 if (GET_MODE_CLASS (mode) == MODE_INT
1954 && GET_MODE_SIZE (mode) == UNITS_PER_FP_WORD)
1955 return 1;
1957 if (PAIRED_SIMD_REGNO_P (regno) && TARGET_PAIRED_FLOAT
1958 && PAIRED_VECTOR_MODE (mode))
1959 return 1;
1961 return 0;
1964 /* The CR register can only hold CC modes. */
1965 if (CR_REGNO_P (regno))
1966 return GET_MODE_CLASS (mode) == MODE_CC;
1968 if (CA_REGNO_P (regno))
1969 return mode == Pmode || mode == SImode;
1971 /* AltiVec only in AldyVec registers. */
1972 if (ALTIVEC_REGNO_P (regno))
1973 return (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)
1974 || mode == V1TImode);
1976 /* ...but GPRs can hold SIMD data on the SPE in one register. */
1977 if (SPE_SIMD_REGNO_P (regno) && TARGET_SPE && SPE_VECTOR_MODE (mode))
1978 return 1;
1980 /* We cannot put non-VSX TImode or PTImode anywhere except general register
1981 and it must be able to fit within the register set. */
1983 return GET_MODE_SIZE (mode) <= UNITS_PER_WORD;
1986 /* Print interesting facts about registers. */
1987 static void
1988 rs6000_debug_reg_print (int first_regno, int last_regno, const char *reg_name)
1990 int r, m;
1992 for (r = first_regno; r <= last_regno; ++r)
1994 const char *comma = "";
1995 int len;
1997 if (first_regno == last_regno)
1998 fprintf (stderr, "%s:\t", reg_name);
1999 else
2000 fprintf (stderr, "%s%d:\t", reg_name, r - first_regno);
2002 len = 8;
2003 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2004 if (rs6000_hard_regno_mode_ok_p[m][r] && rs6000_hard_regno_nregs[m][r])
2006 if (len > 70)
2008 fprintf (stderr, ",\n\t");
2009 len = 8;
2010 comma = "";
2013 if (rs6000_hard_regno_nregs[m][r] > 1)
2014 len += fprintf (stderr, "%s%s/%d", comma, GET_MODE_NAME (m),
2015 rs6000_hard_regno_nregs[m][r]);
2016 else
2017 len += fprintf (stderr, "%s%s", comma, GET_MODE_NAME (m));
2019 comma = ", ";
2022 if (call_used_regs[r])
2024 if (len > 70)
2026 fprintf (stderr, ",\n\t");
2027 len = 8;
2028 comma = "";
2031 len += fprintf (stderr, "%s%s", comma, "call-used");
2032 comma = ", ";
2035 if (fixed_regs[r])
2037 if (len > 70)
2039 fprintf (stderr, ",\n\t");
2040 len = 8;
2041 comma = "";
2044 len += fprintf (stderr, "%s%s", comma, "fixed");
2045 comma = ", ";
2048 if (len > 70)
2050 fprintf (stderr, ",\n\t");
2051 comma = "";
2054 len += fprintf (stderr, "%sreg-class = %s", comma,
2055 reg_class_names[(int)rs6000_regno_regclass[r]]);
2056 comma = ", ";
2058 if (len > 70)
2060 fprintf (stderr, ",\n\t");
2061 comma = "";
2064 fprintf (stderr, "%sregno = %d\n", comma, r);
2068 static const char *
2069 rs6000_debug_vector_unit (enum rs6000_vector v)
2071 const char *ret;
2073 switch (v)
2075 case VECTOR_NONE: ret = "none"; break;
2076 case VECTOR_ALTIVEC: ret = "altivec"; break;
2077 case VECTOR_VSX: ret = "vsx"; break;
2078 case VECTOR_P8_VECTOR: ret = "p8_vector"; break;
2079 case VECTOR_PAIRED: ret = "paired"; break;
2080 case VECTOR_SPE: ret = "spe"; break;
2081 case VECTOR_OTHER: ret = "other"; break;
2082 default: ret = "unknown"; break;
2085 return ret;
2088 /* Inner function printing just the address mask for a particular reload
2089 register class. */
2090 DEBUG_FUNCTION char *
2091 rs6000_debug_addr_mask (addr_mask_type mask, bool keep_spaces)
2093 static char ret[8];
2094 char *p = ret;
2096 if ((mask & RELOAD_REG_VALID) != 0)
2097 *p++ = 'v';
2098 else if (keep_spaces)
2099 *p++ = ' ';
2101 if ((mask & RELOAD_REG_MULTIPLE) != 0)
2102 *p++ = 'm';
2103 else if (keep_spaces)
2104 *p++ = ' ';
2106 if ((mask & RELOAD_REG_INDEXED) != 0)
2107 *p++ = 'i';
2108 else if (keep_spaces)
2109 *p++ = ' ';
2111 if ((mask & RELOAD_REG_OFFSET) != 0)
2112 *p++ = 'o';
2113 else if (keep_spaces)
2114 *p++ = ' ';
2116 if ((mask & RELOAD_REG_PRE_INCDEC) != 0)
2117 *p++ = '+';
2118 else if (keep_spaces)
2119 *p++ = ' ';
2121 if ((mask & RELOAD_REG_PRE_MODIFY) != 0)
2122 *p++ = '+';
2123 else if (keep_spaces)
2124 *p++ = ' ';
2126 if ((mask & RELOAD_REG_AND_M16) != 0)
2127 *p++ = '&';
2128 else if (keep_spaces)
2129 *p++ = ' ';
2131 *p = '\0';
2133 return ret;
2136 /* Print the address masks in a human readble fashion. */
2137 DEBUG_FUNCTION void
2138 rs6000_debug_print_mode (ssize_t m)
2140 ssize_t rc;
2141 int spaces = 0;
2142 bool fuse_extra_p;
2144 fprintf (stderr, "Mode: %-5s", GET_MODE_NAME (m));
2145 for (rc = 0; rc < N_RELOAD_REG; rc++)
2146 fprintf (stderr, " %s: %s", reload_reg_map[rc].name,
2147 rs6000_debug_addr_mask (reg_addr[m].addr_mask[rc], true));
2149 if ((reg_addr[m].reload_store != CODE_FOR_nothing)
2150 || (reg_addr[m].reload_load != CODE_FOR_nothing))
2151 fprintf (stderr, " Reload=%c%c",
2152 (reg_addr[m].reload_store != CODE_FOR_nothing) ? 's' : '*',
2153 (reg_addr[m].reload_load != CODE_FOR_nothing) ? 'l' : '*');
2154 else
2155 spaces += sizeof (" Reload=sl") - 1;
2157 if (reg_addr[m].scalar_in_vmx_p)
2159 fprintf (stderr, "%*s Upper=y", spaces, "");
2160 spaces = 0;
2162 else
2163 spaces += sizeof (" Upper=y") - 1;
2165 fuse_extra_p = ((reg_addr[m].fusion_gpr_ld != CODE_FOR_nothing)
2166 || reg_addr[m].fused_toc);
2167 if (!fuse_extra_p)
2169 for (rc = 0; rc < N_RELOAD_REG; rc++)
2171 if (rc != RELOAD_REG_ANY)
2173 if (reg_addr[m].fusion_addi_ld[rc] != CODE_FOR_nothing
2174 || reg_addr[m].fusion_addi_ld[rc] != CODE_FOR_nothing
2175 || reg_addr[m].fusion_addi_st[rc] != CODE_FOR_nothing
2176 || reg_addr[m].fusion_addis_ld[rc] != CODE_FOR_nothing
2177 || reg_addr[m].fusion_addis_st[rc] != CODE_FOR_nothing)
2179 fuse_extra_p = true;
2180 break;
2186 if (fuse_extra_p)
2188 fprintf (stderr, "%*s Fuse:", spaces, "");
2189 spaces = 0;
2191 for (rc = 0; rc < N_RELOAD_REG; rc++)
2193 if (rc != RELOAD_REG_ANY)
2195 char load, store;
2197 if (reg_addr[m].fusion_addis_ld[rc] != CODE_FOR_nothing)
2198 load = 'l';
2199 else if (reg_addr[m].fusion_addi_ld[rc] != CODE_FOR_nothing)
2200 load = 'L';
2201 else
2202 load = '-';
2204 if (reg_addr[m].fusion_addis_st[rc] != CODE_FOR_nothing)
2205 store = 's';
2206 else if (reg_addr[m].fusion_addi_st[rc] != CODE_FOR_nothing)
2207 store = 'S';
2208 else
2209 store = '-';
2211 if (load == '-' && store == '-')
2212 spaces += 5;
2213 else
2215 fprintf (stderr, "%*s%c=%c%c", (spaces + 1), "",
2216 reload_reg_map[rc].name[0], load, store);
2217 spaces = 0;
2222 if (reg_addr[m].fusion_gpr_ld != CODE_FOR_nothing)
2224 fprintf (stderr, "%*sP8gpr", (spaces + 1), "");
2225 spaces = 0;
2227 else
2228 spaces += sizeof (" P8gpr") - 1;
2230 if (reg_addr[m].fused_toc)
2232 fprintf (stderr, "%*sToc", (spaces + 1), "");
2233 spaces = 0;
2235 else
2236 spaces += sizeof (" Toc") - 1;
2238 else
2239 spaces += sizeof (" Fuse: G=ls F=ls v=ls P8gpr Toc") - 1;
2241 if (rs6000_vector_unit[m] != VECTOR_NONE
2242 || rs6000_vector_mem[m] != VECTOR_NONE)
2244 fprintf (stderr, "%*s vector: arith=%-10s mem=%s",
2245 spaces, "",
2246 rs6000_debug_vector_unit (rs6000_vector_unit[m]),
2247 rs6000_debug_vector_unit (rs6000_vector_mem[m]));
2250 fputs ("\n", stderr);
2253 #define DEBUG_FMT_ID "%-32s= "
2254 #define DEBUG_FMT_D DEBUG_FMT_ID "%d\n"
2255 #define DEBUG_FMT_WX DEBUG_FMT_ID "%#.12" HOST_WIDE_INT_PRINT "x: "
2256 #define DEBUG_FMT_S DEBUG_FMT_ID "%s\n"
2258 /* Print various interesting information with -mdebug=reg. */
2259 static void
2260 rs6000_debug_reg_global (void)
2262 static const char *const tf[2] = { "false", "true" };
2263 const char *nl = (const char *)0;
2264 int m;
2265 size_t m1, m2, v;
2266 char costly_num[20];
2267 char nop_num[20];
2268 char flags_buffer[40];
2269 const char *costly_str;
2270 const char *nop_str;
2271 const char *trace_str;
2272 const char *abi_str;
2273 const char *cmodel_str;
2274 struct cl_target_option cl_opts;
2276 /* Modes we want tieable information on. */
2277 static const machine_mode print_tieable_modes[] = {
2278 QImode,
2279 HImode,
2280 SImode,
2281 DImode,
2282 TImode,
2283 PTImode,
2284 SFmode,
2285 DFmode,
2286 TFmode,
2287 IFmode,
2288 KFmode,
2289 SDmode,
2290 DDmode,
2291 TDmode,
2292 V8QImode,
2293 V4HImode,
2294 V2SImode,
2295 V16QImode,
2296 V8HImode,
2297 V4SImode,
2298 V2DImode,
2299 V1TImode,
2300 V32QImode,
2301 V16HImode,
2302 V8SImode,
2303 V4DImode,
2304 V2TImode,
2305 V2SFmode,
2306 V4SFmode,
2307 V2DFmode,
2308 V8SFmode,
2309 V4DFmode,
2310 CCmode,
2311 CCUNSmode,
2312 CCEQmode,
2315 /* Virtual regs we are interested in. */
2316 const static struct {
2317 int regno; /* register number. */
2318 const char *name; /* register name. */
2319 } virtual_regs[] = {
2320 { STACK_POINTER_REGNUM, "stack pointer:" },
2321 { TOC_REGNUM, "toc: " },
2322 { STATIC_CHAIN_REGNUM, "static chain: " },
2323 { RS6000_PIC_OFFSET_TABLE_REGNUM, "pic offset: " },
2324 { HARD_FRAME_POINTER_REGNUM, "hard frame: " },
2325 { ARG_POINTER_REGNUM, "arg pointer: " },
2326 { FRAME_POINTER_REGNUM, "frame pointer:" },
2327 { FIRST_PSEUDO_REGISTER, "first pseudo: " },
2328 { FIRST_VIRTUAL_REGISTER, "first virtual:" },
2329 { VIRTUAL_INCOMING_ARGS_REGNUM, "incoming_args:" },
2330 { VIRTUAL_STACK_VARS_REGNUM, "stack_vars: " },
2331 { VIRTUAL_STACK_DYNAMIC_REGNUM, "stack_dynamic:" },
2332 { VIRTUAL_OUTGOING_ARGS_REGNUM, "outgoing_args:" },
2333 { VIRTUAL_CFA_REGNUM, "cfa (frame): " },
2334 { VIRTUAL_PREFERRED_STACK_BOUNDARY_REGNUM, "stack boundry:" },
2335 { LAST_VIRTUAL_REGISTER, "last virtual: " },
2338 fputs ("\nHard register information:\n", stderr);
2339 rs6000_debug_reg_print (FIRST_GPR_REGNO, LAST_GPR_REGNO, "gr");
2340 rs6000_debug_reg_print (FIRST_FPR_REGNO, LAST_FPR_REGNO, "fp");
2341 rs6000_debug_reg_print (FIRST_ALTIVEC_REGNO,
2342 LAST_ALTIVEC_REGNO,
2343 "vs");
2344 rs6000_debug_reg_print (LR_REGNO, LR_REGNO, "lr");
2345 rs6000_debug_reg_print (CTR_REGNO, CTR_REGNO, "ctr");
2346 rs6000_debug_reg_print (CR0_REGNO, CR7_REGNO, "cr");
2347 rs6000_debug_reg_print (CA_REGNO, CA_REGNO, "ca");
2348 rs6000_debug_reg_print (VRSAVE_REGNO, VRSAVE_REGNO, "vrsave");
2349 rs6000_debug_reg_print (VSCR_REGNO, VSCR_REGNO, "vscr");
2350 rs6000_debug_reg_print (SPE_ACC_REGNO, SPE_ACC_REGNO, "spe_a");
2351 rs6000_debug_reg_print (SPEFSCR_REGNO, SPEFSCR_REGNO, "spe_f");
2353 fputs ("\nVirtual/stack/frame registers:\n", stderr);
2354 for (v = 0; v < ARRAY_SIZE (virtual_regs); v++)
2355 fprintf (stderr, "%s regno = %3d\n", virtual_regs[v].name, virtual_regs[v].regno);
2357 fprintf (stderr,
2358 "\n"
2359 "d reg_class = %s\n"
2360 "f reg_class = %s\n"
2361 "v reg_class = %s\n"
2362 "wa reg_class = %s\n"
2363 "wb reg_class = %s\n"
2364 "wd reg_class = %s\n"
2365 "we reg_class = %s\n"
2366 "wf reg_class = %s\n"
2367 "wg reg_class = %s\n"
2368 "wh reg_class = %s\n"
2369 "wi reg_class = %s\n"
2370 "wj reg_class = %s\n"
2371 "wk reg_class = %s\n"
2372 "wl reg_class = %s\n"
2373 "wm reg_class = %s\n"
2374 "wo reg_class = %s\n"
2375 "wp reg_class = %s\n"
2376 "wq reg_class = %s\n"
2377 "wr reg_class = %s\n"
2378 "ws reg_class = %s\n"
2379 "wt reg_class = %s\n"
2380 "wu reg_class = %s\n"
2381 "wv reg_class = %s\n"
2382 "ww reg_class = %s\n"
2383 "wx reg_class = %s\n"
2384 "wy reg_class = %s\n"
2385 "wz reg_class = %s\n"
2386 "\n",
2387 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_d]],
2388 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_f]],
2389 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_v]],
2390 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wa]],
2391 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wb]],
2392 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wd]],
2393 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_we]],
2394 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wf]],
2395 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wg]],
2396 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wh]],
2397 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wi]],
2398 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wj]],
2399 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wk]],
2400 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wl]],
2401 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wm]],
2402 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wo]],
2403 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wp]],
2404 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wq]],
2405 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wr]],
2406 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_ws]],
2407 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wt]],
2408 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wu]],
2409 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wv]],
2410 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_ww]],
2411 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wx]],
2412 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wy]],
2413 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wz]]);
2415 nl = "\n";
2416 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2417 rs6000_debug_print_mode (m);
2419 fputs ("\n", stderr);
2421 for (m1 = 0; m1 < ARRAY_SIZE (print_tieable_modes); m1++)
2423 machine_mode mode1 = print_tieable_modes[m1];
2424 bool first_time = true;
2426 nl = (const char *)0;
2427 for (m2 = 0; m2 < ARRAY_SIZE (print_tieable_modes); m2++)
2429 machine_mode mode2 = print_tieable_modes[m2];
2430 if (mode1 != mode2 && MODES_TIEABLE_P (mode1, mode2))
2432 if (first_time)
2434 fprintf (stderr, "Tieable modes %s:", GET_MODE_NAME (mode1));
2435 nl = "\n";
2436 first_time = false;
2439 fprintf (stderr, " %s", GET_MODE_NAME (mode2));
2443 if (!first_time)
2444 fputs ("\n", stderr);
2447 if (nl)
2448 fputs (nl, stderr);
2450 if (rs6000_recip_control)
2452 fprintf (stderr, "\nReciprocal mask = 0x%x\n", rs6000_recip_control);
2454 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2455 if (rs6000_recip_bits[m])
2457 fprintf (stderr,
2458 "Reciprocal estimate mode: %-5s divide: %s rsqrt: %s\n",
2459 GET_MODE_NAME (m),
2460 (RS6000_RECIP_AUTO_RE_P (m)
2461 ? "auto"
2462 : (RS6000_RECIP_HAVE_RE_P (m) ? "have" : "none")),
2463 (RS6000_RECIP_AUTO_RSQRTE_P (m)
2464 ? "auto"
2465 : (RS6000_RECIP_HAVE_RSQRTE_P (m) ? "have" : "none")));
2468 fputs ("\n", stderr);
2471 if (rs6000_cpu_index >= 0)
2473 const char *name = processor_target_table[rs6000_cpu_index].name;
2474 HOST_WIDE_INT flags
2475 = processor_target_table[rs6000_cpu_index].target_enable;
2477 sprintf (flags_buffer, "-mcpu=%s flags", name);
2478 rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2480 else
2481 fprintf (stderr, DEBUG_FMT_S, "cpu", "<none>");
2483 if (rs6000_tune_index >= 0)
2485 const char *name = processor_target_table[rs6000_tune_index].name;
2486 HOST_WIDE_INT flags
2487 = processor_target_table[rs6000_tune_index].target_enable;
2489 sprintf (flags_buffer, "-mtune=%s flags", name);
2490 rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2492 else
2493 fprintf (stderr, DEBUG_FMT_S, "tune", "<none>");
2495 cl_target_option_save (&cl_opts, &global_options);
2496 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags",
2497 rs6000_isa_flags);
2499 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags_explicit",
2500 rs6000_isa_flags_explicit);
2502 rs6000_print_builtin_options (stderr, 0, "rs6000_builtin_mask",
2503 rs6000_builtin_mask);
2505 rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT);
2507 fprintf (stderr, DEBUG_FMT_S, "--with-cpu default",
2508 OPTION_TARGET_CPU_DEFAULT ? OPTION_TARGET_CPU_DEFAULT : "<none>");
2510 switch (rs6000_sched_costly_dep)
2512 case max_dep_latency:
2513 costly_str = "max_dep_latency";
2514 break;
2516 case no_dep_costly:
2517 costly_str = "no_dep_costly";
2518 break;
2520 case all_deps_costly:
2521 costly_str = "all_deps_costly";
2522 break;
2524 case true_store_to_load_dep_costly:
2525 costly_str = "true_store_to_load_dep_costly";
2526 break;
2528 case store_to_load_dep_costly:
2529 costly_str = "store_to_load_dep_costly";
2530 break;
2532 default:
2533 costly_str = costly_num;
2534 sprintf (costly_num, "%d", (int)rs6000_sched_costly_dep);
2535 break;
2538 fprintf (stderr, DEBUG_FMT_S, "sched_costly_dep", costly_str);
2540 switch (rs6000_sched_insert_nops)
2542 case sched_finish_regroup_exact:
2543 nop_str = "sched_finish_regroup_exact";
2544 break;
2546 case sched_finish_pad_groups:
2547 nop_str = "sched_finish_pad_groups";
2548 break;
2550 case sched_finish_none:
2551 nop_str = "sched_finish_none";
2552 break;
2554 default:
2555 nop_str = nop_num;
2556 sprintf (nop_num, "%d", (int)rs6000_sched_insert_nops);
2557 break;
2560 fprintf (stderr, DEBUG_FMT_S, "sched_insert_nops", nop_str);
2562 switch (rs6000_sdata)
2564 default:
2565 case SDATA_NONE:
2566 break;
2568 case SDATA_DATA:
2569 fprintf (stderr, DEBUG_FMT_S, "sdata", "data");
2570 break;
2572 case SDATA_SYSV:
2573 fprintf (stderr, DEBUG_FMT_S, "sdata", "sysv");
2574 break;
2576 case SDATA_EABI:
2577 fprintf (stderr, DEBUG_FMT_S, "sdata", "eabi");
2578 break;
2582 switch (rs6000_traceback)
2584 case traceback_default: trace_str = "default"; break;
2585 case traceback_none: trace_str = "none"; break;
2586 case traceback_part: trace_str = "part"; break;
2587 case traceback_full: trace_str = "full"; break;
2588 default: trace_str = "unknown"; break;
2591 fprintf (stderr, DEBUG_FMT_S, "traceback", trace_str);
2593 switch (rs6000_current_cmodel)
2595 case CMODEL_SMALL: cmodel_str = "small"; break;
2596 case CMODEL_MEDIUM: cmodel_str = "medium"; break;
2597 case CMODEL_LARGE: cmodel_str = "large"; break;
2598 default: cmodel_str = "unknown"; break;
2601 fprintf (stderr, DEBUG_FMT_S, "cmodel", cmodel_str);
2603 switch (rs6000_current_abi)
2605 case ABI_NONE: abi_str = "none"; break;
2606 case ABI_AIX: abi_str = "aix"; break;
2607 case ABI_ELFv2: abi_str = "ELFv2"; break;
2608 case ABI_V4: abi_str = "V4"; break;
2609 case ABI_DARWIN: abi_str = "darwin"; break;
2610 default: abi_str = "unknown"; break;
2613 fprintf (stderr, DEBUG_FMT_S, "abi", abi_str);
2615 if (rs6000_altivec_abi)
2616 fprintf (stderr, DEBUG_FMT_S, "altivec_abi", "true");
2618 if (rs6000_spe_abi)
2619 fprintf (stderr, DEBUG_FMT_S, "spe_abi", "true");
2621 if (rs6000_darwin64_abi)
2622 fprintf (stderr, DEBUG_FMT_S, "darwin64_abi", "true");
2624 if (rs6000_float_gprs)
2625 fprintf (stderr, DEBUG_FMT_S, "float_gprs", "true");
2627 fprintf (stderr, DEBUG_FMT_S, "fprs",
2628 (TARGET_FPRS ? "true" : "false"));
2630 fprintf (stderr, DEBUG_FMT_S, "single_float",
2631 (TARGET_SINGLE_FLOAT ? "true" : "false"));
2633 fprintf (stderr, DEBUG_FMT_S, "double_float",
2634 (TARGET_DOUBLE_FLOAT ? "true" : "false"));
2636 fprintf (stderr, DEBUG_FMT_S, "soft_float",
2637 (TARGET_SOFT_FLOAT ? "true" : "false"));
2639 fprintf (stderr, DEBUG_FMT_S, "e500_single",
2640 (TARGET_E500_SINGLE ? "true" : "false"));
2642 fprintf (stderr, DEBUG_FMT_S, "e500_double",
2643 (TARGET_E500_DOUBLE ? "true" : "false"));
2645 if (TARGET_LINK_STACK)
2646 fprintf (stderr, DEBUG_FMT_S, "link_stack", "true");
2648 if (targetm.lra_p ())
2649 fprintf (stderr, DEBUG_FMT_S, "lra", "true");
2651 if (TARGET_P8_FUSION)
2653 char options[80];
2655 strcpy (options, (TARGET_P9_FUSION) ? "power9" : "power8");
2656 if (TARGET_TOC_FUSION)
2657 strcat (options, ", toc");
2659 if (TARGET_P8_FUSION_SIGN)
2660 strcat (options, ", sign");
2662 fprintf (stderr, DEBUG_FMT_S, "fusion", options);
2665 fprintf (stderr, DEBUG_FMT_S, "plt-format",
2666 TARGET_SECURE_PLT ? "secure" : "bss");
2667 fprintf (stderr, DEBUG_FMT_S, "struct-return",
2668 aix_struct_return ? "aix" : "sysv");
2669 fprintf (stderr, DEBUG_FMT_S, "always_hint", tf[!!rs6000_always_hint]);
2670 fprintf (stderr, DEBUG_FMT_S, "sched_groups", tf[!!rs6000_sched_groups]);
2671 fprintf (stderr, DEBUG_FMT_S, "align_branch",
2672 tf[!!rs6000_align_branch_targets]);
2673 fprintf (stderr, DEBUG_FMT_D, "tls_size", rs6000_tls_size);
2674 fprintf (stderr, DEBUG_FMT_D, "long_double_size",
2675 rs6000_long_double_type_size);
2676 fprintf (stderr, DEBUG_FMT_D, "sched_restricted_insns_priority",
2677 (int)rs6000_sched_restricted_insns_priority);
2678 fprintf (stderr, DEBUG_FMT_D, "Number of standard builtins",
2679 (int)END_BUILTINS);
2680 fprintf (stderr, DEBUG_FMT_D, "Number of rs6000 builtins",
2681 (int)RS6000_BUILTIN_COUNT);
2683 if (TARGET_VSX)
2684 fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit scalar element",
2685 (int)VECTOR_ELEMENT_SCALAR_64BIT);
2687 if (TARGET_DIRECT_MOVE_128)
2688 fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit mfvsrld element",
2689 (int)VECTOR_ELEMENT_MFVSRLD_64BIT);
2693 /* Update the addr mask bits in reg_addr to help secondary reload and go if
2694 legitimate address support to figure out the appropriate addressing to
2695 use. */
2697 static void
2698 rs6000_setup_reg_addr_masks (void)
2700 ssize_t rc, reg, m, nregs;
2701 addr_mask_type any_addr_mask, addr_mask;
2703 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2705 machine_mode m2 = (machine_mode) m;
2706 bool complex_p = false;
2707 size_t msize;
2709 if (COMPLEX_MODE_P (m2))
2711 complex_p = true;
2712 m2 = GET_MODE_INNER (m2);
2715 msize = GET_MODE_SIZE (m2);
2717 /* SDmode is special in that we want to access it only via REG+REG
2718 addressing on power7 and above, since we want to use the LFIWZX and
2719 STFIWZX instructions to load it. */
2720 bool indexed_only_p = (m == SDmode && TARGET_NO_SDMODE_STACK);
2722 any_addr_mask = 0;
2723 for (rc = FIRST_RELOAD_REG_CLASS; rc <= LAST_RELOAD_REG_CLASS; rc++)
2725 addr_mask = 0;
2726 reg = reload_reg_map[rc].reg;
2728 /* Can mode values go in the GPR/FPR/Altivec registers? */
2729 if (reg >= 0 && rs6000_hard_regno_mode_ok_p[m][reg])
2731 nregs = rs6000_hard_regno_nregs[m][reg];
2732 addr_mask |= RELOAD_REG_VALID;
2734 /* Indicate if the mode takes more than 1 physical register. If
2735 it takes a single register, indicate it can do REG+REG
2736 addressing. */
2737 if (nregs > 1 || m == BLKmode || complex_p)
2738 addr_mask |= RELOAD_REG_MULTIPLE;
2739 else
2740 addr_mask |= RELOAD_REG_INDEXED;
2742 /* Figure out if we can do PRE_INC, PRE_DEC, or PRE_MODIFY
2743 addressing. Restrict addressing on SPE for 64-bit types
2744 because of the SUBREG hackery used to address 64-bit floats in
2745 '32-bit' GPRs. If we allow scalars into Altivec registers,
2746 don't allow PRE_INC, PRE_DEC, or PRE_MODIFY. */
2748 if (TARGET_UPDATE
2749 && (rc == RELOAD_REG_GPR || rc == RELOAD_REG_FPR)
2750 && msize <= 8
2751 && !VECTOR_MODE_P (m2)
2752 && !FLOAT128_VECTOR_P (m2)
2753 && !complex_p
2754 && (m2 != DFmode || !TARGET_UPPER_REGS_DF)
2755 && (m2 != SFmode || !TARGET_UPPER_REGS_SF)
2756 && !(TARGET_E500_DOUBLE && msize == 8))
2758 addr_mask |= RELOAD_REG_PRE_INCDEC;
2760 /* PRE_MODIFY is more restricted than PRE_INC/PRE_DEC in that
2761 we don't allow PRE_MODIFY for some multi-register
2762 operations. */
2763 switch (m)
2765 default:
2766 addr_mask |= RELOAD_REG_PRE_MODIFY;
2767 break;
2769 case DImode:
2770 if (TARGET_POWERPC64)
2771 addr_mask |= RELOAD_REG_PRE_MODIFY;
2772 break;
2774 case DFmode:
2775 case DDmode:
2776 if (TARGET_DF_INSN)
2777 addr_mask |= RELOAD_REG_PRE_MODIFY;
2778 break;
2783 /* GPR and FPR registers can do REG+OFFSET addressing, except
2784 possibly for SDmode. ISA 3.0 (i.e. power9) adds D-form
2785 addressing for scalars to altivec registers. */
2786 if ((addr_mask != 0) && !indexed_only_p
2787 && msize <= 8
2788 && (rc == RELOAD_REG_GPR
2789 || rc == RELOAD_REG_FPR
2790 || (rc == RELOAD_REG_VMX
2791 && TARGET_P9_DFORM
2792 && (m2 == DFmode || m2 == SFmode))))
2793 addr_mask |= RELOAD_REG_OFFSET;
2795 /* VMX registers can do (REG & -16) and ((REG+REG) & -16)
2796 addressing on 128-bit types. */
2797 if (rc == RELOAD_REG_VMX && msize == 16
2798 && (addr_mask & RELOAD_REG_VALID) != 0)
2799 addr_mask |= RELOAD_REG_AND_M16;
2801 reg_addr[m].addr_mask[rc] = addr_mask;
2802 any_addr_mask |= addr_mask;
2805 reg_addr[m].addr_mask[RELOAD_REG_ANY] = any_addr_mask;
2810 /* Initialize the various global tables that are based on register size. */
2811 static void
2812 rs6000_init_hard_regno_mode_ok (bool global_init_p)
2814 ssize_t r, m, c;
2815 int align64;
2816 int align32;
2818 /* Precalculate REGNO_REG_CLASS. */
2819 rs6000_regno_regclass[0] = GENERAL_REGS;
2820 for (r = 1; r < 32; ++r)
2821 rs6000_regno_regclass[r] = BASE_REGS;
2823 for (r = 32; r < 64; ++r)
2824 rs6000_regno_regclass[r] = FLOAT_REGS;
2826 for (r = 64; r < FIRST_PSEUDO_REGISTER; ++r)
2827 rs6000_regno_regclass[r] = NO_REGS;
2829 for (r = FIRST_ALTIVEC_REGNO; r <= LAST_ALTIVEC_REGNO; ++r)
2830 rs6000_regno_regclass[r] = ALTIVEC_REGS;
2832 rs6000_regno_regclass[CR0_REGNO] = CR0_REGS;
2833 for (r = CR1_REGNO; r <= CR7_REGNO; ++r)
2834 rs6000_regno_regclass[r] = CR_REGS;
2836 rs6000_regno_regclass[LR_REGNO] = LINK_REGS;
2837 rs6000_regno_regclass[CTR_REGNO] = CTR_REGS;
2838 rs6000_regno_regclass[CA_REGNO] = NO_REGS;
2839 rs6000_regno_regclass[VRSAVE_REGNO] = VRSAVE_REGS;
2840 rs6000_regno_regclass[VSCR_REGNO] = VRSAVE_REGS;
2841 rs6000_regno_regclass[SPE_ACC_REGNO] = SPE_ACC_REGS;
2842 rs6000_regno_regclass[SPEFSCR_REGNO] = SPEFSCR_REGS;
2843 rs6000_regno_regclass[TFHAR_REGNO] = SPR_REGS;
2844 rs6000_regno_regclass[TFIAR_REGNO] = SPR_REGS;
2845 rs6000_regno_regclass[TEXASR_REGNO] = SPR_REGS;
2846 rs6000_regno_regclass[ARG_POINTER_REGNUM] = BASE_REGS;
2847 rs6000_regno_regclass[FRAME_POINTER_REGNUM] = BASE_REGS;
2849 /* Precalculate register class to simpler reload register class. We don't
2850 need all of the register classes that are combinations of different
2851 classes, just the simple ones that have constraint letters. */
2852 for (c = 0; c < N_REG_CLASSES; c++)
2853 reg_class_to_reg_type[c] = NO_REG_TYPE;
2855 reg_class_to_reg_type[(int)GENERAL_REGS] = GPR_REG_TYPE;
2856 reg_class_to_reg_type[(int)BASE_REGS] = GPR_REG_TYPE;
2857 reg_class_to_reg_type[(int)VSX_REGS] = VSX_REG_TYPE;
2858 reg_class_to_reg_type[(int)VRSAVE_REGS] = SPR_REG_TYPE;
2859 reg_class_to_reg_type[(int)VSCR_REGS] = SPR_REG_TYPE;
2860 reg_class_to_reg_type[(int)LINK_REGS] = SPR_REG_TYPE;
2861 reg_class_to_reg_type[(int)CTR_REGS] = SPR_REG_TYPE;
2862 reg_class_to_reg_type[(int)LINK_OR_CTR_REGS] = SPR_REG_TYPE;
2863 reg_class_to_reg_type[(int)CR_REGS] = CR_REG_TYPE;
2864 reg_class_to_reg_type[(int)CR0_REGS] = CR_REG_TYPE;
2865 reg_class_to_reg_type[(int)SPE_ACC_REGS] = SPE_ACC_TYPE;
2866 reg_class_to_reg_type[(int)SPEFSCR_REGS] = SPEFSCR_REG_TYPE;
2868 if (TARGET_VSX)
2870 reg_class_to_reg_type[(int)FLOAT_REGS] = VSX_REG_TYPE;
2871 reg_class_to_reg_type[(int)ALTIVEC_REGS] = VSX_REG_TYPE;
2873 else
2875 reg_class_to_reg_type[(int)FLOAT_REGS] = FPR_REG_TYPE;
2876 reg_class_to_reg_type[(int)ALTIVEC_REGS] = ALTIVEC_REG_TYPE;
2879 /* Precalculate the valid memory formats as well as the vector information,
2880 this must be set up before the rs6000_hard_regno_nregs_internal calls
2881 below. */
2882 gcc_assert ((int)VECTOR_NONE == 0);
2883 memset ((void *) &rs6000_vector_unit[0], '\0', sizeof (rs6000_vector_unit));
2884 memset ((void *) &rs6000_vector_mem[0], '\0', sizeof (rs6000_vector_unit));
2886 gcc_assert ((int)CODE_FOR_nothing == 0);
2887 memset ((void *) &reg_addr[0], '\0', sizeof (reg_addr));
2889 gcc_assert ((int)NO_REGS == 0);
2890 memset ((void *) &rs6000_constraints[0], '\0', sizeof (rs6000_constraints));
2892 /* The VSX hardware allows native alignment for vectors, but control whether the compiler
2893 believes it can use native alignment or still uses 128-bit alignment. */
2894 if (TARGET_VSX && !TARGET_VSX_ALIGN_128)
2896 align64 = 64;
2897 align32 = 32;
2899 else
2901 align64 = 128;
2902 align32 = 128;
2905 /* KF mode (IEEE 128-bit in VSX registers). We do not have arithmetic, so
2906 only set the memory modes. Include TFmode if -mabi=ieeelongdouble. */
2907 if (TARGET_FLOAT128)
2909 rs6000_vector_mem[KFmode] = VECTOR_VSX;
2910 rs6000_vector_align[KFmode] = 128;
2912 if (FLOAT128_IEEE_P (TFmode))
2914 rs6000_vector_mem[TFmode] = VECTOR_VSX;
2915 rs6000_vector_align[TFmode] = 128;
2919 /* V2DF mode, VSX only. */
2920 if (TARGET_VSX)
2922 rs6000_vector_unit[V2DFmode] = VECTOR_VSX;
2923 rs6000_vector_mem[V2DFmode] = VECTOR_VSX;
2924 rs6000_vector_align[V2DFmode] = align64;
2927 /* V4SF mode, either VSX or Altivec. */
2928 if (TARGET_VSX)
2930 rs6000_vector_unit[V4SFmode] = VECTOR_VSX;
2931 rs6000_vector_mem[V4SFmode] = VECTOR_VSX;
2932 rs6000_vector_align[V4SFmode] = align32;
2934 else if (TARGET_ALTIVEC)
2936 rs6000_vector_unit[V4SFmode] = VECTOR_ALTIVEC;
2937 rs6000_vector_mem[V4SFmode] = VECTOR_ALTIVEC;
2938 rs6000_vector_align[V4SFmode] = align32;
2941 /* V16QImode, V8HImode, V4SImode are Altivec only, but possibly do VSX loads
2942 and stores. */
2943 if (TARGET_ALTIVEC)
2945 rs6000_vector_unit[V4SImode] = VECTOR_ALTIVEC;
2946 rs6000_vector_unit[V8HImode] = VECTOR_ALTIVEC;
2947 rs6000_vector_unit[V16QImode] = VECTOR_ALTIVEC;
2948 rs6000_vector_align[V4SImode] = align32;
2949 rs6000_vector_align[V8HImode] = align32;
2950 rs6000_vector_align[V16QImode] = align32;
2952 if (TARGET_VSX)
2954 rs6000_vector_mem[V4SImode] = VECTOR_VSX;
2955 rs6000_vector_mem[V8HImode] = VECTOR_VSX;
2956 rs6000_vector_mem[V16QImode] = VECTOR_VSX;
2958 else
2960 rs6000_vector_mem[V4SImode] = VECTOR_ALTIVEC;
2961 rs6000_vector_mem[V8HImode] = VECTOR_ALTIVEC;
2962 rs6000_vector_mem[V16QImode] = VECTOR_ALTIVEC;
2966 /* V2DImode, full mode depends on ISA 2.07 vector mode. Allow under VSX to
2967 do insert/splat/extract. Altivec doesn't have 64-bit integer support. */
2968 if (TARGET_VSX)
2970 rs6000_vector_mem[V2DImode] = VECTOR_VSX;
2971 rs6000_vector_unit[V2DImode]
2972 = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
2973 rs6000_vector_align[V2DImode] = align64;
2975 rs6000_vector_mem[V1TImode] = VECTOR_VSX;
2976 rs6000_vector_unit[V1TImode]
2977 = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
2978 rs6000_vector_align[V1TImode] = 128;
2981 /* DFmode, see if we want to use the VSX unit. Memory is handled
2982 differently, so don't set rs6000_vector_mem. */
2983 if (TARGET_VSX && TARGET_VSX_SCALAR_DOUBLE)
2985 rs6000_vector_unit[DFmode] = VECTOR_VSX;
2986 rs6000_vector_align[DFmode] = 64;
2989 /* SFmode, see if we want to use the VSX unit. */
2990 if (TARGET_P8_VECTOR && TARGET_VSX_SCALAR_FLOAT)
2992 rs6000_vector_unit[SFmode] = VECTOR_VSX;
2993 rs6000_vector_align[SFmode] = 32;
2996 /* Allow TImode in VSX register and set the VSX memory macros. */
2997 if (TARGET_VSX && TARGET_VSX_TIMODE)
2999 rs6000_vector_mem[TImode] = VECTOR_VSX;
3000 rs6000_vector_align[TImode] = align64;
3003 /* TODO add SPE and paired floating point vector support. */
3005 /* Register class constraints for the constraints that depend on compile
3006 switches. When the VSX code was added, different constraints were added
3007 based on the type (DFmode, V2DFmode, V4SFmode). For the vector types, all
3008 of the VSX registers are used. The register classes for scalar floating
3009 point types is set, based on whether we allow that type into the upper
3010 (Altivec) registers. GCC has register classes to target the Altivec
3011 registers for load/store operations, to select using a VSX memory
3012 operation instead of the traditional floating point operation. The
3013 constraints are:
3015 d - Register class to use with traditional DFmode instructions.
3016 f - Register class to use with traditional SFmode instructions.
3017 v - Altivec register.
3018 wa - Any VSX register.
3019 wc - Reserved to represent individual CR bits (used in LLVM).
3020 wd - Preferred register class for V2DFmode.
3021 wf - Preferred register class for V4SFmode.
3022 wg - Float register for power6x move insns.
3023 wh - FP register for direct move instructions.
3024 wi - FP or VSX register to hold 64-bit integers for VSX insns.
3025 wj - FP or VSX register to hold 64-bit integers for direct moves.
3026 wk - FP or VSX register to hold 64-bit doubles for direct moves.
3027 wl - Float register if we can do 32-bit signed int loads.
3028 wm - VSX register for ISA 2.07 direct move operations.
3029 wn - always NO_REGS.
3030 wr - GPR if 64-bit mode is permitted.
3031 ws - Register class to do ISA 2.06 DF operations.
3032 wt - VSX register for TImode in VSX registers.
3033 wu - Altivec register for ISA 2.07 VSX SF/SI load/stores.
3034 wv - Altivec register for ISA 2.06 VSX DF/DI load/stores.
3035 ww - Register class to do SF conversions in with VSX operations.
3036 wx - Float register if we can do 32-bit int stores.
3037 wy - Register class to do ISA 2.07 SF operations.
3038 wz - Float register if we can do 32-bit unsigned int loads. */
3040 if (TARGET_HARD_FLOAT && TARGET_FPRS)
3041 rs6000_constraints[RS6000_CONSTRAINT_f] = FLOAT_REGS; /* SFmode */
3043 if (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
3044 rs6000_constraints[RS6000_CONSTRAINT_d] = FLOAT_REGS; /* DFmode */
3046 if (TARGET_VSX)
3048 rs6000_constraints[RS6000_CONSTRAINT_wa] = VSX_REGS;
3049 rs6000_constraints[RS6000_CONSTRAINT_wd] = VSX_REGS; /* V2DFmode */
3050 rs6000_constraints[RS6000_CONSTRAINT_wf] = VSX_REGS; /* V4SFmode */
3051 rs6000_constraints[RS6000_CONSTRAINT_wi] = FLOAT_REGS; /* DImode */
3053 if (TARGET_VSX_TIMODE)
3054 rs6000_constraints[RS6000_CONSTRAINT_wt] = VSX_REGS; /* TImode */
3056 if (TARGET_UPPER_REGS_DF) /* DFmode */
3058 rs6000_constraints[RS6000_CONSTRAINT_ws] = VSX_REGS;
3059 rs6000_constraints[RS6000_CONSTRAINT_wv] = ALTIVEC_REGS;
3061 else
3062 rs6000_constraints[RS6000_CONSTRAINT_ws] = FLOAT_REGS;
3065 /* Add conditional constraints based on various options, to allow us to
3066 collapse multiple insn patterns. */
3067 if (TARGET_ALTIVEC)
3068 rs6000_constraints[RS6000_CONSTRAINT_v] = ALTIVEC_REGS;
3070 if (TARGET_MFPGPR) /* DFmode */
3071 rs6000_constraints[RS6000_CONSTRAINT_wg] = FLOAT_REGS;
3073 if (TARGET_LFIWAX)
3074 rs6000_constraints[RS6000_CONSTRAINT_wl] = FLOAT_REGS; /* DImode */
3076 if (TARGET_DIRECT_MOVE)
3078 rs6000_constraints[RS6000_CONSTRAINT_wh] = FLOAT_REGS;
3079 rs6000_constraints[RS6000_CONSTRAINT_wj] /* DImode */
3080 = rs6000_constraints[RS6000_CONSTRAINT_wi];
3081 rs6000_constraints[RS6000_CONSTRAINT_wk] /* DFmode */
3082 = rs6000_constraints[RS6000_CONSTRAINT_ws];
3083 rs6000_constraints[RS6000_CONSTRAINT_wm] = VSX_REGS;
3086 if (TARGET_POWERPC64)
3087 rs6000_constraints[RS6000_CONSTRAINT_wr] = GENERAL_REGS;
3089 if (TARGET_P8_VECTOR && TARGET_UPPER_REGS_SF) /* SFmode */
3091 rs6000_constraints[RS6000_CONSTRAINT_wu] = ALTIVEC_REGS;
3092 rs6000_constraints[RS6000_CONSTRAINT_wy] = VSX_REGS;
3093 rs6000_constraints[RS6000_CONSTRAINT_ww] = VSX_REGS;
3095 else if (TARGET_P8_VECTOR)
3097 rs6000_constraints[RS6000_CONSTRAINT_wy] = FLOAT_REGS;
3098 rs6000_constraints[RS6000_CONSTRAINT_ww] = FLOAT_REGS;
3100 else if (TARGET_VSX)
3101 rs6000_constraints[RS6000_CONSTRAINT_ww] = FLOAT_REGS;
3103 if (TARGET_STFIWX)
3104 rs6000_constraints[RS6000_CONSTRAINT_wx] = FLOAT_REGS; /* DImode */
3106 if (TARGET_LFIWZX)
3107 rs6000_constraints[RS6000_CONSTRAINT_wz] = FLOAT_REGS; /* DImode */
3109 if (TARGET_FLOAT128)
3111 rs6000_constraints[RS6000_CONSTRAINT_wq] = VSX_REGS; /* KFmode */
3112 if (FLOAT128_IEEE_P (TFmode))
3113 rs6000_constraints[RS6000_CONSTRAINT_wp] = VSX_REGS; /* TFmode */
3116 /* Support for new D-form instructions. */
3117 if (TARGET_P9_DFORM)
3118 rs6000_constraints[RS6000_CONSTRAINT_wb] = ALTIVEC_REGS;
3120 /* Support for ISA 3.0 (power9) vectors. */
3121 if (TARGET_P9_VECTOR)
3122 rs6000_constraints[RS6000_CONSTRAINT_wo] = VSX_REGS;
3124 /* Support for new direct moves (ISA 3.0 + 64bit). */
3125 if (TARGET_DIRECT_MOVE_128)
3126 rs6000_constraints[RS6000_CONSTRAINT_we] = VSX_REGS;
3128 /* Set up the reload helper and direct move functions. */
3129 if (TARGET_VSX || TARGET_ALTIVEC)
3131 if (TARGET_64BIT)
3133 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_di_store;
3134 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_di_load;
3135 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_di_store;
3136 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_di_load;
3137 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_di_store;
3138 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_di_load;
3139 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_di_store;
3140 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_di_load;
3141 reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_di_store;
3142 reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_di_load;
3143 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_di_store;
3144 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_di_load;
3145 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_di_store;
3146 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_di_load;
3147 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_di_store;
3148 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_di_load;
3149 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_di_store;
3150 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_di_load;
3151 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_di_store;
3152 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_di_load;
3154 if (FLOAT128_VECTOR_P (KFmode))
3156 reg_addr[KFmode].reload_store = CODE_FOR_reload_kf_di_store;
3157 reg_addr[KFmode].reload_load = CODE_FOR_reload_kf_di_load;
3160 if (FLOAT128_VECTOR_P (TFmode))
3162 reg_addr[TFmode].reload_store = CODE_FOR_reload_tf_di_store;
3163 reg_addr[TFmode].reload_load = CODE_FOR_reload_tf_di_load;
3166 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
3167 available. */
3168 if (TARGET_NO_SDMODE_STACK)
3170 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_di_store;
3171 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_di_load;
3174 if (TARGET_VSX_TIMODE)
3176 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_di_store;
3177 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_di_load;
3180 if (TARGET_DIRECT_MOVE && !TARGET_DIRECT_MOVE_128)
3182 reg_addr[TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxti;
3183 reg_addr[V1TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv1ti;
3184 reg_addr[V2DFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2df;
3185 reg_addr[V2DImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2di;
3186 reg_addr[V4SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4sf;
3187 reg_addr[V4SImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4si;
3188 reg_addr[V8HImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv8hi;
3189 reg_addr[V16QImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv16qi;
3190 reg_addr[SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxsf;
3192 reg_addr[TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprti;
3193 reg_addr[V1TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv1ti;
3194 reg_addr[V2DFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2df;
3195 reg_addr[V2DImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2di;
3196 reg_addr[V4SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4sf;
3197 reg_addr[V4SImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4si;
3198 reg_addr[V8HImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv8hi;
3199 reg_addr[V16QImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv16qi;
3200 reg_addr[SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprsf;
3202 if (FLOAT128_VECTOR_P (KFmode))
3204 reg_addr[KFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxkf;
3205 reg_addr[KFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprkf;
3208 if (FLOAT128_VECTOR_P (TFmode))
3210 reg_addr[TFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxtf;
3211 reg_addr[TFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprtf;
3215 else
3217 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_si_store;
3218 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_si_load;
3219 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_si_store;
3220 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_si_load;
3221 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_si_store;
3222 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_si_load;
3223 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_si_store;
3224 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_si_load;
3225 reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_si_store;
3226 reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_si_load;
3227 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_si_store;
3228 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_si_load;
3229 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_si_store;
3230 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_si_load;
3231 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_si_store;
3232 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_si_load;
3233 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_si_store;
3234 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_si_load;
3235 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_si_store;
3236 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_si_load;
3238 if (FLOAT128_VECTOR_P (KFmode))
3240 reg_addr[KFmode].reload_store = CODE_FOR_reload_kf_si_store;
3241 reg_addr[KFmode].reload_load = CODE_FOR_reload_kf_si_load;
3244 if (FLOAT128_IEEE_P (TFmode))
3246 reg_addr[TFmode].reload_store = CODE_FOR_reload_tf_si_store;
3247 reg_addr[TFmode].reload_load = CODE_FOR_reload_tf_si_load;
3250 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
3251 available. */
3252 if (TARGET_NO_SDMODE_STACK)
3254 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_si_store;
3255 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_si_load;
3258 if (TARGET_VSX_TIMODE)
3260 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_si_store;
3261 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_si_load;
3264 if (TARGET_DIRECT_MOVE)
3266 reg_addr[DImode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdi;
3267 reg_addr[DDmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdd;
3268 reg_addr[DFmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdf;
3272 if (TARGET_UPPER_REGS_DF)
3273 reg_addr[DFmode].scalar_in_vmx_p = true;
3275 if (TARGET_UPPER_REGS_SF)
3276 reg_addr[SFmode].scalar_in_vmx_p = true;
3279 /* Setup the fusion operations. */
3280 if (TARGET_P8_FUSION)
3282 reg_addr[QImode].fusion_gpr_ld = CODE_FOR_fusion_gpr_load_qi;
3283 reg_addr[HImode].fusion_gpr_ld = CODE_FOR_fusion_gpr_load_hi;
3284 reg_addr[SImode].fusion_gpr_ld = CODE_FOR_fusion_gpr_load_si;
3285 if (TARGET_64BIT)
3286 reg_addr[DImode].fusion_gpr_ld = CODE_FOR_fusion_gpr_load_di;
3289 if (TARGET_P9_FUSION)
3291 struct fuse_insns {
3292 enum machine_mode mode; /* mode of the fused type. */
3293 enum machine_mode pmode; /* pointer mode. */
3294 enum rs6000_reload_reg_type rtype; /* register type. */
3295 enum insn_code load; /* load insn. */
3296 enum insn_code store; /* store insn. */
3299 static const struct fuse_insns addis_insns[] = {
3300 { SFmode, DImode, RELOAD_REG_FPR,
3301 CODE_FOR_fusion_fpr_di_sf_load,
3302 CODE_FOR_fusion_fpr_di_sf_store },
3304 { SFmode, SImode, RELOAD_REG_FPR,
3305 CODE_FOR_fusion_fpr_si_sf_load,
3306 CODE_FOR_fusion_fpr_si_sf_store },
3308 { DFmode, DImode, RELOAD_REG_FPR,
3309 CODE_FOR_fusion_fpr_di_df_load,
3310 CODE_FOR_fusion_fpr_di_df_store },
3312 { DFmode, SImode, RELOAD_REG_FPR,
3313 CODE_FOR_fusion_fpr_si_df_load,
3314 CODE_FOR_fusion_fpr_si_df_store },
3316 { DImode, DImode, RELOAD_REG_FPR,
3317 CODE_FOR_fusion_fpr_di_di_load,
3318 CODE_FOR_fusion_fpr_di_di_store },
3320 { DImode, SImode, RELOAD_REG_FPR,
3321 CODE_FOR_fusion_fpr_si_di_load,
3322 CODE_FOR_fusion_fpr_si_di_store },
3324 { QImode, DImode, RELOAD_REG_GPR,
3325 CODE_FOR_fusion_gpr_di_qi_load,
3326 CODE_FOR_fusion_gpr_di_qi_store },
3328 { QImode, SImode, RELOAD_REG_GPR,
3329 CODE_FOR_fusion_gpr_si_qi_load,
3330 CODE_FOR_fusion_gpr_si_qi_store },
3332 { HImode, DImode, RELOAD_REG_GPR,
3333 CODE_FOR_fusion_gpr_di_hi_load,
3334 CODE_FOR_fusion_gpr_di_hi_store },
3336 { HImode, SImode, RELOAD_REG_GPR,
3337 CODE_FOR_fusion_gpr_si_hi_load,
3338 CODE_FOR_fusion_gpr_si_hi_store },
3340 { SImode, DImode, RELOAD_REG_GPR,
3341 CODE_FOR_fusion_gpr_di_si_load,
3342 CODE_FOR_fusion_gpr_di_si_store },
3344 { SImode, SImode, RELOAD_REG_GPR,
3345 CODE_FOR_fusion_gpr_si_si_load,
3346 CODE_FOR_fusion_gpr_si_si_store },
3348 { SFmode, DImode, RELOAD_REG_GPR,
3349 CODE_FOR_fusion_gpr_di_sf_load,
3350 CODE_FOR_fusion_gpr_di_sf_store },
3352 { SFmode, SImode, RELOAD_REG_GPR,
3353 CODE_FOR_fusion_gpr_si_sf_load,
3354 CODE_FOR_fusion_gpr_si_sf_store },
3356 { DImode, DImode, RELOAD_REG_GPR,
3357 CODE_FOR_fusion_gpr_di_di_load,
3358 CODE_FOR_fusion_gpr_di_di_store },
3360 { DFmode, DImode, RELOAD_REG_GPR,
3361 CODE_FOR_fusion_gpr_di_df_load,
3362 CODE_FOR_fusion_gpr_di_df_store },
3365 enum machine_mode cur_pmode = Pmode;
3366 size_t i;
3368 for (i = 0; i < ARRAY_SIZE (addis_insns); i++)
3370 enum machine_mode xmode = addis_insns[i].mode;
3371 enum rs6000_reload_reg_type rtype = addis_insns[i].rtype;
3373 if (addis_insns[i].pmode != cur_pmode)
3374 continue;
3376 if (rtype == RELOAD_REG_FPR
3377 && (!TARGET_HARD_FLOAT || !TARGET_FPRS))
3378 continue;
3380 reg_addr[xmode].fusion_addis_ld[rtype] = addis_insns[i].load;
3381 reg_addr[xmode].fusion_addis_st[rtype] = addis_insns[i].store;
3385 /* Note which types we support fusing TOC setup plus memory insn. We only do
3386 fused TOCs for medium/large code models. */
3387 if (TARGET_P8_FUSION && TARGET_TOC_FUSION && TARGET_POWERPC64
3388 && (TARGET_CMODEL != CMODEL_SMALL))
3390 reg_addr[QImode].fused_toc = true;
3391 reg_addr[HImode].fused_toc = true;
3392 reg_addr[SImode].fused_toc = true;
3393 reg_addr[DImode].fused_toc = true;
3394 if (TARGET_HARD_FLOAT && TARGET_FPRS)
3396 if (TARGET_SINGLE_FLOAT)
3397 reg_addr[SFmode].fused_toc = true;
3398 if (TARGET_DOUBLE_FLOAT)
3399 reg_addr[DFmode].fused_toc = true;
3403 /* Precalculate HARD_REGNO_NREGS. */
3404 for (r = 0; r < FIRST_PSEUDO_REGISTER; ++r)
3405 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3406 rs6000_hard_regno_nregs[m][r]
3407 = rs6000_hard_regno_nregs_internal (r, (machine_mode)m);
3409 /* Precalculate HARD_REGNO_MODE_OK. */
3410 for (r = 0; r < FIRST_PSEUDO_REGISTER; ++r)
3411 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3412 if (rs6000_hard_regno_mode_ok (r, (machine_mode)m))
3413 rs6000_hard_regno_mode_ok_p[m][r] = true;
3415 /* Precalculate CLASS_MAX_NREGS sizes. */
3416 for (c = 0; c < LIM_REG_CLASSES; ++c)
3418 int reg_size;
3420 if (TARGET_VSX && VSX_REG_CLASS_P (c))
3421 reg_size = UNITS_PER_VSX_WORD;
3423 else if (c == ALTIVEC_REGS)
3424 reg_size = UNITS_PER_ALTIVEC_WORD;
3426 else if (c == FLOAT_REGS)
3427 reg_size = UNITS_PER_FP_WORD;
3429 else
3430 reg_size = UNITS_PER_WORD;
3432 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3434 machine_mode m2 = (machine_mode)m;
3435 int reg_size2 = reg_size;
3437 /* TDmode & IBM 128-bit floating point always takes 2 registers, even
3438 in VSX. */
3439 if (TARGET_VSX && VSX_REG_CLASS_P (c) && FLOAT128_2REG_P (m))
3440 reg_size2 = UNITS_PER_FP_WORD;
3442 rs6000_class_max_nregs[m][c]
3443 = (GET_MODE_SIZE (m2) + reg_size2 - 1) / reg_size2;
3447 if (TARGET_E500_DOUBLE)
3448 rs6000_class_max_nregs[DFmode][GENERAL_REGS] = 1;
3450 /* Calculate which modes to automatically generate code to use a the
3451 reciprocal divide and square root instructions. In the future, possibly
3452 automatically generate the instructions even if the user did not specify
3453 -mrecip. The older machines double precision reciprocal sqrt estimate is
3454 not accurate enough. */
3455 memset (rs6000_recip_bits, 0, sizeof (rs6000_recip_bits));
3456 if (TARGET_FRES)
3457 rs6000_recip_bits[SFmode] = RS6000_RECIP_MASK_HAVE_RE;
3458 if (TARGET_FRE)
3459 rs6000_recip_bits[DFmode] = RS6000_RECIP_MASK_HAVE_RE;
3460 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
3461 rs6000_recip_bits[V4SFmode] = RS6000_RECIP_MASK_HAVE_RE;
3462 if (VECTOR_UNIT_VSX_P (V2DFmode))
3463 rs6000_recip_bits[V2DFmode] = RS6000_RECIP_MASK_HAVE_RE;
3465 if (TARGET_FRSQRTES)
3466 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3467 if (TARGET_FRSQRTE)
3468 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3469 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
3470 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3471 if (VECTOR_UNIT_VSX_P (V2DFmode))
3472 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3474 if (rs6000_recip_control)
3476 if (!flag_finite_math_only)
3477 warning (0, "-mrecip requires -ffinite-math or -ffast-math");
3478 if (flag_trapping_math)
3479 warning (0, "-mrecip requires -fno-trapping-math or -ffast-math");
3480 if (!flag_reciprocal_math)
3481 warning (0, "-mrecip requires -freciprocal-math or -ffast-math");
3482 if (flag_finite_math_only && !flag_trapping_math && flag_reciprocal_math)
3484 if (RS6000_RECIP_HAVE_RE_P (SFmode)
3485 && (rs6000_recip_control & RECIP_SF_DIV) != 0)
3486 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3488 if (RS6000_RECIP_HAVE_RE_P (DFmode)
3489 && (rs6000_recip_control & RECIP_DF_DIV) != 0)
3490 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3492 if (RS6000_RECIP_HAVE_RE_P (V4SFmode)
3493 && (rs6000_recip_control & RECIP_V4SF_DIV) != 0)
3494 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3496 if (RS6000_RECIP_HAVE_RE_P (V2DFmode)
3497 && (rs6000_recip_control & RECIP_V2DF_DIV) != 0)
3498 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3500 if (RS6000_RECIP_HAVE_RSQRTE_P (SFmode)
3501 && (rs6000_recip_control & RECIP_SF_RSQRT) != 0)
3502 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3504 if (RS6000_RECIP_HAVE_RSQRTE_P (DFmode)
3505 && (rs6000_recip_control & RECIP_DF_RSQRT) != 0)
3506 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3508 if (RS6000_RECIP_HAVE_RSQRTE_P (V4SFmode)
3509 && (rs6000_recip_control & RECIP_V4SF_RSQRT) != 0)
3510 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3512 if (RS6000_RECIP_HAVE_RSQRTE_P (V2DFmode)
3513 && (rs6000_recip_control & RECIP_V2DF_RSQRT) != 0)
3514 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3518 /* Update the addr mask bits in reg_addr to help secondary reload and go if
3519 legitimate address support to figure out the appropriate addressing to
3520 use. */
3521 rs6000_setup_reg_addr_masks ();
3523 if (global_init_p || TARGET_DEBUG_TARGET)
3525 if (TARGET_DEBUG_REG)
3526 rs6000_debug_reg_global ();
3528 if (TARGET_DEBUG_COST || TARGET_DEBUG_REG)
3529 fprintf (stderr,
3530 "SImode variable mult cost = %d\n"
3531 "SImode constant mult cost = %d\n"
3532 "SImode short constant mult cost = %d\n"
3533 "DImode multipliciation cost = %d\n"
3534 "SImode division cost = %d\n"
3535 "DImode division cost = %d\n"
3536 "Simple fp operation cost = %d\n"
3537 "DFmode multiplication cost = %d\n"
3538 "SFmode division cost = %d\n"
3539 "DFmode division cost = %d\n"
3540 "cache line size = %d\n"
3541 "l1 cache size = %d\n"
3542 "l2 cache size = %d\n"
3543 "simultaneous prefetches = %d\n"
3544 "\n",
3545 rs6000_cost->mulsi,
3546 rs6000_cost->mulsi_const,
3547 rs6000_cost->mulsi_const9,
3548 rs6000_cost->muldi,
3549 rs6000_cost->divsi,
3550 rs6000_cost->divdi,
3551 rs6000_cost->fp,
3552 rs6000_cost->dmul,
3553 rs6000_cost->sdiv,
3554 rs6000_cost->ddiv,
3555 rs6000_cost->cache_line_size,
3556 rs6000_cost->l1_cache_size,
3557 rs6000_cost->l2_cache_size,
3558 rs6000_cost->simultaneous_prefetches);
3562 #if TARGET_MACHO
3563 /* The Darwin version of SUBTARGET_OVERRIDE_OPTIONS. */
3565 static void
3566 darwin_rs6000_override_options (void)
3568 /* The Darwin ABI always includes AltiVec, can't be (validly) turned
3569 off. */
3570 rs6000_altivec_abi = 1;
3571 TARGET_ALTIVEC_VRSAVE = 1;
3572 rs6000_current_abi = ABI_DARWIN;
3574 if (DEFAULT_ABI == ABI_DARWIN
3575 && TARGET_64BIT)
3576 darwin_one_byte_bool = 1;
3578 if (TARGET_64BIT && ! TARGET_POWERPC64)
3580 rs6000_isa_flags |= OPTION_MASK_POWERPC64;
3581 warning (0, "-m64 requires PowerPC64 architecture, enabling");
3583 if (flag_mkernel)
3585 rs6000_default_long_calls = 1;
3586 rs6000_isa_flags |= OPTION_MASK_SOFT_FLOAT;
3589 /* Make -m64 imply -maltivec. Darwin's 64-bit ABI includes
3590 Altivec. */
3591 if (!flag_mkernel && !flag_apple_kext
3592 && TARGET_64BIT
3593 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC))
3594 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3596 /* Unless the user (not the configurer) has explicitly overridden
3597 it with -mcpu=G3 or -mno-altivec, then 10.5+ targets default to
3598 G4 unless targeting the kernel. */
3599 if (!flag_mkernel
3600 && !flag_apple_kext
3601 && strverscmp (darwin_macosx_version_min, "10.5") >= 0
3602 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC)
3603 && ! global_options_set.x_rs6000_cpu_index)
3605 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3608 #endif
3610 /* If not otherwise specified by a target, make 'long double' equivalent to
3611 'double'. */
3613 #ifndef RS6000_DEFAULT_LONG_DOUBLE_SIZE
3614 #define RS6000_DEFAULT_LONG_DOUBLE_SIZE 64
3615 #endif
3617 /* Return the builtin mask of the various options used that could affect which
3618 builtins were used. In the past we used target_flags, but we've run out of
3619 bits, and some options like SPE and PAIRED are no longer in
3620 target_flags. */
3622 HOST_WIDE_INT
3623 rs6000_builtin_mask_calculate (void)
3625 return (((TARGET_ALTIVEC) ? RS6000_BTM_ALTIVEC : 0)
3626 | ((TARGET_VSX) ? RS6000_BTM_VSX : 0)
3627 | ((TARGET_SPE) ? RS6000_BTM_SPE : 0)
3628 | ((TARGET_PAIRED_FLOAT) ? RS6000_BTM_PAIRED : 0)
3629 | ((TARGET_FRE) ? RS6000_BTM_FRE : 0)
3630 | ((TARGET_FRES) ? RS6000_BTM_FRES : 0)
3631 | ((TARGET_FRSQRTE) ? RS6000_BTM_FRSQRTE : 0)
3632 | ((TARGET_FRSQRTES) ? RS6000_BTM_FRSQRTES : 0)
3633 | ((TARGET_POPCNTD) ? RS6000_BTM_POPCNTD : 0)
3634 | ((rs6000_cpu == PROCESSOR_CELL) ? RS6000_BTM_CELL : 0)
3635 | ((TARGET_P8_VECTOR) ? RS6000_BTM_P8_VECTOR : 0)
3636 | ((TARGET_P9_VECTOR) ? RS6000_BTM_P9_VECTOR : 0)
3637 | ((TARGET_CRYPTO) ? RS6000_BTM_CRYPTO : 0)
3638 | ((TARGET_HTM) ? RS6000_BTM_HTM : 0)
3639 | ((TARGET_DFP) ? RS6000_BTM_DFP : 0)
3640 | ((TARGET_HARD_FLOAT) ? RS6000_BTM_HARD_FLOAT : 0)
3641 | ((TARGET_LONG_DOUBLE_128) ? RS6000_BTM_LDBL128 : 0));
3644 /* Implement TARGET_MD_ASM_ADJUST. All asm statements are considered
3645 to clobber the XER[CA] bit because clobbering that bit without telling
3646 the compiler worked just fine with versions of GCC before GCC 5, and
3647 breaking a lot of older code in ways that are hard to track down is
3648 not such a great idea. */
3650 static rtx_insn *
3651 rs6000_md_asm_adjust (vec<rtx> &/*outputs*/, vec<rtx> &/*inputs*/,
3652 vec<const char *> &/*constraints*/,
3653 vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs)
3655 clobbers.safe_push (gen_rtx_REG (SImode, CA_REGNO));
3656 SET_HARD_REG_BIT (clobbered_regs, CA_REGNO);
3657 return NULL;
3660 /* Override command line options. Mostly we process the processor type and
3661 sometimes adjust other TARGET_ options. */
3663 static bool
3664 rs6000_option_override_internal (bool global_init_p)
3666 bool ret = true;
3667 bool have_cpu = false;
3669 /* The default cpu requested at configure time, if any. */
3670 const char *implicit_cpu = OPTION_TARGET_CPU_DEFAULT;
3672 HOST_WIDE_INT set_masks;
3673 int cpu_index;
3674 int tune_index;
3675 struct cl_target_option *main_target_opt
3676 = ((global_init_p || target_option_default_node == NULL)
3677 ? NULL : TREE_TARGET_OPTION (target_option_default_node));
3679 /* Print defaults. */
3680 if ((TARGET_DEBUG_REG || TARGET_DEBUG_TARGET) && global_init_p)
3681 rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT);
3683 /* Remember the explicit arguments. */
3684 if (global_init_p)
3685 rs6000_isa_flags_explicit = global_options_set.x_rs6000_isa_flags;
3687 /* On 64-bit Darwin, power alignment is ABI-incompatible with some C
3688 library functions, so warn about it. The flag may be useful for
3689 performance studies from time to time though, so don't disable it
3690 entirely. */
3691 if (global_options_set.x_rs6000_alignment_flags
3692 && rs6000_alignment_flags == MASK_ALIGN_POWER
3693 && DEFAULT_ABI == ABI_DARWIN
3694 && TARGET_64BIT)
3695 warning (0, "-malign-power is not supported for 64-bit Darwin;"
3696 " it is incompatible with the installed C and C++ libraries");
3698 /* Numerous experiment shows that IRA based loop pressure
3699 calculation works better for RTL loop invariant motion on targets
3700 with enough (>= 32) registers. It is an expensive optimization.
3701 So it is on only for peak performance. */
3702 if (optimize >= 3 && global_init_p
3703 && !global_options_set.x_flag_ira_loop_pressure)
3704 flag_ira_loop_pressure = 1;
3706 /* Set the pointer size. */
3707 if (TARGET_64BIT)
3709 rs6000_pmode = (int)DImode;
3710 rs6000_pointer_size = 64;
3712 else
3714 rs6000_pmode = (int)SImode;
3715 rs6000_pointer_size = 32;
3718 /* Some OSs don't support saving the high part of 64-bit registers on context
3719 switch. Other OSs don't support saving Altivec registers. On those OSs,
3720 we don't touch the OPTION_MASK_POWERPC64 or OPTION_MASK_ALTIVEC settings;
3721 if the user wants either, the user must explicitly specify them and we
3722 won't interfere with the user's specification. */
3724 set_masks = POWERPC_MASKS;
3725 #ifdef OS_MISSING_POWERPC64
3726 if (OS_MISSING_POWERPC64)
3727 set_masks &= ~OPTION_MASK_POWERPC64;
3728 #endif
3729 #ifdef OS_MISSING_ALTIVEC
3730 if (OS_MISSING_ALTIVEC)
3731 set_masks &= ~(OPTION_MASK_ALTIVEC | OPTION_MASK_VSX);
3732 #endif
3734 /* Don't override by the processor default if given explicitly. */
3735 set_masks &= ~rs6000_isa_flags_explicit;
3737 /* Process the -mcpu=<xxx> and -mtune=<xxx> argument. If the user changed
3738 the cpu in a target attribute or pragma, but did not specify a tuning
3739 option, use the cpu for the tuning option rather than the option specified
3740 with -mtune on the command line. Process a '--with-cpu' configuration
3741 request as an implicit --cpu. */
3742 if (rs6000_cpu_index >= 0)
3744 cpu_index = rs6000_cpu_index;
3745 have_cpu = true;
3747 else if (main_target_opt != NULL && main_target_opt->x_rs6000_cpu_index >= 0)
3749 rs6000_cpu_index = cpu_index = main_target_opt->x_rs6000_cpu_index;
3750 have_cpu = true;
3752 else if (implicit_cpu)
3754 rs6000_cpu_index = cpu_index = rs6000_cpu_name_lookup (implicit_cpu);
3755 have_cpu = true;
3757 else
3759 /* PowerPC 64-bit LE requires at least ISA 2.07. */
3760 const char *default_cpu = ((!TARGET_POWERPC64)
3761 ? "powerpc"
3762 : ((BYTES_BIG_ENDIAN)
3763 ? "powerpc64"
3764 : "powerpc64le"));
3766 rs6000_cpu_index = cpu_index = rs6000_cpu_name_lookup (default_cpu);
3767 have_cpu = false;
3770 gcc_assert (cpu_index >= 0);
3772 /* If we have a cpu, either through an explicit -mcpu=<xxx> or if the
3773 compiler was configured with --with-cpu=<xxx>, replace all of the ISA bits
3774 with those from the cpu, except for options that were explicitly set. If
3775 we don't have a cpu, do not override the target bits set in
3776 TARGET_DEFAULT. */
3777 if (have_cpu)
3779 rs6000_isa_flags &= ~set_masks;
3780 rs6000_isa_flags |= (processor_target_table[cpu_index].target_enable
3781 & set_masks);
3783 else
3785 /* If no -mcpu=<xxx>, inherit any default options that were cleared via
3786 POWERPC_MASKS. Originally, TARGET_DEFAULT was used to initialize
3787 target_flags via the TARGET_DEFAULT_TARGET_FLAGS hook. When we switched
3788 to using rs6000_isa_flags, we need to do the initialization here.
3790 If there is a TARGET_DEFAULT, use that. Otherwise fall back to using
3791 -mcpu=powerpc, -mcpu=powerpc64, or -mcpu=powerpc64le defaults. */
3792 HOST_WIDE_INT flags = ((TARGET_DEFAULT) ? TARGET_DEFAULT
3793 : processor_target_table[cpu_index].target_enable);
3794 rs6000_isa_flags |= (flags & ~rs6000_isa_flags_explicit);
3797 if (rs6000_tune_index >= 0)
3798 tune_index = rs6000_tune_index;
3799 else if (have_cpu)
3801 /* Until power9 tuning is available, use power8 tuning if -mcpu=power9. */
3802 if (processor_target_table[cpu_index].processor != PROCESSOR_POWER9)
3803 rs6000_tune_index = tune_index = cpu_index;
3804 else
3806 size_t i;
3807 tune_index = -1;
3808 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
3809 if (processor_target_table[i].processor == PROCESSOR_POWER8)
3811 rs6000_tune_index = tune_index = i;
3812 break;
3816 else
3818 size_t i;
3819 enum processor_type tune_proc
3820 = (TARGET_POWERPC64 ? PROCESSOR_DEFAULT64 : PROCESSOR_DEFAULT);
3822 tune_index = -1;
3823 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
3824 if (processor_target_table[i].processor == tune_proc)
3826 rs6000_tune_index = tune_index = i;
3827 break;
3831 gcc_assert (tune_index >= 0);
3832 rs6000_cpu = processor_target_table[tune_index].processor;
3834 /* Pick defaults for SPE related control flags. Do this early to make sure
3835 that the TARGET_ macros are representative ASAP. */
3837 int spe_capable_cpu =
3838 (rs6000_cpu == PROCESSOR_PPC8540
3839 || rs6000_cpu == PROCESSOR_PPC8548);
3841 if (!global_options_set.x_rs6000_spe_abi)
3842 rs6000_spe_abi = spe_capable_cpu;
3844 if (!global_options_set.x_rs6000_spe)
3845 rs6000_spe = spe_capable_cpu;
3847 if (!global_options_set.x_rs6000_float_gprs)
3848 rs6000_float_gprs =
3849 (rs6000_cpu == PROCESSOR_PPC8540 ? 1
3850 : rs6000_cpu == PROCESSOR_PPC8548 ? 2
3851 : 0);
3854 if (global_options_set.x_rs6000_spe_abi
3855 && rs6000_spe_abi
3856 && !TARGET_SPE_ABI)
3857 error ("not configured for SPE ABI");
3859 if (global_options_set.x_rs6000_spe
3860 && rs6000_spe
3861 && !TARGET_SPE)
3862 error ("not configured for SPE instruction set");
3864 if (main_target_opt != NULL
3865 && ((main_target_opt->x_rs6000_spe_abi != rs6000_spe_abi)
3866 || (main_target_opt->x_rs6000_spe != rs6000_spe)
3867 || (main_target_opt->x_rs6000_float_gprs != rs6000_float_gprs)))
3868 error ("target attribute or pragma changes SPE ABI");
3870 if (rs6000_cpu == PROCESSOR_PPCE300C2 || rs6000_cpu == PROCESSOR_PPCE300C3
3871 || rs6000_cpu == PROCESSOR_PPCE500MC || rs6000_cpu == PROCESSOR_PPCE500MC64
3872 || rs6000_cpu == PROCESSOR_PPCE5500)
3874 if (TARGET_ALTIVEC)
3875 error ("AltiVec not supported in this target");
3876 if (TARGET_SPE)
3877 error ("SPE not supported in this target");
3879 if (rs6000_cpu == PROCESSOR_PPCE6500)
3881 if (TARGET_SPE)
3882 error ("SPE not supported in this target");
3885 /* Disable Cell microcode if we are optimizing for the Cell
3886 and not optimizing for size. */
3887 if (rs6000_gen_cell_microcode == -1)
3888 rs6000_gen_cell_microcode = !(rs6000_cpu == PROCESSOR_CELL
3889 && !optimize_size);
3891 /* If we are optimizing big endian systems for space and it's OK to
3892 use instructions that would be microcoded on the Cell, use the
3893 load/store multiple and string instructions. */
3894 if (BYTES_BIG_ENDIAN && optimize_size && rs6000_gen_cell_microcode)
3895 rs6000_isa_flags |= ~rs6000_isa_flags_explicit & (OPTION_MASK_MULTIPLE
3896 | OPTION_MASK_STRING);
3898 /* Don't allow -mmultiple or -mstring on little endian systems
3899 unless the cpu is a 750, because the hardware doesn't support the
3900 instructions used in little endian mode, and causes an alignment
3901 trap. The 750 does not cause an alignment trap (except when the
3902 target is unaligned). */
3904 if (!BYTES_BIG_ENDIAN && rs6000_cpu != PROCESSOR_PPC750)
3906 if (TARGET_MULTIPLE)
3908 rs6000_isa_flags &= ~OPTION_MASK_MULTIPLE;
3909 if ((rs6000_isa_flags_explicit & OPTION_MASK_MULTIPLE) != 0)
3910 warning (0, "-mmultiple is not supported on little endian systems");
3913 if (TARGET_STRING)
3915 rs6000_isa_flags &= ~OPTION_MASK_STRING;
3916 if ((rs6000_isa_flags_explicit & OPTION_MASK_STRING) != 0)
3917 warning (0, "-mstring is not supported on little endian systems");
3921 /* If little-endian, default to -mstrict-align on older processors.
3922 Testing for htm matches power8 and later. */
3923 if (!BYTES_BIG_ENDIAN
3924 && !(processor_target_table[tune_index].target_enable & OPTION_MASK_HTM))
3925 rs6000_isa_flags |= ~rs6000_isa_flags_explicit & OPTION_MASK_STRICT_ALIGN;
3927 /* -maltivec={le,be} implies -maltivec. */
3928 if (rs6000_altivec_element_order != 0)
3929 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3931 /* Disallow -maltivec=le in big endian mode for now. This is not
3932 known to be useful for anyone. */
3933 if (BYTES_BIG_ENDIAN && rs6000_altivec_element_order == 1)
3935 warning (0, N_("-maltivec=le not allowed for big-endian targets"));
3936 rs6000_altivec_element_order = 0;
3939 /* Add some warnings for VSX. */
3940 if (TARGET_VSX)
3942 const char *msg = NULL;
3943 if (!TARGET_HARD_FLOAT || !TARGET_FPRS
3944 || !TARGET_SINGLE_FLOAT || !TARGET_DOUBLE_FLOAT)
3946 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
3947 msg = N_("-mvsx requires hardware floating point");
3948 else
3950 rs6000_isa_flags &= ~ OPTION_MASK_VSX;
3951 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
3954 else if (TARGET_PAIRED_FLOAT)
3955 msg = N_("-mvsx and -mpaired are incompatible");
3956 else if (TARGET_AVOID_XFORM > 0)
3957 msg = N_("-mvsx needs indexed addressing");
3958 else if (!TARGET_ALTIVEC && (rs6000_isa_flags_explicit
3959 & OPTION_MASK_ALTIVEC))
3961 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
3962 msg = N_("-mvsx and -mno-altivec are incompatible");
3963 else
3964 msg = N_("-mno-altivec disables vsx");
3967 if (msg)
3969 warning (0, msg);
3970 rs6000_isa_flags &= ~ OPTION_MASK_VSX;
3971 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
3975 /* If hard-float/altivec/vsx were explicitly turned off then don't allow
3976 the -mcpu setting to enable options that conflict. */
3977 if ((!TARGET_HARD_FLOAT || !TARGET_ALTIVEC || !TARGET_VSX)
3978 && (rs6000_isa_flags_explicit & (OPTION_MASK_SOFT_FLOAT
3979 | OPTION_MASK_ALTIVEC
3980 | OPTION_MASK_VSX)) != 0)
3981 rs6000_isa_flags &= ~((OPTION_MASK_P8_VECTOR | OPTION_MASK_CRYPTO
3982 | OPTION_MASK_DIRECT_MOVE)
3983 & ~rs6000_isa_flags_explicit);
3985 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
3986 rs6000_print_isa_options (stderr, 0, "before defaults", rs6000_isa_flags);
3988 /* For the newer switches (vsx, dfp, etc.) set some of the older options,
3989 unless the user explicitly used the -mno-<option> to disable the code. */
3990 if (TARGET_P9_VECTOR || TARGET_MODULO || TARGET_P9_DFORM || TARGET_P9_MINMAX)
3991 rs6000_isa_flags |= (ISA_3_0_MASKS_SERVER & ~rs6000_isa_flags_explicit);
3992 else if (TARGET_P8_VECTOR || TARGET_DIRECT_MOVE || TARGET_CRYPTO)
3993 rs6000_isa_flags |= (ISA_2_7_MASKS_SERVER & ~rs6000_isa_flags_explicit);
3994 else if (TARGET_VSX)
3995 rs6000_isa_flags |= (ISA_2_6_MASKS_SERVER & ~rs6000_isa_flags_explicit);
3996 else if (TARGET_POPCNTD)
3997 rs6000_isa_flags |= (ISA_2_6_MASKS_EMBEDDED & ~rs6000_isa_flags_explicit);
3998 else if (TARGET_DFP)
3999 rs6000_isa_flags |= (ISA_2_5_MASKS_SERVER & ~rs6000_isa_flags_explicit);
4000 else if (TARGET_CMPB)
4001 rs6000_isa_flags |= (ISA_2_5_MASKS_EMBEDDED & ~rs6000_isa_flags_explicit);
4002 else if (TARGET_FPRND)
4003 rs6000_isa_flags |= (ISA_2_4_MASKS & ~rs6000_isa_flags_explicit);
4004 else if (TARGET_POPCNTB)
4005 rs6000_isa_flags |= (ISA_2_2_MASKS & ~rs6000_isa_flags_explicit);
4006 else if (TARGET_ALTIVEC)
4007 rs6000_isa_flags |= (OPTION_MASK_PPC_GFXOPT & ~rs6000_isa_flags_explicit);
4009 if (TARGET_CRYPTO && !TARGET_ALTIVEC)
4011 if (rs6000_isa_flags_explicit & OPTION_MASK_CRYPTO)
4012 error ("-mcrypto requires -maltivec");
4013 rs6000_isa_flags &= ~OPTION_MASK_CRYPTO;
4016 if (TARGET_DIRECT_MOVE && !TARGET_VSX)
4018 if (rs6000_isa_flags_explicit & OPTION_MASK_DIRECT_MOVE)
4019 error ("-mdirect-move requires -mvsx");
4020 rs6000_isa_flags &= ~OPTION_MASK_DIRECT_MOVE;
4023 if (TARGET_P8_VECTOR && !TARGET_ALTIVEC)
4025 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
4026 error ("-mpower8-vector requires -maltivec");
4027 rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
4030 if (TARGET_P8_VECTOR && !TARGET_VSX)
4032 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
4033 error ("-mpower8-vector requires -mvsx");
4034 rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
4037 if (TARGET_VSX_TIMODE && !TARGET_VSX)
4039 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX_TIMODE)
4040 error ("-mvsx-timode requires -mvsx");
4041 rs6000_isa_flags &= ~OPTION_MASK_VSX_TIMODE;
4044 if (TARGET_DFP && !TARGET_HARD_FLOAT)
4046 if (rs6000_isa_flags_explicit & OPTION_MASK_DFP)
4047 error ("-mhard-dfp requires -mhard-float");
4048 rs6000_isa_flags &= ~OPTION_MASK_DFP;
4051 /* Allow an explicit -mupper-regs to set both -mupper-regs-df and
4052 -mupper-regs-sf, depending on the cpu, unless the user explicitly also set
4053 the individual option. */
4054 if (TARGET_UPPER_REGS > 0)
4056 if (TARGET_VSX
4057 && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF))
4059 rs6000_isa_flags |= OPTION_MASK_UPPER_REGS_DF;
4060 rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_DF;
4062 if (TARGET_P8_VECTOR
4063 && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_SF))
4065 rs6000_isa_flags |= OPTION_MASK_UPPER_REGS_SF;
4066 rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_SF;
4069 else if (TARGET_UPPER_REGS == 0)
4071 if (TARGET_VSX
4072 && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF))
4074 rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_DF;
4075 rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_DF;
4077 if (TARGET_P8_VECTOR
4078 && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_SF))
4080 rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_SF;
4081 rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_SF;
4085 if (TARGET_UPPER_REGS_DF && !TARGET_VSX)
4087 if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF)
4088 error ("-mupper-regs-df requires -mvsx");
4089 rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_DF;
4092 if (TARGET_UPPER_REGS_SF && !TARGET_P8_VECTOR)
4094 if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_SF)
4095 error ("-mupper-regs-sf requires -mpower8-vector");
4096 rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_SF;
4099 /* The quad memory instructions only works in 64-bit mode. In 32-bit mode,
4100 silently turn off quad memory mode. */
4101 if ((TARGET_QUAD_MEMORY || TARGET_QUAD_MEMORY_ATOMIC) && !TARGET_POWERPC64)
4103 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
4104 warning (0, N_("-mquad-memory requires 64-bit mode"));
4106 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) != 0)
4107 warning (0, N_("-mquad-memory-atomic requires 64-bit mode"));
4109 rs6000_isa_flags &= ~(OPTION_MASK_QUAD_MEMORY
4110 | OPTION_MASK_QUAD_MEMORY_ATOMIC);
4113 /* Non-atomic quad memory load/store are disabled for little endian, since
4114 the words are reversed, but atomic operations can still be done by
4115 swapping the words. */
4116 if (TARGET_QUAD_MEMORY && !WORDS_BIG_ENDIAN)
4118 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
4119 warning (0, N_("-mquad-memory is not available in little endian mode"));
4121 rs6000_isa_flags &= ~OPTION_MASK_QUAD_MEMORY;
4124 /* Assume if the user asked for normal quad memory instructions, they want
4125 the atomic versions as well, unless they explicity told us not to use quad
4126 word atomic instructions. */
4127 if (TARGET_QUAD_MEMORY
4128 && !TARGET_QUAD_MEMORY_ATOMIC
4129 && ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) == 0))
4130 rs6000_isa_flags |= OPTION_MASK_QUAD_MEMORY_ATOMIC;
4132 /* Enable power8 fusion if we are tuning for power8, even if we aren't
4133 generating power8 instructions. */
4134 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION))
4135 rs6000_isa_flags |= (processor_target_table[tune_index].target_enable
4136 & OPTION_MASK_P8_FUSION);
4138 /* Setting additional fusion flags turns on base fusion. */
4139 if (!TARGET_P8_FUSION && (TARGET_P8_FUSION_SIGN || TARGET_TOC_FUSION))
4141 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION)
4143 if (TARGET_P8_FUSION_SIGN)
4144 error ("-mpower8-fusion-sign requires -mpower8-fusion");
4146 if (TARGET_TOC_FUSION)
4147 error ("-mtoc-fusion requires -mpower8-fusion");
4149 rs6000_isa_flags &= ~OPTION_MASK_P8_FUSION;
4151 else
4152 rs6000_isa_flags |= OPTION_MASK_P8_FUSION;
4155 /* Power9 fusion is a superset over power8 fusion. */
4156 if (TARGET_P9_FUSION && !TARGET_P8_FUSION)
4158 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION)
4160 error ("-mpower9-fusion requires -mpower8-fusion");
4161 rs6000_isa_flags &= ~OPTION_MASK_P9_FUSION;
4163 else
4164 rs6000_isa_flags |= OPTION_MASK_P8_FUSION;
4167 /* Enable power9 fusion if we are tuning for power9, even if we aren't
4168 generating power9 instructions. */
4169 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P9_FUSION))
4170 rs6000_isa_flags |= (processor_target_table[tune_index].target_enable
4171 & OPTION_MASK_P9_FUSION);
4173 /* Power8 does not fuse sign extended loads with the addis. If we are
4174 optimizing at high levels for speed, convert a sign extended load into a
4175 zero extending load, and an explicit sign extension. */
4176 if (TARGET_P8_FUSION
4177 && !(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION_SIGN)
4178 && optimize_function_for_speed_p (cfun)
4179 && optimize >= 3)
4180 rs6000_isa_flags |= OPTION_MASK_P8_FUSION_SIGN;
4182 /* TOC fusion requires 64-bit and medium/large code model. */
4183 if (TARGET_TOC_FUSION && !TARGET_POWERPC64)
4185 rs6000_isa_flags &= ~OPTION_MASK_TOC_FUSION;
4186 if ((rs6000_isa_flags_explicit & OPTION_MASK_TOC_FUSION) != 0)
4187 warning (0, N_("-mtoc-fusion requires 64-bit"));
4190 if (TARGET_TOC_FUSION && (TARGET_CMODEL == CMODEL_SMALL))
4192 rs6000_isa_flags &= ~OPTION_MASK_TOC_FUSION;
4193 if ((rs6000_isa_flags_explicit & OPTION_MASK_TOC_FUSION) != 0)
4194 warning (0, N_("-mtoc-fusion requires medium/large code model"));
4197 /* Turn on -mtoc-fusion by default if p8-fusion and 64-bit medium/large code
4198 model. */
4199 if (TARGET_P8_FUSION && !TARGET_TOC_FUSION && TARGET_POWERPC64
4200 && (TARGET_CMODEL != CMODEL_SMALL)
4201 && !(rs6000_isa_flags_explicit & OPTION_MASK_TOC_FUSION))
4202 rs6000_isa_flags |= OPTION_MASK_TOC_FUSION;
4204 /* ISA 3.0 D-form instructions require p9-vector and upper-regs. */
4205 if (TARGET_P9_DFORM && !TARGET_P9_VECTOR)
4207 if (rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR)
4208 error ("-mpower9-dform requires -mpower9-vector");
4209 rs6000_isa_flags &= ~OPTION_MASK_P9_DFORM;
4212 if (TARGET_P9_DFORM && !TARGET_UPPER_REGS_DF)
4214 if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF)
4215 error ("-mpower9-dform requires -mupper-regs-df");
4216 rs6000_isa_flags &= ~OPTION_MASK_P9_DFORM;
4219 if (TARGET_P9_DFORM && !TARGET_UPPER_REGS_SF)
4221 if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_SF)
4222 error ("-mpower9-dform requires -mupper-regs-sf");
4223 rs6000_isa_flags &= ~OPTION_MASK_P9_DFORM;
4226 /* ISA 3.0 vector instructions include ISA 2.07. */
4227 if (TARGET_P9_VECTOR && !TARGET_P8_VECTOR)
4229 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
4230 error ("-mpower9-vector requires -mpower8-vector");
4231 rs6000_isa_flags &= ~OPTION_MASK_P9_VECTOR;
4234 /* Set -mallow-movmisalign to explicitly on if we have full ISA 2.07
4235 support. If we only have ISA 2.06 support, and the user did not specify
4236 the switch, leave it set to -1 so the movmisalign patterns are enabled,
4237 but we don't enable the full vectorization support */
4238 if (TARGET_ALLOW_MOVMISALIGN == -1 && TARGET_P8_VECTOR && TARGET_DIRECT_MOVE)
4239 TARGET_ALLOW_MOVMISALIGN = 1;
4241 else if (TARGET_ALLOW_MOVMISALIGN && !TARGET_VSX)
4243 if (TARGET_ALLOW_MOVMISALIGN > 0
4244 && global_options_set.x_TARGET_ALLOW_MOVMISALIGN)
4245 error ("-mallow-movmisalign requires -mvsx");
4247 TARGET_ALLOW_MOVMISALIGN = 0;
4250 /* Determine when unaligned vector accesses are permitted, and when
4251 they are preferred over masked Altivec loads. Note that if
4252 TARGET_ALLOW_MOVMISALIGN has been disabled by the user, then
4253 TARGET_EFFICIENT_UNALIGNED_VSX must be as well. The converse is
4254 not true. */
4255 if (TARGET_EFFICIENT_UNALIGNED_VSX)
4257 if (!TARGET_VSX)
4259 if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX)
4260 error ("-mefficient-unaligned-vsx requires -mvsx");
4262 rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX;
4265 else if (!TARGET_ALLOW_MOVMISALIGN)
4267 if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX)
4268 error ("-mefficient-unaligned-vsx requires -mallow-movmisalign");
4270 rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX;
4274 /* __float128 requires VSX support. */
4275 if (TARGET_FLOAT128 && !TARGET_VSX)
4277 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128) != 0)
4278 error ("-mfloat128 requires VSX support");
4280 rs6000_isa_flags &= ~(OPTION_MASK_FLOAT128 | OPTION_MASK_FLOAT128_HW);
4283 /* IEEE 128-bit floating point hardware instructions imply enabling
4284 __float128. */
4285 if (TARGET_FLOAT128_HW
4286 && (rs6000_isa_flags & (OPTION_MASK_P9_VECTOR
4287 | OPTION_MASK_DIRECT_MOVE
4288 | OPTION_MASK_UPPER_REGS_DF
4289 | OPTION_MASK_UPPER_REGS_SF)) == 0)
4291 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0)
4292 error ("-mfloat128-hardware requires full ISA 3.0 support");
4294 rs6000_isa_flags &= ~OPTION_MASK_FLOAT128_HW;
4297 else if (TARGET_P9_VECTOR && !TARGET_FLOAT128_HW
4298 && (rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) == 0)
4299 rs6000_isa_flags |= OPTION_MASK_FLOAT128_HW;
4301 if (TARGET_FLOAT128_HW
4302 && (rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128) == 0)
4303 rs6000_isa_flags |= OPTION_MASK_FLOAT128;
4305 /* Print the options after updating the defaults. */
4306 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4307 rs6000_print_isa_options (stderr, 0, "after defaults", rs6000_isa_flags);
4309 /* E500mc does "better" if we inline more aggressively. Respect the
4310 user's opinion, though. */
4311 if (rs6000_block_move_inline_limit == 0
4312 && (rs6000_cpu == PROCESSOR_PPCE500MC
4313 || rs6000_cpu == PROCESSOR_PPCE500MC64
4314 || rs6000_cpu == PROCESSOR_PPCE5500
4315 || rs6000_cpu == PROCESSOR_PPCE6500))
4316 rs6000_block_move_inline_limit = 128;
4318 /* store_one_arg depends on expand_block_move to handle at least the
4319 size of reg_parm_stack_space. */
4320 if (rs6000_block_move_inline_limit < (TARGET_POWERPC64 ? 64 : 32))
4321 rs6000_block_move_inline_limit = (TARGET_POWERPC64 ? 64 : 32);
4323 if (global_init_p)
4325 /* If the appropriate debug option is enabled, replace the target hooks
4326 with debug versions that call the real version and then prints
4327 debugging information. */
4328 if (TARGET_DEBUG_COST)
4330 targetm.rtx_costs = rs6000_debug_rtx_costs;
4331 targetm.address_cost = rs6000_debug_address_cost;
4332 targetm.sched.adjust_cost = rs6000_debug_adjust_cost;
4335 if (TARGET_DEBUG_ADDR)
4337 targetm.legitimate_address_p = rs6000_debug_legitimate_address_p;
4338 targetm.legitimize_address = rs6000_debug_legitimize_address;
4339 rs6000_secondary_reload_class_ptr
4340 = rs6000_debug_secondary_reload_class;
4341 rs6000_secondary_memory_needed_ptr
4342 = rs6000_debug_secondary_memory_needed;
4343 rs6000_cannot_change_mode_class_ptr
4344 = rs6000_debug_cannot_change_mode_class;
4345 rs6000_preferred_reload_class_ptr
4346 = rs6000_debug_preferred_reload_class;
4347 rs6000_legitimize_reload_address_ptr
4348 = rs6000_debug_legitimize_reload_address;
4349 rs6000_mode_dependent_address_ptr
4350 = rs6000_debug_mode_dependent_address;
4353 if (rs6000_veclibabi_name)
4355 if (strcmp (rs6000_veclibabi_name, "mass") == 0)
4356 rs6000_veclib_handler = rs6000_builtin_vectorized_libmass;
4357 else
4359 error ("unknown vectorization library ABI type (%s) for "
4360 "-mveclibabi= switch", rs6000_veclibabi_name);
4361 ret = false;
4366 if (!global_options_set.x_rs6000_long_double_type_size)
4368 if (main_target_opt != NULL
4369 && (main_target_opt->x_rs6000_long_double_type_size
4370 != RS6000_DEFAULT_LONG_DOUBLE_SIZE))
4371 error ("target attribute or pragma changes long double size");
4372 else
4373 rs6000_long_double_type_size = RS6000_DEFAULT_LONG_DOUBLE_SIZE;
4376 #if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD)
4377 if (!global_options_set.x_rs6000_ieeequad)
4378 rs6000_ieeequad = 1;
4379 #endif
4381 /* Disable VSX and Altivec silently if the user switched cpus to power7 in a
4382 target attribute or pragma which automatically enables both options,
4383 unless the altivec ABI was set. This is set by default for 64-bit, but
4384 not for 32-bit. */
4385 if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi)
4386 rs6000_isa_flags &= ~((OPTION_MASK_VSX | OPTION_MASK_ALTIVEC
4387 | OPTION_MASK_FLOAT128)
4388 & ~rs6000_isa_flags_explicit);
4390 /* Enable Altivec ABI for AIX -maltivec. */
4391 if (TARGET_XCOFF && (TARGET_ALTIVEC || TARGET_VSX))
4393 if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi)
4394 error ("target attribute or pragma changes AltiVec ABI");
4395 else
4396 rs6000_altivec_abi = 1;
4399 /* The AltiVec ABI is the default for PowerPC-64 GNU/Linux. For
4400 PowerPC-32 GNU/Linux, -maltivec implies the AltiVec ABI. It can
4401 be explicitly overridden in either case. */
4402 if (TARGET_ELF)
4404 if (!global_options_set.x_rs6000_altivec_abi
4405 && (TARGET_64BIT || TARGET_ALTIVEC || TARGET_VSX))
4407 if (main_target_opt != NULL &&
4408 !main_target_opt->x_rs6000_altivec_abi)
4409 error ("target attribute or pragma changes AltiVec ABI");
4410 else
4411 rs6000_altivec_abi = 1;
4415 /* Set the Darwin64 ABI as default for 64-bit Darwin.
4416 So far, the only darwin64 targets are also MACH-O. */
4417 if (TARGET_MACHO
4418 && DEFAULT_ABI == ABI_DARWIN
4419 && TARGET_64BIT)
4421 if (main_target_opt != NULL && !main_target_opt->x_rs6000_darwin64_abi)
4422 error ("target attribute or pragma changes darwin64 ABI");
4423 else
4425 rs6000_darwin64_abi = 1;
4426 /* Default to natural alignment, for better performance. */
4427 rs6000_alignment_flags = MASK_ALIGN_NATURAL;
4431 /* Place FP constants in the constant pool instead of TOC
4432 if section anchors enabled. */
4433 if (flag_section_anchors
4434 && !global_options_set.x_TARGET_NO_FP_IN_TOC)
4435 TARGET_NO_FP_IN_TOC = 1;
4437 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4438 rs6000_print_isa_options (stderr, 0, "before subtarget", rs6000_isa_flags);
4440 #ifdef SUBTARGET_OVERRIDE_OPTIONS
4441 SUBTARGET_OVERRIDE_OPTIONS;
4442 #endif
4443 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
4444 SUBSUBTARGET_OVERRIDE_OPTIONS;
4445 #endif
4446 #ifdef SUB3TARGET_OVERRIDE_OPTIONS
4447 SUB3TARGET_OVERRIDE_OPTIONS;
4448 #endif
4450 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4451 rs6000_print_isa_options (stderr, 0, "after subtarget", rs6000_isa_flags);
4453 /* For the E500 family of cores, reset the single/double FP flags to let us
4454 check that they remain constant across attributes or pragmas. Also,
4455 clear a possible request for string instructions, not supported and which
4456 we might have silently queried above for -Os.
4458 For other families, clear ISEL in case it was set implicitly.
4461 switch (rs6000_cpu)
4463 case PROCESSOR_PPC8540:
4464 case PROCESSOR_PPC8548:
4465 case PROCESSOR_PPCE500MC:
4466 case PROCESSOR_PPCE500MC64:
4467 case PROCESSOR_PPCE5500:
4468 case PROCESSOR_PPCE6500:
4470 rs6000_single_float = TARGET_E500_SINGLE || TARGET_E500_DOUBLE;
4471 rs6000_double_float = TARGET_E500_DOUBLE;
4473 rs6000_isa_flags &= ~OPTION_MASK_STRING;
4475 break;
4477 default:
4479 if (have_cpu && !(rs6000_isa_flags_explicit & OPTION_MASK_ISEL))
4480 rs6000_isa_flags &= ~OPTION_MASK_ISEL;
4482 break;
4485 if (main_target_opt)
4487 if (main_target_opt->x_rs6000_single_float != rs6000_single_float)
4488 error ("target attribute or pragma changes single precision floating "
4489 "point");
4490 if (main_target_opt->x_rs6000_double_float != rs6000_double_float)
4491 error ("target attribute or pragma changes double precision floating "
4492 "point");
4495 /* Detect invalid option combinations with E500. */
4496 CHECK_E500_OPTIONS;
4498 rs6000_always_hint = (rs6000_cpu != PROCESSOR_POWER4
4499 && rs6000_cpu != PROCESSOR_POWER5
4500 && rs6000_cpu != PROCESSOR_POWER6
4501 && rs6000_cpu != PROCESSOR_POWER7
4502 && rs6000_cpu != PROCESSOR_POWER8
4503 && rs6000_cpu != PROCESSOR_POWER9
4504 && rs6000_cpu != PROCESSOR_PPCA2
4505 && rs6000_cpu != PROCESSOR_CELL
4506 && rs6000_cpu != PROCESSOR_PPC476);
4507 rs6000_sched_groups = (rs6000_cpu == PROCESSOR_POWER4
4508 || rs6000_cpu == PROCESSOR_POWER5
4509 || rs6000_cpu == PROCESSOR_POWER7
4510 || rs6000_cpu == PROCESSOR_POWER8
4511 || rs6000_cpu == PROCESSOR_POWER9);
4512 rs6000_align_branch_targets = (rs6000_cpu == PROCESSOR_POWER4
4513 || rs6000_cpu == PROCESSOR_POWER5
4514 || rs6000_cpu == PROCESSOR_POWER6
4515 || rs6000_cpu == PROCESSOR_POWER7
4516 || rs6000_cpu == PROCESSOR_POWER8
4517 || rs6000_cpu == PROCESSOR_POWER9
4518 || rs6000_cpu == PROCESSOR_PPCE500MC
4519 || rs6000_cpu == PROCESSOR_PPCE500MC64
4520 || rs6000_cpu == PROCESSOR_PPCE5500
4521 || rs6000_cpu == PROCESSOR_PPCE6500);
4523 /* Allow debug switches to override the above settings. These are set to -1
4524 in rs6000.opt to indicate the user hasn't directly set the switch. */
4525 if (TARGET_ALWAYS_HINT >= 0)
4526 rs6000_always_hint = TARGET_ALWAYS_HINT;
4528 if (TARGET_SCHED_GROUPS >= 0)
4529 rs6000_sched_groups = TARGET_SCHED_GROUPS;
4531 if (TARGET_ALIGN_BRANCH_TARGETS >= 0)
4532 rs6000_align_branch_targets = TARGET_ALIGN_BRANCH_TARGETS;
4534 rs6000_sched_restricted_insns_priority
4535 = (rs6000_sched_groups ? 1 : 0);
4537 /* Handle -msched-costly-dep option. */
4538 rs6000_sched_costly_dep
4539 = (rs6000_sched_groups ? true_store_to_load_dep_costly : no_dep_costly);
4541 if (rs6000_sched_costly_dep_str)
4543 if (! strcmp (rs6000_sched_costly_dep_str, "no"))
4544 rs6000_sched_costly_dep = no_dep_costly;
4545 else if (! strcmp (rs6000_sched_costly_dep_str, "all"))
4546 rs6000_sched_costly_dep = all_deps_costly;
4547 else if (! strcmp (rs6000_sched_costly_dep_str, "true_store_to_load"))
4548 rs6000_sched_costly_dep = true_store_to_load_dep_costly;
4549 else if (! strcmp (rs6000_sched_costly_dep_str, "store_to_load"))
4550 rs6000_sched_costly_dep = store_to_load_dep_costly;
4551 else
4552 rs6000_sched_costly_dep = ((enum rs6000_dependence_cost)
4553 atoi (rs6000_sched_costly_dep_str));
4556 /* Handle -minsert-sched-nops option. */
4557 rs6000_sched_insert_nops
4558 = (rs6000_sched_groups ? sched_finish_regroup_exact : sched_finish_none);
4560 if (rs6000_sched_insert_nops_str)
4562 if (! strcmp (rs6000_sched_insert_nops_str, "no"))
4563 rs6000_sched_insert_nops = sched_finish_none;
4564 else if (! strcmp (rs6000_sched_insert_nops_str, "pad"))
4565 rs6000_sched_insert_nops = sched_finish_pad_groups;
4566 else if (! strcmp (rs6000_sched_insert_nops_str, "regroup_exact"))
4567 rs6000_sched_insert_nops = sched_finish_regroup_exact;
4568 else
4569 rs6000_sched_insert_nops = ((enum rs6000_nop_insertion)
4570 atoi (rs6000_sched_insert_nops_str));
4573 if (global_init_p)
4575 #ifdef TARGET_REGNAMES
4576 /* If the user desires alternate register names, copy in the
4577 alternate names now. */
4578 if (TARGET_REGNAMES)
4579 memcpy (rs6000_reg_names, alt_reg_names, sizeof (rs6000_reg_names));
4580 #endif
4582 /* Set aix_struct_return last, after the ABI is determined.
4583 If -maix-struct-return or -msvr4-struct-return was explicitly
4584 used, don't override with the ABI default. */
4585 if (!global_options_set.x_aix_struct_return)
4586 aix_struct_return = (DEFAULT_ABI != ABI_V4 || DRAFT_V4_STRUCT_RET);
4588 #if 0
4589 /* IBM XL compiler defaults to unsigned bitfields. */
4590 if (TARGET_XL_COMPAT)
4591 flag_signed_bitfields = 0;
4592 #endif
4594 if (TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
4595 REAL_MODE_FORMAT (TFmode) = &ibm_extended_format;
4597 ASM_GENERATE_INTERNAL_LABEL (toc_label_name, "LCTOC", 1);
4599 /* We can only guarantee the availability of DI pseudo-ops when
4600 assembling for 64-bit targets. */
4601 if (!TARGET_64BIT)
4603 targetm.asm_out.aligned_op.di = NULL;
4604 targetm.asm_out.unaligned_op.di = NULL;
4608 /* Set branch target alignment, if not optimizing for size. */
4609 if (!optimize_size)
4611 /* Cell wants to be aligned 8byte for dual issue. Titan wants to be
4612 aligned 8byte to avoid misprediction by the branch predictor. */
4613 if (rs6000_cpu == PROCESSOR_TITAN
4614 || rs6000_cpu == PROCESSOR_CELL)
4616 if (align_functions <= 0)
4617 align_functions = 8;
4618 if (align_jumps <= 0)
4619 align_jumps = 8;
4620 if (align_loops <= 0)
4621 align_loops = 8;
4623 if (rs6000_align_branch_targets)
4625 if (align_functions <= 0)
4626 align_functions = 16;
4627 if (align_jumps <= 0)
4628 align_jumps = 16;
4629 if (align_loops <= 0)
4631 can_override_loop_align = 1;
4632 align_loops = 16;
4635 if (align_jumps_max_skip <= 0)
4636 align_jumps_max_skip = 15;
4637 if (align_loops_max_skip <= 0)
4638 align_loops_max_skip = 15;
4641 /* Arrange to save and restore machine status around nested functions. */
4642 init_machine_status = rs6000_init_machine_status;
4644 /* We should always be splitting complex arguments, but we can't break
4645 Linux and Darwin ABIs at the moment. For now, only AIX is fixed. */
4646 if (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN)
4647 targetm.calls.split_complex_arg = NULL;
4650 /* Initialize rs6000_cost with the appropriate target costs. */
4651 if (optimize_size)
4652 rs6000_cost = TARGET_POWERPC64 ? &size64_cost : &size32_cost;
4653 else
4654 switch (rs6000_cpu)
4656 case PROCESSOR_RS64A:
4657 rs6000_cost = &rs64a_cost;
4658 break;
4660 case PROCESSOR_MPCCORE:
4661 rs6000_cost = &mpccore_cost;
4662 break;
4664 case PROCESSOR_PPC403:
4665 rs6000_cost = &ppc403_cost;
4666 break;
4668 case PROCESSOR_PPC405:
4669 rs6000_cost = &ppc405_cost;
4670 break;
4672 case PROCESSOR_PPC440:
4673 rs6000_cost = &ppc440_cost;
4674 break;
4676 case PROCESSOR_PPC476:
4677 rs6000_cost = &ppc476_cost;
4678 break;
4680 case PROCESSOR_PPC601:
4681 rs6000_cost = &ppc601_cost;
4682 break;
4684 case PROCESSOR_PPC603:
4685 rs6000_cost = &ppc603_cost;
4686 break;
4688 case PROCESSOR_PPC604:
4689 rs6000_cost = &ppc604_cost;
4690 break;
4692 case PROCESSOR_PPC604e:
4693 rs6000_cost = &ppc604e_cost;
4694 break;
4696 case PROCESSOR_PPC620:
4697 rs6000_cost = &ppc620_cost;
4698 break;
4700 case PROCESSOR_PPC630:
4701 rs6000_cost = &ppc630_cost;
4702 break;
4704 case PROCESSOR_CELL:
4705 rs6000_cost = &ppccell_cost;
4706 break;
4708 case PROCESSOR_PPC750:
4709 case PROCESSOR_PPC7400:
4710 rs6000_cost = &ppc750_cost;
4711 break;
4713 case PROCESSOR_PPC7450:
4714 rs6000_cost = &ppc7450_cost;
4715 break;
4717 case PROCESSOR_PPC8540:
4718 case PROCESSOR_PPC8548:
4719 rs6000_cost = &ppc8540_cost;
4720 break;
4722 case PROCESSOR_PPCE300C2:
4723 case PROCESSOR_PPCE300C3:
4724 rs6000_cost = &ppce300c2c3_cost;
4725 break;
4727 case PROCESSOR_PPCE500MC:
4728 rs6000_cost = &ppce500mc_cost;
4729 break;
4731 case PROCESSOR_PPCE500MC64:
4732 rs6000_cost = &ppce500mc64_cost;
4733 break;
4735 case PROCESSOR_PPCE5500:
4736 rs6000_cost = &ppce5500_cost;
4737 break;
4739 case PROCESSOR_PPCE6500:
4740 rs6000_cost = &ppce6500_cost;
4741 break;
4743 case PROCESSOR_TITAN:
4744 rs6000_cost = &titan_cost;
4745 break;
4747 case PROCESSOR_POWER4:
4748 case PROCESSOR_POWER5:
4749 rs6000_cost = &power4_cost;
4750 break;
4752 case PROCESSOR_POWER6:
4753 rs6000_cost = &power6_cost;
4754 break;
4756 case PROCESSOR_POWER7:
4757 rs6000_cost = &power7_cost;
4758 break;
4760 case PROCESSOR_POWER8:
4761 rs6000_cost = &power8_cost;
4762 break;
4764 case PROCESSOR_POWER9:
4765 rs6000_cost = &power9_cost;
4766 break;
4768 case PROCESSOR_PPCA2:
4769 rs6000_cost = &ppca2_cost;
4770 break;
4772 default:
4773 gcc_unreachable ();
4776 if (global_init_p)
4778 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
4779 rs6000_cost->simultaneous_prefetches,
4780 global_options.x_param_values,
4781 global_options_set.x_param_values);
4782 maybe_set_param_value (PARAM_L1_CACHE_SIZE, rs6000_cost->l1_cache_size,
4783 global_options.x_param_values,
4784 global_options_set.x_param_values);
4785 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
4786 rs6000_cost->cache_line_size,
4787 global_options.x_param_values,
4788 global_options_set.x_param_values);
4789 maybe_set_param_value (PARAM_L2_CACHE_SIZE, rs6000_cost->l2_cache_size,
4790 global_options.x_param_values,
4791 global_options_set.x_param_values);
4793 /* Increase loop peeling limits based on performance analysis. */
4794 maybe_set_param_value (PARAM_MAX_PEELED_INSNS, 400,
4795 global_options.x_param_values,
4796 global_options_set.x_param_values);
4797 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEELED_INSNS, 400,
4798 global_options.x_param_values,
4799 global_options_set.x_param_values);
4801 /* If using typedef char *va_list, signal that
4802 __builtin_va_start (&ap, 0) can be optimized to
4803 ap = __builtin_next_arg (0). */
4804 if (DEFAULT_ABI != ABI_V4)
4805 targetm.expand_builtin_va_start = NULL;
4808 /* Set up single/double float flags.
4809 If TARGET_HARD_FLOAT is set, but neither single or double is set,
4810 then set both flags. */
4811 if (TARGET_HARD_FLOAT && TARGET_FPRS
4812 && rs6000_single_float == 0 && rs6000_double_float == 0)
4813 rs6000_single_float = rs6000_double_float = 1;
4815 /* If not explicitly specified via option, decide whether to generate indexed
4816 load/store instructions. */
4817 if (TARGET_AVOID_XFORM == -1)
4818 /* Avoid indexed addressing when targeting Power6 in order to avoid the
4819 DERAT mispredict penalty. However the LVE and STVE altivec instructions
4820 need indexed accesses and the type used is the scalar type of the element
4821 being loaded or stored. */
4822 TARGET_AVOID_XFORM = (rs6000_cpu == PROCESSOR_POWER6 && TARGET_CMPB
4823 && !TARGET_ALTIVEC);
4825 /* Set the -mrecip options. */
4826 if (rs6000_recip_name)
4828 char *p = ASTRDUP (rs6000_recip_name);
4829 char *q;
4830 unsigned int mask, i;
4831 bool invert;
4833 while ((q = strtok (p, ",")) != NULL)
4835 p = NULL;
4836 if (*q == '!')
4838 invert = true;
4839 q++;
4841 else
4842 invert = false;
4844 if (!strcmp (q, "default"))
4845 mask = ((TARGET_RECIP_PRECISION)
4846 ? RECIP_HIGH_PRECISION : RECIP_LOW_PRECISION);
4847 else
4849 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
4850 if (!strcmp (q, recip_options[i].string))
4852 mask = recip_options[i].mask;
4853 break;
4856 if (i == ARRAY_SIZE (recip_options))
4858 error ("unknown option for -mrecip=%s", q);
4859 invert = false;
4860 mask = 0;
4861 ret = false;
4865 if (invert)
4866 rs6000_recip_control &= ~mask;
4867 else
4868 rs6000_recip_control |= mask;
4872 /* Set the builtin mask of the various options used that could affect which
4873 builtins were used. In the past we used target_flags, but we've run out
4874 of bits, and some options like SPE and PAIRED are no longer in
4875 target_flags. */
4876 rs6000_builtin_mask = rs6000_builtin_mask_calculate ();
4877 if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
4878 rs6000_print_builtin_options (stderr, 0, "builtin mask",
4879 rs6000_builtin_mask);
4881 /* Initialize all of the registers. */
4882 rs6000_init_hard_regno_mode_ok (global_init_p);
4884 /* Save the initial options in case the user does function specific options */
4885 if (global_init_p)
4886 target_option_default_node = target_option_current_node
4887 = build_target_option_node (&global_options);
4889 /* If not explicitly specified via option, decide whether to generate the
4890 extra blr's required to preserve the link stack on some cpus (eg, 476). */
4891 if (TARGET_LINK_STACK == -1)
4892 SET_TARGET_LINK_STACK (rs6000_cpu == PROCESSOR_PPC476 && flag_pic);
4894 return ret;
4897 /* Implement TARGET_OPTION_OVERRIDE. On the RS/6000 this is used to
4898 define the target cpu type. */
4900 static void
4901 rs6000_option_override (void)
4903 (void) rs6000_option_override_internal (true);
4905 /* Register machine-specific passes. This needs to be done at start-up.
4906 It's convenient to do it here (like i386 does). */
4907 opt_pass *pass_analyze_swaps = make_pass_analyze_swaps (g);
4909 struct register_pass_info analyze_swaps_info
4910 = { pass_analyze_swaps, "cse1", 1, PASS_POS_INSERT_BEFORE };
4912 register_pass (&analyze_swaps_info);
4916 /* Implement targetm.vectorize.builtin_mask_for_load. */
4917 static tree
4918 rs6000_builtin_mask_for_load (void)
4920 /* Don't use lvsl/vperm for P8 and similarly efficient machines. */
4921 if ((TARGET_ALTIVEC && !TARGET_VSX)
4922 || (TARGET_VSX && !TARGET_EFFICIENT_UNALIGNED_VSX))
4923 return altivec_builtin_mask_for_load;
4924 else
4925 return 0;
4928 /* Implement LOOP_ALIGN. */
4930 rs6000_loop_align (rtx label)
4932 basic_block bb;
4933 int ninsns;
4935 /* Don't override loop alignment if -falign-loops was specified. */
4936 if (!can_override_loop_align)
4937 return align_loops_log;
4939 bb = BLOCK_FOR_INSN (label);
4940 ninsns = num_loop_insns(bb->loop_father);
4942 /* Align small loops to 32 bytes to fit in an icache sector, otherwise return default. */
4943 if (ninsns > 4 && ninsns <= 8
4944 && (rs6000_cpu == PROCESSOR_POWER4
4945 || rs6000_cpu == PROCESSOR_POWER5
4946 || rs6000_cpu == PROCESSOR_POWER6
4947 || rs6000_cpu == PROCESSOR_POWER7
4948 || rs6000_cpu == PROCESSOR_POWER8
4949 || rs6000_cpu == PROCESSOR_POWER9))
4950 return 5;
4951 else
4952 return align_loops_log;
4955 /* Implement TARGET_LOOP_ALIGN_MAX_SKIP. */
4956 static int
4957 rs6000_loop_align_max_skip (rtx_insn *label)
4959 return (1 << rs6000_loop_align (label)) - 1;
4962 /* Return true iff, data reference of TYPE can reach vector alignment (16)
4963 after applying N number of iterations. This routine does not determine
4964 how may iterations are required to reach desired alignment. */
4966 static bool
4967 rs6000_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
4969 if (is_packed)
4970 return false;
4972 if (TARGET_32BIT)
4974 if (rs6000_alignment_flags == MASK_ALIGN_NATURAL)
4975 return true;
4977 if (rs6000_alignment_flags == MASK_ALIGN_POWER)
4978 return true;
4980 return false;
4982 else
4984 if (TARGET_MACHO)
4985 return false;
4987 /* Assuming that all other types are naturally aligned. CHECKME! */
4988 return true;
4992 /* Return true if the vector misalignment factor is supported by the
4993 target. */
4994 static bool
4995 rs6000_builtin_support_vector_misalignment (machine_mode mode,
4996 const_tree type,
4997 int misalignment,
4998 bool is_packed)
5000 if (TARGET_VSX)
5002 if (TARGET_EFFICIENT_UNALIGNED_VSX)
5003 return true;
5005 /* Return if movmisalign pattern is not supported for this mode. */
5006 if (optab_handler (movmisalign_optab, mode) == CODE_FOR_nothing)
5007 return false;
5009 if (misalignment == -1)
5011 /* Misalignment factor is unknown at compile time but we know
5012 it's word aligned. */
5013 if (rs6000_vector_alignment_reachable (type, is_packed))
5015 int element_size = TREE_INT_CST_LOW (TYPE_SIZE (type));
5017 if (element_size == 64 || element_size == 32)
5018 return true;
5021 return false;
5024 /* VSX supports word-aligned vector. */
5025 if (misalignment % 4 == 0)
5026 return true;
5028 return false;
5031 /* Implement targetm.vectorize.builtin_vectorization_cost. */
5032 static int
5033 rs6000_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
5034 tree vectype, int misalign)
5036 unsigned elements;
5037 tree elem_type;
5039 switch (type_of_cost)
5041 case scalar_stmt:
5042 case scalar_load:
5043 case scalar_store:
5044 case vector_stmt:
5045 case vector_load:
5046 case vector_store:
5047 case vec_to_scalar:
5048 case scalar_to_vec:
5049 case cond_branch_not_taken:
5050 return 1;
5052 case vec_perm:
5053 if (TARGET_VSX)
5054 return 3;
5055 else
5056 return 1;
5058 case vec_promote_demote:
5059 if (TARGET_VSX)
5060 return 4;
5061 else
5062 return 1;
5064 case cond_branch_taken:
5065 return 3;
5067 case unaligned_load:
5068 if (TARGET_EFFICIENT_UNALIGNED_VSX)
5069 return 1;
5071 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
5073 elements = TYPE_VECTOR_SUBPARTS (vectype);
5074 if (elements == 2)
5075 /* Double word aligned. */
5076 return 2;
5078 if (elements == 4)
5080 switch (misalign)
5082 case 8:
5083 /* Double word aligned. */
5084 return 2;
5086 case -1:
5087 /* Unknown misalignment. */
5088 case 4:
5089 case 12:
5090 /* Word aligned. */
5091 return 22;
5093 default:
5094 gcc_unreachable ();
5099 if (TARGET_ALTIVEC)
5100 /* Misaligned loads are not supported. */
5101 gcc_unreachable ();
5103 return 2;
5105 case unaligned_store:
5106 if (TARGET_EFFICIENT_UNALIGNED_VSX)
5107 return 1;
5109 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
5111 elements = TYPE_VECTOR_SUBPARTS (vectype);
5112 if (elements == 2)
5113 /* Double word aligned. */
5114 return 2;
5116 if (elements == 4)
5118 switch (misalign)
5120 case 8:
5121 /* Double word aligned. */
5122 return 2;
5124 case -1:
5125 /* Unknown misalignment. */
5126 case 4:
5127 case 12:
5128 /* Word aligned. */
5129 return 23;
5131 default:
5132 gcc_unreachable ();
5137 if (TARGET_ALTIVEC)
5138 /* Misaligned stores are not supported. */
5139 gcc_unreachable ();
5141 return 2;
5143 case vec_construct:
5144 elements = TYPE_VECTOR_SUBPARTS (vectype);
5145 elem_type = TREE_TYPE (vectype);
5146 /* 32-bit vectors loaded into registers are stored as double
5147 precision, so we need n/2 converts in addition to the usual
5148 n/2 merges to construct a vector of short floats from them. */
5149 if (SCALAR_FLOAT_TYPE_P (elem_type)
5150 && TYPE_PRECISION (elem_type) == 32)
5151 return elements + 1;
5152 else
5153 return elements / 2 + 1;
5155 default:
5156 gcc_unreachable ();
5160 /* Implement targetm.vectorize.preferred_simd_mode. */
5162 static machine_mode
5163 rs6000_preferred_simd_mode (machine_mode mode)
5165 if (TARGET_VSX)
5166 switch (mode)
5168 case DFmode:
5169 return V2DFmode;
5170 default:;
5172 if (TARGET_ALTIVEC || TARGET_VSX)
5173 switch (mode)
5175 case SFmode:
5176 return V4SFmode;
5177 case TImode:
5178 return V1TImode;
5179 case DImode:
5180 return V2DImode;
5181 case SImode:
5182 return V4SImode;
5183 case HImode:
5184 return V8HImode;
5185 case QImode:
5186 return V16QImode;
5187 default:;
5189 if (TARGET_SPE)
5190 switch (mode)
5192 case SFmode:
5193 return V2SFmode;
5194 case SImode:
5195 return V2SImode;
5196 default:;
5198 if (TARGET_PAIRED_FLOAT
5199 && mode == SFmode)
5200 return V2SFmode;
5201 return word_mode;
5204 typedef struct _rs6000_cost_data
5206 struct loop *loop_info;
5207 unsigned cost[3];
5208 } rs6000_cost_data;
5210 /* Test for likely overcommitment of vector hardware resources. If a
5211 loop iteration is relatively large, and too large a percentage of
5212 instructions in the loop are vectorized, the cost model may not
5213 adequately reflect delays from unavailable vector resources.
5214 Penalize the loop body cost for this case. */
5216 static void
5217 rs6000_density_test (rs6000_cost_data *data)
5219 const int DENSITY_PCT_THRESHOLD = 85;
5220 const int DENSITY_SIZE_THRESHOLD = 70;
5221 const int DENSITY_PENALTY = 10;
5222 struct loop *loop = data->loop_info;
5223 basic_block *bbs = get_loop_body (loop);
5224 int nbbs = loop->num_nodes;
5225 int vec_cost = data->cost[vect_body], not_vec_cost = 0;
5226 int i, density_pct;
5228 for (i = 0; i < nbbs; i++)
5230 basic_block bb = bbs[i];
5231 gimple_stmt_iterator gsi;
5233 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
5235 gimple *stmt = gsi_stmt (gsi);
5236 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5238 if (!STMT_VINFO_RELEVANT_P (stmt_info)
5239 && !STMT_VINFO_IN_PATTERN_P (stmt_info))
5240 not_vec_cost++;
5244 free (bbs);
5245 density_pct = (vec_cost * 100) / (vec_cost + not_vec_cost);
5247 if (density_pct > DENSITY_PCT_THRESHOLD
5248 && vec_cost + not_vec_cost > DENSITY_SIZE_THRESHOLD)
5250 data->cost[vect_body] = vec_cost * (100 + DENSITY_PENALTY) / 100;
5251 if (dump_enabled_p ())
5252 dump_printf_loc (MSG_NOTE, vect_location,
5253 "density %d%%, cost %d exceeds threshold, penalizing "
5254 "loop body cost by %d%%", density_pct,
5255 vec_cost + not_vec_cost, DENSITY_PENALTY);
5259 /* Implement targetm.vectorize.init_cost. */
5261 static void *
5262 rs6000_init_cost (struct loop *loop_info)
5264 rs6000_cost_data *data = XNEW (struct _rs6000_cost_data);
5265 data->loop_info = loop_info;
5266 data->cost[vect_prologue] = 0;
5267 data->cost[vect_body] = 0;
5268 data->cost[vect_epilogue] = 0;
5269 return data;
5272 /* Implement targetm.vectorize.add_stmt_cost. */
5274 static unsigned
5275 rs6000_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
5276 struct _stmt_vec_info *stmt_info, int misalign,
5277 enum vect_cost_model_location where)
5279 rs6000_cost_data *cost_data = (rs6000_cost_data*) data;
5280 unsigned retval = 0;
5282 if (flag_vect_cost_model)
5284 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
5285 int stmt_cost = rs6000_builtin_vectorization_cost (kind, vectype,
5286 misalign);
5287 /* Statements in an inner loop relative to the loop being
5288 vectorized are weighted more heavily. The value here is
5289 arbitrary and could potentially be improved with analysis. */
5290 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
5291 count *= 50; /* FIXME. */
5293 retval = (unsigned) (count * stmt_cost);
5294 cost_data->cost[where] += retval;
5297 return retval;
5300 /* Implement targetm.vectorize.finish_cost. */
5302 static void
5303 rs6000_finish_cost (void *data, unsigned *prologue_cost,
5304 unsigned *body_cost, unsigned *epilogue_cost)
5306 rs6000_cost_data *cost_data = (rs6000_cost_data*) data;
5308 if (cost_data->loop_info)
5309 rs6000_density_test (cost_data);
5311 *prologue_cost = cost_data->cost[vect_prologue];
5312 *body_cost = cost_data->cost[vect_body];
5313 *epilogue_cost = cost_data->cost[vect_epilogue];
5316 /* Implement targetm.vectorize.destroy_cost_data. */
5318 static void
5319 rs6000_destroy_cost_data (void *data)
5321 free (data);
5324 /* Handler for the Mathematical Acceleration Subsystem (mass) interface to a
5325 library with vectorized intrinsics. */
5327 static tree
5328 rs6000_builtin_vectorized_libmass (combined_fn fn, tree type_out,
5329 tree type_in)
5331 char name[32];
5332 const char *suffix = NULL;
5333 tree fntype, new_fndecl, bdecl = NULL_TREE;
5334 int n_args = 1;
5335 const char *bname;
5336 machine_mode el_mode, in_mode;
5337 int n, in_n;
5339 /* Libmass is suitable for unsafe math only as it does not correctly support
5340 parts of IEEE with the required precision such as denormals. Only support
5341 it if we have VSX to use the simd d2 or f4 functions.
5342 XXX: Add variable length support. */
5343 if (!flag_unsafe_math_optimizations || !TARGET_VSX)
5344 return NULL_TREE;
5346 el_mode = TYPE_MODE (TREE_TYPE (type_out));
5347 n = TYPE_VECTOR_SUBPARTS (type_out);
5348 in_mode = TYPE_MODE (TREE_TYPE (type_in));
5349 in_n = TYPE_VECTOR_SUBPARTS (type_in);
5350 if (el_mode != in_mode
5351 || n != in_n)
5352 return NULL_TREE;
5354 switch (fn)
5356 CASE_CFN_ATAN2:
5357 CASE_CFN_HYPOT:
5358 CASE_CFN_POW:
5359 n_args = 2;
5360 /* fall through */
5362 CASE_CFN_ACOS:
5363 CASE_CFN_ACOSH:
5364 CASE_CFN_ASIN:
5365 CASE_CFN_ASINH:
5366 CASE_CFN_ATAN:
5367 CASE_CFN_ATANH:
5368 CASE_CFN_CBRT:
5369 CASE_CFN_COS:
5370 CASE_CFN_COSH:
5371 CASE_CFN_ERF:
5372 CASE_CFN_ERFC:
5373 CASE_CFN_EXP2:
5374 CASE_CFN_EXP:
5375 CASE_CFN_EXPM1:
5376 CASE_CFN_LGAMMA:
5377 CASE_CFN_LOG10:
5378 CASE_CFN_LOG1P:
5379 CASE_CFN_LOG2:
5380 CASE_CFN_LOG:
5381 CASE_CFN_SIN:
5382 CASE_CFN_SINH:
5383 CASE_CFN_SQRT:
5384 CASE_CFN_TAN:
5385 CASE_CFN_TANH:
5386 if (el_mode == DFmode && n == 2)
5388 bdecl = mathfn_built_in (double_type_node, fn);
5389 suffix = "d2"; /* pow -> powd2 */
5391 else if (el_mode == SFmode && n == 4)
5393 bdecl = mathfn_built_in (float_type_node, fn);
5394 suffix = "4"; /* powf -> powf4 */
5396 else
5397 return NULL_TREE;
5398 if (!bdecl)
5399 return NULL_TREE;
5400 break;
5402 default:
5403 return NULL_TREE;
5406 gcc_assert (suffix != NULL);
5407 bname = IDENTIFIER_POINTER (DECL_NAME (bdecl));
5408 if (!bname)
5409 return NULL_TREE;
5411 strcpy (name, bname + sizeof ("__builtin_") - 1);
5412 strcat (name, suffix);
5414 if (n_args == 1)
5415 fntype = build_function_type_list (type_out, type_in, NULL);
5416 else if (n_args == 2)
5417 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
5418 else
5419 gcc_unreachable ();
5421 /* Build a function declaration for the vectorized function. */
5422 new_fndecl = build_decl (BUILTINS_LOCATION,
5423 FUNCTION_DECL, get_identifier (name), fntype);
5424 TREE_PUBLIC (new_fndecl) = 1;
5425 DECL_EXTERNAL (new_fndecl) = 1;
5426 DECL_IS_NOVOPS (new_fndecl) = 1;
5427 TREE_READONLY (new_fndecl) = 1;
5429 return new_fndecl;
5432 /* Returns a function decl for a vectorized version of the builtin function
5433 with builtin function code FN and the result vector type TYPE, or NULL_TREE
5434 if it is not available. */
5436 static tree
5437 rs6000_builtin_vectorized_function (unsigned int fn, tree type_out,
5438 tree type_in)
5440 machine_mode in_mode, out_mode;
5441 int in_n, out_n;
5443 if (TARGET_DEBUG_BUILTIN)
5444 fprintf (stderr, "rs6000_builtin_vectorized_function (%s, %s, %s)\n",
5445 combined_fn_name (combined_fn (fn)),
5446 GET_MODE_NAME (TYPE_MODE (type_out)),
5447 GET_MODE_NAME (TYPE_MODE (type_in)));
5449 if (TREE_CODE (type_out) != VECTOR_TYPE
5450 || TREE_CODE (type_in) != VECTOR_TYPE
5451 || !TARGET_VECTORIZE_BUILTINS)
5452 return NULL_TREE;
5454 out_mode = TYPE_MODE (TREE_TYPE (type_out));
5455 out_n = TYPE_VECTOR_SUBPARTS (type_out);
5456 in_mode = TYPE_MODE (TREE_TYPE (type_in));
5457 in_n = TYPE_VECTOR_SUBPARTS (type_in);
5459 switch (fn)
5461 CASE_CFN_COPYSIGN:
5462 if (VECTOR_UNIT_VSX_P (V2DFmode)
5463 && out_mode == DFmode && out_n == 2
5464 && in_mode == DFmode && in_n == 2)
5465 return rs6000_builtin_decls[VSX_BUILTIN_CPSGNDP];
5466 if (VECTOR_UNIT_VSX_P (V4SFmode)
5467 && out_mode == SFmode && out_n == 4
5468 && in_mode == SFmode && in_n == 4)
5469 return rs6000_builtin_decls[VSX_BUILTIN_CPSGNSP];
5470 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5471 && out_mode == SFmode && out_n == 4
5472 && in_mode == SFmode && in_n == 4)
5473 return rs6000_builtin_decls[ALTIVEC_BUILTIN_COPYSIGN_V4SF];
5474 break;
5475 CASE_CFN_CEIL:
5476 if (VECTOR_UNIT_VSX_P (V2DFmode)
5477 && out_mode == DFmode && out_n == 2
5478 && in_mode == DFmode && in_n == 2)
5479 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIP];
5480 if (VECTOR_UNIT_VSX_P (V4SFmode)
5481 && out_mode == SFmode && out_n == 4
5482 && in_mode == SFmode && in_n == 4)
5483 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIP];
5484 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5485 && out_mode == SFmode && out_n == 4
5486 && in_mode == SFmode && in_n == 4)
5487 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIP];
5488 break;
5489 CASE_CFN_FLOOR:
5490 if (VECTOR_UNIT_VSX_P (V2DFmode)
5491 && out_mode == DFmode && out_n == 2
5492 && in_mode == DFmode && in_n == 2)
5493 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIM];
5494 if (VECTOR_UNIT_VSX_P (V4SFmode)
5495 && out_mode == SFmode && out_n == 4
5496 && in_mode == SFmode && in_n == 4)
5497 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIM];
5498 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5499 && out_mode == SFmode && out_n == 4
5500 && in_mode == SFmode && in_n == 4)
5501 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIM];
5502 break;
5503 CASE_CFN_FMA:
5504 if (VECTOR_UNIT_VSX_P (V2DFmode)
5505 && out_mode == DFmode && out_n == 2
5506 && in_mode == DFmode && in_n == 2)
5507 return rs6000_builtin_decls[VSX_BUILTIN_XVMADDDP];
5508 if (VECTOR_UNIT_VSX_P (V4SFmode)
5509 && out_mode == SFmode && out_n == 4
5510 && in_mode == SFmode && in_n == 4)
5511 return rs6000_builtin_decls[VSX_BUILTIN_XVMADDSP];
5512 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5513 && out_mode == SFmode && out_n == 4
5514 && in_mode == SFmode && in_n == 4)
5515 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VMADDFP];
5516 break;
5517 CASE_CFN_TRUNC:
5518 if (VECTOR_UNIT_VSX_P (V2DFmode)
5519 && out_mode == DFmode && out_n == 2
5520 && in_mode == DFmode && in_n == 2)
5521 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIZ];
5522 if (VECTOR_UNIT_VSX_P (V4SFmode)
5523 && out_mode == SFmode && out_n == 4
5524 && in_mode == SFmode && in_n == 4)
5525 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIZ];
5526 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5527 && out_mode == SFmode && out_n == 4
5528 && in_mode == SFmode && in_n == 4)
5529 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIZ];
5530 break;
5531 CASE_CFN_NEARBYINT:
5532 if (VECTOR_UNIT_VSX_P (V2DFmode)
5533 && flag_unsafe_math_optimizations
5534 && out_mode == DFmode && out_n == 2
5535 && in_mode == DFmode && in_n == 2)
5536 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPI];
5537 if (VECTOR_UNIT_VSX_P (V4SFmode)
5538 && flag_unsafe_math_optimizations
5539 && out_mode == SFmode && out_n == 4
5540 && in_mode == SFmode && in_n == 4)
5541 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPI];
5542 break;
5543 CASE_CFN_RINT:
5544 if (VECTOR_UNIT_VSX_P (V2DFmode)
5545 && !flag_trapping_math
5546 && out_mode == DFmode && out_n == 2
5547 && in_mode == DFmode && in_n == 2)
5548 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIC];
5549 if (VECTOR_UNIT_VSX_P (V4SFmode)
5550 && !flag_trapping_math
5551 && out_mode == SFmode && out_n == 4
5552 && in_mode == SFmode && in_n == 4)
5553 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIC];
5554 break;
5555 default:
5556 break;
5559 /* Generate calls to libmass if appropriate. */
5560 if (rs6000_veclib_handler)
5561 return rs6000_veclib_handler (combined_fn (fn), type_out, type_in);
5563 return NULL_TREE;
5566 /* Implement TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION. */
5568 static tree
5569 rs6000_builtin_md_vectorized_function (tree fndecl, tree type_out,
5570 tree type_in)
5572 machine_mode in_mode, out_mode;
5573 int in_n, out_n;
5575 if (TARGET_DEBUG_BUILTIN)
5576 fprintf (stderr, "rs6000_builtin_md_vectorized_function (%s, %s, %s)\n",
5577 IDENTIFIER_POINTER (DECL_NAME (fndecl)),
5578 GET_MODE_NAME (TYPE_MODE (type_out)),
5579 GET_MODE_NAME (TYPE_MODE (type_in)));
5581 if (TREE_CODE (type_out) != VECTOR_TYPE
5582 || TREE_CODE (type_in) != VECTOR_TYPE
5583 || !TARGET_VECTORIZE_BUILTINS)
5584 return NULL_TREE;
5586 out_mode = TYPE_MODE (TREE_TYPE (type_out));
5587 out_n = TYPE_VECTOR_SUBPARTS (type_out);
5588 in_mode = TYPE_MODE (TREE_TYPE (type_in));
5589 in_n = TYPE_VECTOR_SUBPARTS (type_in);
5591 enum rs6000_builtins fn
5592 = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
5593 switch (fn)
5595 case RS6000_BUILTIN_RSQRTF:
5596 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)
5597 && out_mode == SFmode && out_n == 4
5598 && in_mode == SFmode && in_n == 4)
5599 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRSQRTFP];
5600 break;
5601 case RS6000_BUILTIN_RSQRT:
5602 if (VECTOR_UNIT_VSX_P (V2DFmode)
5603 && out_mode == DFmode && out_n == 2
5604 && in_mode == DFmode && in_n == 2)
5605 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_2DF];
5606 break;
5607 case RS6000_BUILTIN_RECIPF:
5608 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)
5609 && out_mode == SFmode && out_n == 4
5610 && in_mode == SFmode && in_n == 4)
5611 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRECIPFP];
5612 break;
5613 case RS6000_BUILTIN_RECIP:
5614 if (VECTOR_UNIT_VSX_P (V2DFmode)
5615 && out_mode == DFmode && out_n == 2
5616 && in_mode == DFmode && in_n == 2)
5617 return rs6000_builtin_decls[VSX_BUILTIN_RECIP_V2DF];
5618 break;
5619 default:
5620 break;
5622 return NULL_TREE;
5625 /* Default CPU string for rs6000*_file_start functions. */
5626 static const char *rs6000_default_cpu;
5628 /* Do anything needed at the start of the asm file. */
5630 static void
5631 rs6000_file_start (void)
5633 char buffer[80];
5634 const char *start = buffer;
5635 FILE *file = asm_out_file;
5637 rs6000_default_cpu = TARGET_CPU_DEFAULT;
5639 default_file_start ();
5641 if (flag_verbose_asm)
5643 sprintf (buffer, "\n%s rs6000/powerpc options:", ASM_COMMENT_START);
5645 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
5647 fprintf (file, "%s --with-cpu=%s", start, rs6000_default_cpu);
5648 start = "";
5651 if (global_options_set.x_rs6000_cpu_index)
5653 fprintf (file, "%s -mcpu=%s", start,
5654 processor_target_table[rs6000_cpu_index].name);
5655 start = "";
5658 if (global_options_set.x_rs6000_tune_index)
5660 fprintf (file, "%s -mtune=%s", start,
5661 processor_target_table[rs6000_tune_index].name);
5662 start = "";
5665 if (PPC405_ERRATUM77)
5667 fprintf (file, "%s PPC405CR_ERRATUM77", start);
5668 start = "";
5671 #ifdef USING_ELFOS_H
5672 switch (rs6000_sdata)
5674 case SDATA_NONE: fprintf (file, "%s -msdata=none", start); start = ""; break;
5675 case SDATA_DATA: fprintf (file, "%s -msdata=data", start); start = ""; break;
5676 case SDATA_SYSV: fprintf (file, "%s -msdata=sysv", start); start = ""; break;
5677 case SDATA_EABI: fprintf (file, "%s -msdata=eabi", start); start = ""; break;
5680 if (rs6000_sdata && g_switch_value)
5682 fprintf (file, "%s -G %d", start,
5683 g_switch_value);
5684 start = "";
5686 #endif
5688 if (*start == '\0')
5689 putc ('\n', file);
5692 #ifdef USING_ELFOS_H
5693 if (rs6000_default_cpu == 0 || rs6000_default_cpu[0] == '\0'
5694 || !global_options_set.x_rs6000_cpu_index)
5696 fputs ("\t.machine ", asm_out_file);
5697 if ((rs6000_isa_flags & OPTION_MASK_MODULO) != 0)
5698 fputs ("power9\n", asm_out_file);
5699 else if ((rs6000_isa_flags & OPTION_MASK_DIRECT_MOVE) != 0)
5700 fputs ("power8\n", asm_out_file);
5701 else if ((rs6000_isa_flags & OPTION_MASK_POPCNTD) != 0)
5702 fputs ("power7\n", asm_out_file);
5703 else if ((rs6000_isa_flags & OPTION_MASK_CMPB) != 0)
5704 fputs ("power6\n", asm_out_file);
5705 else if ((rs6000_isa_flags & OPTION_MASK_POPCNTB) != 0)
5706 fputs ("power5\n", asm_out_file);
5707 else if ((rs6000_isa_flags & OPTION_MASK_MFCRF) != 0)
5708 fputs ("power4\n", asm_out_file);
5709 else if ((rs6000_isa_flags & OPTION_MASK_POWERPC64) != 0)
5710 fputs ("ppc64\n", asm_out_file);
5711 else
5712 fputs ("ppc\n", asm_out_file);
5714 #endif
5716 if (DEFAULT_ABI == ABI_ELFv2)
5717 fprintf (file, "\t.abiversion 2\n");
5721 /* Return nonzero if this function is known to have a null epilogue. */
5724 direct_return (void)
5726 if (reload_completed)
5728 rs6000_stack_t *info = rs6000_stack_info ();
5730 if (info->first_gp_reg_save == 32
5731 && info->first_fp_reg_save == 64
5732 && info->first_altivec_reg_save == LAST_ALTIVEC_REGNO + 1
5733 && ! info->lr_save_p
5734 && ! info->cr_save_p
5735 && info->vrsave_size == 0
5736 && ! info->push_p)
5737 return 1;
5740 return 0;
5743 /* Return the number of instructions it takes to form a constant in an
5744 integer register. */
5747 num_insns_constant_wide (HOST_WIDE_INT value)
5749 /* signed constant loadable with addi */
5750 if (((unsigned HOST_WIDE_INT) value + 0x8000) < 0x10000)
5751 return 1;
5753 /* constant loadable with addis */
5754 else if ((value & 0xffff) == 0
5755 && (value >> 31 == -1 || value >> 31 == 0))
5756 return 1;
5758 else if (TARGET_POWERPC64)
5760 HOST_WIDE_INT low = ((value & 0xffffffff) ^ 0x80000000) - 0x80000000;
5761 HOST_WIDE_INT high = value >> 31;
5763 if (high == 0 || high == -1)
5764 return 2;
5766 high >>= 1;
5768 if (low == 0)
5769 return num_insns_constant_wide (high) + 1;
5770 else if (high == 0)
5771 return num_insns_constant_wide (low) + 1;
5772 else
5773 return (num_insns_constant_wide (high)
5774 + num_insns_constant_wide (low) + 1);
5777 else
5778 return 2;
5782 num_insns_constant (rtx op, machine_mode mode)
5784 HOST_WIDE_INT low, high;
5786 switch (GET_CODE (op))
5788 case CONST_INT:
5789 if ((INTVAL (op) >> 31) != 0 && (INTVAL (op) >> 31) != -1
5790 && rs6000_is_valid_and_mask (op, mode))
5791 return 2;
5792 else
5793 return num_insns_constant_wide (INTVAL (op));
5795 case CONST_WIDE_INT:
5797 int i;
5798 int ins = CONST_WIDE_INT_NUNITS (op) - 1;
5799 for (i = 0; i < CONST_WIDE_INT_NUNITS (op); i++)
5800 ins += num_insns_constant_wide (CONST_WIDE_INT_ELT (op, i));
5801 return ins;
5804 case CONST_DOUBLE:
5805 if (mode == SFmode || mode == SDmode)
5807 long l;
5809 if (DECIMAL_FLOAT_MODE_P (mode))
5810 REAL_VALUE_TO_TARGET_DECIMAL32
5811 (*CONST_DOUBLE_REAL_VALUE (op), l);
5812 else
5813 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), l);
5814 return num_insns_constant_wide ((HOST_WIDE_INT) l);
5817 long l[2];
5818 if (DECIMAL_FLOAT_MODE_P (mode))
5819 REAL_VALUE_TO_TARGET_DECIMAL64 (*CONST_DOUBLE_REAL_VALUE (op), l);
5820 else
5821 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (op), l);
5822 high = l[WORDS_BIG_ENDIAN == 0];
5823 low = l[WORDS_BIG_ENDIAN != 0];
5825 if (TARGET_32BIT)
5826 return (num_insns_constant_wide (low)
5827 + num_insns_constant_wide (high));
5828 else
5830 if ((high == 0 && low >= 0)
5831 || (high == -1 && low < 0))
5832 return num_insns_constant_wide (low);
5834 else if (rs6000_is_valid_and_mask (op, mode))
5835 return 2;
5837 else if (low == 0)
5838 return num_insns_constant_wide (high) + 1;
5840 else
5841 return (num_insns_constant_wide (high)
5842 + num_insns_constant_wide (low) + 1);
5845 default:
5846 gcc_unreachable ();
5850 /* Interpret element ELT of the CONST_VECTOR OP as an integer value.
5851 If the mode of OP is MODE_VECTOR_INT, this simply returns the
5852 corresponding element of the vector, but for V4SFmode and V2SFmode,
5853 the corresponding "float" is interpreted as an SImode integer. */
5855 HOST_WIDE_INT
5856 const_vector_elt_as_int (rtx op, unsigned int elt)
5858 rtx tmp;
5860 /* We can't handle V2DImode and V2DFmode vector constants here yet. */
5861 gcc_assert (GET_MODE (op) != V2DImode
5862 && GET_MODE (op) != V2DFmode);
5864 tmp = CONST_VECTOR_ELT (op, elt);
5865 if (GET_MODE (op) == V4SFmode
5866 || GET_MODE (op) == V2SFmode)
5867 tmp = gen_lowpart (SImode, tmp);
5868 return INTVAL (tmp);
5871 /* Return true if OP can be synthesized with a particular vspltisb, vspltish
5872 or vspltisw instruction. OP is a CONST_VECTOR. Which instruction is used
5873 depends on STEP and COPIES, one of which will be 1. If COPIES > 1,
5874 all items are set to the same value and contain COPIES replicas of the
5875 vsplt's operand; if STEP > 1, one in STEP elements is set to the vsplt's
5876 operand and the others are set to the value of the operand's msb. */
5878 static bool
5879 vspltis_constant (rtx op, unsigned step, unsigned copies)
5881 machine_mode mode = GET_MODE (op);
5882 machine_mode inner = GET_MODE_INNER (mode);
5884 unsigned i;
5885 unsigned nunits;
5886 unsigned bitsize;
5887 unsigned mask;
5889 HOST_WIDE_INT val;
5890 HOST_WIDE_INT splat_val;
5891 HOST_WIDE_INT msb_val;
5893 if (mode == V2DImode || mode == V2DFmode || mode == V1TImode)
5894 return false;
5896 nunits = GET_MODE_NUNITS (mode);
5897 bitsize = GET_MODE_BITSIZE (inner);
5898 mask = GET_MODE_MASK (inner);
5900 val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
5901 splat_val = val;
5902 msb_val = val >= 0 ? 0 : -1;
5904 /* Construct the value to be splatted, if possible. If not, return 0. */
5905 for (i = 2; i <= copies; i *= 2)
5907 HOST_WIDE_INT small_val;
5908 bitsize /= 2;
5909 small_val = splat_val >> bitsize;
5910 mask >>= bitsize;
5911 if (splat_val != ((small_val << bitsize) | (small_val & mask)))
5912 return false;
5913 splat_val = small_val;
5916 /* Check if SPLAT_VAL can really be the operand of a vspltis[bhw]. */
5917 if (EASY_VECTOR_15 (splat_val))
5920 /* Also check if we can splat, and then add the result to itself. Do so if
5921 the value is positive, of if the splat instruction is using OP's mode;
5922 for splat_val < 0, the splat and the add should use the same mode. */
5923 else if (EASY_VECTOR_15_ADD_SELF (splat_val)
5924 && (splat_val >= 0 || (step == 1 && copies == 1)))
5927 /* Also check if are loading up the most significant bit which can be done by
5928 loading up -1 and shifting the value left by -1. */
5929 else if (EASY_VECTOR_MSB (splat_val, inner))
5932 else
5933 return false;
5935 /* Check if VAL is present in every STEP-th element, and the
5936 other elements are filled with its most significant bit. */
5937 for (i = 1; i < nunits; ++i)
5939 HOST_WIDE_INT desired_val;
5940 unsigned elt = BYTES_BIG_ENDIAN ? nunits - 1 - i : i;
5941 if ((i & (step - 1)) == 0)
5942 desired_val = val;
5943 else
5944 desired_val = msb_val;
5946 if (desired_val != const_vector_elt_as_int (op, elt))
5947 return false;
5950 return true;
5953 /* Like vsplitis_constant, but allow the value to be shifted left with a VSLDOI
5954 instruction, filling in the bottom elements with 0 or -1.
5956 Return 0 if the constant cannot be generated with VSLDOI. Return positive
5957 for the number of zeroes to shift in, or negative for the number of 0xff
5958 bytes to shift in.
5960 OP is a CONST_VECTOR. */
5963 vspltis_shifted (rtx op)
5965 machine_mode mode = GET_MODE (op);
5966 machine_mode inner = GET_MODE_INNER (mode);
5968 unsigned i, j;
5969 unsigned nunits;
5970 unsigned mask;
5972 HOST_WIDE_INT val;
5974 if (mode != V16QImode && mode != V8HImode && mode != V4SImode)
5975 return false;
5977 /* We need to create pseudo registers to do the shift, so don't recognize
5978 shift vector constants after reload. */
5979 if (!can_create_pseudo_p ())
5980 return false;
5982 nunits = GET_MODE_NUNITS (mode);
5983 mask = GET_MODE_MASK (inner);
5985 val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? 0 : nunits - 1);
5987 /* Check if the value can really be the operand of a vspltis[bhw]. */
5988 if (EASY_VECTOR_15 (val))
5991 /* Also check if we are loading up the most significant bit which can be done
5992 by loading up -1 and shifting the value left by -1. */
5993 else if (EASY_VECTOR_MSB (val, inner))
5996 else
5997 return 0;
5999 /* Check if VAL is present in every STEP-th element until we find elements
6000 that are 0 or all 1 bits. */
6001 for (i = 1; i < nunits; ++i)
6003 unsigned elt = BYTES_BIG_ENDIAN ? i : nunits - 1 - i;
6004 HOST_WIDE_INT elt_val = const_vector_elt_as_int (op, elt);
6006 /* If the value isn't the splat value, check for the remaining elements
6007 being 0/-1. */
6008 if (val != elt_val)
6010 if (elt_val == 0)
6012 for (j = i+1; j < nunits; ++j)
6014 unsigned elt2 = BYTES_BIG_ENDIAN ? j : nunits - 1 - j;
6015 if (const_vector_elt_as_int (op, elt2) != 0)
6016 return 0;
6019 return (nunits - i) * GET_MODE_SIZE (inner);
6022 else if ((elt_val & mask) == mask)
6024 for (j = i+1; j < nunits; ++j)
6026 unsigned elt2 = BYTES_BIG_ENDIAN ? j : nunits - 1 - j;
6027 if ((const_vector_elt_as_int (op, elt2) & mask) != mask)
6028 return 0;
6031 return -((nunits - i) * GET_MODE_SIZE (inner));
6034 else
6035 return 0;
6039 /* If all elements are equal, we don't need to do VLSDOI. */
6040 return 0;
6044 /* Return true if OP is of the given MODE and can be synthesized
6045 with a vspltisb, vspltish or vspltisw. */
6047 bool
6048 easy_altivec_constant (rtx op, machine_mode mode)
6050 unsigned step, copies;
6052 if (mode == VOIDmode)
6053 mode = GET_MODE (op);
6054 else if (mode != GET_MODE (op))
6055 return false;
6057 /* V2DI/V2DF was added with VSX. Only allow 0 and all 1's as easy
6058 constants. */
6059 if (mode == V2DFmode)
6060 return zero_constant (op, mode);
6062 else if (mode == V2DImode)
6064 if (GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_INT
6065 || GET_CODE (CONST_VECTOR_ELT (op, 1)) != CONST_INT)
6066 return false;
6068 if (zero_constant (op, mode))
6069 return true;
6071 if (INTVAL (CONST_VECTOR_ELT (op, 0)) == -1
6072 && INTVAL (CONST_VECTOR_ELT (op, 1)) == -1)
6073 return true;
6075 return false;
6078 /* V1TImode is a special container for TImode. Ignore for now. */
6079 else if (mode == V1TImode)
6080 return false;
6082 /* Start with a vspltisw. */
6083 step = GET_MODE_NUNITS (mode) / 4;
6084 copies = 1;
6086 if (vspltis_constant (op, step, copies))
6087 return true;
6089 /* Then try with a vspltish. */
6090 if (step == 1)
6091 copies <<= 1;
6092 else
6093 step >>= 1;
6095 if (vspltis_constant (op, step, copies))
6096 return true;
6098 /* And finally a vspltisb. */
6099 if (step == 1)
6100 copies <<= 1;
6101 else
6102 step >>= 1;
6104 if (vspltis_constant (op, step, copies))
6105 return true;
6107 if (vspltis_shifted (op) != 0)
6108 return true;
6110 return false;
6113 /* Generate a VEC_DUPLICATE representing a vspltis[bhw] instruction whose
6114 result is OP. Abort if it is not possible. */
6117 gen_easy_altivec_constant (rtx op)
6119 machine_mode mode = GET_MODE (op);
6120 int nunits = GET_MODE_NUNITS (mode);
6121 rtx val = CONST_VECTOR_ELT (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
6122 unsigned step = nunits / 4;
6123 unsigned copies = 1;
6125 /* Start with a vspltisw. */
6126 if (vspltis_constant (op, step, copies))
6127 return gen_rtx_VEC_DUPLICATE (V4SImode, gen_lowpart (SImode, val));
6129 /* Then try with a vspltish. */
6130 if (step == 1)
6131 copies <<= 1;
6132 else
6133 step >>= 1;
6135 if (vspltis_constant (op, step, copies))
6136 return gen_rtx_VEC_DUPLICATE (V8HImode, gen_lowpart (HImode, val));
6138 /* And finally a vspltisb. */
6139 if (step == 1)
6140 copies <<= 1;
6141 else
6142 step >>= 1;
6144 if (vspltis_constant (op, step, copies))
6145 return gen_rtx_VEC_DUPLICATE (V16QImode, gen_lowpart (QImode, val));
6147 gcc_unreachable ();
6150 const char *
6151 output_vec_const_move (rtx *operands)
6153 int cst, cst2, shift;
6154 machine_mode mode;
6155 rtx dest, vec;
6157 dest = operands[0];
6158 vec = operands[1];
6159 mode = GET_MODE (dest);
6161 if (TARGET_VSX)
6163 if (zero_constant (vec, mode))
6164 return "xxlxor %x0,%x0,%x0";
6166 if (TARGET_P8_VECTOR && vec == CONSTM1_RTX (mode))
6167 return "xxlorc %x0,%x0,%x0";
6169 if ((mode == V2DImode || mode == V1TImode)
6170 && INTVAL (CONST_VECTOR_ELT (vec, 0)) == -1
6171 && INTVAL (CONST_VECTOR_ELT (vec, 1)) == -1)
6172 return (TARGET_P8_VECTOR) ? "xxlorc %x0,%x0,%x0" : "vspltisw %0,-1";
6175 if (TARGET_ALTIVEC)
6177 rtx splat_vec;
6178 if (zero_constant (vec, mode))
6179 return "vxor %0,%0,%0";
6181 /* Do we need to construct a value using VSLDOI? */
6182 shift = vspltis_shifted (vec);
6183 if (shift != 0)
6184 return "#";
6186 splat_vec = gen_easy_altivec_constant (vec);
6187 gcc_assert (GET_CODE (splat_vec) == VEC_DUPLICATE);
6188 operands[1] = XEXP (splat_vec, 0);
6189 if (!EASY_VECTOR_15 (INTVAL (operands[1])))
6190 return "#";
6192 switch (GET_MODE (splat_vec))
6194 case V4SImode:
6195 return "vspltisw %0,%1";
6197 case V8HImode:
6198 return "vspltish %0,%1";
6200 case V16QImode:
6201 return "vspltisb %0,%1";
6203 default:
6204 gcc_unreachable ();
6208 gcc_assert (TARGET_SPE);
6210 /* Vector constant 0 is handled as a splitter of V2SI, and in the
6211 pattern of V1DI, V4HI, and V2SF.
6213 FIXME: We should probably return # and add post reload
6214 splitters for these, but this way is so easy ;-). */
6215 cst = INTVAL (CONST_VECTOR_ELT (vec, 0));
6216 cst2 = INTVAL (CONST_VECTOR_ELT (vec, 1));
6217 operands[1] = CONST_VECTOR_ELT (vec, 0);
6218 operands[2] = CONST_VECTOR_ELT (vec, 1);
6219 if (cst == cst2)
6220 return "li %0,%1\n\tevmergelo %0,%0,%0";
6221 else if (WORDS_BIG_ENDIAN)
6222 return "li %0,%1\n\tevmergelo %0,%0,%0\n\tli %0,%2";
6223 else
6224 return "li %0,%2\n\tevmergelo %0,%0,%0\n\tli %0,%1";
6227 /* Initialize TARGET of vector PAIRED to VALS. */
6229 void
6230 paired_expand_vector_init (rtx target, rtx vals)
6232 machine_mode mode = GET_MODE (target);
6233 int n_elts = GET_MODE_NUNITS (mode);
6234 int n_var = 0;
6235 rtx x, new_rtx, tmp, constant_op, op1, op2;
6236 int i;
6238 for (i = 0; i < n_elts; ++i)
6240 x = XVECEXP (vals, 0, i);
6241 if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x)))
6242 ++n_var;
6244 if (n_var == 0)
6246 /* Load from constant pool. */
6247 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
6248 return;
6251 if (n_var == 2)
6253 /* The vector is initialized only with non-constants. */
6254 new_rtx = gen_rtx_VEC_CONCAT (V2SFmode, XVECEXP (vals, 0, 0),
6255 XVECEXP (vals, 0, 1));
6257 emit_move_insn (target, new_rtx);
6258 return;
6261 /* One field is non-constant and the other one is a constant. Load the
6262 constant from the constant pool and use ps_merge instruction to
6263 construct the whole vector. */
6264 op1 = XVECEXP (vals, 0, 0);
6265 op2 = XVECEXP (vals, 0, 1);
6267 constant_op = (CONSTANT_P (op1)) ? op1 : op2;
6269 tmp = gen_reg_rtx (GET_MODE (constant_op));
6270 emit_move_insn (tmp, constant_op);
6272 if (CONSTANT_P (op1))
6273 new_rtx = gen_rtx_VEC_CONCAT (V2SFmode, tmp, op2);
6274 else
6275 new_rtx = gen_rtx_VEC_CONCAT (V2SFmode, op1, tmp);
6277 emit_move_insn (target, new_rtx);
6280 void
6281 paired_expand_vector_move (rtx operands[])
6283 rtx op0 = operands[0], op1 = operands[1];
6285 emit_move_insn (op0, op1);
6288 /* Emit vector compare for code RCODE. DEST is destination, OP1 and
6289 OP2 are two VEC_COND_EXPR operands, CC_OP0 and CC_OP1 are the two
6290 operands for the relation operation COND. This is a recursive
6291 function. */
6293 static void
6294 paired_emit_vector_compare (enum rtx_code rcode,
6295 rtx dest, rtx op0, rtx op1,
6296 rtx cc_op0, rtx cc_op1)
6298 rtx tmp = gen_reg_rtx (V2SFmode);
6299 rtx tmp1, max, min;
6301 gcc_assert (TARGET_PAIRED_FLOAT);
6302 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
6304 switch (rcode)
6306 case LT:
6307 case LTU:
6308 paired_emit_vector_compare (GE, dest, op1, op0, cc_op0, cc_op1);
6309 return;
6310 case GE:
6311 case GEU:
6312 emit_insn (gen_subv2sf3 (tmp, cc_op0, cc_op1));
6313 emit_insn (gen_selv2sf4 (dest, tmp, op0, op1, CONST0_RTX (SFmode)));
6314 return;
6315 case LE:
6316 case LEU:
6317 paired_emit_vector_compare (GE, dest, op0, op1, cc_op1, cc_op0);
6318 return;
6319 case GT:
6320 paired_emit_vector_compare (LE, dest, op1, op0, cc_op0, cc_op1);
6321 return;
6322 case EQ:
6323 tmp1 = gen_reg_rtx (V2SFmode);
6324 max = gen_reg_rtx (V2SFmode);
6325 min = gen_reg_rtx (V2SFmode);
6326 gen_reg_rtx (V2SFmode);
6328 emit_insn (gen_subv2sf3 (tmp, cc_op0, cc_op1));
6329 emit_insn (gen_selv2sf4
6330 (max, tmp, cc_op0, cc_op1, CONST0_RTX (SFmode)));
6331 emit_insn (gen_subv2sf3 (tmp, cc_op1, cc_op0));
6332 emit_insn (gen_selv2sf4
6333 (min, tmp, cc_op0, cc_op1, CONST0_RTX (SFmode)));
6334 emit_insn (gen_subv2sf3 (tmp1, min, max));
6335 emit_insn (gen_selv2sf4 (dest, tmp1, op0, op1, CONST0_RTX (SFmode)));
6336 return;
6337 case NE:
6338 paired_emit_vector_compare (EQ, dest, op1, op0, cc_op0, cc_op1);
6339 return;
6340 case UNLE:
6341 paired_emit_vector_compare (LE, dest, op1, op0, cc_op0, cc_op1);
6342 return;
6343 case UNLT:
6344 paired_emit_vector_compare (LT, dest, op1, op0, cc_op0, cc_op1);
6345 return;
6346 case UNGE:
6347 paired_emit_vector_compare (GE, dest, op1, op0, cc_op0, cc_op1);
6348 return;
6349 case UNGT:
6350 paired_emit_vector_compare (GT, dest, op1, op0, cc_op0, cc_op1);
6351 return;
6352 default:
6353 gcc_unreachable ();
6356 return;
6359 /* Emit vector conditional expression.
6360 DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands.
6361 CC_OP0 and CC_OP1 are the two operands for the relation operation COND. */
6364 paired_emit_vector_cond_expr (rtx dest, rtx op1, rtx op2,
6365 rtx cond, rtx cc_op0, rtx cc_op1)
6367 enum rtx_code rcode = GET_CODE (cond);
6369 if (!TARGET_PAIRED_FLOAT)
6370 return 0;
6372 paired_emit_vector_compare (rcode, dest, op1, op2, cc_op0, cc_op1);
6374 return 1;
6377 /* Initialize vector TARGET to VALS. */
6379 void
6380 rs6000_expand_vector_init (rtx target, rtx vals)
6382 machine_mode mode = GET_MODE (target);
6383 machine_mode inner_mode = GET_MODE_INNER (mode);
6384 int n_elts = GET_MODE_NUNITS (mode);
6385 int n_var = 0, one_var = -1;
6386 bool all_same = true, all_const_zero = true;
6387 rtx x, mem;
6388 int i;
6390 for (i = 0; i < n_elts; ++i)
6392 x = XVECEXP (vals, 0, i);
6393 if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x)))
6394 ++n_var, one_var = i;
6395 else if (x != CONST0_RTX (inner_mode))
6396 all_const_zero = false;
6398 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
6399 all_same = false;
6402 if (n_var == 0)
6404 rtx const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
6405 bool int_vector_p = (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
6406 if ((int_vector_p || TARGET_VSX) && all_const_zero)
6408 /* Zero register. */
6409 emit_insn (gen_rtx_SET (target, gen_rtx_XOR (mode, target, target)));
6410 return;
6412 else if (int_vector_p && easy_vector_constant (const_vec, mode))
6414 /* Splat immediate. */
6415 emit_insn (gen_rtx_SET (target, const_vec));
6416 return;
6418 else
6420 /* Load from constant pool. */
6421 emit_move_insn (target, const_vec);
6422 return;
6426 /* Double word values on VSX can use xxpermdi or lxvdsx. */
6427 if (VECTOR_MEM_VSX_P (mode) && (mode == V2DFmode || mode == V2DImode))
6429 rtx op0 = XVECEXP (vals, 0, 0);
6430 rtx op1 = XVECEXP (vals, 0, 1);
6431 if (all_same)
6433 if (!MEM_P (op0) && !REG_P (op0))
6434 op0 = force_reg (inner_mode, op0);
6435 if (mode == V2DFmode)
6436 emit_insn (gen_vsx_splat_v2df (target, op0));
6437 else
6438 emit_insn (gen_vsx_splat_v2di (target, op0));
6440 else
6442 op0 = force_reg (inner_mode, op0);
6443 op1 = force_reg (inner_mode, op1);
6444 if (mode == V2DFmode)
6445 emit_insn (gen_vsx_concat_v2df (target, op0, op1));
6446 else
6447 emit_insn (gen_vsx_concat_v2di (target, op0, op1));
6449 return;
6452 /* With single precision floating point on VSX, know that internally single
6453 precision is actually represented as a double, and either make 2 V2DF
6454 vectors, and convert these vectors to single precision, or do one
6455 conversion, and splat the result to the other elements. */
6456 if (mode == V4SFmode && VECTOR_MEM_VSX_P (mode))
6458 if (all_same)
6460 rtx freg = gen_reg_rtx (V4SFmode);
6461 rtx sreg = force_reg (SFmode, XVECEXP (vals, 0, 0));
6462 rtx cvt = ((TARGET_XSCVDPSPN)
6463 ? gen_vsx_xscvdpspn_scalar (freg, sreg)
6464 : gen_vsx_xscvdpsp_scalar (freg, sreg));
6466 emit_insn (cvt);
6467 emit_insn (gen_vsx_xxspltw_v4sf_direct (target, freg, const0_rtx));
6469 else
6471 rtx dbl_even = gen_reg_rtx (V2DFmode);
6472 rtx dbl_odd = gen_reg_rtx (V2DFmode);
6473 rtx flt_even = gen_reg_rtx (V4SFmode);
6474 rtx flt_odd = gen_reg_rtx (V4SFmode);
6475 rtx op0 = force_reg (SFmode, XVECEXP (vals, 0, 0));
6476 rtx op1 = force_reg (SFmode, XVECEXP (vals, 0, 1));
6477 rtx op2 = force_reg (SFmode, XVECEXP (vals, 0, 2));
6478 rtx op3 = force_reg (SFmode, XVECEXP (vals, 0, 3));
6480 emit_insn (gen_vsx_concat_v2sf (dbl_even, op0, op1));
6481 emit_insn (gen_vsx_concat_v2sf (dbl_odd, op2, op3));
6482 emit_insn (gen_vsx_xvcvdpsp (flt_even, dbl_even));
6483 emit_insn (gen_vsx_xvcvdpsp (flt_odd, dbl_odd));
6484 rs6000_expand_extract_even (target, flt_even, flt_odd);
6486 return;
6489 /* Store value to stack temp. Load vector element. Splat. However, splat
6490 of 64-bit items is not supported on Altivec. */
6491 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
6493 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
6494 emit_move_insn (adjust_address_nv (mem, inner_mode, 0),
6495 XVECEXP (vals, 0, 0));
6496 x = gen_rtx_UNSPEC (VOIDmode,
6497 gen_rtvec (1, const0_rtx), UNSPEC_LVE);
6498 emit_insn (gen_rtx_PARALLEL (VOIDmode,
6499 gen_rtvec (2,
6500 gen_rtx_SET (target, mem),
6501 x)));
6502 x = gen_rtx_VEC_SELECT (inner_mode, target,
6503 gen_rtx_PARALLEL (VOIDmode,
6504 gen_rtvec (1, const0_rtx)));
6505 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_DUPLICATE (mode, x)));
6506 return;
6509 /* One field is non-constant. Load constant then overwrite
6510 varying field. */
6511 if (n_var == 1)
6513 rtx copy = copy_rtx (vals);
6515 /* Load constant part of vector, substitute neighboring value for
6516 varying element. */
6517 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
6518 rs6000_expand_vector_init (target, copy);
6520 /* Insert variable. */
6521 rs6000_expand_vector_set (target, XVECEXP (vals, 0, one_var), one_var);
6522 return;
6525 /* Construct the vector in memory one field at a time
6526 and load the whole vector. */
6527 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
6528 for (i = 0; i < n_elts; i++)
6529 emit_move_insn (adjust_address_nv (mem, inner_mode,
6530 i * GET_MODE_SIZE (inner_mode)),
6531 XVECEXP (vals, 0, i));
6532 emit_move_insn (target, mem);
6535 /* Set field ELT of TARGET to VAL. */
6537 void
6538 rs6000_expand_vector_set (rtx target, rtx val, int elt)
6540 machine_mode mode = GET_MODE (target);
6541 machine_mode inner_mode = GET_MODE_INNER (mode);
6542 rtx reg = gen_reg_rtx (mode);
6543 rtx mask, mem, x;
6544 int width = GET_MODE_SIZE (inner_mode);
6545 int i;
6547 if (VECTOR_MEM_VSX_P (mode) && (mode == V2DFmode || mode == V2DImode))
6549 rtx (*set_func) (rtx, rtx, rtx, rtx)
6550 = ((mode == V2DFmode) ? gen_vsx_set_v2df : gen_vsx_set_v2di);
6551 emit_insn (set_func (target, target, val, GEN_INT (elt)));
6552 return;
6555 /* Simplify setting single element vectors like V1TImode. */
6556 if (GET_MODE_SIZE (mode) == GET_MODE_SIZE (inner_mode) && elt == 0)
6558 emit_move_insn (target, gen_lowpart (mode, val));
6559 return;
6562 /* Load single variable value. */
6563 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
6564 emit_move_insn (adjust_address_nv (mem, inner_mode, 0), val);
6565 x = gen_rtx_UNSPEC (VOIDmode,
6566 gen_rtvec (1, const0_rtx), UNSPEC_LVE);
6567 emit_insn (gen_rtx_PARALLEL (VOIDmode,
6568 gen_rtvec (2,
6569 gen_rtx_SET (reg, mem),
6570 x)));
6572 /* Linear sequence. */
6573 mask = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
6574 for (i = 0; i < 16; ++i)
6575 XVECEXP (mask, 0, i) = GEN_INT (i);
6577 /* Set permute mask to insert element into target. */
6578 for (i = 0; i < width; ++i)
6579 XVECEXP (mask, 0, elt*width + i)
6580 = GEN_INT (i + 0x10);
6581 x = gen_rtx_CONST_VECTOR (V16QImode, XVEC (mask, 0));
6583 if (BYTES_BIG_ENDIAN)
6584 x = gen_rtx_UNSPEC (mode,
6585 gen_rtvec (3, target, reg,
6586 force_reg (V16QImode, x)),
6587 UNSPEC_VPERM);
6588 else
6590 /* Invert selector. We prefer to generate VNAND on P8 so
6591 that future fusion opportunities can kick in, but must
6592 generate VNOR elsewhere. */
6593 rtx notx = gen_rtx_NOT (V16QImode, force_reg (V16QImode, x));
6594 rtx iorx = (TARGET_P8_VECTOR
6595 ? gen_rtx_IOR (V16QImode, notx, notx)
6596 : gen_rtx_AND (V16QImode, notx, notx));
6597 rtx tmp = gen_reg_rtx (V16QImode);
6598 emit_insn (gen_rtx_SET (tmp, iorx));
6600 /* Permute with operands reversed and adjusted selector. */
6601 x = gen_rtx_UNSPEC (mode, gen_rtvec (3, reg, target, tmp),
6602 UNSPEC_VPERM);
6605 emit_insn (gen_rtx_SET (target, x));
6608 /* Extract field ELT from VEC into TARGET. */
6610 void
6611 rs6000_expand_vector_extract (rtx target, rtx vec, int elt)
6613 machine_mode mode = GET_MODE (vec);
6614 machine_mode inner_mode = GET_MODE_INNER (mode);
6615 rtx mem;
6617 if (VECTOR_MEM_VSX_P (mode))
6619 switch (mode)
6621 default:
6622 break;
6623 case V1TImode:
6624 gcc_assert (elt == 0 && inner_mode == TImode);
6625 emit_move_insn (target, gen_lowpart (TImode, vec));
6626 break;
6627 case V2DFmode:
6628 emit_insn (gen_vsx_extract_v2df (target, vec, GEN_INT (elt)));
6629 return;
6630 case V2DImode:
6631 emit_insn (gen_vsx_extract_v2di (target, vec, GEN_INT (elt)));
6632 return;
6633 case V4SFmode:
6634 emit_insn (gen_vsx_extract_v4sf (target, vec, GEN_INT (elt)));
6635 return;
6639 /* Allocate mode-sized buffer. */
6640 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
6642 emit_move_insn (mem, vec);
6644 /* Add offset to field within buffer matching vector element. */
6645 mem = adjust_address_nv (mem, inner_mode, elt * GET_MODE_SIZE (inner_mode));
6647 emit_move_insn (target, adjust_address_nv (mem, inner_mode, 0));
6650 /* Return TRUE if OP is an invalid SUBREG operation on the e500. */
6652 bool
6653 invalid_e500_subreg (rtx op, machine_mode mode)
6655 if (TARGET_E500_DOUBLE)
6657 /* Reject (subreg:SI (reg:DF)); likewise with subreg:DI or
6658 subreg:TI and reg:TF. Decimal float modes are like integer
6659 modes (only low part of each register used) for this
6660 purpose. */
6661 if (GET_CODE (op) == SUBREG
6662 && (mode == SImode || mode == DImode || mode == TImode
6663 || mode == DDmode || mode == TDmode || mode == PTImode)
6664 && REG_P (SUBREG_REG (op))
6665 && (GET_MODE (SUBREG_REG (op)) == DFmode
6666 || GET_MODE (SUBREG_REG (op)) == TFmode
6667 || GET_MODE (SUBREG_REG (op)) == IFmode
6668 || GET_MODE (SUBREG_REG (op)) == KFmode))
6669 return true;
6671 /* Reject (subreg:DF (reg:DI)); likewise with subreg:TF and
6672 reg:TI. */
6673 if (GET_CODE (op) == SUBREG
6674 && (mode == DFmode || mode == TFmode || mode == IFmode
6675 || mode == KFmode)
6676 && REG_P (SUBREG_REG (op))
6677 && (GET_MODE (SUBREG_REG (op)) == DImode
6678 || GET_MODE (SUBREG_REG (op)) == TImode
6679 || GET_MODE (SUBREG_REG (op)) == PTImode
6680 || GET_MODE (SUBREG_REG (op)) == DDmode
6681 || GET_MODE (SUBREG_REG (op)) == TDmode))
6682 return true;
6685 if (TARGET_SPE
6686 && GET_CODE (op) == SUBREG
6687 && mode == SImode
6688 && REG_P (SUBREG_REG (op))
6689 && SPE_VECTOR_MODE (GET_MODE (SUBREG_REG (op))))
6690 return true;
6692 return false;
6695 /* Return alignment of TYPE. Existing alignment is ALIGN. HOW
6696 selects whether the alignment is abi mandated, optional, or
6697 both abi and optional alignment. */
6699 unsigned int
6700 rs6000_data_alignment (tree type, unsigned int align, enum data_align how)
6702 if (how != align_opt)
6704 if (TREE_CODE (type) == VECTOR_TYPE)
6706 if ((TARGET_SPE && SPE_VECTOR_MODE (TYPE_MODE (type)))
6707 || (TARGET_PAIRED_FLOAT && PAIRED_VECTOR_MODE (TYPE_MODE (type))))
6709 if (align < 64)
6710 align = 64;
6712 else if (align < 128)
6713 align = 128;
6715 else if (TARGET_E500_DOUBLE
6716 && TREE_CODE (type) == REAL_TYPE
6717 && TYPE_MODE (type) == DFmode)
6719 if (align < 64)
6720 align = 64;
6724 if (how != align_abi)
6726 if (TREE_CODE (type) == ARRAY_TYPE
6727 && TYPE_MODE (TREE_TYPE (type)) == QImode)
6729 if (align < BITS_PER_WORD)
6730 align = BITS_PER_WORD;
6734 return align;
6737 /* Previous GCC releases forced all vector types to have 16-byte alignment. */
6739 bool
6740 rs6000_special_adjust_field_align_p (tree field, unsigned int computed)
6742 if (TARGET_ALTIVEC && TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
6744 if (computed != 128)
6746 static bool warned;
6747 if (!warned && warn_psabi)
6749 warned = true;
6750 inform (input_location,
6751 "the layout of aggregates containing vectors with"
6752 " %d-byte alignment has changed in GCC 5",
6753 computed / BITS_PER_UNIT);
6756 /* In current GCC there is no special case. */
6757 return false;
6760 return false;
6763 /* AIX increases natural record alignment to doubleword if the first
6764 field is an FP double while the FP fields remain word aligned. */
6766 unsigned int
6767 rs6000_special_round_type_align (tree type, unsigned int computed,
6768 unsigned int specified)
6770 unsigned int align = MAX (computed, specified);
6771 tree field = TYPE_FIELDS (type);
6773 /* Skip all non field decls */
6774 while (field != NULL && TREE_CODE (field) != FIELD_DECL)
6775 field = DECL_CHAIN (field);
6777 if (field != NULL && field != type)
6779 type = TREE_TYPE (field);
6780 while (TREE_CODE (type) == ARRAY_TYPE)
6781 type = TREE_TYPE (type);
6783 if (type != error_mark_node && TYPE_MODE (type) == DFmode)
6784 align = MAX (align, 64);
6787 return align;
6790 /* Darwin increases record alignment to the natural alignment of
6791 the first field. */
6793 unsigned int
6794 darwin_rs6000_special_round_type_align (tree type, unsigned int computed,
6795 unsigned int specified)
6797 unsigned int align = MAX (computed, specified);
6799 if (TYPE_PACKED (type))
6800 return align;
6802 /* Find the first field, looking down into aggregates. */
6803 do {
6804 tree field = TYPE_FIELDS (type);
6805 /* Skip all non field decls */
6806 while (field != NULL && TREE_CODE (field) != FIELD_DECL)
6807 field = DECL_CHAIN (field);
6808 if (! field)
6809 break;
6810 /* A packed field does not contribute any extra alignment. */
6811 if (DECL_PACKED (field))
6812 return align;
6813 type = TREE_TYPE (field);
6814 while (TREE_CODE (type) == ARRAY_TYPE)
6815 type = TREE_TYPE (type);
6816 } while (AGGREGATE_TYPE_P (type));
6818 if (! AGGREGATE_TYPE_P (type) && type != error_mark_node)
6819 align = MAX (align, TYPE_ALIGN (type));
6821 return align;
6824 /* Return 1 for an operand in small memory on V.4/eabi. */
6827 small_data_operand (rtx op ATTRIBUTE_UNUSED,
6828 machine_mode mode ATTRIBUTE_UNUSED)
6830 #if TARGET_ELF
6831 rtx sym_ref;
6833 if (rs6000_sdata == SDATA_NONE || rs6000_sdata == SDATA_DATA)
6834 return 0;
6836 if (DEFAULT_ABI != ABI_V4)
6837 return 0;
6839 /* Vector and float memory instructions have a limited offset on the
6840 SPE, so using a vector or float variable directly as an operand is
6841 not useful. */
6842 if (TARGET_SPE
6843 && (SPE_VECTOR_MODE (mode) || FLOAT_MODE_P (mode)))
6844 return 0;
6846 if (GET_CODE (op) == SYMBOL_REF)
6847 sym_ref = op;
6849 else if (GET_CODE (op) != CONST
6850 || GET_CODE (XEXP (op, 0)) != PLUS
6851 || GET_CODE (XEXP (XEXP (op, 0), 0)) != SYMBOL_REF
6852 || GET_CODE (XEXP (XEXP (op, 0), 1)) != CONST_INT)
6853 return 0;
6855 else
6857 rtx sum = XEXP (op, 0);
6858 HOST_WIDE_INT summand;
6860 /* We have to be careful here, because it is the referenced address
6861 that must be 32k from _SDA_BASE_, not just the symbol. */
6862 summand = INTVAL (XEXP (sum, 1));
6863 if (summand < 0 || summand > g_switch_value)
6864 return 0;
6866 sym_ref = XEXP (sum, 0);
6869 return SYMBOL_REF_SMALL_P (sym_ref);
6870 #else
6871 return 0;
6872 #endif
6875 /* Return true if either operand is a general purpose register. */
6877 bool
6878 gpr_or_gpr_p (rtx op0, rtx op1)
6880 return ((REG_P (op0) && INT_REGNO_P (REGNO (op0)))
6881 || (REG_P (op1) && INT_REGNO_P (REGNO (op1))));
6884 /* Return true if this is a move direct operation between GPR registers and
6885 floating point/VSX registers. */
6887 bool
6888 direct_move_p (rtx op0, rtx op1)
6890 int regno0, regno1;
6892 if (!REG_P (op0) || !REG_P (op1))
6893 return false;
6895 if (!TARGET_DIRECT_MOVE && !TARGET_MFPGPR)
6896 return false;
6898 regno0 = REGNO (op0);
6899 regno1 = REGNO (op1);
6900 if (regno0 >= FIRST_PSEUDO_REGISTER || regno1 >= FIRST_PSEUDO_REGISTER)
6901 return false;
6903 if (INT_REGNO_P (regno0))
6904 return (TARGET_DIRECT_MOVE) ? VSX_REGNO_P (regno1) : FP_REGNO_P (regno1);
6906 else if (INT_REGNO_P (regno1))
6908 if (TARGET_MFPGPR && FP_REGNO_P (regno0))
6909 return true;
6911 else if (TARGET_DIRECT_MOVE && VSX_REGNO_P (regno0))
6912 return true;
6915 return false;
6918 /* Return true if this is a load or store quad operation. This function does
6919 not handle the atomic quad memory instructions. */
6921 bool
6922 quad_load_store_p (rtx op0, rtx op1)
6924 bool ret;
6926 if (!TARGET_QUAD_MEMORY)
6927 ret = false;
6929 else if (REG_P (op0) && MEM_P (op1))
6930 ret = (quad_int_reg_operand (op0, GET_MODE (op0))
6931 && quad_memory_operand (op1, GET_MODE (op1))
6932 && !reg_overlap_mentioned_p (op0, op1));
6934 else if (MEM_P (op0) && REG_P (op1))
6935 ret = (quad_memory_operand (op0, GET_MODE (op0))
6936 && quad_int_reg_operand (op1, GET_MODE (op1)));
6938 else
6939 ret = false;
6941 if (TARGET_DEBUG_ADDR)
6943 fprintf (stderr, "\n========== quad_load_store, return %s\n",
6944 ret ? "true" : "false");
6945 debug_rtx (gen_rtx_SET (op0, op1));
6948 return ret;
6951 /* Given an address, return a constant offset term if one exists. */
6953 static rtx
6954 address_offset (rtx op)
6956 if (GET_CODE (op) == PRE_INC
6957 || GET_CODE (op) == PRE_DEC)
6958 op = XEXP (op, 0);
6959 else if (GET_CODE (op) == PRE_MODIFY
6960 || GET_CODE (op) == LO_SUM)
6961 op = XEXP (op, 1);
6963 if (GET_CODE (op) == CONST)
6964 op = XEXP (op, 0);
6966 if (GET_CODE (op) == PLUS)
6967 op = XEXP (op, 1);
6969 if (CONST_INT_P (op))
6970 return op;
6972 return NULL_RTX;
6975 /* Return true if the MEM operand is a memory operand suitable for use
6976 with a (full width, possibly multiple) gpr load/store. On
6977 powerpc64 this means the offset must be divisible by 4.
6978 Implements 'Y' constraint.
6980 Accept direct, indexed, offset, lo_sum and tocref. Since this is
6981 a constraint function we know the operand has satisfied a suitable
6982 memory predicate. Also accept some odd rtl generated by reload
6983 (see rs6000_legitimize_reload_address for various forms). It is
6984 important that reload rtl be accepted by appropriate constraints
6985 but not by the operand predicate.
6987 Offsetting a lo_sum should not be allowed, except where we know by
6988 alignment that a 32k boundary is not crossed, but see the ???
6989 comment in rs6000_legitimize_reload_address. Note that by
6990 "offsetting" here we mean a further offset to access parts of the
6991 MEM. It's fine to have a lo_sum where the inner address is offset
6992 from a sym, since the same sym+offset will appear in the high part
6993 of the address calculation. */
6995 bool
6996 mem_operand_gpr (rtx op, machine_mode mode)
6998 unsigned HOST_WIDE_INT offset;
6999 int extra;
7000 rtx addr = XEXP (op, 0);
7002 op = address_offset (addr);
7003 if (op == NULL_RTX)
7004 return true;
7006 offset = INTVAL (op);
7007 if (TARGET_POWERPC64 && (offset & 3) != 0)
7008 return false;
7010 extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD;
7011 if (extra < 0)
7012 extra = 0;
7014 if (GET_CODE (addr) == LO_SUM)
7015 /* For lo_sum addresses, we must allow any offset except one that
7016 causes a wrap, so test only the low 16 bits. */
7017 offset = ((offset & 0xffff) ^ 0x8000) - 0x8000;
7019 return offset + 0x8000 < 0x10000u - extra;
7022 /* Subroutines of rs6000_legitimize_address and rs6000_legitimate_address_p. */
7024 static bool
7025 reg_offset_addressing_ok_p (machine_mode mode)
7027 switch (mode)
7029 case V16QImode:
7030 case V8HImode:
7031 case V4SFmode:
7032 case V4SImode:
7033 case V2DFmode:
7034 case V2DImode:
7035 case V1TImode:
7036 case TImode:
7037 case TFmode:
7038 case KFmode:
7039 /* AltiVec/VSX vector modes. Only reg+reg addressing is valid. While
7040 TImode is not a vector mode, if we want to use the VSX registers to
7041 move it around, we need to restrict ourselves to reg+reg addressing.
7042 Similarly for IEEE 128-bit floating point that is passed in a single
7043 vector register. */
7044 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode))
7045 return false;
7046 break;
7048 case V4HImode:
7049 case V2SImode:
7050 case V1DImode:
7051 case V2SFmode:
7052 /* Paired vector modes. Only reg+reg addressing is valid. */
7053 if (TARGET_PAIRED_FLOAT)
7054 return false;
7055 break;
7057 case SDmode:
7058 /* If we can do direct load/stores of SDmode, restrict it to reg+reg
7059 addressing for the LFIWZX and STFIWX instructions. */
7060 if (TARGET_NO_SDMODE_STACK)
7061 return false;
7062 break;
7064 default:
7065 break;
7068 return true;
7071 static bool
7072 virtual_stack_registers_memory_p (rtx op)
7074 int regnum;
7076 if (GET_CODE (op) == REG)
7077 regnum = REGNO (op);
7079 else if (GET_CODE (op) == PLUS
7080 && GET_CODE (XEXP (op, 0)) == REG
7081 && GET_CODE (XEXP (op, 1)) == CONST_INT)
7082 regnum = REGNO (XEXP (op, 0));
7084 else
7085 return false;
7087 return (regnum >= FIRST_VIRTUAL_REGISTER
7088 && regnum <= LAST_VIRTUAL_POINTER_REGISTER);
7091 /* Return true if a MODE sized memory accesses to OP plus OFFSET
7092 is known to not straddle a 32k boundary. This function is used
7093 to determine whether -mcmodel=medium code can use TOC pointer
7094 relative addressing for OP. This means the alignment of the TOC
7095 pointer must also be taken into account, and unfortunately that is
7096 only 8 bytes. */
7098 #ifndef POWERPC64_TOC_POINTER_ALIGNMENT
7099 #define POWERPC64_TOC_POINTER_ALIGNMENT 8
7100 #endif
7102 static bool
7103 offsettable_ok_by_alignment (rtx op, HOST_WIDE_INT offset,
7104 machine_mode mode)
7106 tree decl;
7107 unsigned HOST_WIDE_INT dsize, dalign, lsb, mask;
7109 if (GET_CODE (op) != SYMBOL_REF)
7110 return false;
7112 dsize = GET_MODE_SIZE (mode);
7113 decl = SYMBOL_REF_DECL (op);
7114 if (!decl)
7116 if (dsize == 0)
7117 return false;
7119 /* -fsection-anchors loses the original SYMBOL_REF_DECL when
7120 replacing memory addresses with an anchor plus offset. We
7121 could find the decl by rummaging around in the block->objects
7122 VEC for the given offset but that seems like too much work. */
7123 dalign = BITS_PER_UNIT;
7124 if (SYMBOL_REF_HAS_BLOCK_INFO_P (op)
7125 && SYMBOL_REF_ANCHOR_P (op)
7126 && SYMBOL_REF_BLOCK (op) != NULL)
7128 struct object_block *block = SYMBOL_REF_BLOCK (op);
7130 dalign = block->alignment;
7131 offset += SYMBOL_REF_BLOCK_OFFSET (op);
7133 else if (CONSTANT_POOL_ADDRESS_P (op))
7135 /* It would be nice to have get_pool_align().. */
7136 machine_mode cmode = get_pool_mode (op);
7138 dalign = GET_MODE_ALIGNMENT (cmode);
7141 else if (DECL_P (decl))
7143 dalign = DECL_ALIGN (decl);
7145 if (dsize == 0)
7147 /* Allow BLKmode when the entire object is known to not
7148 cross a 32k boundary. */
7149 if (!DECL_SIZE_UNIT (decl))
7150 return false;
7152 if (!tree_fits_uhwi_p (DECL_SIZE_UNIT (decl)))
7153 return false;
7155 dsize = tree_to_uhwi (DECL_SIZE_UNIT (decl));
7156 if (dsize > 32768)
7157 return false;
7159 dalign /= BITS_PER_UNIT;
7160 if (dalign > POWERPC64_TOC_POINTER_ALIGNMENT)
7161 dalign = POWERPC64_TOC_POINTER_ALIGNMENT;
7162 return dalign >= dsize;
7165 else
7166 gcc_unreachable ();
7168 /* Find how many bits of the alignment we know for this access. */
7169 dalign /= BITS_PER_UNIT;
7170 if (dalign > POWERPC64_TOC_POINTER_ALIGNMENT)
7171 dalign = POWERPC64_TOC_POINTER_ALIGNMENT;
7172 mask = dalign - 1;
7173 lsb = offset & -offset;
7174 mask &= lsb - 1;
7175 dalign = mask + 1;
7177 return dalign >= dsize;
7180 static bool
7181 constant_pool_expr_p (rtx op)
7183 rtx base, offset;
7185 split_const (op, &base, &offset);
7186 return (GET_CODE (base) == SYMBOL_REF
7187 && CONSTANT_POOL_ADDRESS_P (base)
7188 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (base), Pmode));
7191 static const_rtx tocrel_base, tocrel_offset;
7193 /* Return true if OP is a toc pointer relative address (the output
7194 of create_TOC_reference). If STRICT, do not match high part or
7195 non-split -mcmodel=large/medium toc pointer relative addresses. */
7197 bool
7198 toc_relative_expr_p (const_rtx op, bool strict)
7200 if (!TARGET_TOC)
7201 return false;
7203 if (TARGET_CMODEL != CMODEL_SMALL)
7205 /* Only match the low part. */
7206 if (GET_CODE (op) == LO_SUM
7207 && REG_P (XEXP (op, 0))
7208 && INT_REG_OK_FOR_BASE_P (XEXP (op, 0), strict))
7209 op = XEXP (op, 1);
7210 else if (strict)
7211 return false;
7214 tocrel_base = op;
7215 tocrel_offset = const0_rtx;
7216 if (GET_CODE (op) == PLUS && add_cint_operand (XEXP (op, 1), GET_MODE (op)))
7218 tocrel_base = XEXP (op, 0);
7219 tocrel_offset = XEXP (op, 1);
7222 return (GET_CODE (tocrel_base) == UNSPEC
7223 && XINT (tocrel_base, 1) == UNSPEC_TOCREL);
7226 /* Return true if X is a constant pool address, and also for cmodel=medium
7227 if X is a toc-relative address known to be offsettable within MODE. */
7229 bool
7230 legitimate_constant_pool_address_p (const_rtx x, machine_mode mode,
7231 bool strict)
7233 return (toc_relative_expr_p (x, strict)
7234 && (TARGET_CMODEL != CMODEL_MEDIUM
7235 || constant_pool_expr_p (XVECEXP (tocrel_base, 0, 0))
7236 || mode == QImode
7237 || offsettable_ok_by_alignment (XVECEXP (tocrel_base, 0, 0),
7238 INTVAL (tocrel_offset), mode)));
7241 static bool
7242 legitimate_small_data_p (machine_mode mode, rtx x)
7244 return (DEFAULT_ABI == ABI_V4
7245 && !flag_pic && !TARGET_TOC
7246 && (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == CONST)
7247 && small_data_operand (x, mode));
7250 /* SPE offset addressing is limited to 5-bits worth of double words. */
7251 #define SPE_CONST_OFFSET_OK(x) (((x) & ~0xf8) == 0)
7253 bool
7254 rs6000_legitimate_offset_address_p (machine_mode mode, rtx x,
7255 bool strict, bool worst_case)
7257 unsigned HOST_WIDE_INT offset;
7258 unsigned int extra;
7260 if (GET_CODE (x) != PLUS)
7261 return false;
7262 if (!REG_P (XEXP (x, 0)))
7263 return false;
7264 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
7265 return false;
7266 if (!reg_offset_addressing_ok_p (mode))
7267 return virtual_stack_registers_memory_p (x);
7268 if (legitimate_constant_pool_address_p (x, mode, strict || lra_in_progress))
7269 return true;
7270 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
7271 return false;
7273 offset = INTVAL (XEXP (x, 1));
7274 extra = 0;
7275 switch (mode)
7277 case V4HImode:
7278 case V2SImode:
7279 case V1DImode:
7280 case V2SFmode:
7281 /* SPE vector modes. */
7282 return SPE_CONST_OFFSET_OK (offset);
7284 case DFmode:
7285 case DDmode:
7286 case DImode:
7287 /* On e500v2, we may have:
7289 (subreg:DF (mem:DI (plus (reg) (const_int))) 0).
7291 Which gets addressed with evldd instructions. */
7292 if (TARGET_E500_DOUBLE)
7293 return SPE_CONST_OFFSET_OK (offset);
7295 /* If we are using VSX scalar loads, restrict ourselves to reg+reg
7296 addressing. */
7297 if (VECTOR_MEM_VSX_P (mode))
7298 return false;
7300 if (!worst_case)
7301 break;
7302 if (!TARGET_POWERPC64)
7303 extra = 4;
7304 else if (offset & 3)
7305 return false;
7306 break;
7308 case TFmode:
7309 case IFmode:
7310 case KFmode:
7311 if (TARGET_E500_DOUBLE)
7312 return (SPE_CONST_OFFSET_OK (offset)
7313 && SPE_CONST_OFFSET_OK (offset + 8));
7314 /* fall through */
7316 case TDmode:
7317 case TImode:
7318 case PTImode:
7319 extra = 8;
7320 if (!worst_case)
7321 break;
7322 if (!TARGET_POWERPC64)
7323 extra = 12;
7324 else if (offset & 3)
7325 return false;
7326 break;
7328 default:
7329 break;
7332 offset += 0x8000;
7333 return offset < 0x10000 - extra;
7336 bool
7337 legitimate_indexed_address_p (rtx x, int strict)
7339 rtx op0, op1;
7341 if (GET_CODE (x) != PLUS)
7342 return false;
7344 op0 = XEXP (x, 0);
7345 op1 = XEXP (x, 1);
7347 /* Recognize the rtl generated by reload which we know will later be
7348 replaced with proper base and index regs. */
7349 if (!strict
7350 && reload_in_progress
7351 && (REG_P (op0) || GET_CODE (op0) == PLUS)
7352 && REG_P (op1))
7353 return true;
7355 return (REG_P (op0) && REG_P (op1)
7356 && ((INT_REG_OK_FOR_BASE_P (op0, strict)
7357 && INT_REG_OK_FOR_INDEX_P (op1, strict))
7358 || (INT_REG_OK_FOR_BASE_P (op1, strict)
7359 && INT_REG_OK_FOR_INDEX_P (op0, strict))));
7362 bool
7363 avoiding_indexed_address_p (machine_mode mode)
7365 /* Avoid indexed addressing for modes that have non-indexed
7366 load/store instruction forms. */
7367 return (TARGET_AVOID_XFORM && VECTOR_MEM_NONE_P (mode));
7370 bool
7371 legitimate_indirect_address_p (rtx x, int strict)
7373 return GET_CODE (x) == REG && INT_REG_OK_FOR_BASE_P (x, strict);
7376 bool
7377 macho_lo_sum_memory_operand (rtx x, machine_mode mode)
7379 if (!TARGET_MACHO || !flag_pic
7380 || mode != SImode || GET_CODE (x) != MEM)
7381 return false;
7382 x = XEXP (x, 0);
7384 if (GET_CODE (x) != LO_SUM)
7385 return false;
7386 if (GET_CODE (XEXP (x, 0)) != REG)
7387 return false;
7388 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), 0))
7389 return false;
7390 x = XEXP (x, 1);
7392 return CONSTANT_P (x);
7395 static bool
7396 legitimate_lo_sum_address_p (machine_mode mode, rtx x, int strict)
7398 if (GET_CODE (x) != LO_SUM)
7399 return false;
7400 if (GET_CODE (XEXP (x, 0)) != REG)
7401 return false;
7402 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
7403 return false;
7404 /* Restrict addressing for DI because of our SUBREG hackery. */
7405 if (TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
7406 return false;
7407 x = XEXP (x, 1);
7409 if (TARGET_ELF || TARGET_MACHO)
7411 bool large_toc_ok;
7413 if (DEFAULT_ABI == ABI_V4 && flag_pic)
7414 return false;
7415 /* LRA don't use LEGITIMIZE_RELOAD_ADDRESS as it usually calls
7416 push_reload from reload pass code. LEGITIMIZE_RELOAD_ADDRESS
7417 recognizes some LO_SUM addresses as valid although this
7418 function says opposite. In most cases, LRA through different
7419 transformations can generate correct code for address reloads.
7420 It can not manage only some LO_SUM cases. So we need to add
7421 code analogous to one in rs6000_legitimize_reload_address for
7422 LOW_SUM here saying that some addresses are still valid. */
7423 large_toc_ok = (lra_in_progress && TARGET_CMODEL != CMODEL_SMALL
7424 && small_toc_ref (x, VOIDmode));
7425 if (TARGET_TOC && ! large_toc_ok)
7426 return false;
7427 if (GET_MODE_NUNITS (mode) != 1)
7428 return false;
7429 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
7430 && !(/* ??? Assume floating point reg based on mode? */
7431 TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
7432 && (mode == DFmode || mode == DDmode)))
7433 return false;
7435 return CONSTANT_P (x) || large_toc_ok;
7438 return false;
7442 /* Try machine-dependent ways of modifying an illegitimate address
7443 to be legitimate. If we find one, return the new, valid address.
7444 This is used from only one place: `memory_address' in explow.c.
7446 OLDX is the address as it was before break_out_memory_refs was
7447 called. In some cases it is useful to look at this to decide what
7448 needs to be done.
7450 It is always safe for this function to do nothing. It exists to
7451 recognize opportunities to optimize the output.
7453 On RS/6000, first check for the sum of a register with a constant
7454 integer that is out of range. If so, generate code to add the
7455 constant with the low-order 16 bits masked to the register and force
7456 this result into another register (this can be done with `cau').
7457 Then generate an address of REG+(CONST&0xffff), allowing for the
7458 possibility of bit 16 being a one.
7460 Then check for the sum of a register and something not constant, try to
7461 load the other things into a register and return the sum. */
7463 static rtx
7464 rs6000_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
7465 machine_mode mode)
7467 unsigned int extra;
7469 if (!reg_offset_addressing_ok_p (mode))
7471 if (virtual_stack_registers_memory_p (x))
7472 return x;
7474 /* In theory we should not be seeing addresses of the form reg+0,
7475 but just in case it is generated, optimize it away. */
7476 if (GET_CODE (x) == PLUS && XEXP (x, 1) == const0_rtx)
7477 return force_reg (Pmode, XEXP (x, 0));
7479 /* For TImode with load/store quad, restrict addresses to just a single
7480 pointer, so it works with both GPRs and VSX registers. */
7481 /* Make sure both operands are registers. */
7482 else if (GET_CODE (x) == PLUS
7483 && (mode != TImode || !TARGET_QUAD_MEMORY))
7484 return gen_rtx_PLUS (Pmode,
7485 force_reg (Pmode, XEXP (x, 0)),
7486 force_reg (Pmode, XEXP (x, 1)));
7487 else
7488 return force_reg (Pmode, x);
7490 if (GET_CODE (x) == SYMBOL_REF)
7492 enum tls_model model = SYMBOL_REF_TLS_MODEL (x);
7493 if (model != 0)
7494 return rs6000_legitimize_tls_address (x, model);
7497 extra = 0;
7498 switch (mode)
7500 case TFmode:
7501 case TDmode:
7502 case TImode:
7503 case PTImode:
7504 case IFmode:
7505 case KFmode:
7506 /* As in legitimate_offset_address_p we do not assume
7507 worst-case. The mode here is just a hint as to the registers
7508 used. A TImode is usually in gprs, but may actually be in
7509 fprs. Leave worst-case scenario for reload to handle via
7510 insn constraints. PTImode is only GPRs. */
7511 extra = 8;
7512 break;
7513 default:
7514 break;
7517 if (GET_CODE (x) == PLUS
7518 && GET_CODE (XEXP (x, 0)) == REG
7519 && GET_CODE (XEXP (x, 1)) == CONST_INT
7520 && ((unsigned HOST_WIDE_INT) (INTVAL (XEXP (x, 1)) + 0x8000)
7521 >= 0x10000 - extra)
7522 && !(SPE_VECTOR_MODE (mode)
7523 || (TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD)))
7525 HOST_WIDE_INT high_int, low_int;
7526 rtx sum;
7527 low_int = ((INTVAL (XEXP (x, 1)) & 0xffff) ^ 0x8000) - 0x8000;
7528 if (low_int >= 0x8000 - extra)
7529 low_int = 0;
7530 high_int = INTVAL (XEXP (x, 1)) - low_int;
7531 sum = force_operand (gen_rtx_PLUS (Pmode, XEXP (x, 0),
7532 GEN_INT (high_int)), 0);
7533 return plus_constant (Pmode, sum, low_int);
7535 else if (GET_CODE (x) == PLUS
7536 && GET_CODE (XEXP (x, 0)) == REG
7537 && GET_CODE (XEXP (x, 1)) != CONST_INT
7538 && GET_MODE_NUNITS (mode) == 1
7539 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
7540 || (/* ??? Assume floating point reg based on mode? */
7541 (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
7542 && (mode == DFmode || mode == DDmode)))
7543 && !avoiding_indexed_address_p (mode))
7545 return gen_rtx_PLUS (Pmode, XEXP (x, 0),
7546 force_reg (Pmode, force_operand (XEXP (x, 1), 0)));
7548 else if (SPE_VECTOR_MODE (mode)
7549 || (TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD))
7551 if (mode == DImode)
7552 return x;
7553 /* We accept [reg + reg] and [reg + OFFSET]. */
7555 if (GET_CODE (x) == PLUS)
7557 rtx op1 = XEXP (x, 0);
7558 rtx op2 = XEXP (x, 1);
7559 rtx y;
7561 op1 = force_reg (Pmode, op1);
7563 if (GET_CODE (op2) != REG
7564 && (GET_CODE (op2) != CONST_INT
7565 || !SPE_CONST_OFFSET_OK (INTVAL (op2))
7566 || (GET_MODE_SIZE (mode) > 8
7567 && !SPE_CONST_OFFSET_OK (INTVAL (op2) + 8))))
7568 op2 = force_reg (Pmode, op2);
7570 /* We can't always do [reg + reg] for these, because [reg +
7571 reg + offset] is not a legitimate addressing mode. */
7572 y = gen_rtx_PLUS (Pmode, op1, op2);
7574 if ((GET_MODE_SIZE (mode) > 8 || mode == DDmode) && REG_P (op2))
7575 return force_reg (Pmode, y);
7576 else
7577 return y;
7580 return force_reg (Pmode, x);
7582 else if ((TARGET_ELF
7583 #if TARGET_MACHO
7584 || !MACHO_DYNAMIC_NO_PIC_P
7585 #endif
7587 && TARGET_32BIT
7588 && TARGET_NO_TOC
7589 && ! flag_pic
7590 && GET_CODE (x) != CONST_INT
7591 && GET_CODE (x) != CONST_WIDE_INT
7592 && GET_CODE (x) != CONST_DOUBLE
7593 && CONSTANT_P (x)
7594 && GET_MODE_NUNITS (mode) == 1
7595 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
7596 || (/* ??? Assume floating point reg based on mode? */
7597 (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
7598 && (mode == DFmode || mode == DDmode))))
7600 rtx reg = gen_reg_rtx (Pmode);
7601 if (TARGET_ELF)
7602 emit_insn (gen_elf_high (reg, x));
7603 else
7604 emit_insn (gen_macho_high (reg, x));
7605 return gen_rtx_LO_SUM (Pmode, reg, x);
7607 else if (TARGET_TOC
7608 && GET_CODE (x) == SYMBOL_REF
7609 && constant_pool_expr_p (x)
7610 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (x), Pmode))
7611 return create_TOC_reference (x, NULL_RTX);
7612 else
7613 return x;
7616 /* Debug version of rs6000_legitimize_address. */
7617 static rtx
7618 rs6000_debug_legitimize_address (rtx x, rtx oldx, machine_mode mode)
7620 rtx ret;
7621 rtx_insn *insns;
7623 start_sequence ();
7624 ret = rs6000_legitimize_address (x, oldx, mode);
7625 insns = get_insns ();
7626 end_sequence ();
7628 if (ret != x)
7630 fprintf (stderr,
7631 "\nrs6000_legitimize_address: mode %s, old code %s, "
7632 "new code %s, modified\n",
7633 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)),
7634 GET_RTX_NAME (GET_CODE (ret)));
7636 fprintf (stderr, "Original address:\n");
7637 debug_rtx (x);
7639 fprintf (stderr, "oldx:\n");
7640 debug_rtx (oldx);
7642 fprintf (stderr, "New address:\n");
7643 debug_rtx (ret);
7645 if (insns)
7647 fprintf (stderr, "Insns added:\n");
7648 debug_rtx_list (insns, 20);
7651 else
7653 fprintf (stderr,
7654 "\nrs6000_legitimize_address: mode %s, code %s, no change:\n",
7655 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)));
7657 debug_rtx (x);
7660 if (insns)
7661 emit_insn (insns);
7663 return ret;
7666 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
7667 We need to emit DTP-relative relocations. */
7669 static void rs6000_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
7670 static void
7671 rs6000_output_dwarf_dtprel (FILE *file, int size, rtx x)
7673 switch (size)
7675 case 4:
7676 fputs ("\t.long\t", file);
7677 break;
7678 case 8:
7679 fputs (DOUBLE_INT_ASM_OP, file);
7680 break;
7681 default:
7682 gcc_unreachable ();
7684 output_addr_const (file, x);
7685 if (TARGET_ELF)
7686 fputs ("@dtprel+0x8000", file);
7687 else if (TARGET_XCOFF && GET_CODE (x) == SYMBOL_REF)
7689 switch (SYMBOL_REF_TLS_MODEL (x))
7691 case 0:
7692 break;
7693 case TLS_MODEL_LOCAL_EXEC:
7694 fputs ("@le", file);
7695 break;
7696 case TLS_MODEL_INITIAL_EXEC:
7697 fputs ("@ie", file);
7698 break;
7699 case TLS_MODEL_GLOBAL_DYNAMIC:
7700 case TLS_MODEL_LOCAL_DYNAMIC:
7701 fputs ("@m", file);
7702 break;
7703 default:
7704 gcc_unreachable ();
7709 /* Return true if X is a symbol that refers to real (rather than emulated)
7710 TLS. */
7712 static bool
7713 rs6000_real_tls_symbol_ref_p (rtx x)
7715 return (GET_CODE (x) == SYMBOL_REF
7716 && SYMBOL_REF_TLS_MODEL (x) >= TLS_MODEL_REAL);
7719 /* In the name of slightly smaller debug output, and to cater to
7720 general assembler lossage, recognize various UNSPEC sequences
7721 and turn them back into a direct symbol reference. */
7723 static rtx
7724 rs6000_delegitimize_address (rtx orig_x)
7726 rtx x, y, offset;
7728 orig_x = delegitimize_mem_from_attrs (orig_x);
7729 x = orig_x;
7730 if (MEM_P (x))
7731 x = XEXP (x, 0);
7733 y = x;
7734 if (TARGET_CMODEL != CMODEL_SMALL
7735 && GET_CODE (y) == LO_SUM)
7736 y = XEXP (y, 1);
7738 offset = NULL_RTX;
7739 if (GET_CODE (y) == PLUS
7740 && GET_MODE (y) == Pmode
7741 && CONST_INT_P (XEXP (y, 1)))
7743 offset = XEXP (y, 1);
7744 y = XEXP (y, 0);
7747 if (GET_CODE (y) == UNSPEC
7748 && XINT (y, 1) == UNSPEC_TOCREL)
7750 y = XVECEXP (y, 0, 0);
7752 #ifdef HAVE_AS_TLS
7753 /* Do not associate thread-local symbols with the original
7754 constant pool symbol. */
7755 if (TARGET_XCOFF
7756 && GET_CODE (y) == SYMBOL_REF
7757 && CONSTANT_POOL_ADDRESS_P (y)
7758 && rs6000_real_tls_symbol_ref_p (get_pool_constant (y)))
7759 return orig_x;
7760 #endif
7762 if (offset != NULL_RTX)
7763 y = gen_rtx_PLUS (Pmode, y, offset);
7764 if (!MEM_P (orig_x))
7765 return y;
7766 else
7767 return replace_equiv_address_nv (orig_x, y);
7770 if (TARGET_MACHO
7771 && GET_CODE (orig_x) == LO_SUM
7772 && GET_CODE (XEXP (orig_x, 1)) == CONST)
7774 y = XEXP (XEXP (orig_x, 1), 0);
7775 if (GET_CODE (y) == UNSPEC
7776 && XINT (y, 1) == UNSPEC_MACHOPIC_OFFSET)
7777 return XVECEXP (y, 0, 0);
7780 return orig_x;
7783 /* Return true if X shouldn't be emitted into the debug info.
7784 The linker doesn't like .toc section references from
7785 .debug_* sections, so reject .toc section symbols. */
7787 static bool
7788 rs6000_const_not_ok_for_debug_p (rtx x)
7790 if (GET_CODE (x) == SYMBOL_REF
7791 && CONSTANT_POOL_ADDRESS_P (x))
7793 rtx c = get_pool_constant (x);
7794 machine_mode cmode = get_pool_mode (x);
7795 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (c, cmode))
7796 return true;
7799 return false;
7802 /* Construct the SYMBOL_REF for the tls_get_addr function. */
7804 static GTY(()) rtx rs6000_tls_symbol;
7805 static rtx
7806 rs6000_tls_get_addr (void)
7808 if (!rs6000_tls_symbol)
7809 rs6000_tls_symbol = init_one_libfunc ("__tls_get_addr");
7811 return rs6000_tls_symbol;
7814 /* Construct the SYMBOL_REF for TLS GOT references. */
7816 static GTY(()) rtx rs6000_got_symbol;
7817 static rtx
7818 rs6000_got_sym (void)
7820 if (!rs6000_got_symbol)
7822 rs6000_got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
7823 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_LOCAL;
7824 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_EXTERNAL;
7827 return rs6000_got_symbol;
7830 /* AIX Thread-Local Address support. */
7832 static rtx
7833 rs6000_legitimize_tls_address_aix (rtx addr, enum tls_model model)
7835 rtx sym, mem, tocref, tlsreg, tmpreg, dest, tlsaddr;
7836 const char *name;
7837 char *tlsname;
7839 name = XSTR (addr, 0);
7840 /* Append TLS CSECT qualifier, unless the symbol already is qualified
7841 or the symbol will be in TLS private data section. */
7842 if (name[strlen (name) - 1] != ']'
7843 && (TREE_PUBLIC (SYMBOL_REF_DECL (addr))
7844 || bss_initializer_p (SYMBOL_REF_DECL (addr))))
7846 tlsname = XALLOCAVEC (char, strlen (name) + 4);
7847 strcpy (tlsname, name);
7848 strcat (tlsname,
7849 bss_initializer_p (SYMBOL_REF_DECL (addr)) ? "[UL]" : "[TL]");
7850 tlsaddr = copy_rtx (addr);
7851 XSTR (tlsaddr, 0) = ggc_strdup (tlsname);
7853 else
7854 tlsaddr = addr;
7856 /* Place addr into TOC constant pool. */
7857 sym = force_const_mem (GET_MODE (tlsaddr), tlsaddr);
7859 /* Output the TOC entry and create the MEM referencing the value. */
7860 if (constant_pool_expr_p (XEXP (sym, 0))
7861 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (XEXP (sym, 0)), Pmode))
7863 tocref = create_TOC_reference (XEXP (sym, 0), NULL_RTX);
7864 mem = gen_const_mem (Pmode, tocref);
7865 set_mem_alias_set (mem, get_TOC_alias_set ());
7867 else
7868 return sym;
7870 /* Use global-dynamic for local-dynamic. */
7871 if (model == TLS_MODEL_GLOBAL_DYNAMIC
7872 || model == TLS_MODEL_LOCAL_DYNAMIC)
7874 /* Create new TOC reference for @m symbol. */
7875 name = XSTR (XVECEXP (XEXP (mem, 0), 0, 0), 0);
7876 tlsname = XALLOCAVEC (char, strlen (name) + 1);
7877 strcpy (tlsname, "*LCM");
7878 strcat (tlsname, name + 3);
7879 rtx modaddr = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (tlsname));
7880 SYMBOL_REF_FLAGS (modaddr) |= SYMBOL_FLAG_LOCAL;
7881 tocref = create_TOC_reference (modaddr, NULL_RTX);
7882 rtx modmem = gen_const_mem (Pmode, tocref);
7883 set_mem_alias_set (modmem, get_TOC_alias_set ());
7885 rtx modreg = gen_reg_rtx (Pmode);
7886 emit_insn (gen_rtx_SET (modreg, modmem));
7888 tmpreg = gen_reg_rtx (Pmode);
7889 emit_insn (gen_rtx_SET (tmpreg, mem));
7891 dest = gen_reg_rtx (Pmode);
7892 if (TARGET_32BIT)
7893 emit_insn (gen_tls_get_addrsi (dest, modreg, tmpreg));
7894 else
7895 emit_insn (gen_tls_get_addrdi (dest, modreg, tmpreg));
7896 return dest;
7898 /* Obtain TLS pointer: 32 bit call or 64 bit GPR 13. */
7899 else if (TARGET_32BIT)
7901 tlsreg = gen_reg_rtx (SImode);
7902 emit_insn (gen_tls_get_tpointer (tlsreg));
7904 else
7905 tlsreg = gen_rtx_REG (DImode, 13);
7907 /* Load the TOC value into temporary register. */
7908 tmpreg = gen_reg_rtx (Pmode);
7909 emit_insn (gen_rtx_SET (tmpreg, mem));
7910 set_unique_reg_note (get_last_insn (), REG_EQUAL,
7911 gen_rtx_MINUS (Pmode, addr, tlsreg));
7913 /* Add TOC symbol value to TLS pointer. */
7914 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tmpreg, tlsreg));
7916 return dest;
7919 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
7920 this (thread-local) address. */
7922 static rtx
7923 rs6000_legitimize_tls_address (rtx addr, enum tls_model model)
7925 rtx dest, insn;
7927 if (TARGET_XCOFF)
7928 return rs6000_legitimize_tls_address_aix (addr, model);
7930 dest = gen_reg_rtx (Pmode);
7931 if (model == TLS_MODEL_LOCAL_EXEC && rs6000_tls_size == 16)
7933 rtx tlsreg;
7935 if (TARGET_64BIT)
7937 tlsreg = gen_rtx_REG (Pmode, 13);
7938 insn = gen_tls_tprel_64 (dest, tlsreg, addr);
7940 else
7942 tlsreg = gen_rtx_REG (Pmode, 2);
7943 insn = gen_tls_tprel_32 (dest, tlsreg, addr);
7945 emit_insn (insn);
7947 else if (model == TLS_MODEL_LOCAL_EXEC && rs6000_tls_size == 32)
7949 rtx tlsreg, tmp;
7951 tmp = gen_reg_rtx (Pmode);
7952 if (TARGET_64BIT)
7954 tlsreg = gen_rtx_REG (Pmode, 13);
7955 insn = gen_tls_tprel_ha_64 (tmp, tlsreg, addr);
7957 else
7959 tlsreg = gen_rtx_REG (Pmode, 2);
7960 insn = gen_tls_tprel_ha_32 (tmp, tlsreg, addr);
7962 emit_insn (insn);
7963 if (TARGET_64BIT)
7964 insn = gen_tls_tprel_lo_64 (dest, tmp, addr);
7965 else
7966 insn = gen_tls_tprel_lo_32 (dest, tmp, addr);
7967 emit_insn (insn);
7969 else
7971 rtx r3, got, tga, tmp1, tmp2, call_insn;
7973 /* We currently use relocations like @got@tlsgd for tls, which
7974 means the linker will handle allocation of tls entries, placing
7975 them in the .got section. So use a pointer to the .got section,
7976 not one to secondary TOC sections used by 64-bit -mminimal-toc,
7977 or to secondary GOT sections used by 32-bit -fPIC. */
7978 if (TARGET_64BIT)
7979 got = gen_rtx_REG (Pmode, 2);
7980 else
7982 if (flag_pic == 1)
7983 got = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
7984 else
7986 rtx gsym = rs6000_got_sym ();
7987 got = gen_reg_rtx (Pmode);
7988 if (flag_pic == 0)
7989 rs6000_emit_move (got, gsym, Pmode);
7990 else
7992 rtx mem, lab, last;
7994 tmp1 = gen_reg_rtx (Pmode);
7995 tmp2 = gen_reg_rtx (Pmode);
7996 mem = gen_const_mem (Pmode, tmp1);
7997 lab = gen_label_rtx ();
7998 emit_insn (gen_load_toc_v4_PIC_1b (gsym, lab));
7999 emit_move_insn (tmp1, gen_rtx_REG (Pmode, LR_REGNO));
8000 if (TARGET_LINK_STACK)
8001 emit_insn (gen_addsi3 (tmp1, tmp1, GEN_INT (4)));
8002 emit_move_insn (tmp2, mem);
8003 last = emit_insn (gen_addsi3 (got, tmp1, tmp2));
8004 set_unique_reg_note (last, REG_EQUAL, gsym);
8009 if (model == TLS_MODEL_GLOBAL_DYNAMIC)
8011 tga = rs6000_tls_get_addr ();
8012 emit_library_call_value (tga, dest, LCT_CONST, Pmode,
8013 1, const0_rtx, Pmode);
8015 r3 = gen_rtx_REG (Pmode, 3);
8016 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
8018 if (TARGET_64BIT)
8019 insn = gen_tls_gd_aix64 (r3, got, addr, tga, const0_rtx);
8020 else
8021 insn = gen_tls_gd_aix32 (r3, got, addr, tga, const0_rtx);
8023 else if (DEFAULT_ABI == ABI_V4)
8024 insn = gen_tls_gd_sysvsi (r3, got, addr, tga, const0_rtx);
8025 else
8026 gcc_unreachable ();
8027 call_insn = last_call_insn ();
8028 PATTERN (call_insn) = insn;
8029 if (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
8030 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn),
8031 pic_offset_table_rtx);
8033 else if (model == TLS_MODEL_LOCAL_DYNAMIC)
8035 tga = rs6000_tls_get_addr ();
8036 tmp1 = gen_reg_rtx (Pmode);
8037 emit_library_call_value (tga, tmp1, LCT_CONST, Pmode,
8038 1, const0_rtx, Pmode);
8040 r3 = gen_rtx_REG (Pmode, 3);
8041 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
8043 if (TARGET_64BIT)
8044 insn = gen_tls_ld_aix64 (r3, got, tga, const0_rtx);
8045 else
8046 insn = gen_tls_ld_aix32 (r3, got, tga, const0_rtx);
8048 else if (DEFAULT_ABI == ABI_V4)
8049 insn = gen_tls_ld_sysvsi (r3, got, tga, const0_rtx);
8050 else
8051 gcc_unreachable ();
8052 call_insn = last_call_insn ();
8053 PATTERN (call_insn) = insn;
8054 if (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
8055 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn),
8056 pic_offset_table_rtx);
8058 if (rs6000_tls_size == 16)
8060 if (TARGET_64BIT)
8061 insn = gen_tls_dtprel_64 (dest, tmp1, addr);
8062 else
8063 insn = gen_tls_dtprel_32 (dest, tmp1, addr);
8065 else if (rs6000_tls_size == 32)
8067 tmp2 = gen_reg_rtx (Pmode);
8068 if (TARGET_64BIT)
8069 insn = gen_tls_dtprel_ha_64 (tmp2, tmp1, addr);
8070 else
8071 insn = gen_tls_dtprel_ha_32 (tmp2, tmp1, addr);
8072 emit_insn (insn);
8073 if (TARGET_64BIT)
8074 insn = gen_tls_dtprel_lo_64 (dest, tmp2, addr);
8075 else
8076 insn = gen_tls_dtprel_lo_32 (dest, tmp2, addr);
8078 else
8080 tmp2 = gen_reg_rtx (Pmode);
8081 if (TARGET_64BIT)
8082 insn = gen_tls_got_dtprel_64 (tmp2, got, addr);
8083 else
8084 insn = gen_tls_got_dtprel_32 (tmp2, got, addr);
8085 emit_insn (insn);
8086 insn = gen_rtx_SET (dest, gen_rtx_PLUS (Pmode, tmp2, tmp1));
8088 emit_insn (insn);
8090 else
8092 /* IE, or 64-bit offset LE. */
8093 tmp2 = gen_reg_rtx (Pmode);
8094 if (TARGET_64BIT)
8095 insn = gen_tls_got_tprel_64 (tmp2, got, addr);
8096 else
8097 insn = gen_tls_got_tprel_32 (tmp2, got, addr);
8098 emit_insn (insn);
8099 if (TARGET_64BIT)
8100 insn = gen_tls_tls_64 (dest, tmp2, addr);
8101 else
8102 insn = gen_tls_tls_32 (dest, tmp2, addr);
8103 emit_insn (insn);
8107 return dest;
8110 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
8112 static bool
8113 rs6000_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8115 if (GET_CODE (x) == HIGH
8116 && GET_CODE (XEXP (x, 0)) == UNSPEC)
8117 return true;
8119 /* A TLS symbol in the TOC cannot contain a sum. */
8120 if (GET_CODE (x) == CONST
8121 && GET_CODE (XEXP (x, 0)) == PLUS
8122 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
8123 && SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0)) != 0)
8124 return true;
8126 /* Do not place an ELF TLS symbol in the constant pool. */
8127 return TARGET_ELF && tls_referenced_p (x);
8130 /* Return true iff the given SYMBOL_REF refers to a constant pool entry
8131 that we have put in the TOC, or for cmodel=medium, if the SYMBOL_REF
8132 can be addressed relative to the toc pointer. */
8134 static bool
8135 use_toc_relative_ref (rtx sym, machine_mode mode)
8137 return ((constant_pool_expr_p (sym)
8138 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (sym),
8139 get_pool_mode (sym)))
8140 || (TARGET_CMODEL == CMODEL_MEDIUM
8141 && SYMBOL_REF_LOCAL_P (sym)
8142 && GET_MODE_SIZE (mode) <= POWERPC64_TOC_POINTER_ALIGNMENT));
8145 /* Our implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
8146 replace the input X, or the original X if no replacement is called for.
8147 The output parameter *WIN is 1 if the calling macro should goto WIN,
8148 0 if it should not.
8150 For RS/6000, we wish to handle large displacements off a base
8151 register by splitting the addend across an addiu/addis and the mem insn.
8152 This cuts number of extra insns needed from 3 to 1.
8154 On Darwin, we use this to generate code for floating point constants.
8155 A movsf_low is generated so we wind up with 2 instructions rather than 3.
8156 The Darwin code is inside #if TARGET_MACHO because only then are the
8157 machopic_* functions defined. */
8158 static rtx
8159 rs6000_legitimize_reload_address (rtx x, machine_mode mode,
8160 int opnum, int type,
8161 int ind_levels ATTRIBUTE_UNUSED, int *win)
8163 bool reg_offset_p = reg_offset_addressing_ok_p (mode);
8165 /* Nasty hack for vsx_splat_V2DF/V2DI load from mem, which takes a
8166 DFmode/DImode MEM. */
8167 if (reg_offset_p
8168 && opnum == 1
8169 && ((mode == DFmode && recog_data.operand_mode[0] == V2DFmode)
8170 || (mode == DImode && recog_data.operand_mode[0] == V2DImode)))
8171 reg_offset_p = false;
8173 /* We must recognize output that we have already generated ourselves. */
8174 if (GET_CODE (x) == PLUS
8175 && GET_CODE (XEXP (x, 0)) == PLUS
8176 && GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
8177 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
8178 && GET_CODE (XEXP (x, 1)) == CONST_INT)
8180 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
8181 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
8182 opnum, (enum reload_type) type);
8183 *win = 1;
8184 return x;
8187 /* Likewise for (lo_sum (high ...) ...) output we have generated. */
8188 if (GET_CODE (x) == LO_SUM
8189 && GET_CODE (XEXP (x, 0)) == HIGH)
8191 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
8192 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
8193 opnum, (enum reload_type) type);
8194 *win = 1;
8195 return x;
8198 #if TARGET_MACHO
8199 if (DEFAULT_ABI == ABI_DARWIN && flag_pic
8200 && GET_CODE (x) == LO_SUM
8201 && GET_CODE (XEXP (x, 0)) == PLUS
8202 && XEXP (XEXP (x, 0), 0) == pic_offset_table_rtx
8203 && GET_CODE (XEXP (XEXP (x, 0), 1)) == HIGH
8204 && XEXP (XEXP (XEXP (x, 0), 1), 0) == XEXP (x, 1)
8205 && machopic_operand_p (XEXP (x, 1)))
8207 /* Result of previous invocation of this function on Darwin
8208 floating point constant. */
8209 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
8210 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
8211 opnum, (enum reload_type) type);
8212 *win = 1;
8213 return x;
8215 #endif
8217 if (TARGET_CMODEL != CMODEL_SMALL
8218 && reg_offset_p
8219 && small_toc_ref (x, VOIDmode))
8221 rtx hi = gen_rtx_HIGH (Pmode, copy_rtx (x));
8222 x = gen_rtx_LO_SUM (Pmode, hi, x);
8223 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
8224 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
8225 opnum, (enum reload_type) type);
8226 *win = 1;
8227 return x;
8230 if (GET_CODE (x) == PLUS
8231 && GET_CODE (XEXP (x, 0)) == REG
8232 && REGNO (XEXP (x, 0)) < FIRST_PSEUDO_REGISTER
8233 && INT_REG_OK_FOR_BASE_P (XEXP (x, 0), 1)
8234 && GET_CODE (XEXP (x, 1)) == CONST_INT
8235 && reg_offset_p
8236 && !SPE_VECTOR_MODE (mode)
8237 && !(TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
8238 && (!VECTOR_MODE_P (mode) || VECTOR_MEM_NONE_P (mode)))
8240 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
8241 HOST_WIDE_INT low = ((val & 0xffff) ^ 0x8000) - 0x8000;
8242 HOST_WIDE_INT high
8243 = (((val - low) & 0xffffffff) ^ 0x80000000) - 0x80000000;
8245 /* Check for 32-bit overflow. */
8246 if (high + low != val)
8248 *win = 0;
8249 return x;
8252 /* Reload the high part into a base reg; leave the low part
8253 in the mem directly. */
8255 x = gen_rtx_PLUS (GET_MODE (x),
8256 gen_rtx_PLUS (GET_MODE (x), XEXP (x, 0),
8257 GEN_INT (high)),
8258 GEN_INT (low));
8260 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
8261 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
8262 opnum, (enum reload_type) type);
8263 *win = 1;
8264 return x;
8267 if (GET_CODE (x) == SYMBOL_REF
8268 && reg_offset_p
8269 && (!VECTOR_MODE_P (mode) || VECTOR_MEM_NONE_P (mode))
8270 && !SPE_VECTOR_MODE (mode)
8271 #if TARGET_MACHO
8272 && DEFAULT_ABI == ABI_DARWIN
8273 && (flag_pic || MACHO_DYNAMIC_NO_PIC_P)
8274 && machopic_symbol_defined_p (x)
8275 #else
8276 && DEFAULT_ABI == ABI_V4
8277 && !flag_pic
8278 #endif
8279 /* Don't do this for TFmode or TDmode, since the result isn't offsettable.
8280 The same goes for DImode without 64-bit gprs and DFmode and DDmode
8281 without fprs.
8282 ??? Assume floating point reg based on mode? This assumption is
8283 violated by eg. powerpc-linux -m32 compile of gcc.dg/pr28796-2.c
8284 where reload ends up doing a DFmode load of a constant from
8285 mem using two gprs. Unfortunately, at this point reload
8286 hasn't yet selected regs so poking around in reload data
8287 won't help and even if we could figure out the regs reliably,
8288 we'd still want to allow this transformation when the mem is
8289 naturally aligned. Since we say the address is good here, we
8290 can't disable offsets from LO_SUMs in mem_operand_gpr.
8291 FIXME: Allow offset from lo_sum for other modes too, when
8292 mem is sufficiently aligned.
8294 Also disallow this if the type can go in VMX/Altivec registers, since
8295 those registers do not have d-form (reg+offset) address modes. */
8296 && !reg_addr[mode].scalar_in_vmx_p
8297 && mode != TFmode
8298 && mode != TDmode
8299 && mode != IFmode
8300 && mode != KFmode
8301 && (mode != TImode || !TARGET_VSX_TIMODE)
8302 && mode != PTImode
8303 && (mode != DImode || TARGET_POWERPC64)
8304 && ((mode != DFmode && mode != DDmode) || TARGET_POWERPC64
8305 || (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)))
8307 #if TARGET_MACHO
8308 if (flag_pic)
8310 rtx offset = machopic_gen_offset (x);
8311 x = gen_rtx_LO_SUM (GET_MODE (x),
8312 gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
8313 gen_rtx_HIGH (Pmode, offset)), offset);
8315 else
8316 #endif
8317 x = gen_rtx_LO_SUM (GET_MODE (x),
8318 gen_rtx_HIGH (Pmode, x), x);
8320 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
8321 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
8322 opnum, (enum reload_type) type);
8323 *win = 1;
8324 return x;
8327 /* Reload an offset address wrapped by an AND that represents the
8328 masking of the lower bits. Strip the outer AND and let reload
8329 convert the offset address into an indirect address. For VSX,
8330 force reload to create the address with an AND in a separate
8331 register, because we can't guarantee an altivec register will
8332 be used. */
8333 if (VECTOR_MEM_ALTIVEC_P (mode)
8334 && GET_CODE (x) == AND
8335 && GET_CODE (XEXP (x, 0)) == PLUS
8336 && GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
8337 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
8338 && GET_CODE (XEXP (x, 1)) == CONST_INT
8339 && INTVAL (XEXP (x, 1)) == -16)
8341 x = XEXP (x, 0);
8342 *win = 1;
8343 return x;
8346 if (TARGET_TOC
8347 && reg_offset_p
8348 && GET_CODE (x) == SYMBOL_REF
8349 && use_toc_relative_ref (x, mode))
8351 x = create_TOC_reference (x, NULL_RTX);
8352 if (TARGET_CMODEL != CMODEL_SMALL)
8353 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
8354 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
8355 opnum, (enum reload_type) type);
8356 *win = 1;
8357 return x;
8359 *win = 0;
8360 return x;
8363 /* Debug version of rs6000_legitimize_reload_address. */
8364 static rtx
8365 rs6000_debug_legitimize_reload_address (rtx x, machine_mode mode,
8366 int opnum, int type,
8367 int ind_levels, int *win)
8369 rtx ret = rs6000_legitimize_reload_address (x, mode, opnum, type,
8370 ind_levels, win);
8371 fprintf (stderr,
8372 "\nrs6000_legitimize_reload_address: mode = %s, opnum = %d, "
8373 "type = %d, ind_levels = %d, win = %d, original addr:\n",
8374 GET_MODE_NAME (mode), opnum, type, ind_levels, *win);
8375 debug_rtx (x);
8377 if (x == ret)
8378 fprintf (stderr, "Same address returned\n");
8379 else if (!ret)
8380 fprintf (stderr, "NULL returned\n");
8381 else
8383 fprintf (stderr, "New address:\n");
8384 debug_rtx (ret);
8387 return ret;
8390 /* TARGET_LEGITIMATE_ADDRESS_P recognizes an RTL expression
8391 that is a valid memory address for an instruction.
8392 The MODE argument is the machine mode for the MEM expression
8393 that wants to use this address.
8395 On the RS/6000, there are four valid address: a SYMBOL_REF that
8396 refers to a constant pool entry of an address (or the sum of it
8397 plus a constant), a short (16-bit signed) constant plus a register,
8398 the sum of two registers, or a register indirect, possibly with an
8399 auto-increment. For DFmode, DDmode and DImode with a constant plus
8400 register, we must ensure that both words are addressable or PowerPC64
8401 with offset word aligned.
8403 For modes spanning multiple registers (DFmode and DDmode in 32-bit GPRs,
8404 32-bit DImode, TImode, TFmode, TDmode), indexed addressing cannot be used
8405 because adjacent memory cells are accessed by adding word-sized offsets
8406 during assembly output. */
8407 static bool
8408 rs6000_legitimate_address_p (machine_mode mode, rtx x, bool reg_ok_strict)
8410 bool reg_offset_p = reg_offset_addressing_ok_p (mode);
8412 /* If this is an unaligned stvx/ldvx type address, discard the outer AND. */
8413 if (VECTOR_MEM_ALTIVEC_P (mode)
8414 && GET_CODE (x) == AND
8415 && GET_CODE (XEXP (x, 1)) == CONST_INT
8416 && INTVAL (XEXP (x, 1)) == -16)
8417 x = XEXP (x, 0);
8419 if (TARGET_ELF && RS6000_SYMBOL_REF_TLS_P (x))
8420 return 0;
8421 if (legitimate_indirect_address_p (x, reg_ok_strict))
8422 return 1;
8423 if (TARGET_UPDATE
8424 && (GET_CODE (x) == PRE_INC || GET_CODE (x) == PRE_DEC)
8425 && mode_supports_pre_incdec_p (mode)
8426 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict))
8427 return 1;
8428 if (virtual_stack_registers_memory_p (x))
8429 return 1;
8430 if (reg_offset_p && legitimate_small_data_p (mode, x))
8431 return 1;
8432 if (reg_offset_p
8433 && legitimate_constant_pool_address_p (x, mode,
8434 reg_ok_strict || lra_in_progress))
8435 return 1;
8436 if (reg_offset_p && reg_addr[mode].fused_toc && GET_CODE (x) == UNSPEC
8437 && XINT (x, 1) == UNSPEC_FUSION_ADDIS)
8438 return 1;
8439 /* For TImode, if we have load/store quad and TImode in VSX registers, only
8440 allow register indirect addresses. This will allow the values to go in
8441 either GPRs or VSX registers without reloading. The vector types would
8442 tend to go into VSX registers, so we allow REG+REG, while TImode seems
8443 somewhat split, in that some uses are GPR based, and some VSX based. */
8444 if (mode == TImode && TARGET_QUAD_MEMORY && TARGET_VSX_TIMODE)
8445 return 0;
8446 /* If not REG_OK_STRICT (before reload) let pass any stack offset. */
8447 if (! reg_ok_strict
8448 && reg_offset_p
8449 && GET_CODE (x) == PLUS
8450 && GET_CODE (XEXP (x, 0)) == REG
8451 && (XEXP (x, 0) == virtual_stack_vars_rtx
8452 || XEXP (x, 0) == arg_pointer_rtx)
8453 && GET_CODE (XEXP (x, 1)) == CONST_INT)
8454 return 1;
8455 if (rs6000_legitimate_offset_address_p (mode, x, reg_ok_strict, false))
8456 return 1;
8457 if (!FLOAT128_2REG_P (mode)
8458 && ((TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
8459 || TARGET_POWERPC64
8460 || (mode != DFmode && mode != DDmode)
8461 || (TARGET_E500_DOUBLE && mode != DDmode))
8462 && (TARGET_POWERPC64 || mode != DImode)
8463 && (mode != TImode || VECTOR_MEM_VSX_P (TImode))
8464 && mode != PTImode
8465 && !avoiding_indexed_address_p (mode)
8466 && legitimate_indexed_address_p (x, reg_ok_strict))
8467 return 1;
8468 if (TARGET_UPDATE && GET_CODE (x) == PRE_MODIFY
8469 && mode_supports_pre_modify_p (mode)
8470 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict)
8471 && (rs6000_legitimate_offset_address_p (mode, XEXP (x, 1),
8472 reg_ok_strict, false)
8473 || (!avoiding_indexed_address_p (mode)
8474 && legitimate_indexed_address_p (XEXP (x, 1), reg_ok_strict)))
8475 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
8476 return 1;
8477 if (reg_offset_p && legitimate_lo_sum_address_p (mode, x, reg_ok_strict))
8478 return 1;
8479 return 0;
8482 /* Debug version of rs6000_legitimate_address_p. */
8483 static bool
8484 rs6000_debug_legitimate_address_p (machine_mode mode, rtx x,
8485 bool reg_ok_strict)
8487 bool ret = rs6000_legitimate_address_p (mode, x, reg_ok_strict);
8488 fprintf (stderr,
8489 "\nrs6000_legitimate_address_p: return = %s, mode = %s, "
8490 "strict = %d, reload = %s, code = %s\n",
8491 ret ? "true" : "false",
8492 GET_MODE_NAME (mode),
8493 reg_ok_strict,
8494 (reload_completed
8495 ? "after"
8496 : (reload_in_progress ? "progress" : "before")),
8497 GET_RTX_NAME (GET_CODE (x)));
8498 debug_rtx (x);
8500 return ret;
8503 /* Implement TARGET_MODE_DEPENDENT_ADDRESS_P. */
8505 static bool
8506 rs6000_mode_dependent_address_p (const_rtx addr,
8507 addr_space_t as ATTRIBUTE_UNUSED)
8509 return rs6000_mode_dependent_address_ptr (addr);
8512 /* Go to LABEL if ADDR (a legitimate address expression)
8513 has an effect that depends on the machine mode it is used for.
8515 On the RS/6000 this is true of all integral offsets (since AltiVec
8516 and VSX modes don't allow them) or is a pre-increment or decrement.
8518 ??? Except that due to conceptual problems in offsettable_address_p
8519 we can't really report the problems of integral offsets. So leave
8520 this assuming that the adjustable offset must be valid for the
8521 sub-words of a TFmode operand, which is what we had before. */
8523 static bool
8524 rs6000_mode_dependent_address (const_rtx addr)
8526 switch (GET_CODE (addr))
8528 case PLUS:
8529 /* Any offset from virtual_stack_vars_rtx and arg_pointer_rtx
8530 is considered a legitimate address before reload, so there
8531 are no offset restrictions in that case. Note that this
8532 condition is safe in strict mode because any address involving
8533 virtual_stack_vars_rtx or arg_pointer_rtx would already have
8534 been rejected as illegitimate. */
8535 if (XEXP (addr, 0) != virtual_stack_vars_rtx
8536 && XEXP (addr, 0) != arg_pointer_rtx
8537 && GET_CODE (XEXP (addr, 1)) == CONST_INT)
8539 unsigned HOST_WIDE_INT val = INTVAL (XEXP (addr, 1));
8540 return val + 0x8000 >= 0x10000 - (TARGET_POWERPC64 ? 8 : 12);
8542 break;
8544 case LO_SUM:
8545 /* Anything in the constant pool is sufficiently aligned that
8546 all bytes have the same high part address. */
8547 return !legitimate_constant_pool_address_p (addr, QImode, false);
8549 /* Auto-increment cases are now treated generically in recog.c. */
8550 case PRE_MODIFY:
8551 return TARGET_UPDATE;
8553 /* AND is only allowed in Altivec loads. */
8554 case AND:
8555 return true;
8557 default:
8558 break;
8561 return false;
8564 /* Debug version of rs6000_mode_dependent_address. */
8565 static bool
8566 rs6000_debug_mode_dependent_address (const_rtx addr)
8568 bool ret = rs6000_mode_dependent_address (addr);
8570 fprintf (stderr, "\nrs6000_mode_dependent_address: ret = %s\n",
8571 ret ? "true" : "false");
8572 debug_rtx (addr);
8574 return ret;
8577 /* Implement FIND_BASE_TERM. */
8580 rs6000_find_base_term (rtx op)
8582 rtx base;
8584 base = op;
8585 if (GET_CODE (base) == CONST)
8586 base = XEXP (base, 0);
8587 if (GET_CODE (base) == PLUS)
8588 base = XEXP (base, 0);
8589 if (GET_CODE (base) == UNSPEC)
8590 switch (XINT (base, 1))
8592 case UNSPEC_TOCREL:
8593 case UNSPEC_MACHOPIC_OFFSET:
8594 /* OP represents SYM [+ OFFSET] - ANCHOR. SYM is the base term
8595 for aliasing purposes. */
8596 return XVECEXP (base, 0, 0);
8599 return op;
8602 /* More elaborate version of recog's offsettable_memref_p predicate
8603 that works around the ??? note of rs6000_mode_dependent_address.
8604 In particular it accepts
8606 (mem:DI (plus:SI (reg/f:SI 31 31) (const_int 32760 [0x7ff8])))
8608 in 32-bit mode, that the recog predicate rejects. */
8610 static bool
8611 rs6000_offsettable_memref_p (rtx op, machine_mode reg_mode)
8613 bool worst_case;
8615 if (!MEM_P (op))
8616 return false;
8618 /* First mimic offsettable_memref_p. */
8619 if (offsettable_address_p (true, GET_MODE (op), XEXP (op, 0)))
8620 return true;
8622 /* offsettable_address_p invokes rs6000_mode_dependent_address, but
8623 the latter predicate knows nothing about the mode of the memory
8624 reference and, therefore, assumes that it is the largest supported
8625 mode (TFmode). As a consequence, legitimate offsettable memory
8626 references are rejected. rs6000_legitimate_offset_address_p contains
8627 the correct logic for the PLUS case of rs6000_mode_dependent_address,
8628 at least with a little bit of help here given that we know the
8629 actual registers used. */
8630 worst_case = ((TARGET_POWERPC64 && GET_MODE_CLASS (reg_mode) == MODE_INT)
8631 || GET_MODE_SIZE (reg_mode) == 4);
8632 return rs6000_legitimate_offset_address_p (GET_MODE (op), XEXP (op, 0),
8633 true, worst_case);
8636 /* Change register usage conditional on target flags. */
8637 static void
8638 rs6000_conditional_register_usage (void)
8640 int i;
8642 if (TARGET_DEBUG_TARGET)
8643 fprintf (stderr, "rs6000_conditional_register_usage called\n");
8645 /* Set MQ register fixed (already call_used) so that it will not be
8646 allocated. */
8647 fixed_regs[64] = 1;
8649 /* 64-bit AIX and Linux reserve GPR13 for thread-private data. */
8650 if (TARGET_64BIT)
8651 fixed_regs[13] = call_used_regs[13]
8652 = call_really_used_regs[13] = 1;
8654 /* Conditionally disable FPRs. */
8655 if (TARGET_SOFT_FLOAT || !TARGET_FPRS)
8656 for (i = 32; i < 64; i++)
8657 fixed_regs[i] = call_used_regs[i]
8658 = call_really_used_regs[i] = 1;
8660 /* The TOC register is not killed across calls in a way that is
8661 visible to the compiler. */
8662 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
8663 call_really_used_regs[2] = 0;
8665 if (DEFAULT_ABI == ABI_V4 && flag_pic == 2)
8666 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
8668 if (DEFAULT_ABI == ABI_V4 && flag_pic == 1)
8669 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
8670 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
8671 = call_really_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
8673 if (DEFAULT_ABI == ABI_DARWIN && flag_pic)
8674 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
8675 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
8676 = call_really_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
8678 if (TARGET_TOC && TARGET_MINIMAL_TOC)
8679 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
8680 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
8682 if (TARGET_SPE)
8684 global_regs[SPEFSCR_REGNO] = 1;
8685 /* We used to use r14 as FIXED_SCRATCH to address SPE 64-bit
8686 registers in prologues and epilogues. We no longer use r14
8687 for FIXED_SCRATCH, but we're keeping r14 out of the allocation
8688 pool for link-compatibility with older versions of GCC. Once
8689 "old" code has died out, we can return r14 to the allocation
8690 pool. */
8691 fixed_regs[14]
8692 = call_used_regs[14]
8693 = call_really_used_regs[14] = 1;
8696 if (!TARGET_ALTIVEC && !TARGET_VSX)
8698 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
8699 fixed_regs[i] = call_used_regs[i] = call_really_used_regs[i] = 1;
8700 call_really_used_regs[VRSAVE_REGNO] = 1;
8703 if (TARGET_ALTIVEC || TARGET_VSX)
8704 global_regs[VSCR_REGNO] = 1;
8706 if (TARGET_ALTIVEC_ABI)
8708 for (i = FIRST_ALTIVEC_REGNO; i < FIRST_ALTIVEC_REGNO + 20; ++i)
8709 call_used_regs[i] = call_really_used_regs[i] = 1;
8711 /* AIX reserves VR20:31 in non-extended ABI mode. */
8712 if (TARGET_XCOFF)
8713 for (i = FIRST_ALTIVEC_REGNO + 20; i < FIRST_ALTIVEC_REGNO + 32; ++i)
8714 fixed_regs[i] = call_used_regs[i] = call_really_used_regs[i] = 1;
8719 /* Output insns to set DEST equal to the constant SOURCE as a series of
8720 lis, ori and shl instructions and return TRUE. */
8722 bool
8723 rs6000_emit_set_const (rtx dest, rtx source)
8725 machine_mode mode = GET_MODE (dest);
8726 rtx temp, set;
8727 rtx_insn *insn;
8728 HOST_WIDE_INT c;
8730 gcc_checking_assert (CONST_INT_P (source));
8731 c = INTVAL (source);
8732 switch (mode)
8734 case QImode:
8735 case HImode:
8736 emit_insn (gen_rtx_SET (dest, source));
8737 return true;
8739 case SImode:
8740 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (SImode);
8742 emit_insn (gen_rtx_SET (copy_rtx (temp),
8743 GEN_INT (c & ~(HOST_WIDE_INT) 0xffff)));
8744 emit_insn (gen_rtx_SET (dest,
8745 gen_rtx_IOR (SImode, copy_rtx (temp),
8746 GEN_INT (c & 0xffff))));
8747 break;
8749 case DImode:
8750 if (!TARGET_POWERPC64)
8752 rtx hi, lo;
8754 hi = operand_subword_force (copy_rtx (dest), WORDS_BIG_ENDIAN == 0,
8755 DImode);
8756 lo = operand_subword_force (dest, WORDS_BIG_ENDIAN != 0,
8757 DImode);
8758 emit_move_insn (hi, GEN_INT (c >> 32));
8759 c = ((c & 0xffffffff) ^ 0x80000000) - 0x80000000;
8760 emit_move_insn (lo, GEN_INT (c));
8762 else
8763 rs6000_emit_set_long_const (dest, c);
8764 break;
8766 default:
8767 gcc_unreachable ();
8770 insn = get_last_insn ();
8771 set = single_set (insn);
8772 if (! CONSTANT_P (SET_SRC (set)))
8773 set_unique_reg_note (insn, REG_EQUAL, GEN_INT (c));
8775 return true;
8778 /* Subroutine of rs6000_emit_set_const, handling PowerPC64 DImode.
8779 Output insns to set DEST equal to the constant C as a series of
8780 lis, ori and shl instructions. */
8782 static void
8783 rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c)
8785 rtx temp;
8786 HOST_WIDE_INT ud1, ud2, ud3, ud4;
8788 ud1 = c & 0xffff;
8789 c = c >> 16;
8790 ud2 = c & 0xffff;
8791 c = c >> 16;
8792 ud3 = c & 0xffff;
8793 c = c >> 16;
8794 ud4 = c & 0xffff;
8796 if ((ud4 == 0xffff && ud3 == 0xffff && ud2 == 0xffff && (ud1 & 0x8000))
8797 || (ud4 == 0 && ud3 == 0 && ud2 == 0 && ! (ud1 & 0x8000)))
8798 emit_move_insn (dest, GEN_INT ((ud1 ^ 0x8000) - 0x8000));
8800 else if ((ud4 == 0xffff && ud3 == 0xffff && (ud2 & 0x8000))
8801 || (ud4 == 0 && ud3 == 0 && ! (ud2 & 0x8000)))
8803 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
8805 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
8806 GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
8807 if (ud1 != 0)
8808 emit_move_insn (dest,
8809 gen_rtx_IOR (DImode, copy_rtx (temp),
8810 GEN_INT (ud1)));
8812 else if (ud3 == 0 && ud4 == 0)
8814 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
8816 gcc_assert (ud2 & 0x8000);
8817 emit_move_insn (copy_rtx (temp),
8818 GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
8819 if (ud1 != 0)
8820 emit_move_insn (copy_rtx (temp),
8821 gen_rtx_IOR (DImode, copy_rtx (temp),
8822 GEN_INT (ud1)));
8823 emit_move_insn (dest,
8824 gen_rtx_ZERO_EXTEND (DImode,
8825 gen_lowpart (SImode,
8826 copy_rtx (temp))));
8828 else if ((ud4 == 0xffff && (ud3 & 0x8000))
8829 || (ud4 == 0 && ! (ud3 & 0x8000)))
8831 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
8833 emit_move_insn (copy_rtx (temp),
8834 GEN_INT (((ud3 << 16) ^ 0x80000000) - 0x80000000));
8835 if (ud2 != 0)
8836 emit_move_insn (copy_rtx (temp),
8837 gen_rtx_IOR (DImode, copy_rtx (temp),
8838 GEN_INT (ud2)));
8839 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
8840 gen_rtx_ASHIFT (DImode, copy_rtx (temp),
8841 GEN_INT (16)));
8842 if (ud1 != 0)
8843 emit_move_insn (dest,
8844 gen_rtx_IOR (DImode, copy_rtx (temp),
8845 GEN_INT (ud1)));
8847 else
8849 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
8851 emit_move_insn (copy_rtx (temp),
8852 GEN_INT (((ud4 << 16) ^ 0x80000000) - 0x80000000));
8853 if (ud3 != 0)
8854 emit_move_insn (copy_rtx (temp),
8855 gen_rtx_IOR (DImode, copy_rtx (temp),
8856 GEN_INT (ud3)));
8858 emit_move_insn (ud2 != 0 || ud1 != 0 ? copy_rtx (temp) : dest,
8859 gen_rtx_ASHIFT (DImode, copy_rtx (temp),
8860 GEN_INT (32)));
8861 if (ud2 != 0)
8862 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
8863 gen_rtx_IOR (DImode, copy_rtx (temp),
8864 GEN_INT (ud2 << 16)));
8865 if (ud1 != 0)
8866 emit_move_insn (dest,
8867 gen_rtx_IOR (DImode, copy_rtx (temp),
8868 GEN_INT (ud1)));
8872 /* Helper for the following. Get rid of [r+r] memory refs
8873 in cases where it won't work (TImode, TFmode, TDmode, PTImode). */
8875 static void
8876 rs6000_eliminate_indexed_memrefs (rtx operands[2])
8878 if (reload_in_progress)
8879 return;
8881 if (GET_CODE (operands[0]) == MEM
8882 && GET_CODE (XEXP (operands[0], 0)) != REG
8883 && ! legitimate_constant_pool_address_p (XEXP (operands[0], 0),
8884 GET_MODE (operands[0]), false))
8885 operands[0]
8886 = replace_equiv_address (operands[0],
8887 copy_addr_to_reg (XEXP (operands[0], 0)));
8889 if (GET_CODE (operands[1]) == MEM
8890 && GET_CODE (XEXP (operands[1], 0)) != REG
8891 && ! legitimate_constant_pool_address_p (XEXP (operands[1], 0),
8892 GET_MODE (operands[1]), false))
8893 operands[1]
8894 = replace_equiv_address (operands[1],
8895 copy_addr_to_reg (XEXP (operands[1], 0)));
8898 /* Generate a vector of constants to permute MODE for a little-endian
8899 storage operation by swapping the two halves of a vector. */
8900 static rtvec
8901 rs6000_const_vec (machine_mode mode)
8903 int i, subparts;
8904 rtvec v;
8906 switch (mode)
8908 case V1TImode:
8909 subparts = 1;
8910 break;
8911 case V2DFmode:
8912 case V2DImode:
8913 subparts = 2;
8914 break;
8915 case V4SFmode:
8916 case V4SImode:
8917 subparts = 4;
8918 break;
8919 case V8HImode:
8920 subparts = 8;
8921 break;
8922 case V16QImode:
8923 subparts = 16;
8924 break;
8925 default:
8926 gcc_unreachable();
8929 v = rtvec_alloc (subparts);
8931 for (i = 0; i < subparts / 2; ++i)
8932 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i + subparts / 2);
8933 for (i = subparts / 2; i < subparts; ++i)
8934 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i - subparts / 2);
8936 return v;
8939 /* Generate a permute rtx that represents an lxvd2x, stxvd2x, or xxpermdi
8940 for a VSX load or store operation. */
8942 rs6000_gen_le_vsx_permute (rtx source, machine_mode mode)
8944 /* Use ROTATE instead of VEC_SELECT on IEEE 128-bit floating point, and
8945 128-bit integers if they are allowed in VSX registers. */
8946 if (FLOAT128_VECTOR_P (mode) || mode == TImode)
8947 return gen_rtx_ROTATE (mode, source, GEN_INT (64));
8948 else
8950 rtx par = gen_rtx_PARALLEL (VOIDmode, rs6000_const_vec (mode));
8951 return gen_rtx_VEC_SELECT (mode, source, par);
8955 /* Emit a little-endian load from vector memory location SOURCE to VSX
8956 register DEST in mode MODE. The load is done with two permuting
8957 insn's that represent an lxvd2x and xxpermdi. */
8958 void
8959 rs6000_emit_le_vsx_load (rtx dest, rtx source, machine_mode mode)
8961 rtx tmp, permute_mem, permute_reg;
8963 /* Use V2DImode to do swaps of types with 128-bit scalare parts (TImode,
8964 V1TImode). */
8965 if (mode == TImode || mode == V1TImode)
8967 mode = V2DImode;
8968 dest = gen_lowpart (V2DImode, dest);
8969 source = adjust_address (source, V2DImode, 0);
8972 tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (dest) : dest;
8973 permute_mem = rs6000_gen_le_vsx_permute (source, mode);
8974 permute_reg = rs6000_gen_le_vsx_permute (tmp, mode);
8975 emit_insn (gen_rtx_SET (tmp, permute_mem));
8976 emit_insn (gen_rtx_SET (dest, permute_reg));
8979 /* Emit a little-endian store to vector memory location DEST from VSX
8980 register SOURCE in mode MODE. The store is done with two permuting
8981 insn's that represent an xxpermdi and an stxvd2x. */
8982 void
8983 rs6000_emit_le_vsx_store (rtx dest, rtx source, machine_mode mode)
8985 rtx tmp, permute_src, permute_tmp;
8987 /* This should never be called during or after reload, because it does
8988 not re-permute the source register. It is intended only for use
8989 during expand. */
8990 gcc_assert (!reload_in_progress && !lra_in_progress && !reload_completed);
8992 /* Use V2DImode to do swaps of types with 128-bit scalar parts (TImode,
8993 V1TImode). */
8994 if (mode == TImode || mode == V1TImode)
8996 mode = V2DImode;
8997 dest = adjust_address (dest, V2DImode, 0);
8998 source = gen_lowpart (V2DImode, source);
9001 tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (source) : source;
9002 permute_src = rs6000_gen_le_vsx_permute (source, mode);
9003 permute_tmp = rs6000_gen_le_vsx_permute (tmp, mode);
9004 emit_insn (gen_rtx_SET (tmp, permute_src));
9005 emit_insn (gen_rtx_SET (dest, permute_tmp));
9008 /* Emit a sequence representing a little-endian VSX load or store,
9009 moving data from SOURCE to DEST in mode MODE. This is done
9010 separately from rs6000_emit_move to ensure it is called only
9011 during expand. LE VSX loads and stores introduced later are
9012 handled with a split. The expand-time RTL generation allows
9013 us to optimize away redundant pairs of register-permutes. */
9014 void
9015 rs6000_emit_le_vsx_move (rtx dest, rtx source, machine_mode mode)
9017 gcc_assert (!BYTES_BIG_ENDIAN
9018 && VECTOR_MEM_VSX_P (mode)
9019 && !TARGET_P9_VECTOR
9020 && !gpr_or_gpr_p (dest, source)
9021 && (MEM_P (source) ^ MEM_P (dest)));
9023 if (MEM_P (source))
9025 gcc_assert (REG_P (dest) || GET_CODE (dest) == SUBREG);
9026 rs6000_emit_le_vsx_load (dest, source, mode);
9028 else
9030 if (!REG_P (source))
9031 source = force_reg (mode, source);
9032 rs6000_emit_le_vsx_store (dest, source, mode);
9036 /* Emit a move from SOURCE to DEST in mode MODE. */
9037 void
9038 rs6000_emit_move (rtx dest, rtx source, machine_mode mode)
9040 rtx operands[2];
9041 operands[0] = dest;
9042 operands[1] = source;
9044 if (TARGET_DEBUG_ADDR)
9046 fprintf (stderr,
9047 "\nrs6000_emit_move: mode = %s, reload_in_progress = %d, "
9048 "reload_completed = %d, can_create_pseudos = %d.\ndest:\n",
9049 GET_MODE_NAME (mode),
9050 reload_in_progress,
9051 reload_completed,
9052 can_create_pseudo_p ());
9053 debug_rtx (dest);
9054 fprintf (stderr, "source:\n");
9055 debug_rtx (source);
9058 /* Sanity checks. Check that we get CONST_DOUBLE only when we should. */
9059 if (CONST_WIDE_INT_P (operands[1])
9060 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
9062 /* This should be fixed with the introduction of CONST_WIDE_INT. */
9063 gcc_unreachable ();
9066 /* Check if GCC is setting up a block move that will end up using FP
9067 registers as temporaries. We must make sure this is acceptable. */
9068 if (GET_CODE (operands[0]) == MEM
9069 && GET_CODE (operands[1]) == MEM
9070 && mode == DImode
9071 && (SLOW_UNALIGNED_ACCESS (DImode, MEM_ALIGN (operands[0]))
9072 || SLOW_UNALIGNED_ACCESS (DImode, MEM_ALIGN (operands[1])))
9073 && ! (SLOW_UNALIGNED_ACCESS (SImode, (MEM_ALIGN (operands[0]) > 32
9074 ? 32 : MEM_ALIGN (operands[0])))
9075 || SLOW_UNALIGNED_ACCESS (SImode, (MEM_ALIGN (operands[1]) > 32
9076 ? 32
9077 : MEM_ALIGN (operands[1]))))
9078 && ! MEM_VOLATILE_P (operands [0])
9079 && ! MEM_VOLATILE_P (operands [1]))
9081 emit_move_insn (adjust_address (operands[0], SImode, 0),
9082 adjust_address (operands[1], SImode, 0));
9083 emit_move_insn (adjust_address (copy_rtx (operands[0]), SImode, 4),
9084 adjust_address (copy_rtx (operands[1]), SImode, 4));
9085 return;
9088 if (can_create_pseudo_p () && GET_CODE (operands[0]) == MEM
9089 && !gpc_reg_operand (operands[1], mode))
9090 operands[1] = force_reg (mode, operands[1]);
9092 /* Recognize the case where operand[1] is a reference to thread-local
9093 data and load its address to a register. */
9094 if (tls_referenced_p (operands[1]))
9096 enum tls_model model;
9097 rtx tmp = operands[1];
9098 rtx addend = NULL;
9100 if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS)
9102 addend = XEXP (XEXP (tmp, 0), 1);
9103 tmp = XEXP (XEXP (tmp, 0), 0);
9106 gcc_assert (GET_CODE (tmp) == SYMBOL_REF);
9107 model = SYMBOL_REF_TLS_MODEL (tmp);
9108 gcc_assert (model != 0);
9110 tmp = rs6000_legitimize_tls_address (tmp, model);
9111 if (addend)
9113 tmp = gen_rtx_PLUS (mode, tmp, addend);
9114 tmp = force_operand (tmp, operands[0]);
9116 operands[1] = tmp;
9119 /* Handle the case where reload calls us with an invalid address. */
9120 if (reload_in_progress && mode == Pmode
9121 && (! general_operand (operands[1], mode)
9122 || ! nonimmediate_operand (operands[0], mode)))
9123 goto emit_set;
9125 /* 128-bit constant floating-point values on Darwin should really be loaded
9126 as two parts. However, this premature splitting is a problem when DFmode
9127 values can go into Altivec registers. */
9128 if (FLOAT128_IBM_P (mode) && !reg_addr[DFmode].scalar_in_vmx_p
9129 && GET_CODE (operands[1]) == CONST_DOUBLE)
9131 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode, 0),
9132 simplify_gen_subreg (DFmode, operands[1], mode, 0),
9133 DFmode);
9134 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode,
9135 GET_MODE_SIZE (DFmode)),
9136 simplify_gen_subreg (DFmode, operands[1], mode,
9137 GET_MODE_SIZE (DFmode)),
9138 DFmode);
9139 return;
9142 if (reload_in_progress && cfun->machine->sdmode_stack_slot != NULL_RTX)
9143 cfun->machine->sdmode_stack_slot =
9144 eliminate_regs (cfun->machine->sdmode_stack_slot, VOIDmode, NULL_RTX);
9147 /* Transform (p0:DD, (SUBREG:DD p1:SD)) to ((SUBREG:SD p0:DD),
9148 p1:SD) if p1 is not of floating point class and p0 is spilled as
9149 we can have no analogous movsd_store for this. */
9150 if (lra_in_progress && mode == DDmode
9151 && REG_P (operands[0]) && REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER
9152 && reg_preferred_class (REGNO (operands[0])) == NO_REGS
9153 && GET_CODE (operands[1]) == SUBREG && REG_P (SUBREG_REG (operands[1]))
9154 && GET_MODE (SUBREG_REG (operands[1])) == SDmode)
9156 enum reg_class cl;
9157 int regno = REGNO (SUBREG_REG (operands[1]));
9159 if (regno >= FIRST_PSEUDO_REGISTER)
9161 cl = reg_preferred_class (regno);
9162 regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][1];
9164 if (regno >= 0 && ! FP_REGNO_P (regno))
9166 mode = SDmode;
9167 operands[0] = gen_lowpart_SUBREG (SDmode, operands[0]);
9168 operands[1] = SUBREG_REG (operands[1]);
9171 if (lra_in_progress
9172 && mode == SDmode
9173 && REG_P (operands[0]) && REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER
9174 && reg_preferred_class (REGNO (operands[0])) == NO_REGS
9175 && (REG_P (operands[1])
9176 || (GET_CODE (operands[1]) == SUBREG
9177 && REG_P (SUBREG_REG (operands[1])))))
9179 int regno = REGNO (GET_CODE (operands[1]) == SUBREG
9180 ? SUBREG_REG (operands[1]) : operands[1]);
9181 enum reg_class cl;
9183 if (regno >= FIRST_PSEUDO_REGISTER)
9185 cl = reg_preferred_class (regno);
9186 gcc_assert (cl != NO_REGS);
9187 regno = ira_class_hard_regs[cl][0];
9189 if (FP_REGNO_P (regno))
9191 if (GET_MODE (operands[0]) != DDmode)
9192 operands[0] = gen_rtx_SUBREG (DDmode, operands[0], 0);
9193 emit_insn (gen_movsd_store (operands[0], operands[1]));
9195 else if (INT_REGNO_P (regno))
9196 emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
9197 else
9198 gcc_unreachable();
9199 return;
9201 /* Transform ((SUBREG:DD p0:SD), p1:DD) to (p0:SD, (SUBREG:SD
9202 p:DD)) if p0 is not of floating point class and p1 is spilled as
9203 we can have no analogous movsd_load for this. */
9204 if (lra_in_progress && mode == DDmode
9205 && GET_CODE (operands[0]) == SUBREG && REG_P (SUBREG_REG (operands[0]))
9206 && GET_MODE (SUBREG_REG (operands[0])) == SDmode
9207 && REG_P (operands[1]) && REGNO (operands[1]) >= FIRST_PSEUDO_REGISTER
9208 && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
9210 enum reg_class cl;
9211 int regno = REGNO (SUBREG_REG (operands[0]));
9213 if (regno >= FIRST_PSEUDO_REGISTER)
9215 cl = reg_preferred_class (regno);
9216 regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][0];
9218 if (regno >= 0 && ! FP_REGNO_P (regno))
9220 mode = SDmode;
9221 operands[0] = SUBREG_REG (operands[0]);
9222 operands[1] = gen_lowpart_SUBREG (SDmode, operands[1]);
9225 if (lra_in_progress
9226 && mode == SDmode
9227 && (REG_P (operands[0])
9228 || (GET_CODE (operands[0]) == SUBREG
9229 && REG_P (SUBREG_REG (operands[0]))))
9230 && REG_P (operands[1]) && REGNO (operands[1]) >= FIRST_PSEUDO_REGISTER
9231 && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
9233 int regno = REGNO (GET_CODE (operands[0]) == SUBREG
9234 ? SUBREG_REG (operands[0]) : operands[0]);
9235 enum reg_class cl;
9237 if (regno >= FIRST_PSEUDO_REGISTER)
9239 cl = reg_preferred_class (regno);
9240 gcc_assert (cl != NO_REGS);
9241 regno = ira_class_hard_regs[cl][0];
9243 if (FP_REGNO_P (regno))
9245 if (GET_MODE (operands[1]) != DDmode)
9246 operands[1] = gen_rtx_SUBREG (DDmode, operands[1], 0);
9247 emit_insn (gen_movsd_load (operands[0], operands[1]));
9249 else if (INT_REGNO_P (regno))
9250 emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
9251 else
9252 gcc_unreachable();
9253 return;
9256 if (reload_in_progress
9257 && mode == SDmode
9258 && cfun->machine->sdmode_stack_slot != NULL_RTX
9259 && MEM_P (operands[0])
9260 && rtx_equal_p (operands[0], cfun->machine->sdmode_stack_slot)
9261 && REG_P (operands[1]))
9263 if (FP_REGNO_P (REGNO (operands[1])))
9265 rtx mem = adjust_address_nv (operands[0], DDmode, 0);
9266 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
9267 emit_insn (gen_movsd_store (mem, operands[1]));
9269 else if (INT_REGNO_P (REGNO (operands[1])))
9271 rtx mem = operands[0];
9272 if (BYTES_BIG_ENDIAN)
9273 mem = adjust_address_nv (mem, mode, 4);
9274 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
9275 emit_insn (gen_movsd_hardfloat (mem, operands[1]));
9277 else
9278 gcc_unreachable();
9279 return;
9281 if (reload_in_progress
9282 && mode == SDmode
9283 && REG_P (operands[0])
9284 && MEM_P (operands[1])
9285 && cfun->machine->sdmode_stack_slot != NULL_RTX
9286 && rtx_equal_p (operands[1], cfun->machine->sdmode_stack_slot))
9288 if (FP_REGNO_P (REGNO (operands[0])))
9290 rtx mem = adjust_address_nv (operands[1], DDmode, 0);
9291 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
9292 emit_insn (gen_movsd_load (operands[0], mem));
9294 else if (INT_REGNO_P (REGNO (operands[0])))
9296 rtx mem = operands[1];
9297 if (BYTES_BIG_ENDIAN)
9298 mem = adjust_address_nv (mem, mode, 4);
9299 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
9300 emit_insn (gen_movsd_hardfloat (operands[0], mem));
9302 else
9303 gcc_unreachable();
9304 return;
9307 /* FIXME: In the long term, this switch statement should go away
9308 and be replaced by a sequence of tests based on things like
9309 mode == Pmode. */
9310 switch (mode)
9312 case HImode:
9313 case QImode:
9314 if (CONSTANT_P (operands[1])
9315 && GET_CODE (operands[1]) != CONST_INT)
9316 operands[1] = force_const_mem (mode, operands[1]);
9317 break;
9319 case TFmode:
9320 case TDmode:
9321 case IFmode:
9322 case KFmode:
9323 if (FLOAT128_2REG_P (mode))
9324 rs6000_eliminate_indexed_memrefs (operands);
9325 /* fall through */
9327 case DFmode:
9328 case DDmode:
9329 case SFmode:
9330 case SDmode:
9331 if (CONSTANT_P (operands[1])
9332 && ! easy_fp_constant (operands[1], mode))
9333 operands[1] = force_const_mem (mode, operands[1]);
9334 break;
9336 case V16QImode:
9337 case V8HImode:
9338 case V4SFmode:
9339 case V4SImode:
9340 case V4HImode:
9341 case V2SFmode:
9342 case V2SImode:
9343 case V1DImode:
9344 case V2DFmode:
9345 case V2DImode:
9346 case V1TImode:
9347 if (CONSTANT_P (operands[1])
9348 && !easy_vector_constant (operands[1], mode))
9349 operands[1] = force_const_mem (mode, operands[1]);
9350 break;
9352 case SImode:
9353 case DImode:
9354 /* Use default pattern for address of ELF small data */
9355 if (TARGET_ELF
9356 && mode == Pmode
9357 && DEFAULT_ABI == ABI_V4
9358 && (GET_CODE (operands[1]) == SYMBOL_REF
9359 || GET_CODE (operands[1]) == CONST)
9360 && small_data_operand (operands[1], mode))
9362 emit_insn (gen_rtx_SET (operands[0], operands[1]));
9363 return;
9366 if (DEFAULT_ABI == ABI_V4
9367 && mode == Pmode && mode == SImode
9368 && flag_pic == 1 && got_operand (operands[1], mode))
9370 emit_insn (gen_movsi_got (operands[0], operands[1]));
9371 return;
9374 if ((TARGET_ELF || DEFAULT_ABI == ABI_DARWIN)
9375 && TARGET_NO_TOC
9376 && ! flag_pic
9377 && mode == Pmode
9378 && CONSTANT_P (operands[1])
9379 && GET_CODE (operands[1]) != HIGH
9380 && GET_CODE (operands[1]) != CONST_INT)
9382 rtx target = (!can_create_pseudo_p ()
9383 ? operands[0]
9384 : gen_reg_rtx (mode));
9386 /* If this is a function address on -mcall-aixdesc,
9387 convert it to the address of the descriptor. */
9388 if (DEFAULT_ABI == ABI_AIX
9389 && GET_CODE (operands[1]) == SYMBOL_REF
9390 && XSTR (operands[1], 0)[0] == '.')
9392 const char *name = XSTR (operands[1], 0);
9393 rtx new_ref;
9394 while (*name == '.')
9395 name++;
9396 new_ref = gen_rtx_SYMBOL_REF (Pmode, name);
9397 CONSTANT_POOL_ADDRESS_P (new_ref)
9398 = CONSTANT_POOL_ADDRESS_P (operands[1]);
9399 SYMBOL_REF_FLAGS (new_ref) = SYMBOL_REF_FLAGS (operands[1]);
9400 SYMBOL_REF_USED (new_ref) = SYMBOL_REF_USED (operands[1]);
9401 SYMBOL_REF_DATA (new_ref) = SYMBOL_REF_DATA (operands[1]);
9402 operands[1] = new_ref;
9405 if (DEFAULT_ABI == ABI_DARWIN)
9407 #if TARGET_MACHO
9408 if (MACHO_DYNAMIC_NO_PIC_P)
9410 /* Take care of any required data indirection. */
9411 operands[1] = rs6000_machopic_legitimize_pic_address (
9412 operands[1], mode, operands[0]);
9413 if (operands[0] != operands[1])
9414 emit_insn (gen_rtx_SET (operands[0], operands[1]));
9415 return;
9417 #endif
9418 emit_insn (gen_macho_high (target, operands[1]));
9419 emit_insn (gen_macho_low (operands[0], target, operands[1]));
9420 return;
9423 emit_insn (gen_elf_high (target, operands[1]));
9424 emit_insn (gen_elf_low (operands[0], target, operands[1]));
9425 return;
9428 /* If this is a SYMBOL_REF that refers to a constant pool entry,
9429 and we have put it in the TOC, we just need to make a TOC-relative
9430 reference to it. */
9431 if (TARGET_TOC
9432 && GET_CODE (operands[1]) == SYMBOL_REF
9433 && use_toc_relative_ref (operands[1], mode))
9434 operands[1] = create_TOC_reference (operands[1], operands[0]);
9435 else if (mode == Pmode
9436 && CONSTANT_P (operands[1])
9437 && GET_CODE (operands[1]) != HIGH
9438 && ((GET_CODE (operands[1]) != CONST_INT
9439 && ! easy_fp_constant (operands[1], mode))
9440 || (GET_CODE (operands[1]) == CONST_INT
9441 && (num_insns_constant (operands[1], mode)
9442 > (TARGET_CMODEL != CMODEL_SMALL ? 3 : 2)))
9443 || (GET_CODE (operands[0]) == REG
9444 && FP_REGNO_P (REGNO (operands[0]))))
9445 && !toc_relative_expr_p (operands[1], false)
9446 && (TARGET_CMODEL == CMODEL_SMALL
9447 || can_create_pseudo_p ()
9448 || (REG_P (operands[0])
9449 && INT_REG_OK_FOR_BASE_P (operands[0], true))))
9452 #if TARGET_MACHO
9453 /* Darwin uses a special PIC legitimizer. */
9454 if (DEFAULT_ABI == ABI_DARWIN && MACHOPIC_INDIRECT)
9456 operands[1] =
9457 rs6000_machopic_legitimize_pic_address (operands[1], mode,
9458 operands[0]);
9459 if (operands[0] != operands[1])
9460 emit_insn (gen_rtx_SET (operands[0], operands[1]));
9461 return;
9463 #endif
9465 /* If we are to limit the number of things we put in the TOC and
9466 this is a symbol plus a constant we can add in one insn,
9467 just put the symbol in the TOC and add the constant. Don't do
9468 this if reload is in progress. */
9469 if (GET_CODE (operands[1]) == CONST
9470 && TARGET_NO_SUM_IN_TOC && ! reload_in_progress
9471 && GET_CODE (XEXP (operands[1], 0)) == PLUS
9472 && add_operand (XEXP (XEXP (operands[1], 0), 1), mode)
9473 && (GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == LABEL_REF
9474 || GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == SYMBOL_REF)
9475 && ! side_effects_p (operands[0]))
9477 rtx sym =
9478 force_const_mem (mode, XEXP (XEXP (operands[1], 0), 0));
9479 rtx other = XEXP (XEXP (operands[1], 0), 1);
9481 sym = force_reg (mode, sym);
9482 emit_insn (gen_add3_insn (operands[0], sym, other));
9483 return;
9486 operands[1] = force_const_mem (mode, operands[1]);
9488 if (TARGET_TOC
9489 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
9490 && constant_pool_expr_p (XEXP (operands[1], 0))
9491 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (
9492 get_pool_constant (XEXP (operands[1], 0)),
9493 get_pool_mode (XEXP (operands[1], 0))))
9495 rtx tocref = create_TOC_reference (XEXP (operands[1], 0),
9496 operands[0]);
9497 operands[1] = gen_const_mem (mode, tocref);
9498 set_mem_alias_set (operands[1], get_TOC_alias_set ());
9501 break;
9503 case TImode:
9504 if (!VECTOR_MEM_VSX_P (TImode))
9505 rs6000_eliminate_indexed_memrefs (operands);
9506 break;
9508 case PTImode:
9509 rs6000_eliminate_indexed_memrefs (operands);
9510 break;
9512 default:
9513 fatal_insn ("bad move", gen_rtx_SET (dest, source));
9516 /* Above, we may have called force_const_mem which may have returned
9517 an invalid address. If we can, fix this up; otherwise, reload will
9518 have to deal with it. */
9519 if (GET_CODE (operands[1]) == MEM && ! reload_in_progress)
9520 operands[1] = validize_mem (operands[1]);
9522 emit_set:
9523 emit_insn (gen_rtx_SET (operands[0], operands[1]));
9526 /* Return true if a structure, union or array containing FIELD should be
9527 accessed using `BLKMODE'.
9529 For the SPE, simd types are V2SI, and gcc can be tempted to put the
9530 entire thing in a DI and use subregs to access the internals.
9531 store_bit_field() will force (subreg:DI (reg:V2SI x))'s to the
9532 back-end. Because a single GPR can hold a V2SI, but not a DI, the
9533 best thing to do is set structs to BLKmode and avoid Severe Tire
9534 Damage.
9536 On e500 v2, DF and DI modes suffer from the same anomaly. DF can
9537 fit into 1, whereas DI still needs two. */
9539 static bool
9540 rs6000_member_type_forces_blk (const_tree field, machine_mode mode)
9542 return ((TARGET_SPE && TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
9543 || (TARGET_E500_DOUBLE && mode == DFmode));
9546 /* Nonzero if we can use a floating-point register to pass this arg. */
9547 #define USE_FP_FOR_ARG_P(CUM,MODE) \
9548 (SCALAR_FLOAT_MODE_NOT_VECTOR_P (MODE) \
9549 && (CUM)->fregno <= FP_ARG_MAX_REG \
9550 && TARGET_HARD_FLOAT && TARGET_FPRS)
9552 /* Nonzero if we can use an AltiVec register to pass this arg. */
9553 #define USE_ALTIVEC_FOR_ARG_P(CUM,MODE,NAMED) \
9554 (ALTIVEC_OR_VSX_VECTOR_MODE (MODE) \
9555 && (CUM)->vregno <= ALTIVEC_ARG_MAX_REG \
9556 && TARGET_ALTIVEC_ABI \
9557 && (NAMED))
9559 /* Walk down the type tree of TYPE counting consecutive base elements.
9560 If *MODEP is VOIDmode, then set it to the first valid floating point
9561 or vector type. If a non-floating point or vector type is found, or
9562 if a floating point or vector type that doesn't match a non-VOIDmode
9563 *MODEP is found, then return -1, otherwise return the count in the
9564 sub-tree. */
9566 static int
9567 rs6000_aggregate_candidate (const_tree type, machine_mode *modep)
9569 machine_mode mode;
9570 HOST_WIDE_INT size;
9572 switch (TREE_CODE (type))
9574 case REAL_TYPE:
9575 mode = TYPE_MODE (type);
9576 if (!SCALAR_FLOAT_MODE_P (mode))
9577 return -1;
9579 if (*modep == VOIDmode)
9580 *modep = mode;
9582 if (*modep == mode)
9583 return 1;
9585 break;
9587 case COMPLEX_TYPE:
9588 mode = TYPE_MODE (TREE_TYPE (type));
9589 if (!SCALAR_FLOAT_MODE_P (mode))
9590 return -1;
9592 if (*modep == VOIDmode)
9593 *modep = mode;
9595 if (*modep == mode)
9596 return 2;
9598 break;
9600 case VECTOR_TYPE:
9601 if (!TARGET_ALTIVEC_ABI || !TARGET_ALTIVEC)
9602 return -1;
9604 /* Use V4SImode as representative of all 128-bit vector types. */
9605 size = int_size_in_bytes (type);
9606 switch (size)
9608 case 16:
9609 mode = V4SImode;
9610 break;
9611 default:
9612 return -1;
9615 if (*modep == VOIDmode)
9616 *modep = mode;
9618 /* Vector modes are considered to be opaque: two vectors are
9619 equivalent for the purposes of being homogeneous aggregates
9620 if they are the same size. */
9621 if (*modep == mode)
9622 return 1;
9624 break;
9626 case ARRAY_TYPE:
9628 int count;
9629 tree index = TYPE_DOMAIN (type);
9631 /* Can't handle incomplete types nor sizes that are not
9632 fixed. */
9633 if (!COMPLETE_TYPE_P (type)
9634 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
9635 return -1;
9637 count = rs6000_aggregate_candidate (TREE_TYPE (type), modep);
9638 if (count == -1
9639 || !index
9640 || !TYPE_MAX_VALUE (index)
9641 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
9642 || !TYPE_MIN_VALUE (index)
9643 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
9644 || count < 0)
9645 return -1;
9647 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
9648 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
9650 /* There must be no padding. */
9651 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
9652 return -1;
9654 return count;
9657 case RECORD_TYPE:
9659 int count = 0;
9660 int sub_count;
9661 tree field;
9663 /* Can't handle incomplete types nor sizes that are not
9664 fixed. */
9665 if (!COMPLETE_TYPE_P (type)
9666 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
9667 return -1;
9669 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
9671 if (TREE_CODE (field) != FIELD_DECL)
9672 continue;
9674 sub_count = rs6000_aggregate_candidate (TREE_TYPE (field), modep);
9675 if (sub_count < 0)
9676 return -1;
9677 count += sub_count;
9680 /* There must be no padding. */
9681 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
9682 return -1;
9684 return count;
9687 case UNION_TYPE:
9688 case QUAL_UNION_TYPE:
9690 /* These aren't very interesting except in a degenerate case. */
9691 int count = 0;
9692 int sub_count;
9693 tree field;
9695 /* Can't handle incomplete types nor sizes that are not
9696 fixed. */
9697 if (!COMPLETE_TYPE_P (type)
9698 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
9699 return -1;
9701 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
9703 if (TREE_CODE (field) != FIELD_DECL)
9704 continue;
9706 sub_count = rs6000_aggregate_candidate (TREE_TYPE (field), modep);
9707 if (sub_count < 0)
9708 return -1;
9709 count = count > sub_count ? count : sub_count;
9712 /* There must be no padding. */
9713 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
9714 return -1;
9716 return count;
9719 default:
9720 break;
9723 return -1;
9726 /* If an argument, whose type is described by TYPE and MODE, is a homogeneous
9727 float or vector aggregate that shall be passed in FP/vector registers
9728 according to the ELFv2 ABI, return the homogeneous element mode in
9729 *ELT_MODE and the number of elements in *N_ELTS, and return TRUE.
9731 Otherwise, set *ELT_MODE to MODE and *N_ELTS to 1, and return FALSE. */
9733 static bool
9734 rs6000_discover_homogeneous_aggregate (machine_mode mode, const_tree type,
9735 machine_mode *elt_mode,
9736 int *n_elts)
9738 /* Note that we do not accept complex types at the top level as
9739 homogeneous aggregates; these types are handled via the
9740 targetm.calls.split_complex_arg mechanism. Complex types
9741 can be elements of homogeneous aggregates, however. */
9742 if (DEFAULT_ABI == ABI_ELFv2 && type && AGGREGATE_TYPE_P (type))
9744 machine_mode field_mode = VOIDmode;
9745 int field_count = rs6000_aggregate_candidate (type, &field_mode);
9747 if (field_count > 0)
9749 int n_regs = (SCALAR_FLOAT_MODE_P (field_mode) ?
9750 (GET_MODE_SIZE (field_mode) + 7) >> 3 : 1);
9752 /* The ELFv2 ABI allows homogeneous aggregates to occupy
9753 up to AGGR_ARG_NUM_REG registers. */
9754 if (field_count * n_regs <= AGGR_ARG_NUM_REG)
9756 if (elt_mode)
9757 *elt_mode = field_mode;
9758 if (n_elts)
9759 *n_elts = field_count;
9760 return true;
9765 if (elt_mode)
9766 *elt_mode = mode;
9767 if (n_elts)
9768 *n_elts = 1;
9769 return false;
9772 /* Return a nonzero value to say to return the function value in
9773 memory, just as large structures are always returned. TYPE will be
9774 the data type of the value, and FNTYPE will be the type of the
9775 function doing the returning, or @code{NULL} for libcalls.
9777 The AIX ABI for the RS/6000 specifies that all structures are
9778 returned in memory. The Darwin ABI does the same.
9780 For the Darwin 64 Bit ABI, a function result can be returned in
9781 registers or in memory, depending on the size of the return data
9782 type. If it is returned in registers, the value occupies the same
9783 registers as it would if it were the first and only function
9784 argument. Otherwise, the function places its result in memory at
9785 the location pointed to by GPR3.
9787 The SVR4 ABI specifies that structures <= 8 bytes are returned in r3/r4,
9788 but a draft put them in memory, and GCC used to implement the draft
9789 instead of the final standard. Therefore, aix_struct_return
9790 controls this instead of DEFAULT_ABI; V.4 targets needing backward
9791 compatibility can change DRAFT_V4_STRUCT_RET to override the
9792 default, and -m switches get the final word. See
9793 rs6000_option_override_internal for more details.
9795 The PPC32 SVR4 ABI uses IEEE double extended for long double, if 128-bit
9796 long double support is enabled. These values are returned in memory.
9798 int_size_in_bytes returns -1 for variable size objects, which go in
9799 memory always. The cast to unsigned makes -1 > 8. */
9801 static bool
9802 rs6000_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
9804 /* For the Darwin64 ABI, test if we can fit the return value in regs. */
9805 if (TARGET_MACHO
9806 && rs6000_darwin64_abi
9807 && TREE_CODE (type) == RECORD_TYPE
9808 && int_size_in_bytes (type) > 0)
9810 CUMULATIVE_ARGS valcum;
9811 rtx valret;
9813 valcum.words = 0;
9814 valcum.fregno = FP_ARG_MIN_REG;
9815 valcum.vregno = ALTIVEC_ARG_MIN_REG;
9816 /* Do a trial code generation as if this were going to be passed
9817 as an argument; if any part goes in memory, we return NULL. */
9818 valret = rs6000_darwin64_record_arg (&valcum, type, true, true);
9819 if (valret)
9820 return false;
9821 /* Otherwise fall through to more conventional ABI rules. */
9824 /* The ELFv2 ABI returns homogeneous VFP aggregates in registers */
9825 if (rs6000_discover_homogeneous_aggregate (TYPE_MODE (type), type,
9826 NULL, NULL))
9827 return false;
9829 /* The ELFv2 ABI returns aggregates up to 16B in registers */
9830 if (DEFAULT_ABI == ABI_ELFv2 && AGGREGATE_TYPE_P (type)
9831 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) <= 16)
9832 return false;
9834 if (AGGREGATE_TYPE_P (type)
9835 && (aix_struct_return
9836 || (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8))
9837 return true;
9839 /* Allow -maltivec -mabi=no-altivec without warning. Altivec vector
9840 modes only exist for GCC vector types if -maltivec. */
9841 if (TARGET_32BIT && !TARGET_ALTIVEC_ABI
9842 && ALTIVEC_VECTOR_MODE (TYPE_MODE (type)))
9843 return false;
9845 /* Return synthetic vectors in memory. */
9846 if (TREE_CODE (type) == VECTOR_TYPE
9847 && int_size_in_bytes (type) > (TARGET_ALTIVEC_ABI ? 16 : 8))
9849 static bool warned_for_return_big_vectors = false;
9850 if (!warned_for_return_big_vectors)
9852 warning (0, "GCC vector returned by reference: "
9853 "non-standard ABI extension with no compatibility guarantee");
9854 warned_for_return_big_vectors = true;
9856 return true;
9859 if (DEFAULT_ABI == ABI_V4 && TARGET_IEEEQUAD
9860 && FLOAT128_IEEE_P (TYPE_MODE (type)))
9861 return true;
9863 return false;
9866 /* Specify whether values returned in registers should be at the most
9867 significant end of a register. We want aggregates returned by
9868 value to match the way aggregates are passed to functions. */
9870 static bool
9871 rs6000_return_in_msb (const_tree valtype)
9873 return (DEFAULT_ABI == ABI_ELFv2
9874 && BYTES_BIG_ENDIAN
9875 && AGGREGATE_TYPE_P (valtype)
9876 && FUNCTION_ARG_PADDING (TYPE_MODE (valtype), valtype) == upward);
9879 #ifdef HAVE_AS_GNU_ATTRIBUTE
9880 /* Return TRUE if a call to function FNDECL may be one that
9881 potentially affects the function calling ABI of the object file. */
9883 static bool
9884 call_ABI_of_interest (tree fndecl)
9886 if (symtab->state == EXPANSION)
9888 struct cgraph_node *c_node;
9890 /* Libcalls are always interesting. */
9891 if (fndecl == NULL_TREE)
9892 return true;
9894 /* Any call to an external function is interesting. */
9895 if (DECL_EXTERNAL (fndecl))
9896 return true;
9898 /* Interesting functions that we are emitting in this object file. */
9899 c_node = cgraph_node::get (fndecl);
9900 c_node = c_node->ultimate_alias_target ();
9901 return !c_node->only_called_directly_p ();
9903 return false;
9905 #endif
9907 /* Initialize a variable CUM of type CUMULATIVE_ARGS
9908 for a call to a function whose data type is FNTYPE.
9909 For a library call, FNTYPE is 0 and RETURN_MODE the return value mode.
9911 For incoming args we set the number of arguments in the prototype large
9912 so we never return a PARALLEL. */
9914 void
9915 init_cumulative_args (CUMULATIVE_ARGS *cum, tree fntype,
9916 rtx libname ATTRIBUTE_UNUSED, int incoming,
9917 int libcall, int n_named_args,
9918 tree fndecl ATTRIBUTE_UNUSED,
9919 machine_mode return_mode ATTRIBUTE_UNUSED)
9921 static CUMULATIVE_ARGS zero_cumulative;
9923 *cum = zero_cumulative;
9924 cum->words = 0;
9925 cum->fregno = FP_ARG_MIN_REG;
9926 cum->vregno = ALTIVEC_ARG_MIN_REG;
9927 cum->prototype = (fntype && prototype_p (fntype));
9928 cum->call_cookie = ((DEFAULT_ABI == ABI_V4 && libcall)
9929 ? CALL_LIBCALL : CALL_NORMAL);
9930 cum->sysv_gregno = GP_ARG_MIN_REG;
9931 cum->stdarg = stdarg_p (fntype);
9932 cum->libcall = libcall;
9934 cum->nargs_prototype = 0;
9935 if (incoming || cum->prototype)
9936 cum->nargs_prototype = n_named_args;
9938 /* Check for a longcall attribute. */
9939 if ((!fntype && rs6000_default_long_calls)
9940 || (fntype
9941 && lookup_attribute ("longcall", TYPE_ATTRIBUTES (fntype))
9942 && !lookup_attribute ("shortcall", TYPE_ATTRIBUTES (fntype))))
9943 cum->call_cookie |= CALL_LONG;
9945 if (TARGET_DEBUG_ARG)
9947 fprintf (stderr, "\ninit_cumulative_args:");
9948 if (fntype)
9950 tree ret_type = TREE_TYPE (fntype);
9951 fprintf (stderr, " ret code = %s,",
9952 get_tree_code_name (TREE_CODE (ret_type)));
9955 if (cum->call_cookie & CALL_LONG)
9956 fprintf (stderr, " longcall,");
9958 fprintf (stderr, " proto = %d, nargs = %d\n",
9959 cum->prototype, cum->nargs_prototype);
9962 #ifdef HAVE_AS_GNU_ATTRIBUTE
9963 if (DEFAULT_ABI == ABI_V4)
9965 cum->escapes = call_ABI_of_interest (fndecl);
9966 if (cum->escapes)
9968 tree return_type;
9970 if (fntype)
9972 return_type = TREE_TYPE (fntype);
9973 return_mode = TYPE_MODE (return_type);
9975 else
9976 return_type = lang_hooks.types.type_for_mode (return_mode, 0);
9978 if (return_type != NULL)
9980 if (TREE_CODE (return_type) == RECORD_TYPE
9981 && TYPE_TRANSPARENT_AGGR (return_type))
9983 return_type = TREE_TYPE (first_field (return_type));
9984 return_mode = TYPE_MODE (return_type);
9986 if (AGGREGATE_TYPE_P (return_type)
9987 && ((unsigned HOST_WIDE_INT) int_size_in_bytes (return_type)
9988 <= 8))
9989 rs6000_returns_struct = true;
9991 if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (return_mode))
9992 rs6000_passes_float = true;
9993 else if (ALTIVEC_OR_VSX_VECTOR_MODE (return_mode)
9994 || SPE_VECTOR_MODE (return_mode))
9995 rs6000_passes_vector = true;
9998 #endif
10000 if (fntype
10001 && !TARGET_ALTIVEC
10002 && TARGET_ALTIVEC_ABI
10003 && ALTIVEC_VECTOR_MODE (TYPE_MODE (TREE_TYPE (fntype))))
10005 error ("cannot return value in vector register because"
10006 " altivec instructions are disabled, use -maltivec"
10007 " to enable them");
10011 /* The mode the ABI uses for a word. This is not the same as word_mode
10012 for -m32 -mpowerpc64. This is used to implement various target hooks. */
10014 static machine_mode
10015 rs6000_abi_word_mode (void)
10017 return TARGET_32BIT ? SImode : DImode;
10020 /* Implement the TARGET_OFFLOAD_OPTIONS hook. */
10021 static char *
10022 rs6000_offload_options (void)
10024 if (TARGET_64BIT)
10025 return xstrdup ("-foffload-abi=lp64");
10026 else
10027 return xstrdup ("-foffload-abi=ilp32");
10030 /* On rs6000, function arguments are promoted, as are function return
10031 values. */
10033 static machine_mode
10034 rs6000_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
10035 machine_mode mode,
10036 int *punsignedp ATTRIBUTE_UNUSED,
10037 const_tree, int)
10039 PROMOTE_MODE (mode, *punsignedp, type);
10041 return mode;
10044 /* Return true if TYPE must be passed on the stack and not in registers. */
10046 static bool
10047 rs6000_must_pass_in_stack (machine_mode mode, const_tree type)
10049 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2 || TARGET_64BIT)
10050 return must_pass_in_stack_var_size (mode, type);
10051 else
10052 return must_pass_in_stack_var_size_or_pad (mode, type);
10055 /* If defined, a C expression which determines whether, and in which
10056 direction, to pad out an argument with extra space. The value
10057 should be of type `enum direction': either `upward' to pad above
10058 the argument, `downward' to pad below, or `none' to inhibit
10059 padding.
10061 For the AIX ABI structs are always stored left shifted in their
10062 argument slot. */
10064 enum direction
10065 function_arg_padding (machine_mode mode, const_tree type)
10067 #ifndef AGGREGATE_PADDING_FIXED
10068 #define AGGREGATE_PADDING_FIXED 0
10069 #endif
10070 #ifndef AGGREGATES_PAD_UPWARD_ALWAYS
10071 #define AGGREGATES_PAD_UPWARD_ALWAYS 0
10072 #endif
10074 if (!AGGREGATE_PADDING_FIXED)
10076 /* GCC used to pass structures of the same size as integer types as
10077 if they were in fact integers, ignoring FUNCTION_ARG_PADDING.
10078 i.e. Structures of size 1 or 2 (or 4 when TARGET_64BIT) were
10079 passed padded downward, except that -mstrict-align further
10080 muddied the water in that multi-component structures of 2 and 4
10081 bytes in size were passed padded upward.
10083 The following arranges for best compatibility with previous
10084 versions of gcc, but removes the -mstrict-align dependency. */
10085 if (BYTES_BIG_ENDIAN)
10087 HOST_WIDE_INT size = 0;
10089 if (mode == BLKmode)
10091 if (type && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
10092 size = int_size_in_bytes (type);
10094 else
10095 size = GET_MODE_SIZE (mode);
10097 if (size == 1 || size == 2 || size == 4)
10098 return downward;
10100 return upward;
10103 if (AGGREGATES_PAD_UPWARD_ALWAYS)
10105 if (type != 0 && AGGREGATE_TYPE_P (type))
10106 return upward;
10109 /* Fall back to the default. */
10110 return DEFAULT_FUNCTION_ARG_PADDING (mode, type);
10113 /* If defined, a C expression that gives the alignment boundary, in bits,
10114 of an argument with the specified mode and type. If it is not defined,
10115 PARM_BOUNDARY is used for all arguments.
10117 V.4 wants long longs and doubles to be double word aligned. Just
10118 testing the mode size is a boneheaded way to do this as it means
10119 that other types such as complex int are also double word aligned.
10120 However, we're stuck with this because changing the ABI might break
10121 existing library interfaces.
10123 Doubleword align SPE vectors.
10124 Quadword align Altivec/VSX vectors.
10125 Quadword align large synthetic vector types. */
10127 static unsigned int
10128 rs6000_function_arg_boundary (machine_mode mode, const_tree type)
10130 machine_mode elt_mode;
10131 int n_elts;
10133 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
10135 if (DEFAULT_ABI == ABI_V4
10136 && (GET_MODE_SIZE (mode) == 8
10137 || (TARGET_HARD_FLOAT
10138 && TARGET_FPRS
10139 && FLOAT128_2REG_P (mode))))
10140 return 64;
10141 else if (FLOAT128_VECTOR_P (mode))
10142 return 128;
10143 else if (SPE_VECTOR_MODE (mode)
10144 || (type && TREE_CODE (type) == VECTOR_TYPE
10145 && int_size_in_bytes (type) >= 8
10146 && int_size_in_bytes (type) < 16))
10147 return 64;
10148 else if (ALTIVEC_OR_VSX_VECTOR_MODE (elt_mode)
10149 || (type && TREE_CODE (type) == VECTOR_TYPE
10150 && int_size_in_bytes (type) >= 16))
10151 return 128;
10153 /* Aggregate types that need > 8 byte alignment are quadword-aligned
10154 in the parameter area in the ELFv2 ABI, and in the AIX ABI unless
10155 -mcompat-align-parm is used. */
10156 if (((DEFAULT_ABI == ABI_AIX && !rs6000_compat_align_parm)
10157 || DEFAULT_ABI == ABI_ELFv2)
10158 && type && TYPE_ALIGN (type) > 64)
10160 /* "Aggregate" means any AGGREGATE_TYPE except for single-element
10161 or homogeneous float/vector aggregates here. We already handled
10162 vector aggregates above, but still need to check for float here. */
10163 bool aggregate_p = (AGGREGATE_TYPE_P (type)
10164 && !SCALAR_FLOAT_MODE_P (elt_mode));
10166 /* We used to check for BLKmode instead of the above aggregate type
10167 check. Warn when this results in any difference to the ABI. */
10168 if (aggregate_p != (mode == BLKmode))
10170 static bool warned;
10171 if (!warned && warn_psabi)
10173 warned = true;
10174 inform (input_location,
10175 "the ABI of passing aggregates with %d-byte alignment"
10176 " has changed in GCC 5",
10177 (int) TYPE_ALIGN (type) / BITS_PER_UNIT);
10181 if (aggregate_p)
10182 return 128;
10185 /* Similar for the Darwin64 ABI. Note that for historical reasons we
10186 implement the "aggregate type" check as a BLKmode check here; this
10187 means certain aggregate types are in fact not aligned. */
10188 if (TARGET_MACHO && rs6000_darwin64_abi
10189 && mode == BLKmode
10190 && type && TYPE_ALIGN (type) > 64)
10191 return 128;
10193 return PARM_BOUNDARY;
10196 /* The offset in words to the start of the parameter save area. */
10198 static unsigned int
10199 rs6000_parm_offset (void)
10201 return (DEFAULT_ABI == ABI_V4 ? 2
10202 : DEFAULT_ABI == ABI_ELFv2 ? 4
10203 : 6);
10206 /* For a function parm of MODE and TYPE, return the starting word in
10207 the parameter area. NWORDS of the parameter area are already used. */
10209 static unsigned int
10210 rs6000_parm_start (machine_mode mode, const_tree type,
10211 unsigned int nwords)
10213 unsigned int align;
10215 align = rs6000_function_arg_boundary (mode, type) / PARM_BOUNDARY - 1;
10216 return nwords + (-(rs6000_parm_offset () + nwords) & align);
10219 /* Compute the size (in words) of a function argument. */
10221 static unsigned long
10222 rs6000_arg_size (machine_mode mode, const_tree type)
10224 unsigned long size;
10226 if (mode != BLKmode)
10227 size = GET_MODE_SIZE (mode);
10228 else
10229 size = int_size_in_bytes (type);
10231 if (TARGET_32BIT)
10232 return (size + 3) >> 2;
10233 else
10234 return (size + 7) >> 3;
10237 /* Use this to flush pending int fields. */
10239 static void
10240 rs6000_darwin64_record_arg_advance_flush (CUMULATIVE_ARGS *cum,
10241 HOST_WIDE_INT bitpos, int final)
10243 unsigned int startbit, endbit;
10244 int intregs, intoffset;
10245 machine_mode mode;
10247 /* Handle the situations where a float is taking up the first half
10248 of the GPR, and the other half is empty (typically due to
10249 alignment restrictions). We can detect this by a 8-byte-aligned
10250 int field, or by seeing that this is the final flush for this
10251 argument. Count the word and continue on. */
10252 if (cum->floats_in_gpr == 1
10253 && (cum->intoffset % 64 == 0
10254 || (cum->intoffset == -1 && final)))
10256 cum->words++;
10257 cum->floats_in_gpr = 0;
10260 if (cum->intoffset == -1)
10261 return;
10263 intoffset = cum->intoffset;
10264 cum->intoffset = -1;
10265 cum->floats_in_gpr = 0;
10267 if (intoffset % BITS_PER_WORD != 0)
10269 mode = mode_for_size (BITS_PER_WORD - intoffset % BITS_PER_WORD,
10270 MODE_INT, 0);
10271 if (mode == BLKmode)
10273 /* We couldn't find an appropriate mode, which happens,
10274 e.g., in packed structs when there are 3 bytes to load.
10275 Back intoffset back to the beginning of the word in this
10276 case. */
10277 intoffset = ROUND_DOWN (intoffset, BITS_PER_WORD);
10281 startbit = ROUND_DOWN (intoffset, BITS_PER_WORD);
10282 endbit = ROUND_UP (bitpos, BITS_PER_WORD);
10283 intregs = (endbit - startbit) / BITS_PER_WORD;
10284 cum->words += intregs;
10285 /* words should be unsigned. */
10286 if ((unsigned)cum->words < (endbit/BITS_PER_WORD))
10288 int pad = (endbit/BITS_PER_WORD) - cum->words;
10289 cum->words += pad;
10293 /* The darwin64 ABI calls for us to recurse down through structs,
10294 looking for elements passed in registers. Unfortunately, we have
10295 to track int register count here also because of misalignments
10296 in powerpc alignment mode. */
10298 static void
10299 rs6000_darwin64_record_arg_advance_recurse (CUMULATIVE_ARGS *cum,
10300 const_tree type,
10301 HOST_WIDE_INT startbitpos)
10303 tree f;
10305 for (f = TYPE_FIELDS (type); f ; f = DECL_CHAIN (f))
10306 if (TREE_CODE (f) == FIELD_DECL)
10308 HOST_WIDE_INT bitpos = startbitpos;
10309 tree ftype = TREE_TYPE (f);
10310 machine_mode mode;
10311 if (ftype == error_mark_node)
10312 continue;
10313 mode = TYPE_MODE (ftype);
10315 if (DECL_SIZE (f) != 0
10316 && tree_fits_uhwi_p (bit_position (f)))
10317 bitpos += int_bit_position (f);
10319 /* ??? FIXME: else assume zero offset. */
10321 if (TREE_CODE (ftype) == RECORD_TYPE)
10322 rs6000_darwin64_record_arg_advance_recurse (cum, ftype, bitpos);
10323 else if (USE_FP_FOR_ARG_P (cum, mode))
10325 unsigned n_fpregs = (GET_MODE_SIZE (mode) + 7) >> 3;
10326 rs6000_darwin64_record_arg_advance_flush (cum, bitpos, 0);
10327 cum->fregno += n_fpregs;
10328 /* Single-precision floats present a special problem for
10329 us, because they are smaller than an 8-byte GPR, and so
10330 the structure-packing rules combined with the standard
10331 varargs behavior mean that we want to pack float/float
10332 and float/int combinations into a single register's
10333 space. This is complicated by the arg advance flushing,
10334 which works on arbitrarily large groups of int-type
10335 fields. */
10336 if (mode == SFmode)
10338 if (cum->floats_in_gpr == 1)
10340 /* Two floats in a word; count the word and reset
10341 the float count. */
10342 cum->words++;
10343 cum->floats_in_gpr = 0;
10345 else if (bitpos % 64 == 0)
10347 /* A float at the beginning of an 8-byte word;
10348 count it and put off adjusting cum->words until
10349 we see if a arg advance flush is going to do it
10350 for us. */
10351 cum->floats_in_gpr++;
10353 else
10355 /* The float is at the end of a word, preceded
10356 by integer fields, so the arg advance flush
10357 just above has already set cum->words and
10358 everything is taken care of. */
10361 else
10362 cum->words += n_fpregs;
10364 else if (USE_ALTIVEC_FOR_ARG_P (cum, mode, 1))
10366 rs6000_darwin64_record_arg_advance_flush (cum, bitpos, 0);
10367 cum->vregno++;
10368 cum->words += 2;
10370 else if (cum->intoffset == -1)
10371 cum->intoffset = bitpos;
10375 /* Check for an item that needs to be considered specially under the darwin 64
10376 bit ABI. These are record types where the mode is BLK or the structure is
10377 8 bytes in size. */
10378 static int
10379 rs6000_darwin64_struct_check_p (machine_mode mode, const_tree type)
10381 return rs6000_darwin64_abi
10382 && ((mode == BLKmode
10383 && TREE_CODE (type) == RECORD_TYPE
10384 && int_size_in_bytes (type) > 0)
10385 || (type && TREE_CODE (type) == RECORD_TYPE
10386 && int_size_in_bytes (type) == 8)) ? 1 : 0;
10389 /* Update the data in CUM to advance over an argument
10390 of mode MODE and data type TYPE.
10391 (TYPE is null for libcalls where that information may not be available.)
10393 Note that for args passed by reference, function_arg will be called
10394 with MODE and TYPE set to that of the pointer to the arg, not the arg
10395 itself. */
10397 static void
10398 rs6000_function_arg_advance_1 (CUMULATIVE_ARGS *cum, machine_mode mode,
10399 const_tree type, bool named, int depth)
10401 machine_mode elt_mode;
10402 int n_elts;
10404 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
10406 /* Only tick off an argument if we're not recursing. */
10407 if (depth == 0)
10408 cum->nargs_prototype--;
10410 #ifdef HAVE_AS_GNU_ATTRIBUTE
10411 if (DEFAULT_ABI == ABI_V4
10412 && cum->escapes)
10414 if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode))
10415 rs6000_passes_float = true;
10416 else if (named && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
10417 rs6000_passes_vector = true;
10418 else if (SPE_VECTOR_MODE (mode)
10419 && !cum->stdarg
10420 && cum->sysv_gregno <= GP_ARG_MAX_REG)
10421 rs6000_passes_vector = true;
10423 #endif
10425 if (TARGET_ALTIVEC_ABI
10426 && (ALTIVEC_OR_VSX_VECTOR_MODE (elt_mode)
10427 || (type && TREE_CODE (type) == VECTOR_TYPE
10428 && int_size_in_bytes (type) == 16)))
10430 bool stack = false;
10432 if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
10434 cum->vregno += n_elts;
10436 if (!TARGET_ALTIVEC)
10437 error ("cannot pass argument in vector register because"
10438 " altivec instructions are disabled, use -maltivec"
10439 " to enable them");
10441 /* PowerPC64 Linux and AIX allocate GPRs for a vector argument
10442 even if it is going to be passed in a vector register.
10443 Darwin does the same for variable-argument functions. */
10444 if (((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
10445 && TARGET_64BIT)
10446 || (cum->stdarg && DEFAULT_ABI != ABI_V4))
10447 stack = true;
10449 else
10450 stack = true;
10452 if (stack)
10454 int align;
10456 /* Vector parameters must be 16-byte aligned. In 32-bit
10457 mode this means we need to take into account the offset
10458 to the parameter save area. In 64-bit mode, they just
10459 have to start on an even word, since the parameter save
10460 area is 16-byte aligned. */
10461 if (TARGET_32BIT)
10462 align = -(rs6000_parm_offset () + cum->words) & 3;
10463 else
10464 align = cum->words & 1;
10465 cum->words += align + rs6000_arg_size (mode, type);
10467 if (TARGET_DEBUG_ARG)
10469 fprintf (stderr, "function_adv: words = %2d, align=%d, ",
10470 cum->words, align);
10471 fprintf (stderr, "nargs = %4d, proto = %d, mode = %4s\n",
10472 cum->nargs_prototype, cum->prototype,
10473 GET_MODE_NAME (mode));
10477 else if (TARGET_SPE_ABI && TARGET_SPE && SPE_VECTOR_MODE (mode)
10478 && !cum->stdarg
10479 && cum->sysv_gregno <= GP_ARG_MAX_REG)
10480 cum->sysv_gregno++;
10482 else if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
10484 int size = int_size_in_bytes (type);
10485 /* Variable sized types have size == -1 and are
10486 treated as if consisting entirely of ints.
10487 Pad to 16 byte boundary if needed. */
10488 if (TYPE_ALIGN (type) >= 2 * BITS_PER_WORD
10489 && (cum->words % 2) != 0)
10490 cum->words++;
10491 /* For varargs, we can just go up by the size of the struct. */
10492 if (!named)
10493 cum->words += (size + 7) / 8;
10494 else
10496 /* It is tempting to say int register count just goes up by
10497 sizeof(type)/8, but this is wrong in a case such as
10498 { int; double; int; } [powerpc alignment]. We have to
10499 grovel through the fields for these too. */
10500 cum->intoffset = 0;
10501 cum->floats_in_gpr = 0;
10502 rs6000_darwin64_record_arg_advance_recurse (cum, type, 0);
10503 rs6000_darwin64_record_arg_advance_flush (cum,
10504 size * BITS_PER_UNIT, 1);
10506 if (TARGET_DEBUG_ARG)
10508 fprintf (stderr, "function_adv: words = %2d, align=%d, size=%d",
10509 cum->words, TYPE_ALIGN (type), size);
10510 fprintf (stderr,
10511 "nargs = %4d, proto = %d, mode = %4s (darwin64 abi)\n",
10512 cum->nargs_prototype, cum->prototype,
10513 GET_MODE_NAME (mode));
10516 else if (DEFAULT_ABI == ABI_V4)
10518 if (TARGET_HARD_FLOAT && TARGET_FPRS
10519 && ((TARGET_SINGLE_FLOAT && mode == SFmode)
10520 || (TARGET_DOUBLE_FLOAT && mode == DFmode)
10521 || FLOAT128_2REG_P (mode)
10522 || DECIMAL_FLOAT_MODE_P (mode)))
10524 /* _Decimal128 must use an even/odd register pair. This assumes
10525 that the register number is odd when fregno is odd. */
10526 if (mode == TDmode && (cum->fregno % 2) == 1)
10527 cum->fregno++;
10529 if (cum->fregno + (FLOAT128_2REG_P (mode) ? 1 : 0)
10530 <= FP_ARG_V4_MAX_REG)
10531 cum->fregno += (GET_MODE_SIZE (mode) + 7) >> 3;
10532 else
10534 cum->fregno = FP_ARG_V4_MAX_REG + 1;
10535 if (mode == DFmode || FLOAT128_IBM_P (mode)
10536 || mode == DDmode || mode == TDmode)
10537 cum->words += cum->words & 1;
10538 cum->words += rs6000_arg_size (mode, type);
10541 else
10543 int n_words = rs6000_arg_size (mode, type);
10544 int gregno = cum->sysv_gregno;
10546 /* Long long and SPE vectors are put in (r3,r4), (r5,r6),
10547 (r7,r8) or (r9,r10). As does any other 2 word item such
10548 as complex int due to a historical mistake. */
10549 if (n_words == 2)
10550 gregno += (1 - gregno) & 1;
10552 /* Multi-reg args are not split between registers and stack. */
10553 if (gregno + n_words - 1 > GP_ARG_MAX_REG)
10555 /* Long long and SPE vectors are aligned on the stack.
10556 So are other 2 word items such as complex int due to
10557 a historical mistake. */
10558 if (n_words == 2)
10559 cum->words += cum->words & 1;
10560 cum->words += n_words;
10563 /* Note: continuing to accumulate gregno past when we've started
10564 spilling to the stack indicates the fact that we've started
10565 spilling to the stack to expand_builtin_saveregs. */
10566 cum->sysv_gregno = gregno + n_words;
10569 if (TARGET_DEBUG_ARG)
10571 fprintf (stderr, "function_adv: words = %2d, fregno = %2d, ",
10572 cum->words, cum->fregno);
10573 fprintf (stderr, "gregno = %2d, nargs = %4d, proto = %d, ",
10574 cum->sysv_gregno, cum->nargs_prototype, cum->prototype);
10575 fprintf (stderr, "mode = %4s, named = %d\n",
10576 GET_MODE_NAME (mode), named);
10579 else
10581 int n_words = rs6000_arg_size (mode, type);
10582 int start_words = cum->words;
10583 int align_words = rs6000_parm_start (mode, type, start_words);
10585 cum->words = align_words + n_words;
10587 if (SCALAR_FLOAT_MODE_P (elt_mode) && TARGET_HARD_FLOAT && TARGET_FPRS)
10589 /* _Decimal128 must be passed in an even/odd float register pair.
10590 This assumes that the register number is odd when fregno is
10591 odd. */
10592 if (elt_mode == TDmode && (cum->fregno % 2) == 1)
10593 cum->fregno++;
10594 cum->fregno += n_elts * ((GET_MODE_SIZE (elt_mode) + 7) >> 3);
10597 if (TARGET_DEBUG_ARG)
10599 fprintf (stderr, "function_adv: words = %2d, fregno = %2d, ",
10600 cum->words, cum->fregno);
10601 fprintf (stderr, "nargs = %4d, proto = %d, mode = %4s, ",
10602 cum->nargs_prototype, cum->prototype, GET_MODE_NAME (mode));
10603 fprintf (stderr, "named = %d, align = %d, depth = %d\n",
10604 named, align_words - start_words, depth);
10609 static void
10610 rs6000_function_arg_advance (cumulative_args_t cum, machine_mode mode,
10611 const_tree type, bool named)
10613 rs6000_function_arg_advance_1 (get_cumulative_args (cum), mode, type, named,
10617 static rtx
10618 spe_build_register_parallel (machine_mode mode, int gregno)
10620 rtx r1, r3, r5, r7;
10622 switch (mode)
10624 case DFmode:
10625 r1 = gen_rtx_REG (DImode, gregno);
10626 r1 = gen_rtx_EXPR_LIST (VOIDmode, r1, const0_rtx);
10627 return gen_rtx_PARALLEL (mode, gen_rtvec (1, r1));
10629 case DCmode:
10630 case TFmode:
10631 r1 = gen_rtx_REG (DImode, gregno);
10632 r1 = gen_rtx_EXPR_LIST (VOIDmode, r1, const0_rtx);
10633 r3 = gen_rtx_REG (DImode, gregno + 2);
10634 r3 = gen_rtx_EXPR_LIST (VOIDmode, r3, GEN_INT (8));
10635 return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r3));
10637 case TCmode:
10638 r1 = gen_rtx_REG (DImode, gregno);
10639 r1 = gen_rtx_EXPR_LIST (VOIDmode, r1, const0_rtx);
10640 r3 = gen_rtx_REG (DImode, gregno + 2);
10641 r3 = gen_rtx_EXPR_LIST (VOIDmode, r3, GEN_INT (8));
10642 r5 = gen_rtx_REG (DImode, gregno + 4);
10643 r5 = gen_rtx_EXPR_LIST (VOIDmode, r5, GEN_INT (16));
10644 r7 = gen_rtx_REG (DImode, gregno + 6);
10645 r7 = gen_rtx_EXPR_LIST (VOIDmode, r7, GEN_INT (24));
10646 return gen_rtx_PARALLEL (mode, gen_rtvec (4, r1, r3, r5, r7));
10648 default:
10649 gcc_unreachable ();
10653 /* Determine where to put a SIMD argument on the SPE. */
10654 static rtx
10655 rs6000_spe_function_arg (const CUMULATIVE_ARGS *cum, machine_mode mode,
10656 const_tree type)
10658 int gregno = cum->sysv_gregno;
10660 /* On E500 v2, double arithmetic is done on the full 64-bit GPR, but
10661 are passed and returned in a pair of GPRs for ABI compatibility. */
10662 if (TARGET_E500_DOUBLE && (mode == DFmode || mode == TFmode
10663 || mode == DCmode || mode == TCmode))
10665 int n_words = rs6000_arg_size (mode, type);
10667 /* Doubles go in an odd/even register pair (r5/r6, etc). */
10668 if (mode == DFmode)
10669 gregno += (1 - gregno) & 1;
10671 /* Multi-reg args are not split between registers and stack. */
10672 if (gregno + n_words - 1 > GP_ARG_MAX_REG)
10673 return NULL_RTX;
10675 return spe_build_register_parallel (mode, gregno);
10677 if (cum->stdarg)
10679 int n_words = rs6000_arg_size (mode, type);
10681 /* SPE vectors are put in odd registers. */
10682 if (n_words == 2 && (gregno & 1) == 0)
10683 gregno += 1;
10685 if (gregno + n_words - 1 <= GP_ARG_MAX_REG)
10687 rtx r1, r2;
10688 machine_mode m = SImode;
10690 r1 = gen_rtx_REG (m, gregno);
10691 r1 = gen_rtx_EXPR_LIST (m, r1, const0_rtx);
10692 r2 = gen_rtx_REG (m, gregno + 1);
10693 r2 = gen_rtx_EXPR_LIST (m, r2, GEN_INT (4));
10694 return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r2));
10696 else
10697 return NULL_RTX;
10699 else
10701 if (gregno <= GP_ARG_MAX_REG)
10702 return gen_rtx_REG (mode, gregno);
10703 else
10704 return NULL_RTX;
10708 /* A subroutine of rs6000_darwin64_record_arg. Assign the bits of the
10709 structure between cum->intoffset and bitpos to integer registers. */
10711 static void
10712 rs6000_darwin64_record_arg_flush (CUMULATIVE_ARGS *cum,
10713 HOST_WIDE_INT bitpos, rtx rvec[], int *k)
10715 machine_mode mode;
10716 unsigned int regno;
10717 unsigned int startbit, endbit;
10718 int this_regno, intregs, intoffset;
10719 rtx reg;
10721 if (cum->intoffset == -1)
10722 return;
10724 intoffset = cum->intoffset;
10725 cum->intoffset = -1;
10727 /* If this is the trailing part of a word, try to only load that
10728 much into the register. Otherwise load the whole register. Note
10729 that in the latter case we may pick up unwanted bits. It's not a
10730 problem at the moment but may wish to revisit. */
10732 if (intoffset % BITS_PER_WORD != 0)
10734 mode = mode_for_size (BITS_PER_WORD - intoffset % BITS_PER_WORD,
10735 MODE_INT, 0);
10736 if (mode == BLKmode)
10738 /* We couldn't find an appropriate mode, which happens,
10739 e.g., in packed structs when there are 3 bytes to load.
10740 Back intoffset back to the beginning of the word in this
10741 case. */
10742 intoffset = ROUND_DOWN (intoffset, BITS_PER_WORD);
10743 mode = word_mode;
10746 else
10747 mode = word_mode;
10749 startbit = ROUND_DOWN (intoffset, BITS_PER_WORD);
10750 endbit = ROUND_UP (bitpos, BITS_PER_WORD);
10751 intregs = (endbit - startbit) / BITS_PER_WORD;
10752 this_regno = cum->words + intoffset / BITS_PER_WORD;
10754 if (intregs > 0 && intregs > GP_ARG_NUM_REG - this_regno)
10755 cum->use_stack = 1;
10757 intregs = MIN (intregs, GP_ARG_NUM_REG - this_regno);
10758 if (intregs <= 0)
10759 return;
10761 intoffset /= BITS_PER_UNIT;
10764 regno = GP_ARG_MIN_REG + this_regno;
10765 reg = gen_rtx_REG (mode, regno);
10766 rvec[(*k)++] =
10767 gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (intoffset));
10769 this_regno += 1;
10770 intoffset = (intoffset | (UNITS_PER_WORD-1)) + 1;
10771 mode = word_mode;
10772 intregs -= 1;
10774 while (intregs > 0);
10777 /* Recursive workhorse for the following. */
10779 static void
10780 rs6000_darwin64_record_arg_recurse (CUMULATIVE_ARGS *cum, const_tree type,
10781 HOST_WIDE_INT startbitpos, rtx rvec[],
10782 int *k)
10784 tree f;
10786 for (f = TYPE_FIELDS (type); f ; f = DECL_CHAIN (f))
10787 if (TREE_CODE (f) == FIELD_DECL)
10789 HOST_WIDE_INT bitpos = startbitpos;
10790 tree ftype = TREE_TYPE (f);
10791 machine_mode mode;
10792 if (ftype == error_mark_node)
10793 continue;
10794 mode = TYPE_MODE (ftype);
10796 if (DECL_SIZE (f) != 0
10797 && tree_fits_uhwi_p (bit_position (f)))
10798 bitpos += int_bit_position (f);
10800 /* ??? FIXME: else assume zero offset. */
10802 if (TREE_CODE (ftype) == RECORD_TYPE)
10803 rs6000_darwin64_record_arg_recurse (cum, ftype, bitpos, rvec, k);
10804 else if (cum->named && USE_FP_FOR_ARG_P (cum, mode))
10806 unsigned n_fpreg = (GET_MODE_SIZE (mode) + 7) >> 3;
10807 #if 0
10808 switch (mode)
10810 case SCmode: mode = SFmode; break;
10811 case DCmode: mode = DFmode; break;
10812 case TCmode: mode = TFmode; break;
10813 default: break;
10815 #endif
10816 rs6000_darwin64_record_arg_flush (cum, bitpos, rvec, k);
10817 if (cum->fregno + n_fpreg > FP_ARG_MAX_REG + 1)
10819 gcc_assert (cum->fregno == FP_ARG_MAX_REG
10820 && (mode == TFmode || mode == TDmode));
10821 /* Long double or _Decimal128 split over regs and memory. */
10822 mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode : DFmode;
10823 cum->use_stack=1;
10825 rvec[(*k)++]
10826 = gen_rtx_EXPR_LIST (VOIDmode,
10827 gen_rtx_REG (mode, cum->fregno++),
10828 GEN_INT (bitpos / BITS_PER_UNIT));
10829 if (FLOAT128_2REG_P (mode))
10830 cum->fregno++;
10832 else if (cum->named && USE_ALTIVEC_FOR_ARG_P (cum, mode, 1))
10834 rs6000_darwin64_record_arg_flush (cum, bitpos, rvec, k);
10835 rvec[(*k)++]
10836 = gen_rtx_EXPR_LIST (VOIDmode,
10837 gen_rtx_REG (mode, cum->vregno++),
10838 GEN_INT (bitpos / BITS_PER_UNIT));
10840 else if (cum->intoffset == -1)
10841 cum->intoffset = bitpos;
10845 /* For the darwin64 ABI, we want to construct a PARALLEL consisting of
10846 the register(s) to be used for each field and subfield of a struct
10847 being passed by value, along with the offset of where the
10848 register's value may be found in the block. FP fields go in FP
10849 register, vector fields go in vector registers, and everything
10850 else goes in int registers, packed as in memory.
10852 This code is also used for function return values. RETVAL indicates
10853 whether this is the case.
10855 Much of this is taken from the SPARC V9 port, which has a similar
10856 calling convention. */
10858 static rtx
10859 rs6000_darwin64_record_arg (CUMULATIVE_ARGS *orig_cum, const_tree type,
10860 bool named, bool retval)
10862 rtx rvec[FIRST_PSEUDO_REGISTER];
10863 int k = 1, kbase = 1;
10864 HOST_WIDE_INT typesize = int_size_in_bytes (type);
10865 /* This is a copy; modifications are not visible to our caller. */
10866 CUMULATIVE_ARGS copy_cum = *orig_cum;
10867 CUMULATIVE_ARGS *cum = &copy_cum;
10869 /* Pad to 16 byte boundary if needed. */
10870 if (!retval && TYPE_ALIGN (type) >= 2 * BITS_PER_WORD
10871 && (cum->words % 2) != 0)
10872 cum->words++;
10874 cum->intoffset = 0;
10875 cum->use_stack = 0;
10876 cum->named = named;
10878 /* Put entries into rvec[] for individual FP and vector fields, and
10879 for the chunks of memory that go in int regs. Note we start at
10880 element 1; 0 is reserved for an indication of using memory, and
10881 may or may not be filled in below. */
10882 rs6000_darwin64_record_arg_recurse (cum, type, /* startbit pos= */ 0, rvec, &k);
10883 rs6000_darwin64_record_arg_flush (cum, typesize * BITS_PER_UNIT, rvec, &k);
10885 /* If any part of the struct went on the stack put all of it there.
10886 This hack is because the generic code for
10887 FUNCTION_ARG_PARTIAL_NREGS cannot handle cases where the register
10888 parts of the struct are not at the beginning. */
10889 if (cum->use_stack)
10891 if (retval)
10892 return NULL_RTX; /* doesn't go in registers at all */
10893 kbase = 0;
10894 rvec[0] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
10896 if (k > 1 || cum->use_stack)
10897 return gen_rtx_PARALLEL (BLKmode, gen_rtvec_v (k - kbase, &rvec[kbase]));
10898 else
10899 return NULL_RTX;
10902 /* Determine where to place an argument in 64-bit mode with 32-bit ABI. */
10904 static rtx
10905 rs6000_mixed_function_arg (machine_mode mode, const_tree type,
10906 int align_words)
10908 int n_units;
10909 int i, k;
10910 rtx rvec[GP_ARG_NUM_REG + 1];
10912 if (align_words >= GP_ARG_NUM_REG)
10913 return NULL_RTX;
10915 n_units = rs6000_arg_size (mode, type);
10917 /* Optimize the simple case where the arg fits in one gpr, except in
10918 the case of BLKmode due to assign_parms assuming that registers are
10919 BITS_PER_WORD wide. */
10920 if (n_units == 0
10921 || (n_units == 1 && mode != BLKmode))
10922 return gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
10924 k = 0;
10925 if (align_words + n_units > GP_ARG_NUM_REG)
10926 /* Not all of the arg fits in gprs. Say that it goes in memory too,
10927 using a magic NULL_RTX component.
10928 This is not strictly correct. Only some of the arg belongs in
10929 memory, not all of it. However, the normal scheme using
10930 function_arg_partial_nregs can result in unusual subregs, eg.
10931 (subreg:SI (reg:DF) 4), which are not handled well. The code to
10932 store the whole arg to memory is often more efficient than code
10933 to store pieces, and we know that space is available in the right
10934 place for the whole arg. */
10935 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
10937 i = 0;
10940 rtx r = gen_rtx_REG (SImode, GP_ARG_MIN_REG + align_words);
10941 rtx off = GEN_INT (i++ * 4);
10942 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
10944 while (++align_words < GP_ARG_NUM_REG && --n_units != 0);
10946 return gen_rtx_PARALLEL (mode, gen_rtvec_v (k, rvec));
10949 /* We have an argument of MODE and TYPE that goes into FPRs or VRs,
10950 but must also be copied into the parameter save area starting at
10951 offset ALIGN_WORDS. Fill in RVEC with the elements corresponding
10952 to the GPRs and/or memory. Return the number of elements used. */
10954 static int
10955 rs6000_psave_function_arg (machine_mode mode, const_tree type,
10956 int align_words, rtx *rvec)
10958 int k = 0;
10960 if (align_words < GP_ARG_NUM_REG)
10962 int n_words = rs6000_arg_size (mode, type);
10964 if (align_words + n_words > GP_ARG_NUM_REG
10965 || mode == BLKmode
10966 || (TARGET_32BIT && TARGET_POWERPC64))
10968 /* If this is partially on the stack, then we only
10969 include the portion actually in registers here. */
10970 machine_mode rmode = TARGET_32BIT ? SImode : DImode;
10971 int i = 0;
10973 if (align_words + n_words > GP_ARG_NUM_REG)
10975 /* Not all of the arg fits in gprs. Say that it goes in memory
10976 too, using a magic NULL_RTX component. Also see comment in
10977 rs6000_mixed_function_arg for why the normal
10978 function_arg_partial_nregs scheme doesn't work in this case. */
10979 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
10984 rtx r = gen_rtx_REG (rmode, GP_ARG_MIN_REG + align_words);
10985 rtx off = GEN_INT (i++ * GET_MODE_SIZE (rmode));
10986 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
10988 while (++align_words < GP_ARG_NUM_REG && --n_words != 0);
10990 else
10992 /* The whole arg fits in gprs. */
10993 rtx r = gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
10994 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, const0_rtx);
10997 else
10999 /* It's entirely in memory. */
11000 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
11003 return k;
11006 /* RVEC is a vector of K components of an argument of mode MODE.
11007 Construct the final function_arg return value from it. */
11009 static rtx
11010 rs6000_finish_function_arg (machine_mode mode, rtx *rvec, int k)
11012 gcc_assert (k >= 1);
11014 /* Avoid returning a PARALLEL in the trivial cases. */
11015 if (k == 1)
11017 if (XEXP (rvec[0], 0) == NULL_RTX)
11018 return NULL_RTX;
11020 if (GET_MODE (XEXP (rvec[0], 0)) == mode)
11021 return XEXP (rvec[0], 0);
11024 return gen_rtx_PARALLEL (mode, gen_rtvec_v (k, rvec));
11027 /* Determine where to put an argument to a function.
11028 Value is zero to push the argument on the stack,
11029 or a hard register in which to store the argument.
11031 MODE is the argument's machine mode.
11032 TYPE is the data type of the argument (as a tree).
11033 This is null for libcalls where that information may
11034 not be available.
11035 CUM is a variable of type CUMULATIVE_ARGS which gives info about
11036 the preceding args and about the function being called. It is
11037 not modified in this routine.
11038 NAMED is nonzero if this argument is a named parameter
11039 (otherwise it is an extra parameter matching an ellipsis).
11041 On RS/6000 the first eight words of non-FP are normally in registers
11042 and the rest are pushed. Under AIX, the first 13 FP args are in registers.
11043 Under V.4, the first 8 FP args are in registers.
11045 If this is floating-point and no prototype is specified, we use
11046 both an FP and integer register (or possibly FP reg and stack). Library
11047 functions (when CALL_LIBCALL is set) always have the proper types for args,
11048 so we can pass the FP value just in one register. emit_library_function
11049 doesn't support PARALLEL anyway.
11051 Note that for args passed by reference, function_arg will be called
11052 with MODE and TYPE set to that of the pointer to the arg, not the arg
11053 itself. */
11055 static rtx
11056 rs6000_function_arg (cumulative_args_t cum_v, machine_mode mode,
11057 const_tree type, bool named)
11059 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
11060 enum rs6000_abi abi = DEFAULT_ABI;
11061 machine_mode elt_mode;
11062 int n_elts;
11064 /* Return a marker to indicate whether CR1 needs to set or clear the
11065 bit that V.4 uses to say fp args were passed in registers.
11066 Assume that we don't need the marker for software floating point,
11067 or compiler generated library calls. */
11068 if (mode == VOIDmode)
11070 if (abi == ABI_V4
11071 && (cum->call_cookie & CALL_LIBCALL) == 0
11072 && (cum->stdarg
11073 || (cum->nargs_prototype < 0
11074 && (cum->prototype || TARGET_NO_PROTOTYPE))))
11076 /* For the SPE, we need to crxor CR6 always. */
11077 if (TARGET_SPE_ABI)
11078 return GEN_INT (cum->call_cookie | CALL_V4_SET_FP_ARGS);
11079 else if (TARGET_HARD_FLOAT && TARGET_FPRS)
11080 return GEN_INT (cum->call_cookie
11081 | ((cum->fregno == FP_ARG_MIN_REG)
11082 ? CALL_V4_SET_FP_ARGS
11083 : CALL_V4_CLEAR_FP_ARGS));
11086 return GEN_INT (cum->call_cookie & ~CALL_LIBCALL);
11089 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
11091 if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
11093 rtx rslt = rs6000_darwin64_record_arg (cum, type, named, /*retval= */false);
11094 if (rslt != NULL_RTX)
11095 return rslt;
11096 /* Else fall through to usual handling. */
11099 if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
11101 rtx rvec[GP_ARG_NUM_REG + AGGR_ARG_NUM_REG + 1];
11102 rtx r, off;
11103 int i, k = 0;
11105 /* Do we also need to pass this argument in the parameter save area?
11106 Library support functions for IEEE 128-bit are assumed to not need the
11107 value passed both in GPRs and in vector registers. */
11108 if (TARGET_64BIT && !cum->prototype
11109 && (!cum->libcall || !FLOAT128_VECTOR_P (elt_mode)))
11111 int align_words = ROUND_UP (cum->words, 2);
11112 k = rs6000_psave_function_arg (mode, type, align_words, rvec);
11115 /* Describe where this argument goes in the vector registers. */
11116 for (i = 0; i < n_elts && cum->vregno + i <= ALTIVEC_ARG_MAX_REG; i++)
11118 r = gen_rtx_REG (elt_mode, cum->vregno + i);
11119 off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
11120 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
11123 return rs6000_finish_function_arg (mode, rvec, k);
11125 else if (TARGET_ALTIVEC_ABI
11126 && (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
11127 || (type && TREE_CODE (type) == VECTOR_TYPE
11128 && int_size_in_bytes (type) == 16)))
11130 if (named || abi == ABI_V4)
11131 return NULL_RTX;
11132 else
11134 /* Vector parameters to varargs functions under AIX or Darwin
11135 get passed in memory and possibly also in GPRs. */
11136 int align, align_words, n_words;
11137 machine_mode part_mode;
11139 /* Vector parameters must be 16-byte aligned. In 32-bit
11140 mode this means we need to take into account the offset
11141 to the parameter save area. In 64-bit mode, they just
11142 have to start on an even word, since the parameter save
11143 area is 16-byte aligned. */
11144 if (TARGET_32BIT)
11145 align = -(rs6000_parm_offset () + cum->words) & 3;
11146 else
11147 align = cum->words & 1;
11148 align_words = cum->words + align;
11150 /* Out of registers? Memory, then. */
11151 if (align_words >= GP_ARG_NUM_REG)
11152 return NULL_RTX;
11154 if (TARGET_32BIT && TARGET_POWERPC64)
11155 return rs6000_mixed_function_arg (mode, type, align_words);
11157 /* The vector value goes in GPRs. Only the part of the
11158 value in GPRs is reported here. */
11159 part_mode = mode;
11160 n_words = rs6000_arg_size (mode, type);
11161 if (align_words + n_words > GP_ARG_NUM_REG)
11162 /* Fortunately, there are only two possibilities, the value
11163 is either wholly in GPRs or half in GPRs and half not. */
11164 part_mode = DImode;
11166 return gen_rtx_REG (part_mode, GP_ARG_MIN_REG + align_words);
11169 else if (TARGET_SPE_ABI && TARGET_SPE
11170 && (SPE_VECTOR_MODE (mode)
11171 || (TARGET_E500_DOUBLE && (mode == DFmode
11172 || mode == DCmode
11173 || mode == TFmode
11174 || mode == TCmode))))
11175 return rs6000_spe_function_arg (cum, mode, type);
11177 else if (abi == ABI_V4)
11179 if (TARGET_HARD_FLOAT && TARGET_FPRS
11180 && ((TARGET_SINGLE_FLOAT && mode == SFmode)
11181 || (TARGET_DOUBLE_FLOAT && mode == DFmode)
11182 || FLOAT128_2REG_P (mode)
11183 || DECIMAL_FLOAT_MODE_P (mode)))
11185 /* _Decimal128 must use an even/odd register pair. This assumes
11186 that the register number is odd when fregno is odd. */
11187 if (mode == TDmode && (cum->fregno % 2) == 1)
11188 cum->fregno++;
11190 if (cum->fregno + (FLOAT128_2REG_P (mode) ? 1 : 0)
11191 <= FP_ARG_V4_MAX_REG)
11192 return gen_rtx_REG (mode, cum->fregno);
11193 else
11194 return NULL_RTX;
11196 else
11198 int n_words = rs6000_arg_size (mode, type);
11199 int gregno = cum->sysv_gregno;
11201 /* Long long and SPE vectors are put in (r3,r4), (r5,r6),
11202 (r7,r8) or (r9,r10). As does any other 2 word item such
11203 as complex int due to a historical mistake. */
11204 if (n_words == 2)
11205 gregno += (1 - gregno) & 1;
11207 /* Multi-reg args are not split between registers and stack. */
11208 if (gregno + n_words - 1 > GP_ARG_MAX_REG)
11209 return NULL_RTX;
11211 if (TARGET_32BIT && TARGET_POWERPC64)
11212 return rs6000_mixed_function_arg (mode, type,
11213 gregno - GP_ARG_MIN_REG);
11214 return gen_rtx_REG (mode, gregno);
11217 else
11219 int align_words = rs6000_parm_start (mode, type, cum->words);
11221 /* _Decimal128 must be passed in an even/odd float register pair.
11222 This assumes that the register number is odd when fregno is odd. */
11223 if (elt_mode == TDmode && (cum->fregno % 2) == 1)
11224 cum->fregno++;
11226 if (USE_FP_FOR_ARG_P (cum, elt_mode))
11228 rtx rvec[GP_ARG_NUM_REG + AGGR_ARG_NUM_REG + 1];
11229 rtx r, off;
11230 int i, k = 0;
11231 unsigned long n_fpreg = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
11232 int fpr_words;
11234 /* Do we also need to pass this argument in the parameter
11235 save area? */
11236 if (type && (cum->nargs_prototype <= 0
11237 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
11238 && TARGET_XL_COMPAT
11239 && align_words >= GP_ARG_NUM_REG)))
11240 k = rs6000_psave_function_arg (mode, type, align_words, rvec);
11242 /* Describe where this argument goes in the fprs. */
11243 for (i = 0; i < n_elts
11244 && cum->fregno + i * n_fpreg <= FP_ARG_MAX_REG; i++)
11246 /* Check if the argument is split over registers and memory.
11247 This can only ever happen for long double or _Decimal128;
11248 complex types are handled via split_complex_arg. */
11249 machine_mode fmode = elt_mode;
11250 if (cum->fregno + (i + 1) * n_fpreg > FP_ARG_MAX_REG + 1)
11252 gcc_assert (FLOAT128_2REG_P (fmode));
11253 fmode = DECIMAL_FLOAT_MODE_P (fmode) ? DDmode : DFmode;
11256 r = gen_rtx_REG (fmode, cum->fregno + i * n_fpreg);
11257 off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
11258 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
11261 /* If there were not enough FPRs to hold the argument, the rest
11262 usually goes into memory. However, if the current position
11263 is still within the register parameter area, a portion may
11264 actually have to go into GPRs.
11266 Note that it may happen that the portion of the argument
11267 passed in the first "half" of the first GPR was already
11268 passed in the last FPR as well.
11270 For unnamed arguments, we already set up GPRs to cover the
11271 whole argument in rs6000_psave_function_arg, so there is
11272 nothing further to do at this point. */
11273 fpr_words = (i * GET_MODE_SIZE (elt_mode)) / (TARGET_32BIT ? 4 : 8);
11274 if (i < n_elts && align_words + fpr_words < GP_ARG_NUM_REG
11275 && cum->nargs_prototype > 0)
11277 static bool warned;
11279 machine_mode rmode = TARGET_32BIT ? SImode : DImode;
11280 int n_words = rs6000_arg_size (mode, type);
11282 align_words += fpr_words;
11283 n_words -= fpr_words;
11287 r = gen_rtx_REG (rmode, GP_ARG_MIN_REG + align_words);
11288 off = GEN_INT (fpr_words++ * GET_MODE_SIZE (rmode));
11289 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
11291 while (++align_words < GP_ARG_NUM_REG && --n_words != 0);
11293 if (!warned && warn_psabi)
11295 warned = true;
11296 inform (input_location,
11297 "the ABI of passing homogeneous float aggregates"
11298 " has changed in GCC 5");
11302 return rs6000_finish_function_arg (mode, rvec, k);
11304 else if (align_words < GP_ARG_NUM_REG)
11306 if (TARGET_32BIT && TARGET_POWERPC64)
11307 return rs6000_mixed_function_arg (mode, type, align_words);
11309 return gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
11311 else
11312 return NULL_RTX;
11316 /* For an arg passed partly in registers and partly in memory, this is
11317 the number of bytes passed in registers. For args passed entirely in
11318 registers or entirely in memory, zero. When an arg is described by a
11319 PARALLEL, perhaps using more than one register type, this function
11320 returns the number of bytes used by the first element of the PARALLEL. */
11322 static int
11323 rs6000_arg_partial_bytes (cumulative_args_t cum_v, machine_mode mode,
11324 tree type, bool named)
11326 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
11327 bool passed_in_gprs = true;
11328 int ret = 0;
11329 int align_words;
11330 machine_mode elt_mode;
11331 int n_elts;
11333 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
11335 if (DEFAULT_ABI == ABI_V4)
11336 return 0;
11338 if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
11340 /* If we are passing this arg in the fixed parameter save area (gprs or
11341 memory) as well as VRs, we do not use the partial bytes mechanism;
11342 instead, rs6000_function_arg will return a PARALLEL including a memory
11343 element as necessary. Library support functions for IEEE 128-bit are
11344 assumed to not need the value passed both in GPRs and in vector
11345 registers. */
11346 if (TARGET_64BIT && !cum->prototype
11347 && (!cum->libcall || !FLOAT128_VECTOR_P (elt_mode)))
11348 return 0;
11350 /* Otherwise, we pass in VRs only. Check for partial copies. */
11351 passed_in_gprs = false;
11352 if (cum->vregno + n_elts > ALTIVEC_ARG_MAX_REG + 1)
11353 ret = (ALTIVEC_ARG_MAX_REG + 1 - cum->vregno) * 16;
11356 /* In this complicated case we just disable the partial_nregs code. */
11357 if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
11358 return 0;
11360 align_words = rs6000_parm_start (mode, type, cum->words);
11362 if (USE_FP_FOR_ARG_P (cum, elt_mode))
11364 unsigned long n_fpreg = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
11366 /* If we are passing this arg in the fixed parameter save area
11367 (gprs or memory) as well as FPRs, we do not use the partial
11368 bytes mechanism; instead, rs6000_function_arg will return a
11369 PARALLEL including a memory element as necessary. */
11370 if (type
11371 && (cum->nargs_prototype <= 0
11372 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
11373 && TARGET_XL_COMPAT
11374 && align_words >= GP_ARG_NUM_REG)))
11375 return 0;
11377 /* Otherwise, we pass in FPRs only. Check for partial copies. */
11378 passed_in_gprs = false;
11379 if (cum->fregno + n_elts * n_fpreg > FP_ARG_MAX_REG + 1)
11381 /* Compute number of bytes / words passed in FPRs. If there
11382 is still space available in the register parameter area
11383 *after* that amount, a part of the argument will be passed
11384 in GPRs. In that case, the total amount passed in any
11385 registers is equal to the amount that would have been passed
11386 in GPRs if everything were passed there, so we fall back to
11387 the GPR code below to compute the appropriate value. */
11388 int fpr = ((FP_ARG_MAX_REG + 1 - cum->fregno)
11389 * MIN (8, GET_MODE_SIZE (elt_mode)));
11390 int fpr_words = fpr / (TARGET_32BIT ? 4 : 8);
11392 if (align_words + fpr_words < GP_ARG_NUM_REG)
11393 passed_in_gprs = true;
11394 else
11395 ret = fpr;
11399 if (passed_in_gprs
11400 && align_words < GP_ARG_NUM_REG
11401 && GP_ARG_NUM_REG < align_words + rs6000_arg_size (mode, type))
11402 ret = (GP_ARG_NUM_REG - align_words) * (TARGET_32BIT ? 4 : 8);
11404 if (ret != 0 && TARGET_DEBUG_ARG)
11405 fprintf (stderr, "rs6000_arg_partial_bytes: %d\n", ret);
11407 return ret;
11410 /* A C expression that indicates when an argument must be passed by
11411 reference. If nonzero for an argument, a copy of that argument is
11412 made in memory and a pointer to the argument is passed instead of
11413 the argument itself. The pointer is passed in whatever way is
11414 appropriate for passing a pointer to that type.
11416 Under V.4, aggregates and long double are passed by reference.
11418 As an extension to all 32-bit ABIs, AltiVec vectors are passed by
11419 reference unless the AltiVec vector extension ABI is in force.
11421 As an extension to all ABIs, variable sized types are passed by
11422 reference. */
11424 static bool
11425 rs6000_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
11426 machine_mode mode, const_tree type,
11427 bool named ATTRIBUTE_UNUSED)
11429 if (!type)
11430 return 0;
11432 if (DEFAULT_ABI == ABI_V4 && TARGET_IEEEQUAD
11433 && FLOAT128_IEEE_P (TYPE_MODE (type)))
11435 if (TARGET_DEBUG_ARG)
11436 fprintf (stderr, "function_arg_pass_by_reference: V4 IEEE 128-bit\n");
11437 return 1;
11440 if (DEFAULT_ABI == ABI_V4 && AGGREGATE_TYPE_P (type))
11442 if (TARGET_DEBUG_ARG)
11443 fprintf (stderr, "function_arg_pass_by_reference: V4 aggregate\n");
11444 return 1;
11447 if (int_size_in_bytes (type) < 0)
11449 if (TARGET_DEBUG_ARG)
11450 fprintf (stderr, "function_arg_pass_by_reference: variable size\n");
11451 return 1;
11454 /* Allow -maltivec -mabi=no-altivec without warning. Altivec vector
11455 modes only exist for GCC vector types if -maltivec. */
11456 if (TARGET_32BIT && !TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (mode))
11458 if (TARGET_DEBUG_ARG)
11459 fprintf (stderr, "function_arg_pass_by_reference: AltiVec\n");
11460 return 1;
11463 /* Pass synthetic vectors in memory. */
11464 if (TREE_CODE (type) == VECTOR_TYPE
11465 && int_size_in_bytes (type) > (TARGET_ALTIVEC_ABI ? 16 : 8))
11467 static bool warned_for_pass_big_vectors = false;
11468 if (TARGET_DEBUG_ARG)
11469 fprintf (stderr, "function_arg_pass_by_reference: synthetic vector\n");
11470 if (!warned_for_pass_big_vectors)
11472 warning (0, "GCC vector passed by reference: "
11473 "non-standard ABI extension with no compatibility guarantee");
11474 warned_for_pass_big_vectors = true;
11476 return 1;
11479 return 0;
11482 /* Process parameter of type TYPE after ARGS_SO_FAR parameters were
11483 already processes. Return true if the parameter must be passed
11484 (fully or partially) on the stack. */
11486 static bool
11487 rs6000_parm_needs_stack (cumulative_args_t args_so_far, tree type)
11489 machine_mode mode;
11490 int unsignedp;
11491 rtx entry_parm;
11493 /* Catch errors. */
11494 if (type == NULL || type == error_mark_node)
11495 return true;
11497 /* Handle types with no storage requirement. */
11498 if (TYPE_MODE (type) == VOIDmode)
11499 return false;
11501 /* Handle complex types. */
11502 if (TREE_CODE (type) == COMPLEX_TYPE)
11503 return (rs6000_parm_needs_stack (args_so_far, TREE_TYPE (type))
11504 || rs6000_parm_needs_stack (args_so_far, TREE_TYPE (type)));
11506 /* Handle transparent aggregates. */
11507 if ((TREE_CODE (type) == UNION_TYPE || TREE_CODE (type) == RECORD_TYPE)
11508 && TYPE_TRANSPARENT_AGGR (type))
11509 type = TREE_TYPE (first_field (type));
11511 /* See if this arg was passed by invisible reference. */
11512 if (pass_by_reference (get_cumulative_args (args_so_far),
11513 TYPE_MODE (type), type, true))
11514 type = build_pointer_type (type);
11516 /* Find mode as it is passed by the ABI. */
11517 unsignedp = TYPE_UNSIGNED (type);
11518 mode = promote_mode (type, TYPE_MODE (type), &unsignedp);
11520 /* If we must pass in stack, we need a stack. */
11521 if (rs6000_must_pass_in_stack (mode, type))
11522 return true;
11524 /* If there is no incoming register, we need a stack. */
11525 entry_parm = rs6000_function_arg (args_so_far, mode, type, true);
11526 if (entry_parm == NULL)
11527 return true;
11529 /* Likewise if we need to pass both in registers and on the stack. */
11530 if (GET_CODE (entry_parm) == PARALLEL
11531 && XEXP (XVECEXP (entry_parm, 0, 0), 0) == NULL_RTX)
11532 return true;
11534 /* Also true if we're partially in registers and partially not. */
11535 if (rs6000_arg_partial_bytes (args_so_far, mode, type, true) != 0)
11536 return true;
11538 /* Update info on where next arg arrives in registers. */
11539 rs6000_function_arg_advance (args_so_far, mode, type, true);
11540 return false;
11543 /* Return true if FUN has no prototype, has a variable argument
11544 list, or passes any parameter in memory. */
11546 static bool
11547 rs6000_function_parms_need_stack (tree fun, bool incoming)
11549 tree fntype, result;
11550 CUMULATIVE_ARGS args_so_far_v;
11551 cumulative_args_t args_so_far;
11553 if (!fun)
11554 /* Must be a libcall, all of which only use reg parms. */
11555 return false;
11557 fntype = fun;
11558 if (!TYPE_P (fun))
11559 fntype = TREE_TYPE (fun);
11561 /* Varargs functions need the parameter save area. */
11562 if ((!incoming && !prototype_p (fntype)) || stdarg_p (fntype))
11563 return true;
11565 INIT_CUMULATIVE_INCOMING_ARGS (args_so_far_v, fntype, NULL_RTX);
11566 args_so_far = pack_cumulative_args (&args_so_far_v);
11568 /* When incoming, we will have been passed the function decl.
11569 It is necessary to use the decl to handle K&R style functions,
11570 where TYPE_ARG_TYPES may not be available. */
11571 if (incoming)
11573 gcc_assert (DECL_P (fun));
11574 result = DECL_RESULT (fun);
11576 else
11577 result = TREE_TYPE (fntype);
11579 if (result && aggregate_value_p (result, fntype))
11581 if (!TYPE_P (result))
11582 result = TREE_TYPE (result);
11583 result = build_pointer_type (result);
11584 rs6000_parm_needs_stack (args_so_far, result);
11587 if (incoming)
11589 tree parm;
11591 for (parm = DECL_ARGUMENTS (fun);
11592 parm && parm != void_list_node;
11593 parm = TREE_CHAIN (parm))
11594 if (rs6000_parm_needs_stack (args_so_far, TREE_TYPE (parm)))
11595 return true;
11597 else
11599 function_args_iterator args_iter;
11600 tree arg_type;
11602 FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
11603 if (rs6000_parm_needs_stack (args_so_far, arg_type))
11604 return true;
11607 return false;
11610 /* Return the size of the REG_PARM_STACK_SPACE are for FUN. This is
11611 usually a constant depending on the ABI. However, in the ELFv2 ABI
11612 the register parameter area is optional when calling a function that
11613 has a prototype is scope, has no variable argument list, and passes
11614 all parameters in registers. */
11617 rs6000_reg_parm_stack_space (tree fun, bool incoming)
11619 int reg_parm_stack_space;
11621 switch (DEFAULT_ABI)
11623 default:
11624 reg_parm_stack_space = 0;
11625 break;
11627 case ABI_AIX:
11628 case ABI_DARWIN:
11629 reg_parm_stack_space = TARGET_64BIT ? 64 : 32;
11630 break;
11632 case ABI_ELFv2:
11633 /* ??? Recomputing this every time is a bit expensive. Is there
11634 a place to cache this information? */
11635 if (rs6000_function_parms_need_stack (fun, incoming))
11636 reg_parm_stack_space = TARGET_64BIT ? 64 : 32;
11637 else
11638 reg_parm_stack_space = 0;
11639 break;
11642 return reg_parm_stack_space;
11645 static void
11646 rs6000_move_block_from_reg (int regno, rtx x, int nregs)
11648 int i;
11649 machine_mode reg_mode = TARGET_32BIT ? SImode : DImode;
11651 if (nregs == 0)
11652 return;
11654 for (i = 0; i < nregs; i++)
11656 rtx tem = adjust_address_nv (x, reg_mode, i * GET_MODE_SIZE (reg_mode));
11657 if (reload_completed)
11659 if (! strict_memory_address_p (reg_mode, XEXP (tem, 0)))
11660 tem = NULL_RTX;
11661 else
11662 tem = simplify_gen_subreg (reg_mode, x, BLKmode,
11663 i * GET_MODE_SIZE (reg_mode));
11665 else
11666 tem = replace_equiv_address (tem, XEXP (tem, 0));
11668 gcc_assert (tem);
11670 emit_move_insn (tem, gen_rtx_REG (reg_mode, regno + i));
11674 /* Perform any needed actions needed for a function that is receiving a
11675 variable number of arguments.
11677 CUM is as above.
11679 MODE and TYPE are the mode and type of the current parameter.
11681 PRETEND_SIZE is a variable that should be set to the amount of stack
11682 that must be pushed by the prolog to pretend that our caller pushed
11685 Normally, this macro will push all remaining incoming registers on the
11686 stack and set PRETEND_SIZE to the length of the registers pushed. */
11688 static void
11689 setup_incoming_varargs (cumulative_args_t cum, machine_mode mode,
11690 tree type, int *pretend_size ATTRIBUTE_UNUSED,
11691 int no_rtl)
11693 CUMULATIVE_ARGS next_cum;
11694 int reg_size = TARGET_32BIT ? 4 : 8;
11695 rtx save_area = NULL_RTX, mem;
11696 int first_reg_offset;
11697 alias_set_type set;
11699 /* Skip the last named argument. */
11700 next_cum = *get_cumulative_args (cum);
11701 rs6000_function_arg_advance_1 (&next_cum, mode, type, true, 0);
11703 if (DEFAULT_ABI == ABI_V4)
11705 first_reg_offset = next_cum.sysv_gregno - GP_ARG_MIN_REG;
11707 if (! no_rtl)
11709 int gpr_reg_num = 0, gpr_size = 0, fpr_size = 0;
11710 HOST_WIDE_INT offset = 0;
11712 /* Try to optimize the size of the varargs save area.
11713 The ABI requires that ap.reg_save_area is doubleword
11714 aligned, but we don't need to allocate space for all
11715 the bytes, only those to which we actually will save
11716 anything. */
11717 if (cfun->va_list_gpr_size && first_reg_offset < GP_ARG_NUM_REG)
11718 gpr_reg_num = GP_ARG_NUM_REG - first_reg_offset;
11719 if (TARGET_HARD_FLOAT && TARGET_FPRS
11720 && next_cum.fregno <= FP_ARG_V4_MAX_REG
11721 && cfun->va_list_fpr_size)
11723 if (gpr_reg_num)
11724 fpr_size = (next_cum.fregno - FP_ARG_MIN_REG)
11725 * UNITS_PER_FP_WORD;
11726 if (cfun->va_list_fpr_size
11727 < FP_ARG_V4_MAX_REG + 1 - next_cum.fregno)
11728 fpr_size += cfun->va_list_fpr_size * UNITS_PER_FP_WORD;
11729 else
11730 fpr_size += (FP_ARG_V4_MAX_REG + 1 - next_cum.fregno)
11731 * UNITS_PER_FP_WORD;
11733 if (gpr_reg_num)
11735 offset = -((first_reg_offset * reg_size) & ~7);
11736 if (!fpr_size && gpr_reg_num > cfun->va_list_gpr_size)
11738 gpr_reg_num = cfun->va_list_gpr_size;
11739 if (reg_size == 4 && (first_reg_offset & 1))
11740 gpr_reg_num++;
11742 gpr_size = (gpr_reg_num * reg_size + 7) & ~7;
11744 else if (fpr_size)
11745 offset = - (int) (next_cum.fregno - FP_ARG_MIN_REG)
11746 * UNITS_PER_FP_WORD
11747 - (int) (GP_ARG_NUM_REG * reg_size);
11749 if (gpr_size + fpr_size)
11751 rtx reg_save_area
11752 = assign_stack_local (BLKmode, gpr_size + fpr_size, 64);
11753 gcc_assert (GET_CODE (reg_save_area) == MEM);
11754 reg_save_area = XEXP (reg_save_area, 0);
11755 if (GET_CODE (reg_save_area) == PLUS)
11757 gcc_assert (XEXP (reg_save_area, 0)
11758 == virtual_stack_vars_rtx);
11759 gcc_assert (GET_CODE (XEXP (reg_save_area, 1)) == CONST_INT);
11760 offset += INTVAL (XEXP (reg_save_area, 1));
11762 else
11763 gcc_assert (reg_save_area == virtual_stack_vars_rtx);
11766 cfun->machine->varargs_save_offset = offset;
11767 save_area = plus_constant (Pmode, virtual_stack_vars_rtx, offset);
11770 else
11772 first_reg_offset = next_cum.words;
11773 save_area = crtl->args.internal_arg_pointer;
11775 if (targetm.calls.must_pass_in_stack (mode, type))
11776 first_reg_offset += rs6000_arg_size (TYPE_MODE (type), type);
11779 set = get_varargs_alias_set ();
11780 if (! no_rtl && first_reg_offset < GP_ARG_NUM_REG
11781 && cfun->va_list_gpr_size)
11783 int n_gpr, nregs = GP_ARG_NUM_REG - first_reg_offset;
11785 if (va_list_gpr_counter_field)
11786 /* V4 va_list_gpr_size counts number of registers needed. */
11787 n_gpr = cfun->va_list_gpr_size;
11788 else
11789 /* char * va_list instead counts number of bytes needed. */
11790 n_gpr = (cfun->va_list_gpr_size + reg_size - 1) / reg_size;
11792 if (nregs > n_gpr)
11793 nregs = n_gpr;
11795 mem = gen_rtx_MEM (BLKmode,
11796 plus_constant (Pmode, save_area,
11797 first_reg_offset * reg_size));
11798 MEM_NOTRAP_P (mem) = 1;
11799 set_mem_alias_set (mem, set);
11800 set_mem_align (mem, BITS_PER_WORD);
11802 rs6000_move_block_from_reg (GP_ARG_MIN_REG + first_reg_offset, mem,
11803 nregs);
11806 /* Save FP registers if needed. */
11807 if (DEFAULT_ABI == ABI_V4
11808 && TARGET_HARD_FLOAT && TARGET_FPRS
11809 && ! no_rtl
11810 && next_cum.fregno <= FP_ARG_V4_MAX_REG
11811 && cfun->va_list_fpr_size)
11813 int fregno = next_cum.fregno, nregs;
11814 rtx cr1 = gen_rtx_REG (CCmode, CR1_REGNO);
11815 rtx lab = gen_label_rtx ();
11816 int off = (GP_ARG_NUM_REG * reg_size) + ((fregno - FP_ARG_MIN_REG)
11817 * UNITS_PER_FP_WORD);
11819 emit_jump_insn
11820 (gen_rtx_SET (pc_rtx,
11821 gen_rtx_IF_THEN_ELSE (VOIDmode,
11822 gen_rtx_NE (VOIDmode, cr1,
11823 const0_rtx),
11824 gen_rtx_LABEL_REF (VOIDmode, lab),
11825 pc_rtx)));
11827 for (nregs = 0;
11828 fregno <= FP_ARG_V4_MAX_REG && nregs < cfun->va_list_fpr_size;
11829 fregno++, off += UNITS_PER_FP_WORD, nregs++)
11831 mem = gen_rtx_MEM ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
11832 ? DFmode : SFmode,
11833 plus_constant (Pmode, save_area, off));
11834 MEM_NOTRAP_P (mem) = 1;
11835 set_mem_alias_set (mem, set);
11836 set_mem_align (mem, GET_MODE_ALIGNMENT (
11837 (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
11838 ? DFmode : SFmode));
11839 emit_move_insn (mem, gen_rtx_REG (
11840 (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
11841 ? DFmode : SFmode, fregno));
11844 emit_label (lab);
11848 /* Create the va_list data type. */
11850 static tree
11851 rs6000_build_builtin_va_list (void)
11853 tree f_gpr, f_fpr, f_res, f_ovf, f_sav, record, type_decl;
11855 /* For AIX, prefer 'char *' because that's what the system
11856 header files like. */
11857 if (DEFAULT_ABI != ABI_V4)
11858 return build_pointer_type (char_type_node);
11860 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
11861 type_decl = build_decl (BUILTINS_LOCATION, TYPE_DECL,
11862 get_identifier ("__va_list_tag"), record);
11864 f_gpr = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("gpr"),
11865 unsigned_char_type_node);
11866 f_fpr = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("fpr"),
11867 unsigned_char_type_node);
11868 /* Give the two bytes of padding a name, so that -Wpadded won't warn on
11869 every user file. */
11870 f_res = build_decl (BUILTINS_LOCATION, FIELD_DECL,
11871 get_identifier ("reserved"), short_unsigned_type_node);
11872 f_ovf = build_decl (BUILTINS_LOCATION, FIELD_DECL,
11873 get_identifier ("overflow_arg_area"),
11874 ptr_type_node);
11875 f_sav = build_decl (BUILTINS_LOCATION, FIELD_DECL,
11876 get_identifier ("reg_save_area"),
11877 ptr_type_node);
11879 va_list_gpr_counter_field = f_gpr;
11880 va_list_fpr_counter_field = f_fpr;
11882 DECL_FIELD_CONTEXT (f_gpr) = record;
11883 DECL_FIELD_CONTEXT (f_fpr) = record;
11884 DECL_FIELD_CONTEXT (f_res) = record;
11885 DECL_FIELD_CONTEXT (f_ovf) = record;
11886 DECL_FIELD_CONTEXT (f_sav) = record;
11888 TYPE_STUB_DECL (record) = type_decl;
11889 TYPE_NAME (record) = type_decl;
11890 TYPE_FIELDS (record) = f_gpr;
11891 DECL_CHAIN (f_gpr) = f_fpr;
11892 DECL_CHAIN (f_fpr) = f_res;
11893 DECL_CHAIN (f_res) = f_ovf;
11894 DECL_CHAIN (f_ovf) = f_sav;
11896 layout_type (record);
11898 /* The correct type is an array type of one element. */
11899 return build_array_type (record, build_index_type (size_zero_node));
11902 /* Implement va_start. */
11904 static void
11905 rs6000_va_start (tree valist, rtx nextarg)
11907 HOST_WIDE_INT words, n_gpr, n_fpr;
11908 tree f_gpr, f_fpr, f_res, f_ovf, f_sav;
11909 tree gpr, fpr, ovf, sav, t;
11911 /* Only SVR4 needs something special. */
11912 if (DEFAULT_ABI != ABI_V4)
11914 std_expand_builtin_va_start (valist, nextarg);
11915 return;
11918 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
11919 f_fpr = DECL_CHAIN (f_gpr);
11920 f_res = DECL_CHAIN (f_fpr);
11921 f_ovf = DECL_CHAIN (f_res);
11922 f_sav = DECL_CHAIN (f_ovf);
11924 valist = build_simple_mem_ref (valist);
11925 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
11926 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
11927 f_fpr, NULL_TREE);
11928 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
11929 f_ovf, NULL_TREE);
11930 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
11931 f_sav, NULL_TREE);
11933 /* Count number of gp and fp argument registers used. */
11934 words = crtl->args.info.words;
11935 n_gpr = MIN (crtl->args.info.sysv_gregno - GP_ARG_MIN_REG,
11936 GP_ARG_NUM_REG);
11937 n_fpr = MIN (crtl->args.info.fregno - FP_ARG_MIN_REG,
11938 FP_ARG_NUM_REG);
11940 if (TARGET_DEBUG_ARG)
11941 fprintf (stderr, "va_start: words = " HOST_WIDE_INT_PRINT_DEC", n_gpr = "
11942 HOST_WIDE_INT_PRINT_DEC", n_fpr = " HOST_WIDE_INT_PRINT_DEC"\n",
11943 words, n_gpr, n_fpr);
11945 if (cfun->va_list_gpr_size)
11947 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
11948 build_int_cst (NULL_TREE, n_gpr));
11949 TREE_SIDE_EFFECTS (t) = 1;
11950 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
11953 if (cfun->va_list_fpr_size)
11955 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
11956 build_int_cst (NULL_TREE, n_fpr));
11957 TREE_SIDE_EFFECTS (t) = 1;
11958 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
11960 #ifdef HAVE_AS_GNU_ATTRIBUTE
11961 if (call_ABI_of_interest (cfun->decl))
11962 rs6000_passes_float = true;
11963 #endif
11966 /* Find the overflow area. */
11967 t = make_tree (TREE_TYPE (ovf), crtl->args.internal_arg_pointer);
11968 if (words != 0)
11969 t = fold_build_pointer_plus_hwi (t, words * MIN_UNITS_PER_WORD);
11970 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
11971 TREE_SIDE_EFFECTS (t) = 1;
11972 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
11974 /* If there were no va_arg invocations, don't set up the register
11975 save area. */
11976 if (!cfun->va_list_gpr_size
11977 && !cfun->va_list_fpr_size
11978 && n_gpr < GP_ARG_NUM_REG
11979 && n_fpr < FP_ARG_V4_MAX_REG)
11980 return;
11982 /* Find the register save area. */
11983 t = make_tree (TREE_TYPE (sav), virtual_stack_vars_rtx);
11984 if (cfun->machine->varargs_save_offset)
11985 t = fold_build_pointer_plus_hwi (t, cfun->machine->varargs_save_offset);
11986 t = build2 (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
11987 TREE_SIDE_EFFECTS (t) = 1;
11988 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
11991 /* Implement va_arg. */
11993 static tree
11994 rs6000_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
11995 gimple_seq *post_p)
11997 tree f_gpr, f_fpr, f_res, f_ovf, f_sav;
11998 tree gpr, fpr, ovf, sav, reg, t, u;
11999 int size, rsize, n_reg, sav_ofs, sav_scale;
12000 tree lab_false, lab_over, addr;
12001 int align;
12002 tree ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
12003 int regalign = 0;
12004 gimple *stmt;
12006 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
12008 t = rs6000_gimplify_va_arg (valist, ptrtype, pre_p, post_p);
12009 return build_va_arg_indirect_ref (t);
12012 /* We need to deal with the fact that the darwin ppc64 ABI is defined by an
12013 earlier version of gcc, with the property that it always applied alignment
12014 adjustments to the va-args (even for zero-sized types). The cheapest way
12015 to deal with this is to replicate the effect of the part of
12016 std_gimplify_va_arg_expr that carries out the align adjust, for the case
12017 of relevance.
12018 We don't need to check for pass-by-reference because of the test above.
12019 We can return a simplifed answer, since we know there's no offset to add. */
12021 if (((TARGET_MACHO
12022 && rs6000_darwin64_abi)
12023 || DEFAULT_ABI == ABI_ELFv2
12024 || (DEFAULT_ABI == ABI_AIX && !rs6000_compat_align_parm))
12025 && integer_zerop (TYPE_SIZE (type)))
12027 unsigned HOST_WIDE_INT align, boundary;
12028 tree valist_tmp = get_initialized_tmp_var (valist, pre_p, NULL);
12029 align = PARM_BOUNDARY / BITS_PER_UNIT;
12030 boundary = rs6000_function_arg_boundary (TYPE_MODE (type), type);
12031 if (boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
12032 boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
12033 boundary /= BITS_PER_UNIT;
12034 if (boundary > align)
12036 tree t ;
12037 /* This updates arg ptr by the amount that would be necessary
12038 to align the zero-sized (but not zero-alignment) item. */
12039 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist_tmp,
12040 fold_build_pointer_plus_hwi (valist_tmp, boundary - 1));
12041 gimplify_and_add (t, pre_p);
12043 t = fold_convert (sizetype, valist_tmp);
12044 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist_tmp,
12045 fold_convert (TREE_TYPE (valist),
12046 fold_build2 (BIT_AND_EXPR, sizetype, t,
12047 size_int (-boundary))));
12048 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
12049 gimplify_and_add (t, pre_p);
12051 /* Since it is zero-sized there's no increment for the item itself. */
12052 valist_tmp = fold_convert (build_pointer_type (type), valist_tmp);
12053 return build_va_arg_indirect_ref (valist_tmp);
12056 if (DEFAULT_ABI != ABI_V4)
12058 if (targetm.calls.split_complex_arg && TREE_CODE (type) == COMPLEX_TYPE)
12060 tree elem_type = TREE_TYPE (type);
12061 machine_mode elem_mode = TYPE_MODE (elem_type);
12062 int elem_size = GET_MODE_SIZE (elem_mode);
12064 if (elem_size < UNITS_PER_WORD)
12066 tree real_part, imag_part;
12067 gimple_seq post = NULL;
12069 real_part = rs6000_gimplify_va_arg (valist, elem_type, pre_p,
12070 &post);
12071 /* Copy the value into a temporary, lest the formal temporary
12072 be reused out from under us. */
12073 real_part = get_initialized_tmp_var (real_part, pre_p, &post);
12074 gimple_seq_add_seq (pre_p, post);
12076 imag_part = rs6000_gimplify_va_arg (valist, elem_type, pre_p,
12077 post_p);
12079 return build2 (COMPLEX_EXPR, type, real_part, imag_part);
12083 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
12086 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
12087 f_fpr = DECL_CHAIN (f_gpr);
12088 f_res = DECL_CHAIN (f_fpr);
12089 f_ovf = DECL_CHAIN (f_res);
12090 f_sav = DECL_CHAIN (f_ovf);
12092 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
12093 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
12094 f_fpr, NULL_TREE);
12095 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
12096 f_ovf, NULL_TREE);
12097 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
12098 f_sav, NULL_TREE);
12100 size = int_size_in_bytes (type);
12101 rsize = (size + 3) / 4;
12102 align = 1;
12104 if (TARGET_HARD_FLOAT && TARGET_FPRS
12105 && ((TARGET_SINGLE_FLOAT && TYPE_MODE (type) == SFmode)
12106 || (TARGET_DOUBLE_FLOAT
12107 && (TYPE_MODE (type) == DFmode
12108 || FLOAT128_2REG_P (TYPE_MODE (type))
12109 || DECIMAL_FLOAT_MODE_P (TYPE_MODE (type))))))
12111 /* FP args go in FP registers, if present. */
12112 reg = fpr;
12113 n_reg = (size + 7) / 8;
12114 sav_ofs = ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT) ? 8 : 4) * 4;
12115 sav_scale = ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT) ? 8 : 4);
12116 if (TYPE_MODE (type) != SFmode && TYPE_MODE (type) != SDmode)
12117 align = 8;
12119 else
12121 /* Otherwise into GP registers. */
12122 reg = gpr;
12123 n_reg = rsize;
12124 sav_ofs = 0;
12125 sav_scale = 4;
12126 if (n_reg == 2)
12127 align = 8;
12130 /* Pull the value out of the saved registers.... */
12132 lab_over = NULL;
12133 addr = create_tmp_var (ptr_type_node, "addr");
12135 /* AltiVec vectors never go in registers when -mabi=altivec. */
12136 if (TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (TYPE_MODE (type)))
12137 align = 16;
12138 else
12140 lab_false = create_artificial_label (input_location);
12141 lab_over = create_artificial_label (input_location);
12143 /* Long long and SPE vectors are aligned in the registers.
12144 As are any other 2 gpr item such as complex int due to a
12145 historical mistake. */
12146 u = reg;
12147 if (n_reg == 2 && reg == gpr)
12149 regalign = 1;
12150 u = build2 (BIT_AND_EXPR, TREE_TYPE (reg), unshare_expr (reg),
12151 build_int_cst (TREE_TYPE (reg), n_reg - 1));
12152 u = build2 (POSTINCREMENT_EXPR, TREE_TYPE (reg),
12153 unshare_expr (reg), u);
12155 /* _Decimal128 is passed in even/odd fpr pairs; the stored
12156 reg number is 0 for f1, so we want to make it odd. */
12157 else if (reg == fpr && TYPE_MODE (type) == TDmode)
12159 t = build2 (BIT_IOR_EXPR, TREE_TYPE (reg), unshare_expr (reg),
12160 build_int_cst (TREE_TYPE (reg), 1));
12161 u = build2 (MODIFY_EXPR, void_type_node, unshare_expr (reg), t);
12164 t = fold_convert (TREE_TYPE (reg), size_int (8 - n_reg + 1));
12165 t = build2 (GE_EXPR, boolean_type_node, u, t);
12166 u = build1 (GOTO_EXPR, void_type_node, lab_false);
12167 t = build3 (COND_EXPR, void_type_node, t, u, NULL_TREE);
12168 gimplify_and_add (t, pre_p);
12170 t = sav;
12171 if (sav_ofs)
12172 t = fold_build_pointer_plus_hwi (sav, sav_ofs);
12174 u = build2 (POSTINCREMENT_EXPR, TREE_TYPE (reg), unshare_expr (reg),
12175 build_int_cst (TREE_TYPE (reg), n_reg));
12176 u = fold_convert (sizetype, u);
12177 u = build2 (MULT_EXPR, sizetype, u, size_int (sav_scale));
12178 t = fold_build_pointer_plus (t, u);
12180 /* _Decimal32 varargs are located in the second word of the 64-bit
12181 FP register for 32-bit binaries. */
12182 if (TARGET_32BIT
12183 && TARGET_HARD_FLOAT && TARGET_FPRS
12184 && TYPE_MODE (type) == SDmode)
12185 t = fold_build_pointer_plus_hwi (t, size);
12187 gimplify_assign (addr, t, pre_p);
12189 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
12191 stmt = gimple_build_label (lab_false);
12192 gimple_seq_add_stmt (pre_p, stmt);
12194 if ((n_reg == 2 && !regalign) || n_reg > 2)
12196 /* Ensure that we don't find any more args in regs.
12197 Alignment has taken care of for special cases. */
12198 gimplify_assign (reg, build_int_cst (TREE_TYPE (reg), 8), pre_p);
12202 /* ... otherwise out of the overflow area. */
12204 /* Care for on-stack alignment if needed. */
12205 t = ovf;
12206 if (align != 1)
12208 t = fold_build_pointer_plus_hwi (t, align - 1);
12209 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
12210 build_int_cst (TREE_TYPE (t), -align));
12212 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
12214 gimplify_assign (unshare_expr (addr), t, pre_p);
12216 t = fold_build_pointer_plus_hwi (t, size);
12217 gimplify_assign (unshare_expr (ovf), t, pre_p);
12219 if (lab_over)
12221 stmt = gimple_build_label (lab_over);
12222 gimple_seq_add_stmt (pre_p, stmt);
12225 if (STRICT_ALIGNMENT
12226 && (TYPE_ALIGN (type)
12227 > (unsigned) BITS_PER_UNIT * (align < 4 ? 4 : align)))
12229 /* The value (of type complex double, for example) may not be
12230 aligned in memory in the saved registers, so copy via a
12231 temporary. (This is the same code as used for SPARC.) */
12232 tree tmp = create_tmp_var (type, "va_arg_tmp");
12233 tree dest_addr = build_fold_addr_expr (tmp);
12235 tree copy = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
12236 3, dest_addr, addr, size_int (rsize * 4));
12238 gimplify_and_add (copy, pre_p);
12239 addr = dest_addr;
12242 addr = fold_convert (ptrtype, addr);
12243 return build_va_arg_indirect_ref (addr);
12246 /* Builtins. */
12248 static void
12249 def_builtin (const char *name, tree type, enum rs6000_builtins code)
12251 tree t;
12252 unsigned classify = rs6000_builtin_info[(int)code].attr;
12253 const char *attr_string = "";
12255 gcc_assert (name != NULL);
12256 gcc_assert (IN_RANGE ((int)code, 0, (int)RS6000_BUILTIN_COUNT));
12258 if (rs6000_builtin_decls[(int)code])
12259 fatal_error (input_location,
12260 "internal error: builtin function %s already processed", name);
12262 rs6000_builtin_decls[(int)code] = t =
12263 add_builtin_function (name, type, (int)code, BUILT_IN_MD, NULL, NULL_TREE);
12265 /* Set any special attributes. */
12266 if ((classify & RS6000_BTC_CONST) != 0)
12268 /* const function, function only depends on the inputs. */
12269 TREE_READONLY (t) = 1;
12270 TREE_NOTHROW (t) = 1;
12271 attr_string = ", pure";
12273 else if ((classify & RS6000_BTC_PURE) != 0)
12275 /* pure function, function can read global memory, but does not set any
12276 external state. */
12277 DECL_PURE_P (t) = 1;
12278 TREE_NOTHROW (t) = 1;
12279 attr_string = ", const";
12281 else if ((classify & RS6000_BTC_FP) != 0)
12283 /* Function is a math function. If rounding mode is on, then treat the
12284 function as not reading global memory, but it can have arbitrary side
12285 effects. If it is off, then assume the function is a const function.
12286 This mimics the ATTR_MATHFN_FPROUNDING attribute in
12287 builtin-attribute.def that is used for the math functions. */
12288 TREE_NOTHROW (t) = 1;
12289 if (flag_rounding_math)
12291 DECL_PURE_P (t) = 1;
12292 DECL_IS_NOVOPS (t) = 1;
12293 attr_string = ", fp, pure";
12295 else
12297 TREE_READONLY (t) = 1;
12298 attr_string = ", fp, const";
12301 else if ((classify & RS6000_BTC_ATTR_MASK) != 0)
12302 gcc_unreachable ();
12304 if (TARGET_DEBUG_BUILTIN)
12305 fprintf (stderr, "rs6000_builtin, code = %4d, %s%s\n",
12306 (int)code, name, attr_string);
12309 /* Simple ternary operations: VECd = foo (VECa, VECb, VECc). */
12311 #undef RS6000_BUILTIN_1
12312 #undef RS6000_BUILTIN_2
12313 #undef RS6000_BUILTIN_3
12314 #undef RS6000_BUILTIN_A
12315 #undef RS6000_BUILTIN_D
12316 #undef RS6000_BUILTIN_E
12317 #undef RS6000_BUILTIN_H
12318 #undef RS6000_BUILTIN_P
12319 #undef RS6000_BUILTIN_Q
12320 #undef RS6000_BUILTIN_S
12321 #undef RS6000_BUILTIN_X
12323 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
12324 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
12325 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE) \
12326 { MASK, ICODE, NAME, ENUM },
12328 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
12329 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
12330 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
12331 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
12332 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
12333 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
12334 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
12335 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
12337 static const struct builtin_description bdesc_3arg[] =
12339 #include "rs6000-builtin.def"
12342 /* DST operations: void foo (void *, const int, const char). */
12344 #undef RS6000_BUILTIN_1
12345 #undef RS6000_BUILTIN_2
12346 #undef RS6000_BUILTIN_3
12347 #undef RS6000_BUILTIN_A
12348 #undef RS6000_BUILTIN_D
12349 #undef RS6000_BUILTIN_E
12350 #undef RS6000_BUILTIN_H
12351 #undef RS6000_BUILTIN_P
12352 #undef RS6000_BUILTIN_Q
12353 #undef RS6000_BUILTIN_S
12354 #undef RS6000_BUILTIN_X
12356 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
12357 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
12358 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
12359 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
12360 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE) \
12361 { MASK, ICODE, NAME, ENUM },
12363 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
12364 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
12365 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
12366 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
12367 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
12368 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
12370 static const struct builtin_description bdesc_dst[] =
12372 #include "rs6000-builtin.def"
12375 /* Simple binary operations: VECc = foo (VECa, VECb). */
12377 #undef RS6000_BUILTIN_1
12378 #undef RS6000_BUILTIN_2
12379 #undef RS6000_BUILTIN_3
12380 #undef RS6000_BUILTIN_A
12381 #undef RS6000_BUILTIN_D
12382 #undef RS6000_BUILTIN_E
12383 #undef RS6000_BUILTIN_H
12384 #undef RS6000_BUILTIN_P
12385 #undef RS6000_BUILTIN_Q
12386 #undef RS6000_BUILTIN_S
12387 #undef RS6000_BUILTIN_X
12389 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
12390 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE) \
12391 { MASK, ICODE, NAME, ENUM },
12393 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
12394 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
12395 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
12396 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
12397 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
12398 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
12399 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
12400 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
12401 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
12403 static const struct builtin_description bdesc_2arg[] =
12405 #include "rs6000-builtin.def"
12408 #undef RS6000_BUILTIN_1
12409 #undef RS6000_BUILTIN_2
12410 #undef RS6000_BUILTIN_3
12411 #undef RS6000_BUILTIN_A
12412 #undef RS6000_BUILTIN_D
12413 #undef RS6000_BUILTIN_E
12414 #undef RS6000_BUILTIN_H
12415 #undef RS6000_BUILTIN_P
12416 #undef RS6000_BUILTIN_Q
12417 #undef RS6000_BUILTIN_S
12418 #undef RS6000_BUILTIN_X
12420 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
12421 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
12422 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
12423 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
12424 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
12425 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
12426 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
12427 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) \
12428 { MASK, ICODE, NAME, ENUM },
12430 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
12431 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
12432 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
12434 /* AltiVec predicates. */
12436 static const struct builtin_description bdesc_altivec_preds[] =
12438 #include "rs6000-builtin.def"
12441 /* SPE predicates. */
12442 #undef RS6000_BUILTIN_1
12443 #undef RS6000_BUILTIN_2
12444 #undef RS6000_BUILTIN_3
12445 #undef RS6000_BUILTIN_A
12446 #undef RS6000_BUILTIN_D
12447 #undef RS6000_BUILTIN_E
12448 #undef RS6000_BUILTIN_H
12449 #undef RS6000_BUILTIN_P
12450 #undef RS6000_BUILTIN_Q
12451 #undef RS6000_BUILTIN_S
12452 #undef RS6000_BUILTIN_X
12454 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
12455 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
12456 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
12457 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
12458 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
12459 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
12460 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
12461 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
12462 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
12463 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE) \
12464 { MASK, ICODE, NAME, ENUM },
12466 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
12468 static const struct builtin_description bdesc_spe_predicates[] =
12470 #include "rs6000-builtin.def"
12473 /* SPE evsel predicates. */
12474 #undef RS6000_BUILTIN_1
12475 #undef RS6000_BUILTIN_2
12476 #undef RS6000_BUILTIN_3
12477 #undef RS6000_BUILTIN_A
12478 #undef RS6000_BUILTIN_D
12479 #undef RS6000_BUILTIN_E
12480 #undef RS6000_BUILTIN_H
12481 #undef RS6000_BUILTIN_P
12482 #undef RS6000_BUILTIN_Q
12483 #undef RS6000_BUILTIN_S
12484 #undef RS6000_BUILTIN_X
12486 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
12487 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
12488 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
12489 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
12490 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
12491 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE) \
12492 { MASK, ICODE, NAME, ENUM },
12494 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
12495 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
12496 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
12497 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
12498 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
12500 static const struct builtin_description bdesc_spe_evsel[] =
12502 #include "rs6000-builtin.def"
12505 /* PAIRED predicates. */
12506 #undef RS6000_BUILTIN_1
12507 #undef RS6000_BUILTIN_2
12508 #undef RS6000_BUILTIN_3
12509 #undef RS6000_BUILTIN_A
12510 #undef RS6000_BUILTIN_D
12511 #undef RS6000_BUILTIN_E
12512 #undef RS6000_BUILTIN_H
12513 #undef RS6000_BUILTIN_P
12514 #undef RS6000_BUILTIN_Q
12515 #undef RS6000_BUILTIN_S
12516 #undef RS6000_BUILTIN_X
12518 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
12519 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
12520 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
12521 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
12522 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
12523 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
12524 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
12525 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
12526 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE) \
12527 { MASK, ICODE, NAME, ENUM },
12529 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
12530 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
12532 static const struct builtin_description bdesc_paired_preds[] =
12534 #include "rs6000-builtin.def"
12537 /* ABS* operations. */
12539 #undef RS6000_BUILTIN_1
12540 #undef RS6000_BUILTIN_2
12541 #undef RS6000_BUILTIN_3
12542 #undef RS6000_BUILTIN_A
12543 #undef RS6000_BUILTIN_D
12544 #undef RS6000_BUILTIN_E
12545 #undef RS6000_BUILTIN_H
12546 #undef RS6000_BUILTIN_P
12547 #undef RS6000_BUILTIN_Q
12548 #undef RS6000_BUILTIN_S
12549 #undef RS6000_BUILTIN_X
12551 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
12552 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
12553 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
12554 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE) \
12555 { MASK, ICODE, NAME, ENUM },
12557 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
12558 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
12559 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
12560 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
12561 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
12562 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
12563 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
12565 static const struct builtin_description bdesc_abs[] =
12567 #include "rs6000-builtin.def"
12570 /* Simple unary operations: VECb = foo (unsigned literal) or VECb =
12571 foo (VECa). */
12573 #undef RS6000_BUILTIN_1
12574 #undef RS6000_BUILTIN_2
12575 #undef RS6000_BUILTIN_3
12576 #undef RS6000_BUILTIN_A
12577 #undef RS6000_BUILTIN_D
12578 #undef RS6000_BUILTIN_E
12579 #undef RS6000_BUILTIN_H
12580 #undef RS6000_BUILTIN_P
12581 #undef RS6000_BUILTIN_Q
12582 #undef RS6000_BUILTIN_S
12583 #undef RS6000_BUILTIN_X
12585 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE) \
12586 { MASK, ICODE, NAME, ENUM },
12588 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
12589 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
12590 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
12591 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
12592 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
12593 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
12594 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
12595 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
12596 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
12597 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
12599 static const struct builtin_description bdesc_1arg[] =
12601 #include "rs6000-builtin.def"
12604 /* HTM builtins. */
12605 #undef RS6000_BUILTIN_1
12606 #undef RS6000_BUILTIN_2
12607 #undef RS6000_BUILTIN_3
12608 #undef RS6000_BUILTIN_A
12609 #undef RS6000_BUILTIN_D
12610 #undef RS6000_BUILTIN_E
12611 #undef RS6000_BUILTIN_H
12612 #undef RS6000_BUILTIN_P
12613 #undef RS6000_BUILTIN_Q
12614 #undef RS6000_BUILTIN_S
12615 #undef RS6000_BUILTIN_X
12617 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
12618 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
12619 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
12620 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
12621 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
12622 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
12623 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) \
12624 { MASK, ICODE, NAME, ENUM },
12626 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
12627 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
12628 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
12629 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
12631 static const struct builtin_description bdesc_htm[] =
12633 #include "rs6000-builtin.def"
12636 #undef RS6000_BUILTIN_1
12637 #undef RS6000_BUILTIN_2
12638 #undef RS6000_BUILTIN_3
12639 #undef RS6000_BUILTIN_A
12640 #undef RS6000_BUILTIN_D
12641 #undef RS6000_BUILTIN_E
12642 #undef RS6000_BUILTIN_H
12643 #undef RS6000_BUILTIN_P
12644 #undef RS6000_BUILTIN_Q
12645 #undef RS6000_BUILTIN_S
12647 /* Return true if a builtin function is overloaded. */
12648 bool
12649 rs6000_overloaded_builtin_p (enum rs6000_builtins fncode)
12651 return (rs6000_builtin_info[(int)fncode].attr & RS6000_BTC_OVERLOADED) != 0;
12654 /* Expand an expression EXP that calls a builtin without arguments. */
12655 static rtx
12656 rs6000_expand_zeroop_builtin (enum insn_code icode, rtx target)
12658 rtx pat;
12659 machine_mode tmode = insn_data[icode].operand[0].mode;
12661 if (icode == CODE_FOR_nothing)
12662 /* Builtin not supported on this processor. */
12663 return 0;
12665 if (target == 0
12666 || GET_MODE (target) != tmode
12667 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12668 target = gen_reg_rtx (tmode);
12670 pat = GEN_FCN (icode) (target);
12671 if (! pat)
12672 return 0;
12673 emit_insn (pat);
12675 return target;
12679 static rtx
12680 rs6000_expand_mtfsf_builtin (enum insn_code icode, tree exp)
12682 rtx pat;
12683 tree arg0 = CALL_EXPR_ARG (exp, 0);
12684 tree arg1 = CALL_EXPR_ARG (exp, 1);
12685 rtx op0 = expand_normal (arg0);
12686 rtx op1 = expand_normal (arg1);
12687 machine_mode mode0 = insn_data[icode].operand[0].mode;
12688 machine_mode mode1 = insn_data[icode].operand[1].mode;
12690 if (icode == CODE_FOR_nothing)
12691 /* Builtin not supported on this processor. */
12692 return 0;
12694 /* If we got invalid arguments bail out before generating bad rtl. */
12695 if (arg0 == error_mark_node || arg1 == error_mark_node)
12696 return const0_rtx;
12698 if (GET_CODE (op0) != CONST_INT
12699 || INTVAL (op0) > 255
12700 || INTVAL (op0) < 0)
12702 error ("argument 1 must be an 8-bit field value");
12703 return const0_rtx;
12706 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
12707 op0 = copy_to_mode_reg (mode0, op0);
12709 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
12710 op1 = copy_to_mode_reg (mode1, op1);
12712 pat = GEN_FCN (icode) (op0, op1);
12713 if (! pat)
12714 return const0_rtx;
12715 emit_insn (pat);
12717 return NULL_RTX;
12721 static rtx
12722 rs6000_expand_unop_builtin (enum insn_code icode, tree exp, rtx target)
12724 rtx pat;
12725 tree arg0 = CALL_EXPR_ARG (exp, 0);
12726 rtx op0 = expand_normal (arg0);
12727 machine_mode tmode = insn_data[icode].operand[0].mode;
12728 machine_mode mode0 = insn_data[icode].operand[1].mode;
12730 if (icode == CODE_FOR_nothing)
12731 /* Builtin not supported on this processor. */
12732 return 0;
12734 /* If we got invalid arguments bail out before generating bad rtl. */
12735 if (arg0 == error_mark_node)
12736 return const0_rtx;
12738 if (icode == CODE_FOR_altivec_vspltisb
12739 || icode == CODE_FOR_altivec_vspltish
12740 || icode == CODE_FOR_altivec_vspltisw
12741 || icode == CODE_FOR_spe_evsplatfi
12742 || icode == CODE_FOR_spe_evsplati)
12744 /* Only allow 5-bit *signed* literals. */
12745 if (GET_CODE (op0) != CONST_INT
12746 || INTVAL (op0) > 15
12747 || INTVAL (op0) < -16)
12749 error ("argument 1 must be a 5-bit signed literal");
12750 return const0_rtx;
12754 if (target == 0
12755 || GET_MODE (target) != tmode
12756 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12757 target = gen_reg_rtx (tmode);
12759 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12760 op0 = copy_to_mode_reg (mode0, op0);
12762 pat = GEN_FCN (icode) (target, op0);
12763 if (! pat)
12764 return 0;
12765 emit_insn (pat);
12767 return target;
12770 static rtx
12771 altivec_expand_abs_builtin (enum insn_code icode, tree exp, rtx target)
12773 rtx pat, scratch1, scratch2;
12774 tree arg0 = CALL_EXPR_ARG (exp, 0);
12775 rtx op0 = expand_normal (arg0);
12776 machine_mode tmode = insn_data[icode].operand[0].mode;
12777 machine_mode mode0 = insn_data[icode].operand[1].mode;
12779 /* If we have invalid arguments, bail out before generating bad rtl. */
12780 if (arg0 == error_mark_node)
12781 return const0_rtx;
12783 if (target == 0
12784 || GET_MODE (target) != tmode
12785 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12786 target = gen_reg_rtx (tmode);
12788 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12789 op0 = copy_to_mode_reg (mode0, op0);
12791 scratch1 = gen_reg_rtx (mode0);
12792 scratch2 = gen_reg_rtx (mode0);
12794 pat = GEN_FCN (icode) (target, op0, scratch1, scratch2);
12795 if (! pat)
12796 return 0;
12797 emit_insn (pat);
12799 return target;
12802 static rtx
12803 rs6000_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
12805 rtx pat;
12806 tree arg0 = CALL_EXPR_ARG (exp, 0);
12807 tree arg1 = CALL_EXPR_ARG (exp, 1);
12808 rtx op0 = expand_normal (arg0);
12809 rtx op1 = expand_normal (arg1);
12810 machine_mode tmode = insn_data[icode].operand[0].mode;
12811 machine_mode mode0 = insn_data[icode].operand[1].mode;
12812 machine_mode mode1 = insn_data[icode].operand[2].mode;
12814 if (icode == CODE_FOR_nothing)
12815 /* Builtin not supported on this processor. */
12816 return 0;
12818 /* If we got invalid arguments bail out before generating bad rtl. */
12819 if (arg0 == error_mark_node || arg1 == error_mark_node)
12820 return const0_rtx;
12822 if (icode == CODE_FOR_altivec_vcfux
12823 || icode == CODE_FOR_altivec_vcfsx
12824 || icode == CODE_FOR_altivec_vctsxs
12825 || icode == CODE_FOR_altivec_vctuxs
12826 || icode == CODE_FOR_altivec_vspltb
12827 || icode == CODE_FOR_altivec_vsplth
12828 || icode == CODE_FOR_altivec_vspltw
12829 || icode == CODE_FOR_spe_evaddiw
12830 || icode == CODE_FOR_spe_evldd
12831 || icode == CODE_FOR_spe_evldh
12832 || icode == CODE_FOR_spe_evldw
12833 || icode == CODE_FOR_spe_evlhhesplat
12834 || icode == CODE_FOR_spe_evlhhossplat
12835 || icode == CODE_FOR_spe_evlhhousplat
12836 || icode == CODE_FOR_spe_evlwhe
12837 || icode == CODE_FOR_spe_evlwhos
12838 || icode == CODE_FOR_spe_evlwhou
12839 || icode == CODE_FOR_spe_evlwhsplat
12840 || icode == CODE_FOR_spe_evlwwsplat
12841 || icode == CODE_FOR_spe_evrlwi
12842 || icode == CODE_FOR_spe_evslwi
12843 || icode == CODE_FOR_spe_evsrwis
12844 || icode == CODE_FOR_spe_evsubifw
12845 || icode == CODE_FOR_spe_evsrwiu)
12847 /* Only allow 5-bit unsigned literals. */
12848 STRIP_NOPS (arg1);
12849 if (TREE_CODE (arg1) != INTEGER_CST
12850 || TREE_INT_CST_LOW (arg1) & ~0x1f)
12852 error ("argument 2 must be a 5-bit unsigned literal");
12853 return const0_rtx;
12857 if (target == 0
12858 || GET_MODE (target) != tmode
12859 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12860 target = gen_reg_rtx (tmode);
12862 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12863 op0 = copy_to_mode_reg (mode0, op0);
12864 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12865 op1 = copy_to_mode_reg (mode1, op1);
12867 pat = GEN_FCN (icode) (target, op0, op1);
12868 if (! pat)
12869 return 0;
12870 emit_insn (pat);
12872 return target;
12875 static rtx
12876 altivec_expand_predicate_builtin (enum insn_code icode, tree exp, rtx target)
12878 rtx pat, scratch;
12879 tree cr6_form = CALL_EXPR_ARG (exp, 0);
12880 tree arg0 = CALL_EXPR_ARG (exp, 1);
12881 tree arg1 = CALL_EXPR_ARG (exp, 2);
12882 rtx op0 = expand_normal (arg0);
12883 rtx op1 = expand_normal (arg1);
12884 machine_mode tmode = SImode;
12885 machine_mode mode0 = insn_data[icode].operand[1].mode;
12886 machine_mode mode1 = insn_data[icode].operand[2].mode;
12887 int cr6_form_int;
12889 if (TREE_CODE (cr6_form) != INTEGER_CST)
12891 error ("argument 1 of __builtin_altivec_predicate must be a constant");
12892 return const0_rtx;
12894 else
12895 cr6_form_int = TREE_INT_CST_LOW (cr6_form);
12897 gcc_assert (mode0 == mode1);
12899 /* If we have invalid arguments, bail out before generating bad rtl. */
12900 if (arg0 == error_mark_node || arg1 == error_mark_node)
12901 return const0_rtx;
12903 if (target == 0
12904 || GET_MODE (target) != tmode
12905 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12906 target = gen_reg_rtx (tmode);
12908 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12909 op0 = copy_to_mode_reg (mode0, op0);
12910 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12911 op1 = copy_to_mode_reg (mode1, op1);
12913 scratch = gen_reg_rtx (mode0);
12915 pat = GEN_FCN (icode) (scratch, op0, op1);
12916 if (! pat)
12917 return 0;
12918 emit_insn (pat);
12920 /* The vec_any* and vec_all* predicates use the same opcodes for two
12921 different operations, but the bits in CR6 will be different
12922 depending on what information we want. So we have to play tricks
12923 with CR6 to get the right bits out.
12925 If you think this is disgusting, look at the specs for the
12926 AltiVec predicates. */
12928 switch (cr6_form_int)
12930 case 0:
12931 emit_insn (gen_cr6_test_for_zero (target));
12932 break;
12933 case 1:
12934 emit_insn (gen_cr6_test_for_zero_reverse (target));
12935 break;
12936 case 2:
12937 emit_insn (gen_cr6_test_for_lt (target));
12938 break;
12939 case 3:
12940 emit_insn (gen_cr6_test_for_lt_reverse (target));
12941 break;
12942 default:
12943 error ("argument 1 of __builtin_altivec_predicate is out of range");
12944 break;
12947 return target;
12950 static rtx
12951 paired_expand_lv_builtin (enum insn_code icode, tree exp, rtx target)
12953 rtx pat, addr;
12954 tree arg0 = CALL_EXPR_ARG (exp, 0);
12955 tree arg1 = CALL_EXPR_ARG (exp, 1);
12956 machine_mode tmode = insn_data[icode].operand[0].mode;
12957 machine_mode mode0 = Pmode;
12958 machine_mode mode1 = Pmode;
12959 rtx op0 = expand_normal (arg0);
12960 rtx op1 = expand_normal (arg1);
12962 if (icode == CODE_FOR_nothing)
12963 /* Builtin not supported on this processor. */
12964 return 0;
12966 /* If we got invalid arguments bail out before generating bad rtl. */
12967 if (arg0 == error_mark_node || arg1 == error_mark_node)
12968 return const0_rtx;
12970 if (target == 0
12971 || GET_MODE (target) != tmode
12972 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12973 target = gen_reg_rtx (tmode);
12975 op1 = copy_to_mode_reg (mode1, op1);
12977 if (op0 == const0_rtx)
12979 addr = gen_rtx_MEM (tmode, op1);
12981 else
12983 op0 = copy_to_mode_reg (mode0, op0);
12984 addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op0, op1));
12987 pat = GEN_FCN (icode) (target, addr);
12989 if (! pat)
12990 return 0;
12991 emit_insn (pat);
12993 return target;
12996 /* Return a constant vector for use as a little-endian permute control vector
12997 to reverse the order of elements of the given vector mode. */
12998 static rtx
12999 swap_selector_for_mode (machine_mode mode)
13001 /* These are little endian vectors, so their elements are reversed
13002 from what you would normally expect for a permute control vector. */
13003 unsigned int swap2[16] = {7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8};
13004 unsigned int swap4[16] = {3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12};
13005 unsigned int swap8[16] = {1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14};
13006 unsigned int swap16[16] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
13007 unsigned int *swaparray, i;
13008 rtx perm[16];
13010 switch (mode)
13012 case V2DFmode:
13013 case V2DImode:
13014 swaparray = swap2;
13015 break;
13016 case V4SFmode:
13017 case V4SImode:
13018 swaparray = swap4;
13019 break;
13020 case V8HImode:
13021 swaparray = swap8;
13022 break;
13023 case V16QImode:
13024 swaparray = swap16;
13025 break;
13026 default:
13027 gcc_unreachable ();
13030 for (i = 0; i < 16; ++i)
13031 perm[i] = GEN_INT (swaparray[i]);
13033 return force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm)));
13036 /* Generate code for an "lvxl", or "lve*x" built-in for a little endian target
13037 with -maltivec=be specified. Issue the load followed by an element-
13038 reversing permute. */
13039 void
13040 altivec_expand_lvx_be (rtx op0, rtx op1, machine_mode mode, unsigned unspec)
13042 rtx tmp = gen_reg_rtx (mode);
13043 rtx load = gen_rtx_SET (tmp, op1);
13044 rtx lvx = gen_rtx_UNSPEC (mode, gen_rtvec (1, const0_rtx), unspec);
13045 rtx par = gen_rtx_PARALLEL (mode, gen_rtvec (2, load, lvx));
13046 rtx sel = swap_selector_for_mode (mode);
13047 rtx vperm = gen_rtx_UNSPEC (mode, gen_rtvec (3, tmp, tmp, sel), UNSPEC_VPERM);
13049 gcc_assert (REG_P (op0));
13050 emit_insn (par);
13051 emit_insn (gen_rtx_SET (op0, vperm));
13054 /* Generate code for a "stvxl" built-in for a little endian target with
13055 -maltivec=be specified. Issue the store preceded by an element-reversing
13056 permute. */
13057 void
13058 altivec_expand_stvx_be (rtx op0, rtx op1, machine_mode mode, unsigned unspec)
13060 rtx tmp = gen_reg_rtx (mode);
13061 rtx store = gen_rtx_SET (op0, tmp);
13062 rtx stvx = gen_rtx_UNSPEC (mode, gen_rtvec (1, const0_rtx), unspec);
13063 rtx par = gen_rtx_PARALLEL (mode, gen_rtvec (2, store, stvx));
13064 rtx sel = swap_selector_for_mode (mode);
13065 rtx vperm;
13067 gcc_assert (REG_P (op1));
13068 vperm = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op1, sel), UNSPEC_VPERM);
13069 emit_insn (gen_rtx_SET (tmp, vperm));
13070 emit_insn (par);
13073 /* Generate code for a "stve*x" built-in for a little endian target with -maltivec=be
13074 specified. Issue the store preceded by an element-reversing permute. */
13075 void
13076 altivec_expand_stvex_be (rtx op0, rtx op1, machine_mode mode, unsigned unspec)
13078 machine_mode inner_mode = GET_MODE_INNER (mode);
13079 rtx tmp = gen_reg_rtx (mode);
13080 rtx stvx = gen_rtx_UNSPEC (inner_mode, gen_rtvec (1, tmp), unspec);
13081 rtx sel = swap_selector_for_mode (mode);
13082 rtx vperm;
13084 gcc_assert (REG_P (op1));
13085 vperm = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op1, sel), UNSPEC_VPERM);
13086 emit_insn (gen_rtx_SET (tmp, vperm));
13087 emit_insn (gen_rtx_SET (op0, stvx));
13090 static rtx
13091 altivec_expand_lv_builtin (enum insn_code icode, tree exp, rtx target, bool blk)
13093 rtx pat, addr;
13094 tree arg0 = CALL_EXPR_ARG (exp, 0);
13095 tree arg1 = CALL_EXPR_ARG (exp, 1);
13096 machine_mode tmode = insn_data[icode].operand[0].mode;
13097 machine_mode mode0 = Pmode;
13098 machine_mode mode1 = Pmode;
13099 rtx op0 = expand_normal (arg0);
13100 rtx op1 = expand_normal (arg1);
13102 if (icode == CODE_FOR_nothing)
13103 /* Builtin not supported on this processor. */
13104 return 0;
13106 /* If we got invalid arguments bail out before generating bad rtl. */
13107 if (arg0 == error_mark_node || arg1 == error_mark_node)
13108 return const0_rtx;
13110 if (target == 0
13111 || GET_MODE (target) != tmode
13112 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13113 target = gen_reg_rtx (tmode);
13115 op1 = copy_to_mode_reg (mode1, op1);
13117 /* For LVX, express the RTL accurately by ANDing the address with -16.
13118 LVXL and LVE*X expand to use UNSPECs to hide their special behavior,
13119 so the raw address is fine. */
13120 if (icode == CODE_FOR_altivec_lvx_v2df_2op
13121 || icode == CODE_FOR_altivec_lvx_v2di_2op
13122 || icode == CODE_FOR_altivec_lvx_v4sf_2op
13123 || icode == CODE_FOR_altivec_lvx_v4si_2op
13124 || icode == CODE_FOR_altivec_lvx_v8hi_2op
13125 || icode == CODE_FOR_altivec_lvx_v16qi_2op)
13127 rtx rawaddr;
13128 if (op0 == const0_rtx)
13129 rawaddr = op1;
13130 else
13132 op0 = copy_to_mode_reg (mode0, op0);
13133 rawaddr = gen_rtx_PLUS (Pmode, op1, op0);
13135 addr = gen_rtx_AND (Pmode, rawaddr, gen_rtx_CONST_INT (Pmode, -16));
13136 addr = gen_rtx_MEM (blk ? BLKmode : tmode, addr);
13138 /* For -maltivec=be, emit the load and follow it up with a
13139 permute to swap the elements. */
13140 if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
13142 rtx temp = gen_reg_rtx (tmode);
13143 emit_insn (gen_rtx_SET (temp, addr));
13145 rtx sel = swap_selector_for_mode (tmode);
13146 rtx vperm = gen_rtx_UNSPEC (tmode, gen_rtvec (3, temp, temp, sel),
13147 UNSPEC_VPERM);
13148 emit_insn (gen_rtx_SET (target, vperm));
13150 else
13151 emit_insn (gen_rtx_SET (target, addr));
13153 else
13155 if (op0 == const0_rtx)
13156 addr = gen_rtx_MEM (blk ? BLKmode : tmode, op1);
13157 else
13159 op0 = copy_to_mode_reg (mode0, op0);
13160 addr = gen_rtx_MEM (blk ? BLKmode : tmode,
13161 gen_rtx_PLUS (Pmode, op1, op0));
13164 pat = GEN_FCN (icode) (target, addr);
13165 if (! pat)
13166 return 0;
13167 emit_insn (pat);
13170 return target;
13173 static rtx
13174 spe_expand_stv_builtin (enum insn_code icode, tree exp)
13176 tree arg0 = CALL_EXPR_ARG (exp, 0);
13177 tree arg1 = CALL_EXPR_ARG (exp, 1);
13178 tree arg2 = CALL_EXPR_ARG (exp, 2);
13179 rtx op0 = expand_normal (arg0);
13180 rtx op1 = expand_normal (arg1);
13181 rtx op2 = expand_normal (arg2);
13182 rtx pat;
13183 machine_mode mode0 = insn_data[icode].operand[0].mode;
13184 machine_mode mode1 = insn_data[icode].operand[1].mode;
13185 machine_mode mode2 = insn_data[icode].operand[2].mode;
13187 /* Invalid arguments. Bail before doing anything stoopid! */
13188 if (arg0 == error_mark_node
13189 || arg1 == error_mark_node
13190 || arg2 == error_mark_node)
13191 return const0_rtx;
13193 if (! (*insn_data[icode].operand[2].predicate) (op0, mode2))
13194 op0 = copy_to_mode_reg (mode2, op0);
13195 if (! (*insn_data[icode].operand[0].predicate) (op1, mode0))
13196 op1 = copy_to_mode_reg (mode0, op1);
13197 if (! (*insn_data[icode].operand[1].predicate) (op2, mode1))
13198 op2 = copy_to_mode_reg (mode1, op2);
13200 pat = GEN_FCN (icode) (op1, op2, op0);
13201 if (pat)
13202 emit_insn (pat);
13203 return NULL_RTX;
13206 static rtx
13207 paired_expand_stv_builtin (enum insn_code icode, tree exp)
13209 tree arg0 = CALL_EXPR_ARG (exp, 0);
13210 tree arg1 = CALL_EXPR_ARG (exp, 1);
13211 tree arg2 = CALL_EXPR_ARG (exp, 2);
13212 rtx op0 = expand_normal (arg0);
13213 rtx op1 = expand_normal (arg1);
13214 rtx op2 = expand_normal (arg2);
13215 rtx pat, addr;
13216 machine_mode tmode = insn_data[icode].operand[0].mode;
13217 machine_mode mode1 = Pmode;
13218 machine_mode mode2 = Pmode;
13220 /* Invalid arguments. Bail before doing anything stoopid! */
13221 if (arg0 == error_mark_node
13222 || arg1 == error_mark_node
13223 || arg2 == error_mark_node)
13224 return const0_rtx;
13226 if (! (*insn_data[icode].operand[1].predicate) (op0, tmode))
13227 op0 = copy_to_mode_reg (tmode, op0);
13229 op2 = copy_to_mode_reg (mode2, op2);
13231 if (op1 == const0_rtx)
13233 addr = gen_rtx_MEM (tmode, op2);
13235 else
13237 op1 = copy_to_mode_reg (mode1, op1);
13238 addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op1, op2));
13241 pat = GEN_FCN (icode) (addr, op0);
13242 if (pat)
13243 emit_insn (pat);
13244 return NULL_RTX;
13247 static rtx
13248 altivec_expand_stv_builtin (enum insn_code icode, tree exp)
13250 tree arg0 = CALL_EXPR_ARG (exp, 0);
13251 tree arg1 = CALL_EXPR_ARG (exp, 1);
13252 tree arg2 = CALL_EXPR_ARG (exp, 2);
13253 rtx op0 = expand_normal (arg0);
13254 rtx op1 = expand_normal (arg1);
13255 rtx op2 = expand_normal (arg2);
13256 rtx pat, addr, rawaddr;
13257 machine_mode tmode = insn_data[icode].operand[0].mode;
13258 machine_mode smode = insn_data[icode].operand[1].mode;
13259 machine_mode mode1 = Pmode;
13260 machine_mode mode2 = Pmode;
13262 /* Invalid arguments. Bail before doing anything stoopid! */
13263 if (arg0 == error_mark_node
13264 || arg1 == error_mark_node
13265 || arg2 == error_mark_node)
13266 return const0_rtx;
13268 op2 = copy_to_mode_reg (mode2, op2);
13270 /* For STVX, express the RTL accurately by ANDing the address with -16.
13271 STVXL and STVE*X expand to use UNSPECs to hide their special behavior,
13272 so the raw address is fine. */
13273 if (icode == CODE_FOR_altivec_stvx_v2df_2op
13274 || icode == CODE_FOR_altivec_stvx_v2di_2op
13275 || icode == CODE_FOR_altivec_stvx_v4sf_2op
13276 || icode == CODE_FOR_altivec_stvx_v4si_2op
13277 || icode == CODE_FOR_altivec_stvx_v8hi_2op
13278 || icode == CODE_FOR_altivec_stvx_v16qi_2op)
13280 if (op1 == const0_rtx)
13281 rawaddr = op2;
13282 else
13284 op1 = copy_to_mode_reg (mode1, op1);
13285 rawaddr = gen_rtx_PLUS (Pmode, op2, op1);
13288 addr = gen_rtx_AND (Pmode, rawaddr, gen_rtx_CONST_INT (Pmode, -16));
13289 addr = gen_rtx_MEM (tmode, addr);
13291 op0 = copy_to_mode_reg (tmode, op0);
13293 /* For -maltivec=be, emit a permute to swap the elements, followed
13294 by the store. */
13295 if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
13297 rtx temp = gen_reg_rtx (tmode);
13298 rtx sel = swap_selector_for_mode (tmode);
13299 rtx vperm = gen_rtx_UNSPEC (tmode, gen_rtvec (3, op0, op0, sel),
13300 UNSPEC_VPERM);
13301 emit_insn (gen_rtx_SET (temp, vperm));
13302 emit_insn (gen_rtx_SET (addr, temp));
13304 else
13305 emit_insn (gen_rtx_SET (addr, op0));
13307 else
13309 if (! (*insn_data[icode].operand[1].predicate) (op0, smode))
13310 op0 = copy_to_mode_reg (smode, op0);
13312 if (op1 == const0_rtx)
13313 addr = gen_rtx_MEM (tmode, op2);
13314 else
13316 op1 = copy_to_mode_reg (mode1, op1);
13317 addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op2, op1));
13320 pat = GEN_FCN (icode) (addr, op0);
13321 if (pat)
13322 emit_insn (pat);
13325 return NULL_RTX;
13328 /* Return the appropriate SPR number associated with the given builtin. */
13329 static inline HOST_WIDE_INT
13330 htm_spr_num (enum rs6000_builtins code)
13332 if (code == HTM_BUILTIN_GET_TFHAR
13333 || code == HTM_BUILTIN_SET_TFHAR)
13334 return TFHAR_SPR;
13335 else if (code == HTM_BUILTIN_GET_TFIAR
13336 || code == HTM_BUILTIN_SET_TFIAR)
13337 return TFIAR_SPR;
13338 else if (code == HTM_BUILTIN_GET_TEXASR
13339 || code == HTM_BUILTIN_SET_TEXASR)
13340 return TEXASR_SPR;
13341 gcc_assert (code == HTM_BUILTIN_GET_TEXASRU
13342 || code == HTM_BUILTIN_SET_TEXASRU);
13343 return TEXASRU_SPR;
13346 /* Return the appropriate SPR regno associated with the given builtin. */
13347 static inline HOST_WIDE_INT
13348 htm_spr_regno (enum rs6000_builtins code)
13350 if (code == HTM_BUILTIN_GET_TFHAR
13351 || code == HTM_BUILTIN_SET_TFHAR)
13352 return TFHAR_REGNO;
13353 else if (code == HTM_BUILTIN_GET_TFIAR
13354 || code == HTM_BUILTIN_SET_TFIAR)
13355 return TFIAR_REGNO;
13356 gcc_assert (code == HTM_BUILTIN_GET_TEXASR
13357 || code == HTM_BUILTIN_SET_TEXASR
13358 || code == HTM_BUILTIN_GET_TEXASRU
13359 || code == HTM_BUILTIN_SET_TEXASRU);
13360 return TEXASR_REGNO;
13363 /* Return the correct ICODE value depending on whether we are
13364 setting or reading the HTM SPRs. */
13365 static inline enum insn_code
13366 rs6000_htm_spr_icode (bool nonvoid)
13368 if (nonvoid)
13369 return (TARGET_POWERPC64) ? CODE_FOR_htm_mfspr_di : CODE_FOR_htm_mfspr_si;
13370 else
13371 return (TARGET_POWERPC64) ? CODE_FOR_htm_mtspr_di : CODE_FOR_htm_mtspr_si;
13374 /* Expand the HTM builtin in EXP and store the result in TARGET.
13375 Store true in *EXPANDEDP if we found a builtin to expand. */
13376 static rtx
13377 htm_expand_builtin (tree exp, rtx target, bool * expandedp)
13379 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
13380 bool nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
13381 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
13382 const struct builtin_description *d;
13383 size_t i;
13385 *expandedp = true;
13387 if (!TARGET_POWERPC64
13388 && (fcode == HTM_BUILTIN_TABORTDC
13389 || fcode == HTM_BUILTIN_TABORTDCI))
13391 size_t uns_fcode = (size_t)fcode;
13392 const char *name = rs6000_builtin_info[uns_fcode].name;
13393 error ("builtin %s is only valid in 64-bit mode", name);
13394 return const0_rtx;
13397 /* Expand the HTM builtins. */
13398 d = bdesc_htm;
13399 for (i = 0; i < ARRAY_SIZE (bdesc_htm); i++, d++)
13400 if (d->code == fcode)
13402 rtx op[MAX_HTM_OPERANDS], pat;
13403 int nopnds = 0;
13404 tree arg;
13405 call_expr_arg_iterator iter;
13406 unsigned attr = rs6000_builtin_info[fcode].attr;
13407 enum insn_code icode = d->icode;
13408 const struct insn_operand_data *insn_op;
13409 bool uses_spr = (attr & RS6000_BTC_SPR);
13410 rtx cr = NULL_RTX;
13412 if (uses_spr)
13413 icode = rs6000_htm_spr_icode (nonvoid);
13414 insn_op = &insn_data[icode].operand[0];
13416 if (nonvoid)
13418 machine_mode tmode = (uses_spr) ? insn_op->mode : SImode;
13419 if (!target
13420 || GET_MODE (target) != tmode
13421 || (uses_spr && !(*insn_op->predicate) (target, tmode)))
13422 target = gen_reg_rtx (tmode);
13423 if (uses_spr)
13424 op[nopnds++] = target;
13427 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
13429 if (arg == error_mark_node || nopnds >= MAX_HTM_OPERANDS)
13430 return const0_rtx;
13432 insn_op = &insn_data[icode].operand[nopnds];
13434 op[nopnds] = expand_normal (arg);
13436 if (!(*insn_op->predicate) (op[nopnds], insn_op->mode))
13438 if (!strcmp (insn_op->constraint, "n"))
13440 int arg_num = (nonvoid) ? nopnds : nopnds + 1;
13441 if (!CONST_INT_P (op[nopnds]))
13442 error ("argument %d must be an unsigned literal", arg_num);
13443 else
13444 error ("argument %d is an unsigned literal that is "
13445 "out of range", arg_num);
13446 return const0_rtx;
13448 op[nopnds] = copy_to_mode_reg (insn_op->mode, op[nopnds]);
13451 nopnds++;
13454 /* Handle the builtins for extended mnemonics. These accept
13455 no arguments, but map to builtins that take arguments. */
13456 switch (fcode)
13458 case HTM_BUILTIN_TENDALL: /* Alias for: tend. 1 */
13459 case HTM_BUILTIN_TRESUME: /* Alias for: tsr. 1 */
13460 op[nopnds++] = GEN_INT (1);
13461 if (flag_checking)
13462 attr |= RS6000_BTC_UNARY;
13463 break;
13464 case HTM_BUILTIN_TSUSPEND: /* Alias for: tsr. 0 */
13465 op[nopnds++] = GEN_INT (0);
13466 if (flag_checking)
13467 attr |= RS6000_BTC_UNARY;
13468 break;
13469 default:
13470 break;
13473 /* If this builtin accesses SPRs, then pass in the appropriate
13474 SPR number and SPR regno as the last two operands. */
13475 if (uses_spr)
13477 machine_mode mode = (TARGET_POWERPC64) ? DImode : SImode;
13478 op[nopnds++] = gen_rtx_CONST_INT (mode, htm_spr_num (fcode));
13479 op[nopnds++] = gen_rtx_REG (mode, htm_spr_regno (fcode));
13481 /* If this builtin accesses a CR, then pass in a scratch
13482 CR as the last operand. */
13483 else if (attr & RS6000_BTC_CR)
13484 { cr = gen_reg_rtx (CCmode);
13485 op[nopnds++] = cr;
13488 if (flag_checking)
13490 int expected_nopnds = 0;
13491 if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_UNARY)
13492 expected_nopnds = 1;
13493 else if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_BINARY)
13494 expected_nopnds = 2;
13495 else if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_TERNARY)
13496 expected_nopnds = 3;
13497 if (!(attr & RS6000_BTC_VOID))
13498 expected_nopnds += 1;
13499 if (uses_spr)
13500 expected_nopnds += 2;
13502 gcc_assert (nopnds == expected_nopnds
13503 && nopnds <= MAX_HTM_OPERANDS);
13506 switch (nopnds)
13508 case 1:
13509 pat = GEN_FCN (icode) (op[0]);
13510 break;
13511 case 2:
13512 pat = GEN_FCN (icode) (op[0], op[1]);
13513 break;
13514 case 3:
13515 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
13516 break;
13517 case 4:
13518 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
13519 break;
13520 default:
13521 gcc_unreachable ();
13523 if (!pat)
13524 return NULL_RTX;
13525 emit_insn (pat);
13527 if (attr & RS6000_BTC_CR)
13529 if (fcode == HTM_BUILTIN_TBEGIN)
13531 /* Emit code to set TARGET to true or false depending on
13532 whether the tbegin. instruction successfully or failed
13533 to start a transaction. We do this by placing the 1's
13534 complement of CR's EQ bit into TARGET. */
13535 rtx scratch = gen_reg_rtx (SImode);
13536 emit_insn (gen_rtx_SET (scratch,
13537 gen_rtx_EQ (SImode, cr,
13538 const0_rtx)));
13539 emit_insn (gen_rtx_SET (target,
13540 gen_rtx_XOR (SImode, scratch,
13541 GEN_INT (1))));
13543 else
13545 /* Emit code to copy the 4-bit condition register field
13546 CR into the least significant end of register TARGET. */
13547 rtx scratch1 = gen_reg_rtx (SImode);
13548 rtx scratch2 = gen_reg_rtx (SImode);
13549 rtx subreg = simplify_gen_subreg (CCmode, scratch1, SImode, 0);
13550 emit_insn (gen_movcc (subreg, cr));
13551 emit_insn (gen_lshrsi3 (scratch2, scratch1, GEN_INT (28)));
13552 emit_insn (gen_andsi3 (target, scratch2, GEN_INT (0xf)));
13556 if (nonvoid)
13557 return target;
13558 return const0_rtx;
13561 *expandedp = false;
13562 return NULL_RTX;
13565 /* Expand the CPU builtin in FCODE and store the result in TARGET. */
13567 static rtx
13568 cpu_expand_builtin (enum rs6000_builtins fcode, tree exp ATTRIBUTE_UNUSED,
13569 rtx target)
13571 /* __builtin_cpu_init () is a nop, so expand to nothing. */
13572 if (fcode == RS6000_BUILTIN_CPU_INIT)
13573 return const0_rtx;
13575 if (target == 0 || GET_MODE (target) != SImode)
13576 target = gen_reg_rtx (SImode);
13578 #ifdef TARGET_LIBC_PROVIDES_HWCAP_IN_TCB
13579 tree arg = TREE_OPERAND (CALL_EXPR_ARG (exp, 0), 0);
13580 if (TREE_CODE (arg) != STRING_CST)
13582 error ("builtin %s only accepts a string argument",
13583 rs6000_builtin_info[(size_t) fcode].name);
13584 return const0_rtx;
13587 if (fcode == RS6000_BUILTIN_CPU_IS)
13589 const char *cpu = TREE_STRING_POINTER (arg);
13590 rtx cpuid = NULL_RTX;
13591 for (size_t i = 0; i < ARRAY_SIZE (cpu_is_info); i++)
13592 if (strcmp (cpu, cpu_is_info[i].cpu) == 0)
13594 /* The CPUID value in the TCB is offset by _DL_FIRST_PLATFORM. */
13595 cpuid = GEN_INT (cpu_is_info[i].cpuid + _DL_FIRST_PLATFORM);
13596 break;
13598 if (cpuid == NULL_RTX)
13600 /* Invalid CPU argument. */
13601 error ("cpu %s is an invalid argument to builtin %s",
13602 cpu, rs6000_builtin_info[(size_t) fcode].name);
13603 return const0_rtx;
13606 rtx platform = gen_reg_rtx (SImode);
13607 rtx tcbmem = gen_const_mem (SImode,
13608 gen_rtx_PLUS (Pmode,
13609 gen_rtx_REG (Pmode, TLS_REGNUM),
13610 GEN_INT (TCB_PLATFORM_OFFSET)));
13611 emit_move_insn (platform, tcbmem);
13612 emit_insn (gen_eqsi3 (target, platform, cpuid));
13614 else if (fcode == RS6000_BUILTIN_CPU_SUPPORTS)
13616 const char *hwcap = TREE_STRING_POINTER (arg);
13617 rtx mask = NULL_RTX;
13618 int hwcap_offset;
13619 for (size_t i = 0; i < ARRAY_SIZE (cpu_supports_info); i++)
13620 if (strcmp (hwcap, cpu_supports_info[i].hwcap) == 0)
13622 mask = GEN_INT (cpu_supports_info[i].mask);
13623 hwcap_offset = TCB_HWCAP_OFFSET (cpu_supports_info[i].id);
13624 break;
13626 if (mask == NULL_RTX)
13628 /* Invalid HWCAP argument. */
13629 error ("hwcap %s is an invalid argument to builtin %s",
13630 hwcap, rs6000_builtin_info[(size_t) fcode].name);
13631 return const0_rtx;
13634 rtx tcb_hwcap = gen_reg_rtx (SImode);
13635 rtx tcbmem = gen_const_mem (SImode,
13636 gen_rtx_PLUS (Pmode,
13637 gen_rtx_REG (Pmode, TLS_REGNUM),
13638 GEN_INT (hwcap_offset)));
13639 emit_move_insn (tcb_hwcap, tcbmem);
13640 rtx scratch1 = gen_reg_rtx (SImode);
13641 emit_insn (gen_rtx_SET (scratch1, gen_rtx_AND (SImode, tcb_hwcap, mask)));
13642 rtx scratch2 = gen_reg_rtx (SImode);
13643 emit_insn (gen_eqsi3 (scratch2, scratch1, const0_rtx));
13644 emit_insn (gen_rtx_SET (target, gen_rtx_XOR (SImode, scratch2, const1_rtx)));
13647 /* Record that we have expanded a CPU builtin, so that we can later
13648 emit a reference to the special symbol exported by LIBC to ensure we
13649 do not link against an old LIBC that doesn't support this feature. */
13650 cpu_builtin_p = true;
13652 #else
13653 /* For old LIBCs, always return FALSE. */
13654 emit_move_insn (target, GEN_INT (0));
13655 #endif /* TARGET_LIBC_PROVIDES_HWCAP_IN_TCB */
13657 return target;
13660 static rtx
13661 rs6000_expand_ternop_builtin (enum insn_code icode, tree exp, rtx target)
13663 rtx pat;
13664 tree arg0 = CALL_EXPR_ARG (exp, 0);
13665 tree arg1 = CALL_EXPR_ARG (exp, 1);
13666 tree arg2 = CALL_EXPR_ARG (exp, 2);
13667 rtx op0 = expand_normal (arg0);
13668 rtx op1 = expand_normal (arg1);
13669 rtx op2 = expand_normal (arg2);
13670 machine_mode tmode = insn_data[icode].operand[0].mode;
13671 machine_mode mode0 = insn_data[icode].operand[1].mode;
13672 machine_mode mode1 = insn_data[icode].operand[2].mode;
13673 machine_mode mode2 = insn_data[icode].operand[3].mode;
13675 if (icode == CODE_FOR_nothing)
13676 /* Builtin not supported on this processor. */
13677 return 0;
13679 /* If we got invalid arguments bail out before generating bad rtl. */
13680 if (arg0 == error_mark_node
13681 || arg1 == error_mark_node
13682 || arg2 == error_mark_node)
13683 return const0_rtx;
13685 /* Check and prepare argument depending on the instruction code.
13687 Note that a switch statement instead of the sequence of tests
13688 would be incorrect as many of the CODE_FOR values could be
13689 CODE_FOR_nothing and that would yield multiple alternatives
13690 with identical values. We'd never reach here at runtime in
13691 this case. */
13692 if (icode == CODE_FOR_altivec_vsldoi_v4sf
13693 || icode == CODE_FOR_altivec_vsldoi_v4si
13694 || icode == CODE_FOR_altivec_vsldoi_v8hi
13695 || icode == CODE_FOR_altivec_vsldoi_v16qi)
13697 /* Only allow 4-bit unsigned literals. */
13698 STRIP_NOPS (arg2);
13699 if (TREE_CODE (arg2) != INTEGER_CST
13700 || TREE_INT_CST_LOW (arg2) & ~0xf)
13702 error ("argument 3 must be a 4-bit unsigned literal");
13703 return const0_rtx;
13706 else if (icode == CODE_FOR_vsx_xxpermdi_v2df
13707 || icode == CODE_FOR_vsx_xxpermdi_v2di
13708 || icode == CODE_FOR_vsx_xxsldwi_v16qi
13709 || icode == CODE_FOR_vsx_xxsldwi_v8hi
13710 || icode == CODE_FOR_vsx_xxsldwi_v4si
13711 || icode == CODE_FOR_vsx_xxsldwi_v4sf
13712 || icode == CODE_FOR_vsx_xxsldwi_v2di
13713 || icode == CODE_FOR_vsx_xxsldwi_v2df)
13715 /* Only allow 2-bit unsigned literals. */
13716 STRIP_NOPS (arg2);
13717 if (TREE_CODE (arg2) != INTEGER_CST
13718 || TREE_INT_CST_LOW (arg2) & ~0x3)
13720 error ("argument 3 must be a 2-bit unsigned literal");
13721 return const0_rtx;
13724 else if (icode == CODE_FOR_vsx_set_v2df
13725 || icode == CODE_FOR_vsx_set_v2di
13726 || icode == CODE_FOR_bcdadd
13727 || icode == CODE_FOR_bcdadd_lt
13728 || icode == CODE_FOR_bcdadd_eq
13729 || icode == CODE_FOR_bcdadd_gt
13730 || icode == CODE_FOR_bcdsub
13731 || icode == CODE_FOR_bcdsub_lt
13732 || icode == CODE_FOR_bcdsub_eq
13733 || icode == CODE_FOR_bcdsub_gt)
13735 /* Only allow 1-bit unsigned literals. */
13736 STRIP_NOPS (arg2);
13737 if (TREE_CODE (arg2) != INTEGER_CST
13738 || TREE_INT_CST_LOW (arg2) & ~0x1)
13740 error ("argument 3 must be a 1-bit unsigned literal");
13741 return const0_rtx;
13744 else if (icode == CODE_FOR_dfp_ddedpd_dd
13745 || icode == CODE_FOR_dfp_ddedpd_td)
13747 /* Only allow 2-bit unsigned literals where the value is 0 or 2. */
13748 STRIP_NOPS (arg0);
13749 if (TREE_CODE (arg0) != INTEGER_CST
13750 || TREE_INT_CST_LOW (arg2) & ~0x3)
13752 error ("argument 1 must be 0 or 2");
13753 return const0_rtx;
13756 else if (icode == CODE_FOR_dfp_denbcd_dd
13757 || icode == CODE_FOR_dfp_denbcd_td)
13759 /* Only allow 1-bit unsigned literals. */
13760 STRIP_NOPS (arg0);
13761 if (TREE_CODE (arg0) != INTEGER_CST
13762 || TREE_INT_CST_LOW (arg0) & ~0x1)
13764 error ("argument 1 must be a 1-bit unsigned literal");
13765 return const0_rtx;
13768 else if (icode == CODE_FOR_dfp_dscli_dd
13769 || icode == CODE_FOR_dfp_dscli_td
13770 || icode == CODE_FOR_dfp_dscri_dd
13771 || icode == CODE_FOR_dfp_dscri_td)
13773 /* Only allow 6-bit unsigned literals. */
13774 STRIP_NOPS (arg1);
13775 if (TREE_CODE (arg1) != INTEGER_CST
13776 || TREE_INT_CST_LOW (arg1) & ~0x3f)
13778 error ("argument 2 must be a 6-bit unsigned literal");
13779 return const0_rtx;
13782 else if (icode == CODE_FOR_crypto_vshasigmaw
13783 || icode == CODE_FOR_crypto_vshasigmad)
13785 /* Check whether the 2nd and 3rd arguments are integer constants and in
13786 range and prepare arguments. */
13787 STRIP_NOPS (arg1);
13788 if (TREE_CODE (arg1) != INTEGER_CST || wi::geu_p (arg1, 2))
13790 error ("argument 2 must be 0 or 1");
13791 return const0_rtx;
13794 STRIP_NOPS (arg2);
13795 if (TREE_CODE (arg2) != INTEGER_CST || wi::geu_p (arg1, 16))
13797 error ("argument 3 must be in the range 0..15");
13798 return const0_rtx;
13802 if (target == 0
13803 || GET_MODE (target) != tmode
13804 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13805 target = gen_reg_rtx (tmode);
13807 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13808 op0 = copy_to_mode_reg (mode0, op0);
13809 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13810 op1 = copy_to_mode_reg (mode1, op1);
13811 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
13812 op2 = copy_to_mode_reg (mode2, op2);
13814 if (TARGET_PAIRED_FLOAT && icode == CODE_FOR_selv2sf4)
13815 pat = GEN_FCN (icode) (target, op0, op1, op2, CONST0_RTX (SFmode));
13816 else
13817 pat = GEN_FCN (icode) (target, op0, op1, op2);
13818 if (! pat)
13819 return 0;
13820 emit_insn (pat);
13822 return target;
13825 /* Expand the lvx builtins. */
13826 static rtx
13827 altivec_expand_ld_builtin (tree exp, rtx target, bool *expandedp)
13829 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
13830 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
13831 tree arg0;
13832 machine_mode tmode, mode0;
13833 rtx pat, op0;
13834 enum insn_code icode;
13836 switch (fcode)
13838 case ALTIVEC_BUILTIN_LD_INTERNAL_16qi:
13839 icode = CODE_FOR_vector_altivec_load_v16qi;
13840 break;
13841 case ALTIVEC_BUILTIN_LD_INTERNAL_8hi:
13842 icode = CODE_FOR_vector_altivec_load_v8hi;
13843 break;
13844 case ALTIVEC_BUILTIN_LD_INTERNAL_4si:
13845 icode = CODE_FOR_vector_altivec_load_v4si;
13846 break;
13847 case ALTIVEC_BUILTIN_LD_INTERNAL_4sf:
13848 icode = CODE_FOR_vector_altivec_load_v4sf;
13849 break;
13850 case ALTIVEC_BUILTIN_LD_INTERNAL_2df:
13851 icode = CODE_FOR_vector_altivec_load_v2df;
13852 break;
13853 case ALTIVEC_BUILTIN_LD_INTERNAL_2di:
13854 icode = CODE_FOR_vector_altivec_load_v2di;
13855 case ALTIVEC_BUILTIN_LD_INTERNAL_1ti:
13856 icode = CODE_FOR_vector_altivec_load_v1ti;
13857 break;
13858 default:
13859 *expandedp = false;
13860 return NULL_RTX;
13863 *expandedp = true;
13865 arg0 = CALL_EXPR_ARG (exp, 0);
13866 op0 = expand_normal (arg0);
13867 tmode = insn_data[icode].operand[0].mode;
13868 mode0 = insn_data[icode].operand[1].mode;
13870 if (target == 0
13871 || GET_MODE (target) != tmode
13872 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13873 target = gen_reg_rtx (tmode);
13875 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13876 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13878 pat = GEN_FCN (icode) (target, op0);
13879 if (! pat)
13880 return 0;
13881 emit_insn (pat);
13882 return target;
13885 /* Expand the stvx builtins. */
13886 static rtx
13887 altivec_expand_st_builtin (tree exp, rtx target ATTRIBUTE_UNUSED,
13888 bool *expandedp)
13890 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
13891 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
13892 tree arg0, arg1;
13893 machine_mode mode0, mode1;
13894 rtx pat, op0, op1;
13895 enum insn_code icode;
13897 switch (fcode)
13899 case ALTIVEC_BUILTIN_ST_INTERNAL_16qi:
13900 icode = CODE_FOR_vector_altivec_store_v16qi;
13901 break;
13902 case ALTIVEC_BUILTIN_ST_INTERNAL_8hi:
13903 icode = CODE_FOR_vector_altivec_store_v8hi;
13904 break;
13905 case ALTIVEC_BUILTIN_ST_INTERNAL_4si:
13906 icode = CODE_FOR_vector_altivec_store_v4si;
13907 break;
13908 case ALTIVEC_BUILTIN_ST_INTERNAL_4sf:
13909 icode = CODE_FOR_vector_altivec_store_v4sf;
13910 break;
13911 case ALTIVEC_BUILTIN_ST_INTERNAL_2df:
13912 icode = CODE_FOR_vector_altivec_store_v2df;
13913 break;
13914 case ALTIVEC_BUILTIN_ST_INTERNAL_2di:
13915 icode = CODE_FOR_vector_altivec_store_v2di;
13916 case ALTIVEC_BUILTIN_ST_INTERNAL_1ti:
13917 icode = CODE_FOR_vector_altivec_store_v1ti;
13918 break;
13919 default:
13920 *expandedp = false;
13921 return NULL_RTX;
13924 arg0 = CALL_EXPR_ARG (exp, 0);
13925 arg1 = CALL_EXPR_ARG (exp, 1);
13926 op0 = expand_normal (arg0);
13927 op1 = expand_normal (arg1);
13928 mode0 = insn_data[icode].operand[0].mode;
13929 mode1 = insn_data[icode].operand[1].mode;
13931 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
13932 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13933 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
13934 op1 = copy_to_mode_reg (mode1, op1);
13936 pat = GEN_FCN (icode) (op0, op1);
13937 if (pat)
13938 emit_insn (pat);
13940 *expandedp = true;
13941 return NULL_RTX;
13944 /* Expand the dst builtins. */
13945 static rtx
13946 altivec_expand_dst_builtin (tree exp, rtx target ATTRIBUTE_UNUSED,
13947 bool *expandedp)
13949 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
13950 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
13951 tree arg0, arg1, arg2;
13952 machine_mode mode0, mode1;
13953 rtx pat, op0, op1, op2;
13954 const struct builtin_description *d;
13955 size_t i;
13957 *expandedp = false;
13959 /* Handle DST variants. */
13960 d = bdesc_dst;
13961 for (i = 0; i < ARRAY_SIZE (bdesc_dst); i++, d++)
13962 if (d->code == fcode)
13964 arg0 = CALL_EXPR_ARG (exp, 0);
13965 arg1 = CALL_EXPR_ARG (exp, 1);
13966 arg2 = CALL_EXPR_ARG (exp, 2);
13967 op0 = expand_normal (arg0);
13968 op1 = expand_normal (arg1);
13969 op2 = expand_normal (arg2);
13970 mode0 = insn_data[d->icode].operand[0].mode;
13971 mode1 = insn_data[d->icode].operand[1].mode;
13973 /* Invalid arguments, bail out before generating bad rtl. */
13974 if (arg0 == error_mark_node
13975 || arg1 == error_mark_node
13976 || arg2 == error_mark_node)
13977 return const0_rtx;
13979 *expandedp = true;
13980 STRIP_NOPS (arg2);
13981 if (TREE_CODE (arg2) != INTEGER_CST
13982 || TREE_INT_CST_LOW (arg2) & ~0x3)
13984 error ("argument to %qs must be a 2-bit unsigned literal", d->name);
13985 return const0_rtx;
13988 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
13989 op0 = copy_to_mode_reg (Pmode, op0);
13990 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
13991 op1 = copy_to_mode_reg (mode1, op1);
13993 pat = GEN_FCN (d->icode) (op0, op1, op2);
13994 if (pat != 0)
13995 emit_insn (pat);
13997 return NULL_RTX;
14000 return NULL_RTX;
14003 /* Expand vec_init builtin. */
14004 static rtx
14005 altivec_expand_vec_init_builtin (tree type, tree exp, rtx target)
14007 machine_mode tmode = TYPE_MODE (type);
14008 machine_mode inner_mode = GET_MODE_INNER (tmode);
14009 int i, n_elt = GET_MODE_NUNITS (tmode);
14011 gcc_assert (VECTOR_MODE_P (tmode));
14012 gcc_assert (n_elt == call_expr_nargs (exp));
14014 if (!target || !register_operand (target, tmode))
14015 target = gen_reg_rtx (tmode);
14017 /* If we have a vector compromised of a single element, such as V1TImode, do
14018 the initialization directly. */
14019 if (n_elt == 1 && GET_MODE_SIZE (tmode) == GET_MODE_SIZE (inner_mode))
14021 rtx x = expand_normal (CALL_EXPR_ARG (exp, 0));
14022 emit_move_insn (target, gen_lowpart (tmode, x));
14024 else
14026 rtvec v = rtvec_alloc (n_elt);
14028 for (i = 0; i < n_elt; ++i)
14030 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
14031 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
14034 rs6000_expand_vector_init (target, gen_rtx_PARALLEL (tmode, v));
14037 return target;
14040 /* Return the integer constant in ARG. Constrain it to be in the range
14041 of the subparts of VEC_TYPE; issue an error if not. */
14043 static int
14044 get_element_number (tree vec_type, tree arg)
14046 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
14048 if (!tree_fits_uhwi_p (arg)
14049 || (elt = tree_to_uhwi (arg), elt > max))
14051 error ("selector must be an integer constant in the range 0..%wi", max);
14052 return 0;
14055 return elt;
14058 /* Expand vec_set builtin. */
14059 static rtx
14060 altivec_expand_vec_set_builtin (tree exp)
14062 machine_mode tmode, mode1;
14063 tree arg0, arg1, arg2;
14064 int elt;
14065 rtx op0, op1;
14067 arg0 = CALL_EXPR_ARG (exp, 0);
14068 arg1 = CALL_EXPR_ARG (exp, 1);
14069 arg2 = CALL_EXPR_ARG (exp, 2);
14071 tmode = TYPE_MODE (TREE_TYPE (arg0));
14072 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
14073 gcc_assert (VECTOR_MODE_P (tmode));
14075 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
14076 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
14077 elt = get_element_number (TREE_TYPE (arg0), arg2);
14079 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
14080 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
14082 op0 = force_reg (tmode, op0);
14083 op1 = force_reg (mode1, op1);
14085 rs6000_expand_vector_set (op0, op1, elt);
14087 return op0;
14090 /* Expand vec_ext builtin. */
14091 static rtx
14092 altivec_expand_vec_ext_builtin (tree exp, rtx target)
14094 machine_mode tmode, mode0;
14095 tree arg0, arg1;
14096 int elt;
14097 rtx op0;
14099 arg0 = CALL_EXPR_ARG (exp, 0);
14100 arg1 = CALL_EXPR_ARG (exp, 1);
14102 op0 = expand_normal (arg0);
14103 elt = get_element_number (TREE_TYPE (arg0), arg1);
14105 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
14106 mode0 = TYPE_MODE (TREE_TYPE (arg0));
14107 gcc_assert (VECTOR_MODE_P (mode0));
14109 op0 = force_reg (mode0, op0);
14111 if (optimize || !target || !register_operand (target, tmode))
14112 target = gen_reg_rtx (tmode);
14114 rs6000_expand_vector_extract (target, op0, elt);
14116 return target;
14119 /* Expand the builtin in EXP and store the result in TARGET. Store
14120 true in *EXPANDEDP if we found a builtin to expand. */
14121 static rtx
14122 altivec_expand_builtin (tree exp, rtx target, bool *expandedp)
14124 const struct builtin_description *d;
14125 size_t i;
14126 enum insn_code icode;
14127 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
14128 tree arg0;
14129 rtx op0, pat;
14130 machine_mode tmode, mode0;
14131 enum rs6000_builtins fcode
14132 = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
14134 if (rs6000_overloaded_builtin_p (fcode))
14136 *expandedp = true;
14137 error ("unresolved overload for Altivec builtin %qF", fndecl);
14139 /* Given it is invalid, just generate a normal call. */
14140 return expand_call (exp, target, false);
14143 target = altivec_expand_ld_builtin (exp, target, expandedp);
14144 if (*expandedp)
14145 return target;
14147 target = altivec_expand_st_builtin (exp, target, expandedp);
14148 if (*expandedp)
14149 return target;
14151 target = altivec_expand_dst_builtin (exp, target, expandedp);
14152 if (*expandedp)
14153 return target;
14155 *expandedp = true;
14157 switch (fcode)
14159 case ALTIVEC_BUILTIN_STVX_V2DF:
14160 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v2df_2op, exp);
14161 case ALTIVEC_BUILTIN_STVX_V2DI:
14162 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v2di_2op, exp);
14163 case ALTIVEC_BUILTIN_STVX_V4SF:
14164 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v4sf_2op, exp);
14165 case ALTIVEC_BUILTIN_STVX:
14166 case ALTIVEC_BUILTIN_STVX_V4SI:
14167 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v4si_2op, exp);
14168 case ALTIVEC_BUILTIN_STVX_V8HI:
14169 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v8hi_2op, exp);
14170 case ALTIVEC_BUILTIN_STVX_V16QI:
14171 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v16qi_2op, exp);
14172 case ALTIVEC_BUILTIN_STVEBX:
14173 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvebx, exp);
14174 case ALTIVEC_BUILTIN_STVEHX:
14175 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvehx, exp);
14176 case ALTIVEC_BUILTIN_STVEWX:
14177 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvewx, exp);
14178 case ALTIVEC_BUILTIN_STVXL_V2DF:
14179 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v2df, exp);
14180 case ALTIVEC_BUILTIN_STVXL_V2DI:
14181 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v2di, exp);
14182 case ALTIVEC_BUILTIN_STVXL_V4SF:
14183 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v4sf, exp);
14184 case ALTIVEC_BUILTIN_STVXL:
14185 case ALTIVEC_BUILTIN_STVXL_V4SI:
14186 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v4si, exp);
14187 case ALTIVEC_BUILTIN_STVXL_V8HI:
14188 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v8hi, exp);
14189 case ALTIVEC_BUILTIN_STVXL_V16QI:
14190 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v16qi, exp);
14192 case ALTIVEC_BUILTIN_STVLX:
14193 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvlx, exp);
14194 case ALTIVEC_BUILTIN_STVLXL:
14195 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvlxl, exp);
14196 case ALTIVEC_BUILTIN_STVRX:
14197 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvrx, exp);
14198 case ALTIVEC_BUILTIN_STVRXL:
14199 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvrxl, exp);
14201 case VSX_BUILTIN_STXVD2X_V1TI:
14202 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v1ti, exp);
14203 case VSX_BUILTIN_STXVD2X_V2DF:
14204 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v2df, exp);
14205 case VSX_BUILTIN_STXVD2X_V2DI:
14206 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v2di, exp);
14207 case VSX_BUILTIN_STXVW4X_V4SF:
14208 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v4sf, exp);
14209 case VSX_BUILTIN_STXVW4X_V4SI:
14210 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v4si, exp);
14211 case VSX_BUILTIN_STXVW4X_V8HI:
14212 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v8hi, exp);
14213 case VSX_BUILTIN_STXVW4X_V16QI:
14214 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v16qi, exp);
14216 /* For the following on big endian, it's ok to use any appropriate
14217 unaligned-supporting store, so use a generic expander. For
14218 little-endian, the exact element-reversing instruction must
14219 be used. */
14220 case VSX_BUILTIN_ST_ELEMREV_V2DF:
14222 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v2df
14223 : CODE_FOR_vsx_st_elemrev_v2df);
14224 return altivec_expand_stv_builtin (code, exp);
14226 case VSX_BUILTIN_ST_ELEMREV_V2DI:
14228 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v2di
14229 : CODE_FOR_vsx_st_elemrev_v2di);
14230 return altivec_expand_stv_builtin (code, exp);
14232 case VSX_BUILTIN_ST_ELEMREV_V4SF:
14234 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v4sf
14235 : CODE_FOR_vsx_st_elemrev_v4sf);
14236 return altivec_expand_stv_builtin (code, exp);
14238 case VSX_BUILTIN_ST_ELEMREV_V4SI:
14240 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v4si
14241 : CODE_FOR_vsx_st_elemrev_v4si);
14242 return altivec_expand_stv_builtin (code, exp);
14244 case VSX_BUILTIN_ST_ELEMREV_V8HI:
14246 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v8hi
14247 : CODE_FOR_vsx_st_elemrev_v8hi);
14248 return altivec_expand_stv_builtin (code, exp);
14250 case VSX_BUILTIN_ST_ELEMREV_V16QI:
14252 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v16qi
14253 : CODE_FOR_vsx_st_elemrev_v16qi);
14254 return altivec_expand_stv_builtin (code, exp);
14257 case ALTIVEC_BUILTIN_MFVSCR:
14258 icode = CODE_FOR_altivec_mfvscr;
14259 tmode = insn_data[icode].operand[0].mode;
14261 if (target == 0
14262 || GET_MODE (target) != tmode
14263 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14264 target = gen_reg_rtx (tmode);
14266 pat = GEN_FCN (icode) (target);
14267 if (! pat)
14268 return 0;
14269 emit_insn (pat);
14270 return target;
14272 case ALTIVEC_BUILTIN_MTVSCR:
14273 icode = CODE_FOR_altivec_mtvscr;
14274 arg0 = CALL_EXPR_ARG (exp, 0);
14275 op0 = expand_normal (arg0);
14276 mode0 = insn_data[icode].operand[0].mode;
14278 /* If we got invalid arguments bail out before generating bad rtl. */
14279 if (arg0 == error_mark_node)
14280 return const0_rtx;
14282 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
14283 op0 = copy_to_mode_reg (mode0, op0);
14285 pat = GEN_FCN (icode) (op0);
14286 if (pat)
14287 emit_insn (pat);
14288 return NULL_RTX;
14290 case ALTIVEC_BUILTIN_DSSALL:
14291 emit_insn (gen_altivec_dssall ());
14292 return NULL_RTX;
14294 case ALTIVEC_BUILTIN_DSS:
14295 icode = CODE_FOR_altivec_dss;
14296 arg0 = CALL_EXPR_ARG (exp, 0);
14297 STRIP_NOPS (arg0);
14298 op0 = expand_normal (arg0);
14299 mode0 = insn_data[icode].operand[0].mode;
14301 /* If we got invalid arguments bail out before generating bad rtl. */
14302 if (arg0 == error_mark_node)
14303 return const0_rtx;
14305 if (TREE_CODE (arg0) != INTEGER_CST
14306 || TREE_INT_CST_LOW (arg0) & ~0x3)
14308 error ("argument to dss must be a 2-bit unsigned literal");
14309 return const0_rtx;
14312 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
14313 op0 = copy_to_mode_reg (mode0, op0);
14315 emit_insn (gen_altivec_dss (op0));
14316 return NULL_RTX;
14318 case ALTIVEC_BUILTIN_VEC_INIT_V4SI:
14319 case ALTIVEC_BUILTIN_VEC_INIT_V8HI:
14320 case ALTIVEC_BUILTIN_VEC_INIT_V16QI:
14321 case ALTIVEC_BUILTIN_VEC_INIT_V4SF:
14322 case VSX_BUILTIN_VEC_INIT_V2DF:
14323 case VSX_BUILTIN_VEC_INIT_V2DI:
14324 case VSX_BUILTIN_VEC_INIT_V1TI:
14325 return altivec_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
14327 case ALTIVEC_BUILTIN_VEC_SET_V4SI:
14328 case ALTIVEC_BUILTIN_VEC_SET_V8HI:
14329 case ALTIVEC_BUILTIN_VEC_SET_V16QI:
14330 case ALTIVEC_BUILTIN_VEC_SET_V4SF:
14331 case VSX_BUILTIN_VEC_SET_V2DF:
14332 case VSX_BUILTIN_VEC_SET_V2DI:
14333 case VSX_BUILTIN_VEC_SET_V1TI:
14334 return altivec_expand_vec_set_builtin (exp);
14336 case ALTIVEC_BUILTIN_VEC_EXT_V4SI:
14337 case ALTIVEC_BUILTIN_VEC_EXT_V8HI:
14338 case ALTIVEC_BUILTIN_VEC_EXT_V16QI:
14339 case ALTIVEC_BUILTIN_VEC_EXT_V4SF:
14340 case VSX_BUILTIN_VEC_EXT_V2DF:
14341 case VSX_BUILTIN_VEC_EXT_V2DI:
14342 case VSX_BUILTIN_VEC_EXT_V1TI:
14343 return altivec_expand_vec_ext_builtin (exp, target);
14345 default:
14346 break;
14347 /* Fall through. */
14350 /* Expand abs* operations. */
14351 d = bdesc_abs;
14352 for (i = 0; i < ARRAY_SIZE (bdesc_abs); i++, d++)
14353 if (d->code == fcode)
14354 return altivec_expand_abs_builtin (d->icode, exp, target);
14356 /* Expand the AltiVec predicates. */
14357 d = bdesc_altivec_preds;
14358 for (i = 0; i < ARRAY_SIZE (bdesc_altivec_preds); i++, d++)
14359 if (d->code == fcode)
14360 return altivec_expand_predicate_builtin (d->icode, exp, target);
14362 /* LV* are funky. We initialized them differently. */
14363 switch (fcode)
14365 case ALTIVEC_BUILTIN_LVSL:
14366 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvsl,
14367 exp, target, false);
14368 case ALTIVEC_BUILTIN_LVSR:
14369 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvsr,
14370 exp, target, false);
14371 case ALTIVEC_BUILTIN_LVEBX:
14372 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvebx,
14373 exp, target, false);
14374 case ALTIVEC_BUILTIN_LVEHX:
14375 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvehx,
14376 exp, target, false);
14377 case ALTIVEC_BUILTIN_LVEWX:
14378 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvewx,
14379 exp, target, false);
14380 case ALTIVEC_BUILTIN_LVXL_V2DF:
14381 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v2df,
14382 exp, target, false);
14383 case ALTIVEC_BUILTIN_LVXL_V2DI:
14384 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v2di,
14385 exp, target, false);
14386 case ALTIVEC_BUILTIN_LVXL_V4SF:
14387 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v4sf,
14388 exp, target, false);
14389 case ALTIVEC_BUILTIN_LVXL:
14390 case ALTIVEC_BUILTIN_LVXL_V4SI:
14391 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v4si,
14392 exp, target, false);
14393 case ALTIVEC_BUILTIN_LVXL_V8HI:
14394 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v8hi,
14395 exp, target, false);
14396 case ALTIVEC_BUILTIN_LVXL_V16QI:
14397 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v16qi,
14398 exp, target, false);
14399 case ALTIVEC_BUILTIN_LVX_V2DF:
14400 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v2df_2op,
14401 exp, target, false);
14402 case ALTIVEC_BUILTIN_LVX_V2DI:
14403 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v2di_2op,
14404 exp, target, false);
14405 case ALTIVEC_BUILTIN_LVX_V4SF:
14406 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v4sf_2op,
14407 exp, target, false);
14408 case ALTIVEC_BUILTIN_LVX:
14409 case ALTIVEC_BUILTIN_LVX_V4SI:
14410 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v4si_2op,
14411 exp, target, false);
14412 case ALTIVEC_BUILTIN_LVX_V8HI:
14413 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v8hi_2op,
14414 exp, target, false);
14415 case ALTIVEC_BUILTIN_LVX_V16QI:
14416 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v16qi_2op,
14417 exp, target, false);
14418 case ALTIVEC_BUILTIN_LVLX:
14419 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvlx,
14420 exp, target, true);
14421 case ALTIVEC_BUILTIN_LVLXL:
14422 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvlxl,
14423 exp, target, true);
14424 case ALTIVEC_BUILTIN_LVRX:
14425 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvrx,
14426 exp, target, true);
14427 case ALTIVEC_BUILTIN_LVRXL:
14428 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvrxl,
14429 exp, target, true);
14430 case VSX_BUILTIN_LXVD2X_V1TI:
14431 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v1ti,
14432 exp, target, false);
14433 case VSX_BUILTIN_LXVD2X_V2DF:
14434 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v2df,
14435 exp, target, false);
14436 case VSX_BUILTIN_LXVD2X_V2DI:
14437 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v2di,
14438 exp, target, false);
14439 case VSX_BUILTIN_LXVW4X_V4SF:
14440 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v4sf,
14441 exp, target, false);
14442 case VSX_BUILTIN_LXVW4X_V4SI:
14443 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v4si,
14444 exp, target, false);
14445 case VSX_BUILTIN_LXVW4X_V8HI:
14446 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v8hi,
14447 exp, target, false);
14448 case VSX_BUILTIN_LXVW4X_V16QI:
14449 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v16qi,
14450 exp, target, false);
14451 /* For the following on big endian, it's ok to use any appropriate
14452 unaligned-supporting load, so use a generic expander. For
14453 little-endian, the exact element-reversing instruction must
14454 be used. */
14455 case VSX_BUILTIN_LD_ELEMREV_V2DF:
14457 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v2df
14458 : CODE_FOR_vsx_ld_elemrev_v2df);
14459 return altivec_expand_lv_builtin (code, exp, target, false);
14461 case VSX_BUILTIN_LD_ELEMREV_V2DI:
14463 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v2di
14464 : CODE_FOR_vsx_ld_elemrev_v2di);
14465 return altivec_expand_lv_builtin (code, exp, target, false);
14467 case VSX_BUILTIN_LD_ELEMREV_V4SF:
14469 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v4sf
14470 : CODE_FOR_vsx_ld_elemrev_v4sf);
14471 return altivec_expand_lv_builtin (code, exp, target, false);
14473 case VSX_BUILTIN_LD_ELEMREV_V4SI:
14475 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v4si
14476 : CODE_FOR_vsx_ld_elemrev_v4si);
14477 return altivec_expand_lv_builtin (code, exp, target, false);
14479 case VSX_BUILTIN_LD_ELEMREV_V8HI:
14481 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v8hi
14482 : CODE_FOR_vsx_ld_elemrev_v8hi);
14483 return altivec_expand_lv_builtin (code, exp, target, false);
14485 case VSX_BUILTIN_LD_ELEMREV_V16QI:
14487 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v16qi
14488 : CODE_FOR_vsx_ld_elemrev_v16qi);
14489 return altivec_expand_lv_builtin (code, exp, target, false);
14491 break;
14492 default:
14493 break;
14494 /* Fall through. */
14497 *expandedp = false;
14498 return NULL_RTX;
14501 /* Expand the builtin in EXP and store the result in TARGET. Store
14502 true in *EXPANDEDP if we found a builtin to expand. */
14503 static rtx
14504 paired_expand_builtin (tree exp, rtx target, bool * expandedp)
14506 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
14507 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
14508 const struct builtin_description *d;
14509 size_t i;
14511 *expandedp = true;
14513 switch (fcode)
14515 case PAIRED_BUILTIN_STX:
14516 return paired_expand_stv_builtin (CODE_FOR_paired_stx, exp);
14517 case PAIRED_BUILTIN_LX:
14518 return paired_expand_lv_builtin (CODE_FOR_paired_lx, exp, target);
14519 default:
14520 break;
14521 /* Fall through. */
14524 /* Expand the paired predicates. */
14525 d = bdesc_paired_preds;
14526 for (i = 0; i < ARRAY_SIZE (bdesc_paired_preds); i++, d++)
14527 if (d->code == fcode)
14528 return paired_expand_predicate_builtin (d->icode, exp, target);
14530 *expandedp = false;
14531 return NULL_RTX;
14534 /* Binops that need to be initialized manually, but can be expanded
14535 automagically by rs6000_expand_binop_builtin. */
14536 static const struct builtin_description bdesc_2arg_spe[] =
14538 { RS6000_BTM_SPE, CODE_FOR_spe_evlddx, "__builtin_spe_evlddx", SPE_BUILTIN_EVLDDX },
14539 { RS6000_BTM_SPE, CODE_FOR_spe_evldwx, "__builtin_spe_evldwx", SPE_BUILTIN_EVLDWX },
14540 { RS6000_BTM_SPE, CODE_FOR_spe_evldhx, "__builtin_spe_evldhx", SPE_BUILTIN_EVLDHX },
14541 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhex, "__builtin_spe_evlwhex", SPE_BUILTIN_EVLWHEX },
14542 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhoux, "__builtin_spe_evlwhoux", SPE_BUILTIN_EVLWHOUX },
14543 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhosx, "__builtin_spe_evlwhosx", SPE_BUILTIN_EVLWHOSX },
14544 { RS6000_BTM_SPE, CODE_FOR_spe_evlwwsplatx, "__builtin_spe_evlwwsplatx", SPE_BUILTIN_EVLWWSPLATX },
14545 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhsplatx, "__builtin_spe_evlwhsplatx", SPE_BUILTIN_EVLWHSPLATX },
14546 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhesplatx, "__builtin_spe_evlhhesplatx", SPE_BUILTIN_EVLHHESPLATX },
14547 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhousplatx, "__builtin_spe_evlhhousplatx", SPE_BUILTIN_EVLHHOUSPLATX },
14548 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhossplatx, "__builtin_spe_evlhhossplatx", SPE_BUILTIN_EVLHHOSSPLATX },
14549 { RS6000_BTM_SPE, CODE_FOR_spe_evldd, "__builtin_spe_evldd", SPE_BUILTIN_EVLDD },
14550 { RS6000_BTM_SPE, CODE_FOR_spe_evldw, "__builtin_spe_evldw", SPE_BUILTIN_EVLDW },
14551 { RS6000_BTM_SPE, CODE_FOR_spe_evldh, "__builtin_spe_evldh", SPE_BUILTIN_EVLDH },
14552 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhe, "__builtin_spe_evlwhe", SPE_BUILTIN_EVLWHE },
14553 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhou, "__builtin_spe_evlwhou", SPE_BUILTIN_EVLWHOU },
14554 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhos, "__builtin_spe_evlwhos", SPE_BUILTIN_EVLWHOS },
14555 { RS6000_BTM_SPE, CODE_FOR_spe_evlwwsplat, "__builtin_spe_evlwwsplat", SPE_BUILTIN_EVLWWSPLAT },
14556 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhsplat, "__builtin_spe_evlwhsplat", SPE_BUILTIN_EVLWHSPLAT },
14557 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhesplat, "__builtin_spe_evlhhesplat", SPE_BUILTIN_EVLHHESPLAT },
14558 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhousplat, "__builtin_spe_evlhhousplat", SPE_BUILTIN_EVLHHOUSPLAT },
14559 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhossplat, "__builtin_spe_evlhhossplat", SPE_BUILTIN_EVLHHOSSPLAT }
14562 /* Expand the builtin in EXP and store the result in TARGET. Store
14563 true in *EXPANDEDP if we found a builtin to expand.
14565 This expands the SPE builtins that are not simple unary and binary
14566 operations. */
14567 static rtx
14568 spe_expand_builtin (tree exp, rtx target, bool *expandedp)
14570 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
14571 tree arg1, arg0;
14572 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
14573 enum insn_code icode;
14574 machine_mode tmode, mode0;
14575 rtx pat, op0;
14576 const struct builtin_description *d;
14577 size_t i;
14579 *expandedp = true;
14581 /* Syntax check for a 5-bit unsigned immediate. */
14582 switch (fcode)
14584 case SPE_BUILTIN_EVSTDD:
14585 case SPE_BUILTIN_EVSTDH:
14586 case SPE_BUILTIN_EVSTDW:
14587 case SPE_BUILTIN_EVSTWHE:
14588 case SPE_BUILTIN_EVSTWHO:
14589 case SPE_BUILTIN_EVSTWWE:
14590 case SPE_BUILTIN_EVSTWWO:
14591 arg1 = CALL_EXPR_ARG (exp, 2);
14592 if (TREE_CODE (arg1) != INTEGER_CST
14593 || TREE_INT_CST_LOW (arg1) & ~0x1f)
14595 error ("argument 2 must be a 5-bit unsigned literal");
14596 return const0_rtx;
14598 break;
14599 default:
14600 break;
14603 /* The evsplat*i instructions are not quite generic. */
14604 switch (fcode)
14606 case SPE_BUILTIN_EVSPLATFI:
14607 return rs6000_expand_unop_builtin (CODE_FOR_spe_evsplatfi,
14608 exp, target);
14609 case SPE_BUILTIN_EVSPLATI:
14610 return rs6000_expand_unop_builtin (CODE_FOR_spe_evsplati,
14611 exp, target);
14612 default:
14613 break;
14616 d = bdesc_2arg_spe;
14617 for (i = 0; i < ARRAY_SIZE (bdesc_2arg_spe); ++i, ++d)
14618 if (d->code == fcode)
14619 return rs6000_expand_binop_builtin (d->icode, exp, target);
14621 d = bdesc_spe_predicates;
14622 for (i = 0; i < ARRAY_SIZE (bdesc_spe_predicates); ++i, ++d)
14623 if (d->code == fcode)
14624 return spe_expand_predicate_builtin (d->icode, exp, target);
14626 d = bdesc_spe_evsel;
14627 for (i = 0; i < ARRAY_SIZE (bdesc_spe_evsel); ++i, ++d)
14628 if (d->code == fcode)
14629 return spe_expand_evsel_builtin (d->icode, exp, target);
14631 switch (fcode)
14633 case SPE_BUILTIN_EVSTDDX:
14634 return spe_expand_stv_builtin (CODE_FOR_spe_evstddx, exp);
14635 case SPE_BUILTIN_EVSTDHX:
14636 return spe_expand_stv_builtin (CODE_FOR_spe_evstdhx, exp);
14637 case SPE_BUILTIN_EVSTDWX:
14638 return spe_expand_stv_builtin (CODE_FOR_spe_evstdwx, exp);
14639 case SPE_BUILTIN_EVSTWHEX:
14640 return spe_expand_stv_builtin (CODE_FOR_spe_evstwhex, exp);
14641 case SPE_BUILTIN_EVSTWHOX:
14642 return spe_expand_stv_builtin (CODE_FOR_spe_evstwhox, exp);
14643 case SPE_BUILTIN_EVSTWWEX:
14644 return spe_expand_stv_builtin (CODE_FOR_spe_evstwwex, exp);
14645 case SPE_BUILTIN_EVSTWWOX:
14646 return spe_expand_stv_builtin (CODE_FOR_spe_evstwwox, exp);
14647 case SPE_BUILTIN_EVSTDD:
14648 return spe_expand_stv_builtin (CODE_FOR_spe_evstdd, exp);
14649 case SPE_BUILTIN_EVSTDH:
14650 return spe_expand_stv_builtin (CODE_FOR_spe_evstdh, exp);
14651 case SPE_BUILTIN_EVSTDW:
14652 return spe_expand_stv_builtin (CODE_FOR_spe_evstdw, exp);
14653 case SPE_BUILTIN_EVSTWHE:
14654 return spe_expand_stv_builtin (CODE_FOR_spe_evstwhe, exp);
14655 case SPE_BUILTIN_EVSTWHO:
14656 return spe_expand_stv_builtin (CODE_FOR_spe_evstwho, exp);
14657 case SPE_BUILTIN_EVSTWWE:
14658 return spe_expand_stv_builtin (CODE_FOR_spe_evstwwe, exp);
14659 case SPE_BUILTIN_EVSTWWO:
14660 return spe_expand_stv_builtin (CODE_FOR_spe_evstwwo, exp);
14661 case SPE_BUILTIN_MFSPEFSCR:
14662 icode = CODE_FOR_spe_mfspefscr;
14663 tmode = insn_data[icode].operand[0].mode;
14665 if (target == 0
14666 || GET_MODE (target) != tmode
14667 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14668 target = gen_reg_rtx (tmode);
14670 pat = GEN_FCN (icode) (target);
14671 if (! pat)
14672 return 0;
14673 emit_insn (pat);
14674 return target;
14675 case SPE_BUILTIN_MTSPEFSCR:
14676 icode = CODE_FOR_spe_mtspefscr;
14677 arg0 = CALL_EXPR_ARG (exp, 0);
14678 op0 = expand_normal (arg0);
14679 mode0 = insn_data[icode].operand[0].mode;
14681 if (arg0 == error_mark_node)
14682 return const0_rtx;
14684 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
14685 op0 = copy_to_mode_reg (mode0, op0);
14687 pat = GEN_FCN (icode) (op0);
14688 if (pat)
14689 emit_insn (pat);
14690 return NULL_RTX;
14691 default:
14692 break;
14695 *expandedp = false;
14696 return NULL_RTX;
14699 static rtx
14700 paired_expand_predicate_builtin (enum insn_code icode, tree exp, rtx target)
14702 rtx pat, scratch, tmp;
14703 tree form = CALL_EXPR_ARG (exp, 0);
14704 tree arg0 = CALL_EXPR_ARG (exp, 1);
14705 tree arg1 = CALL_EXPR_ARG (exp, 2);
14706 rtx op0 = expand_normal (arg0);
14707 rtx op1 = expand_normal (arg1);
14708 machine_mode mode0 = insn_data[icode].operand[1].mode;
14709 machine_mode mode1 = insn_data[icode].operand[2].mode;
14710 int form_int;
14711 enum rtx_code code;
14713 if (TREE_CODE (form) != INTEGER_CST)
14715 error ("argument 1 of __builtin_paired_predicate must be a constant");
14716 return const0_rtx;
14718 else
14719 form_int = TREE_INT_CST_LOW (form);
14721 gcc_assert (mode0 == mode1);
14723 if (arg0 == error_mark_node || arg1 == error_mark_node)
14724 return const0_rtx;
14726 if (target == 0
14727 || GET_MODE (target) != SImode
14728 || !(*insn_data[icode].operand[0].predicate) (target, SImode))
14729 target = gen_reg_rtx (SImode);
14730 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
14731 op0 = copy_to_mode_reg (mode0, op0);
14732 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
14733 op1 = copy_to_mode_reg (mode1, op1);
14735 scratch = gen_reg_rtx (CCFPmode);
14737 pat = GEN_FCN (icode) (scratch, op0, op1);
14738 if (!pat)
14739 return const0_rtx;
14741 emit_insn (pat);
14743 switch (form_int)
14745 /* LT bit. */
14746 case 0:
14747 code = LT;
14748 break;
14749 /* GT bit. */
14750 case 1:
14751 code = GT;
14752 break;
14753 /* EQ bit. */
14754 case 2:
14755 code = EQ;
14756 break;
14757 /* UN bit. */
14758 case 3:
14759 emit_insn (gen_move_from_CR_ov_bit (target, scratch));
14760 return target;
14761 default:
14762 error ("argument 1 of __builtin_paired_predicate is out of range");
14763 return const0_rtx;
14766 tmp = gen_rtx_fmt_ee (code, SImode, scratch, const0_rtx);
14767 emit_move_insn (target, tmp);
14768 return target;
14771 static rtx
14772 spe_expand_predicate_builtin (enum insn_code icode, tree exp, rtx target)
14774 rtx pat, scratch, tmp;
14775 tree form = CALL_EXPR_ARG (exp, 0);
14776 tree arg0 = CALL_EXPR_ARG (exp, 1);
14777 tree arg1 = CALL_EXPR_ARG (exp, 2);
14778 rtx op0 = expand_normal (arg0);
14779 rtx op1 = expand_normal (arg1);
14780 machine_mode mode0 = insn_data[icode].operand[1].mode;
14781 machine_mode mode1 = insn_data[icode].operand[2].mode;
14782 int form_int;
14783 enum rtx_code code;
14785 if (TREE_CODE (form) != INTEGER_CST)
14787 error ("argument 1 of __builtin_spe_predicate must be a constant");
14788 return const0_rtx;
14790 else
14791 form_int = TREE_INT_CST_LOW (form);
14793 gcc_assert (mode0 == mode1);
14795 if (arg0 == error_mark_node || arg1 == error_mark_node)
14796 return const0_rtx;
14798 if (target == 0
14799 || GET_MODE (target) != SImode
14800 || ! (*insn_data[icode].operand[0].predicate) (target, SImode))
14801 target = gen_reg_rtx (SImode);
14803 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14804 op0 = copy_to_mode_reg (mode0, op0);
14805 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14806 op1 = copy_to_mode_reg (mode1, op1);
14808 scratch = gen_reg_rtx (CCmode);
14810 pat = GEN_FCN (icode) (scratch, op0, op1);
14811 if (! pat)
14812 return const0_rtx;
14813 emit_insn (pat);
14815 /* There are 4 variants for each predicate: _any_, _all_, _upper_,
14816 _lower_. We use one compare, but look in different bits of the
14817 CR for each variant.
14819 There are 2 elements in each SPE simd type (upper/lower). The CR
14820 bits are set as follows:
14822 BIT0 | BIT 1 | BIT 2 | BIT 3
14823 U | L | (U | L) | (U & L)
14825 So, for an "all" relationship, BIT 3 would be set.
14826 For an "any" relationship, BIT 2 would be set. Etc.
14828 Following traditional nomenclature, these bits map to:
14830 BIT0 | BIT 1 | BIT 2 | BIT 3
14831 LT | GT | EQ | OV
14833 Later, we will generate rtl to look in the LT/EQ/EQ/OV bits.
14836 switch (form_int)
14838 /* All variant. OV bit. */
14839 case 0:
14840 /* We need to get to the OV bit, which is the ORDERED bit. We
14841 could generate (ordered:SI (reg:CC xx) (const_int 0)), but
14842 that's ugly and will make validate_condition_mode die.
14843 So let's just use another pattern. */
14844 emit_insn (gen_move_from_CR_ov_bit (target, scratch));
14845 return target;
14846 /* Any variant. EQ bit. */
14847 case 1:
14848 code = EQ;
14849 break;
14850 /* Upper variant. LT bit. */
14851 case 2:
14852 code = LT;
14853 break;
14854 /* Lower variant. GT bit. */
14855 case 3:
14856 code = GT;
14857 break;
14858 default:
14859 error ("argument 1 of __builtin_spe_predicate is out of range");
14860 return const0_rtx;
14863 tmp = gen_rtx_fmt_ee (code, SImode, scratch, const0_rtx);
14864 emit_move_insn (target, tmp);
14866 return target;
14869 /* The evsel builtins look like this:
14871 e = __builtin_spe_evsel_OP (a, b, c, d);
14873 and work like this:
14875 e[upper] = a[upper] *OP* b[upper] ? c[upper] : d[upper];
14876 e[lower] = a[lower] *OP* b[lower] ? c[lower] : d[lower];
14879 static rtx
14880 spe_expand_evsel_builtin (enum insn_code icode, tree exp, rtx target)
14882 rtx pat, scratch;
14883 tree arg0 = CALL_EXPR_ARG (exp, 0);
14884 tree arg1 = CALL_EXPR_ARG (exp, 1);
14885 tree arg2 = CALL_EXPR_ARG (exp, 2);
14886 tree arg3 = CALL_EXPR_ARG (exp, 3);
14887 rtx op0 = expand_normal (arg0);
14888 rtx op1 = expand_normal (arg1);
14889 rtx op2 = expand_normal (arg2);
14890 rtx op3 = expand_normal (arg3);
14891 machine_mode mode0 = insn_data[icode].operand[1].mode;
14892 machine_mode mode1 = insn_data[icode].operand[2].mode;
14894 gcc_assert (mode0 == mode1);
14896 if (arg0 == error_mark_node || arg1 == error_mark_node
14897 || arg2 == error_mark_node || arg3 == error_mark_node)
14898 return const0_rtx;
14900 if (target == 0
14901 || GET_MODE (target) != mode0
14902 || ! (*insn_data[icode].operand[0].predicate) (target, mode0))
14903 target = gen_reg_rtx (mode0);
14905 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14906 op0 = copy_to_mode_reg (mode0, op0);
14907 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
14908 op1 = copy_to_mode_reg (mode0, op1);
14909 if (! (*insn_data[icode].operand[1].predicate) (op2, mode1))
14910 op2 = copy_to_mode_reg (mode0, op2);
14911 if (! (*insn_data[icode].operand[1].predicate) (op3, mode1))
14912 op3 = copy_to_mode_reg (mode0, op3);
14914 /* Generate the compare. */
14915 scratch = gen_reg_rtx (CCmode);
14916 pat = GEN_FCN (icode) (scratch, op0, op1);
14917 if (! pat)
14918 return const0_rtx;
14919 emit_insn (pat);
14921 if (mode0 == V2SImode)
14922 emit_insn (gen_spe_evsel (target, op2, op3, scratch));
14923 else
14924 emit_insn (gen_spe_evsel_fs (target, op2, op3, scratch));
14926 return target;
14929 /* Raise an error message for a builtin function that is called without the
14930 appropriate target options being set. */
14932 static void
14933 rs6000_invalid_builtin (enum rs6000_builtins fncode)
14935 size_t uns_fncode = (size_t)fncode;
14936 const char *name = rs6000_builtin_info[uns_fncode].name;
14937 HOST_WIDE_INT fnmask = rs6000_builtin_info[uns_fncode].mask;
14939 gcc_assert (name != NULL);
14940 if ((fnmask & RS6000_BTM_CELL) != 0)
14941 error ("Builtin function %s is only valid for the cell processor", name);
14942 else if ((fnmask & RS6000_BTM_VSX) != 0)
14943 error ("Builtin function %s requires the -mvsx option", name);
14944 else if ((fnmask & RS6000_BTM_HTM) != 0)
14945 error ("Builtin function %s requires the -mhtm option", name);
14946 else if ((fnmask & RS6000_BTM_ALTIVEC) != 0)
14947 error ("Builtin function %s requires the -maltivec option", name);
14948 else if ((fnmask & RS6000_BTM_PAIRED) != 0)
14949 error ("Builtin function %s requires the -mpaired option", name);
14950 else if ((fnmask & RS6000_BTM_SPE) != 0)
14951 error ("Builtin function %s requires the -mspe option", name);
14952 else if ((fnmask & (RS6000_BTM_DFP | RS6000_BTM_P8_VECTOR))
14953 == (RS6000_BTM_DFP | RS6000_BTM_P8_VECTOR))
14954 error ("Builtin function %s requires the -mhard-dfp and"
14955 " -mpower8-vector options", name);
14956 else if ((fnmask & RS6000_BTM_DFP) != 0)
14957 error ("Builtin function %s requires the -mhard-dfp option", name);
14958 else if ((fnmask & RS6000_BTM_P8_VECTOR) != 0)
14959 error ("Builtin function %s requires the -mpower8-vector option", name);
14960 else if ((fnmask & RS6000_BTM_P9_VECTOR) != 0)
14961 error ("Builtin function %s requires the -mpower9-vector option", name);
14962 else if ((fnmask & (RS6000_BTM_HARD_FLOAT | RS6000_BTM_LDBL128))
14963 == (RS6000_BTM_HARD_FLOAT | RS6000_BTM_LDBL128))
14964 error ("Builtin function %s requires the -mhard-float and"
14965 " -mlong-double-128 options", name);
14966 else if ((fnmask & RS6000_BTM_HARD_FLOAT) != 0)
14967 error ("Builtin function %s requires the -mhard-float option", name);
14968 else
14969 error ("Builtin function %s is not supported with the current options",
14970 name);
14973 /* Expand an expression EXP that calls a built-in function,
14974 with result going to TARGET if that's convenient
14975 (and in mode MODE if that's convenient).
14976 SUBTARGET may be used as the target for computing one of EXP's operands.
14977 IGNORE is nonzero if the value is to be ignored. */
14979 static rtx
14980 rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
14981 machine_mode mode ATTRIBUTE_UNUSED,
14982 int ignore ATTRIBUTE_UNUSED)
14984 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
14985 enum rs6000_builtins fcode
14986 = (enum rs6000_builtins)DECL_FUNCTION_CODE (fndecl);
14987 size_t uns_fcode = (size_t)fcode;
14988 const struct builtin_description *d;
14989 size_t i;
14990 rtx ret;
14991 bool success;
14992 HOST_WIDE_INT mask = rs6000_builtin_info[uns_fcode].mask;
14993 bool func_valid_p = ((rs6000_builtin_mask & mask) == mask);
14995 if (TARGET_DEBUG_BUILTIN)
14997 enum insn_code icode = rs6000_builtin_info[uns_fcode].icode;
14998 const char *name1 = rs6000_builtin_info[uns_fcode].name;
14999 const char *name2 = ((icode != CODE_FOR_nothing)
15000 ? get_insn_name ((int)icode)
15001 : "nothing");
15002 const char *name3;
15004 switch (rs6000_builtin_info[uns_fcode].attr & RS6000_BTC_TYPE_MASK)
15006 default: name3 = "unknown"; break;
15007 case RS6000_BTC_SPECIAL: name3 = "special"; break;
15008 case RS6000_BTC_UNARY: name3 = "unary"; break;
15009 case RS6000_BTC_BINARY: name3 = "binary"; break;
15010 case RS6000_BTC_TERNARY: name3 = "ternary"; break;
15011 case RS6000_BTC_PREDICATE: name3 = "predicate"; break;
15012 case RS6000_BTC_ABS: name3 = "abs"; break;
15013 case RS6000_BTC_EVSEL: name3 = "evsel"; break;
15014 case RS6000_BTC_DST: name3 = "dst"; break;
15018 fprintf (stderr,
15019 "rs6000_expand_builtin, %s (%d), insn = %s (%d), type=%s%s\n",
15020 (name1) ? name1 : "---", fcode,
15021 (name2) ? name2 : "---", (int)icode,
15022 name3,
15023 func_valid_p ? "" : ", not valid");
15026 if (!func_valid_p)
15028 rs6000_invalid_builtin (fcode);
15030 /* Given it is invalid, just generate a normal call. */
15031 return expand_call (exp, target, ignore);
15034 switch (fcode)
15036 case RS6000_BUILTIN_RECIP:
15037 return rs6000_expand_binop_builtin (CODE_FOR_recipdf3, exp, target);
15039 case RS6000_BUILTIN_RECIPF:
15040 return rs6000_expand_binop_builtin (CODE_FOR_recipsf3, exp, target);
15042 case RS6000_BUILTIN_RSQRTF:
15043 return rs6000_expand_unop_builtin (CODE_FOR_rsqrtsf2, exp, target);
15045 case RS6000_BUILTIN_RSQRT:
15046 return rs6000_expand_unop_builtin (CODE_FOR_rsqrtdf2, exp, target);
15048 case POWER7_BUILTIN_BPERMD:
15049 return rs6000_expand_binop_builtin (((TARGET_64BIT)
15050 ? CODE_FOR_bpermd_di
15051 : CODE_FOR_bpermd_si), exp, target);
15053 case RS6000_BUILTIN_GET_TB:
15054 return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_get_timebase,
15055 target);
15057 case RS6000_BUILTIN_MFTB:
15058 return rs6000_expand_zeroop_builtin (((TARGET_64BIT)
15059 ? CODE_FOR_rs6000_mftb_di
15060 : CODE_FOR_rs6000_mftb_si),
15061 target);
15063 case RS6000_BUILTIN_MFFS:
15064 return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_mffs, target);
15066 case RS6000_BUILTIN_MTFSF:
15067 return rs6000_expand_mtfsf_builtin (CODE_FOR_rs6000_mtfsf, exp);
15069 case RS6000_BUILTIN_CPU_INIT:
15070 case RS6000_BUILTIN_CPU_IS:
15071 case RS6000_BUILTIN_CPU_SUPPORTS:
15072 return cpu_expand_builtin (fcode, exp, target);
15074 case ALTIVEC_BUILTIN_MASK_FOR_LOAD:
15075 case ALTIVEC_BUILTIN_MASK_FOR_STORE:
15077 int icode = (BYTES_BIG_ENDIAN ? (int) CODE_FOR_altivec_lvsr_direct
15078 : (int) CODE_FOR_altivec_lvsl_direct);
15079 machine_mode tmode = insn_data[icode].operand[0].mode;
15080 machine_mode mode = insn_data[icode].operand[1].mode;
15081 tree arg;
15082 rtx op, addr, pat;
15084 gcc_assert (TARGET_ALTIVEC);
15086 arg = CALL_EXPR_ARG (exp, 0);
15087 gcc_assert (POINTER_TYPE_P (TREE_TYPE (arg)));
15088 op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
15089 addr = memory_address (mode, op);
15090 if (fcode == ALTIVEC_BUILTIN_MASK_FOR_STORE)
15091 op = addr;
15092 else
15094 /* For the load case need to negate the address. */
15095 op = gen_reg_rtx (GET_MODE (addr));
15096 emit_insn (gen_rtx_SET (op, gen_rtx_NEG (GET_MODE (addr), addr)));
15098 op = gen_rtx_MEM (mode, op);
15100 if (target == 0
15101 || GET_MODE (target) != tmode
15102 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15103 target = gen_reg_rtx (tmode);
15105 pat = GEN_FCN (icode) (target, op);
15106 if (!pat)
15107 return 0;
15108 emit_insn (pat);
15110 return target;
15113 case ALTIVEC_BUILTIN_VCFUX:
15114 case ALTIVEC_BUILTIN_VCFSX:
15115 case ALTIVEC_BUILTIN_VCTUXS:
15116 case ALTIVEC_BUILTIN_VCTSXS:
15117 /* FIXME: There's got to be a nicer way to handle this case than
15118 constructing a new CALL_EXPR. */
15119 if (call_expr_nargs (exp) == 1)
15121 exp = build_call_nary (TREE_TYPE (exp), CALL_EXPR_FN (exp),
15122 2, CALL_EXPR_ARG (exp, 0), integer_zero_node);
15124 break;
15126 default:
15127 break;
15130 if (TARGET_ALTIVEC)
15132 ret = altivec_expand_builtin (exp, target, &success);
15134 if (success)
15135 return ret;
15137 if (TARGET_SPE)
15139 ret = spe_expand_builtin (exp, target, &success);
15141 if (success)
15142 return ret;
15144 if (TARGET_PAIRED_FLOAT)
15146 ret = paired_expand_builtin (exp, target, &success);
15148 if (success)
15149 return ret;
15151 if (TARGET_HTM)
15153 ret = htm_expand_builtin (exp, target, &success);
15155 if (success)
15156 return ret;
15159 unsigned attr = rs6000_builtin_info[uns_fcode].attr & RS6000_BTC_TYPE_MASK;
15160 gcc_assert (attr == RS6000_BTC_UNARY
15161 || attr == RS6000_BTC_BINARY
15162 || attr == RS6000_BTC_TERNARY);
15164 /* Handle simple unary operations. */
15165 d = bdesc_1arg;
15166 for (i = 0; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
15167 if (d->code == fcode)
15168 return rs6000_expand_unop_builtin (d->icode, exp, target);
15170 /* Handle simple binary operations. */
15171 d = bdesc_2arg;
15172 for (i = 0; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
15173 if (d->code == fcode)
15174 return rs6000_expand_binop_builtin (d->icode, exp, target);
15176 /* Handle simple ternary operations. */
15177 d = bdesc_3arg;
15178 for (i = 0; i < ARRAY_SIZE (bdesc_3arg); i++, d++)
15179 if (d->code == fcode)
15180 return rs6000_expand_ternop_builtin (d->icode, exp, target);
15182 gcc_unreachable ();
15185 static void
15186 rs6000_init_builtins (void)
15188 tree tdecl;
15189 tree ftype;
15190 machine_mode mode;
15192 if (TARGET_DEBUG_BUILTIN)
15193 fprintf (stderr, "rs6000_init_builtins%s%s%s%s\n",
15194 (TARGET_PAIRED_FLOAT) ? ", paired" : "",
15195 (TARGET_SPE) ? ", spe" : "",
15196 (TARGET_ALTIVEC) ? ", altivec" : "",
15197 (TARGET_VSX) ? ", vsx" : "");
15199 V2SI_type_node = build_vector_type (intSI_type_node, 2);
15200 V2SF_type_node = build_vector_type (float_type_node, 2);
15201 V2DI_type_node = build_vector_type (intDI_type_node, 2);
15202 V2DF_type_node = build_vector_type (double_type_node, 2);
15203 V4HI_type_node = build_vector_type (intHI_type_node, 4);
15204 V4SI_type_node = build_vector_type (intSI_type_node, 4);
15205 V4SF_type_node = build_vector_type (float_type_node, 4);
15206 V8HI_type_node = build_vector_type (intHI_type_node, 8);
15207 V16QI_type_node = build_vector_type (intQI_type_node, 16);
15209 unsigned_V16QI_type_node = build_vector_type (unsigned_intQI_type_node, 16);
15210 unsigned_V8HI_type_node = build_vector_type (unsigned_intHI_type_node, 8);
15211 unsigned_V4SI_type_node = build_vector_type (unsigned_intSI_type_node, 4);
15212 unsigned_V2DI_type_node = build_vector_type (unsigned_intDI_type_node, 2);
15214 opaque_V2SF_type_node = build_opaque_vector_type (float_type_node, 2);
15215 opaque_V2SI_type_node = build_opaque_vector_type (intSI_type_node, 2);
15216 opaque_p_V2SI_type_node = build_pointer_type (opaque_V2SI_type_node);
15217 opaque_V4SI_type_node = build_opaque_vector_type (intSI_type_node, 4);
15219 /* We use V1TI mode as a special container to hold __int128_t items that
15220 must live in VSX registers. */
15221 if (intTI_type_node)
15223 V1TI_type_node = build_vector_type (intTI_type_node, 1);
15224 unsigned_V1TI_type_node = build_vector_type (unsigned_intTI_type_node, 1);
15227 /* The 'vector bool ...' types must be kept distinct from 'vector unsigned ...'
15228 types, especially in C++ land. Similarly, 'vector pixel' is distinct from
15229 'vector unsigned short'. */
15231 bool_char_type_node = build_distinct_type_copy (unsigned_intQI_type_node);
15232 bool_short_type_node = build_distinct_type_copy (unsigned_intHI_type_node);
15233 bool_int_type_node = build_distinct_type_copy (unsigned_intSI_type_node);
15234 bool_long_type_node = build_distinct_type_copy (unsigned_intDI_type_node);
15235 pixel_type_node = build_distinct_type_copy (unsigned_intHI_type_node);
15237 long_integer_type_internal_node = long_integer_type_node;
15238 long_unsigned_type_internal_node = long_unsigned_type_node;
15239 long_long_integer_type_internal_node = long_long_integer_type_node;
15240 long_long_unsigned_type_internal_node = long_long_unsigned_type_node;
15241 intQI_type_internal_node = intQI_type_node;
15242 uintQI_type_internal_node = unsigned_intQI_type_node;
15243 intHI_type_internal_node = intHI_type_node;
15244 uintHI_type_internal_node = unsigned_intHI_type_node;
15245 intSI_type_internal_node = intSI_type_node;
15246 uintSI_type_internal_node = unsigned_intSI_type_node;
15247 intDI_type_internal_node = intDI_type_node;
15248 uintDI_type_internal_node = unsigned_intDI_type_node;
15249 intTI_type_internal_node = intTI_type_node;
15250 uintTI_type_internal_node = unsigned_intTI_type_node;
15251 float_type_internal_node = float_type_node;
15252 double_type_internal_node = double_type_node;
15253 long_double_type_internal_node = long_double_type_node;
15254 dfloat64_type_internal_node = dfloat64_type_node;
15255 dfloat128_type_internal_node = dfloat128_type_node;
15256 void_type_internal_node = void_type_node;
15258 /* 128-bit floating point support. KFmode is IEEE 128-bit floating point.
15259 IFmode is the IBM extended 128-bit format that is a pair of doubles.
15260 TFmode will be either IEEE 128-bit floating point or the IBM double-double
15261 format that uses a pair of doubles, depending on the switches and
15262 defaults. */
15263 if (TARGET_FLOAT128)
15265 ibm128_float_type_node = make_node (REAL_TYPE);
15266 TYPE_PRECISION (ibm128_float_type_node) = 128;
15267 layout_type (ibm128_float_type_node);
15268 SET_TYPE_MODE (ibm128_float_type_node, IFmode);
15270 ieee128_float_type_node = make_node (REAL_TYPE);
15271 TYPE_PRECISION (ieee128_float_type_node) = 128;
15272 layout_type (ieee128_float_type_node);
15273 SET_TYPE_MODE (ieee128_float_type_node, KFmode);
15275 lang_hooks.types.register_builtin_type (ieee128_float_type_node,
15276 "__float128");
15278 lang_hooks.types.register_builtin_type (ibm128_float_type_node,
15279 "__ibm128");
15282 /* Initialize the modes for builtin_function_type, mapping a machine mode to
15283 tree type node. */
15284 builtin_mode_to_type[QImode][0] = integer_type_node;
15285 builtin_mode_to_type[HImode][0] = integer_type_node;
15286 builtin_mode_to_type[SImode][0] = intSI_type_node;
15287 builtin_mode_to_type[SImode][1] = unsigned_intSI_type_node;
15288 builtin_mode_to_type[DImode][0] = intDI_type_node;
15289 builtin_mode_to_type[DImode][1] = unsigned_intDI_type_node;
15290 builtin_mode_to_type[TImode][0] = intTI_type_node;
15291 builtin_mode_to_type[TImode][1] = unsigned_intTI_type_node;
15292 builtin_mode_to_type[SFmode][0] = float_type_node;
15293 builtin_mode_to_type[DFmode][0] = double_type_node;
15294 builtin_mode_to_type[IFmode][0] = ibm128_float_type_node;
15295 builtin_mode_to_type[KFmode][0] = ieee128_float_type_node;
15296 builtin_mode_to_type[TFmode][0] = long_double_type_node;
15297 builtin_mode_to_type[DDmode][0] = dfloat64_type_node;
15298 builtin_mode_to_type[TDmode][0] = dfloat128_type_node;
15299 builtin_mode_to_type[V1TImode][0] = V1TI_type_node;
15300 builtin_mode_to_type[V1TImode][1] = unsigned_V1TI_type_node;
15301 builtin_mode_to_type[V2SImode][0] = V2SI_type_node;
15302 builtin_mode_to_type[V2SFmode][0] = V2SF_type_node;
15303 builtin_mode_to_type[V2DImode][0] = V2DI_type_node;
15304 builtin_mode_to_type[V2DImode][1] = unsigned_V2DI_type_node;
15305 builtin_mode_to_type[V2DFmode][0] = V2DF_type_node;
15306 builtin_mode_to_type[V4HImode][0] = V4HI_type_node;
15307 builtin_mode_to_type[V4SImode][0] = V4SI_type_node;
15308 builtin_mode_to_type[V4SImode][1] = unsigned_V4SI_type_node;
15309 builtin_mode_to_type[V4SFmode][0] = V4SF_type_node;
15310 builtin_mode_to_type[V8HImode][0] = V8HI_type_node;
15311 builtin_mode_to_type[V8HImode][1] = unsigned_V8HI_type_node;
15312 builtin_mode_to_type[V16QImode][0] = V16QI_type_node;
15313 builtin_mode_to_type[V16QImode][1] = unsigned_V16QI_type_node;
15315 tdecl = add_builtin_type ("__bool char", bool_char_type_node);
15316 TYPE_NAME (bool_char_type_node) = tdecl;
15318 tdecl = add_builtin_type ("__bool short", bool_short_type_node);
15319 TYPE_NAME (bool_short_type_node) = tdecl;
15321 tdecl = add_builtin_type ("__bool int", bool_int_type_node);
15322 TYPE_NAME (bool_int_type_node) = tdecl;
15324 tdecl = add_builtin_type ("__pixel", pixel_type_node);
15325 TYPE_NAME (pixel_type_node) = tdecl;
15327 bool_V16QI_type_node = build_vector_type (bool_char_type_node, 16);
15328 bool_V8HI_type_node = build_vector_type (bool_short_type_node, 8);
15329 bool_V4SI_type_node = build_vector_type (bool_int_type_node, 4);
15330 bool_V2DI_type_node = build_vector_type (bool_long_type_node, 2);
15331 pixel_V8HI_type_node = build_vector_type (pixel_type_node, 8);
15333 tdecl = add_builtin_type ("__vector unsigned char", unsigned_V16QI_type_node);
15334 TYPE_NAME (unsigned_V16QI_type_node) = tdecl;
15336 tdecl = add_builtin_type ("__vector signed char", V16QI_type_node);
15337 TYPE_NAME (V16QI_type_node) = tdecl;
15339 tdecl = add_builtin_type ("__vector __bool char", bool_V16QI_type_node);
15340 TYPE_NAME ( bool_V16QI_type_node) = tdecl;
15342 tdecl = add_builtin_type ("__vector unsigned short", unsigned_V8HI_type_node);
15343 TYPE_NAME (unsigned_V8HI_type_node) = tdecl;
15345 tdecl = add_builtin_type ("__vector signed short", V8HI_type_node);
15346 TYPE_NAME (V8HI_type_node) = tdecl;
15348 tdecl = add_builtin_type ("__vector __bool short", bool_V8HI_type_node);
15349 TYPE_NAME (bool_V8HI_type_node) = tdecl;
15351 tdecl = add_builtin_type ("__vector unsigned int", unsigned_V4SI_type_node);
15352 TYPE_NAME (unsigned_V4SI_type_node) = tdecl;
15354 tdecl = add_builtin_type ("__vector signed int", V4SI_type_node);
15355 TYPE_NAME (V4SI_type_node) = tdecl;
15357 tdecl = add_builtin_type ("__vector __bool int", bool_V4SI_type_node);
15358 TYPE_NAME (bool_V4SI_type_node) = tdecl;
15360 tdecl = add_builtin_type ("__vector float", V4SF_type_node);
15361 TYPE_NAME (V4SF_type_node) = tdecl;
15363 tdecl = add_builtin_type ("__vector __pixel", pixel_V8HI_type_node);
15364 TYPE_NAME (pixel_V8HI_type_node) = tdecl;
15366 tdecl = add_builtin_type ("__vector double", V2DF_type_node);
15367 TYPE_NAME (V2DF_type_node) = tdecl;
15369 if (TARGET_POWERPC64)
15371 tdecl = add_builtin_type ("__vector long", V2DI_type_node);
15372 TYPE_NAME (V2DI_type_node) = tdecl;
15374 tdecl = add_builtin_type ("__vector unsigned long",
15375 unsigned_V2DI_type_node);
15376 TYPE_NAME (unsigned_V2DI_type_node) = tdecl;
15378 tdecl = add_builtin_type ("__vector __bool long", bool_V2DI_type_node);
15379 TYPE_NAME (bool_V2DI_type_node) = tdecl;
15381 else
15383 tdecl = add_builtin_type ("__vector long long", V2DI_type_node);
15384 TYPE_NAME (V2DI_type_node) = tdecl;
15386 tdecl = add_builtin_type ("__vector unsigned long long",
15387 unsigned_V2DI_type_node);
15388 TYPE_NAME (unsigned_V2DI_type_node) = tdecl;
15390 tdecl = add_builtin_type ("__vector __bool long long",
15391 bool_V2DI_type_node);
15392 TYPE_NAME (bool_V2DI_type_node) = tdecl;
15395 if (V1TI_type_node)
15397 tdecl = add_builtin_type ("__vector __int128", V1TI_type_node);
15398 TYPE_NAME (V1TI_type_node) = tdecl;
15400 tdecl = add_builtin_type ("__vector unsigned __int128",
15401 unsigned_V1TI_type_node);
15402 TYPE_NAME (unsigned_V1TI_type_node) = tdecl;
15405 /* Paired and SPE builtins are only available if you build a compiler with
15406 the appropriate options, so only create those builtins with the
15407 appropriate compiler option. Create Altivec and VSX builtins on machines
15408 with at least the general purpose extensions (970 and newer) to allow the
15409 use of the target attribute. */
15410 if (TARGET_PAIRED_FLOAT)
15411 paired_init_builtins ();
15412 if (TARGET_SPE)
15413 spe_init_builtins ();
15414 if (TARGET_EXTRA_BUILTINS)
15415 altivec_init_builtins ();
15416 if (TARGET_HTM)
15417 htm_init_builtins ();
15419 if (TARGET_EXTRA_BUILTINS || TARGET_SPE || TARGET_PAIRED_FLOAT)
15420 rs6000_common_init_builtins ();
15422 ftype = builtin_function_type (DFmode, DFmode, DFmode, VOIDmode,
15423 RS6000_BUILTIN_RECIP, "__builtin_recipdiv");
15424 def_builtin ("__builtin_recipdiv", ftype, RS6000_BUILTIN_RECIP);
15426 ftype = builtin_function_type (SFmode, SFmode, SFmode, VOIDmode,
15427 RS6000_BUILTIN_RECIPF, "__builtin_recipdivf");
15428 def_builtin ("__builtin_recipdivf", ftype, RS6000_BUILTIN_RECIPF);
15430 ftype = builtin_function_type (DFmode, DFmode, VOIDmode, VOIDmode,
15431 RS6000_BUILTIN_RSQRT, "__builtin_rsqrt");
15432 def_builtin ("__builtin_rsqrt", ftype, RS6000_BUILTIN_RSQRT);
15434 ftype = builtin_function_type (SFmode, SFmode, VOIDmode, VOIDmode,
15435 RS6000_BUILTIN_RSQRTF, "__builtin_rsqrtf");
15436 def_builtin ("__builtin_rsqrtf", ftype, RS6000_BUILTIN_RSQRTF);
15438 mode = (TARGET_64BIT) ? DImode : SImode;
15439 ftype = builtin_function_type (mode, mode, mode, VOIDmode,
15440 POWER7_BUILTIN_BPERMD, "__builtin_bpermd");
15441 def_builtin ("__builtin_bpermd", ftype, POWER7_BUILTIN_BPERMD);
15443 ftype = build_function_type_list (unsigned_intDI_type_node,
15444 NULL_TREE);
15445 def_builtin ("__builtin_ppc_get_timebase", ftype, RS6000_BUILTIN_GET_TB);
15447 if (TARGET_64BIT)
15448 ftype = build_function_type_list (unsigned_intDI_type_node,
15449 NULL_TREE);
15450 else
15451 ftype = build_function_type_list (unsigned_intSI_type_node,
15452 NULL_TREE);
15453 def_builtin ("__builtin_ppc_mftb", ftype, RS6000_BUILTIN_MFTB);
15455 ftype = build_function_type_list (double_type_node, NULL_TREE);
15456 def_builtin ("__builtin_mffs", ftype, RS6000_BUILTIN_MFFS);
15458 ftype = build_function_type_list (void_type_node,
15459 intSI_type_node, double_type_node,
15460 NULL_TREE);
15461 def_builtin ("__builtin_mtfsf", ftype, RS6000_BUILTIN_MTFSF);
15463 ftype = build_function_type_list (void_type_node, NULL_TREE);
15464 def_builtin ("__builtin_cpu_init", ftype, RS6000_BUILTIN_CPU_INIT);
15466 ftype = build_function_type_list (bool_int_type_node, const_ptr_type_node,
15467 NULL_TREE);
15468 def_builtin ("__builtin_cpu_is", ftype, RS6000_BUILTIN_CPU_IS);
15469 def_builtin ("__builtin_cpu_supports", ftype, RS6000_BUILTIN_CPU_SUPPORTS);
15471 #if TARGET_XCOFF
15472 /* AIX libm provides clog as __clog. */
15473 if ((tdecl = builtin_decl_explicit (BUILT_IN_CLOG)) != NULL_TREE)
15474 set_user_assembler_name (tdecl, "__clog");
15475 #endif
15477 #ifdef SUBTARGET_INIT_BUILTINS
15478 SUBTARGET_INIT_BUILTINS;
15479 #endif
15482 /* Returns the rs6000 builtin decl for CODE. */
15484 static tree
15485 rs6000_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
15487 HOST_WIDE_INT fnmask;
15489 if (code >= RS6000_BUILTIN_COUNT)
15490 return error_mark_node;
15492 fnmask = rs6000_builtin_info[code].mask;
15493 if ((fnmask & rs6000_builtin_mask) != fnmask)
15495 rs6000_invalid_builtin ((enum rs6000_builtins)code);
15496 return error_mark_node;
15499 return rs6000_builtin_decls[code];
15502 static void
15503 spe_init_builtins (void)
15505 tree puint_type_node = build_pointer_type (unsigned_type_node);
15506 tree pushort_type_node = build_pointer_type (short_unsigned_type_node);
15507 const struct builtin_description *d;
15508 size_t i;
15510 tree v2si_ftype_4_v2si
15511 = build_function_type_list (opaque_V2SI_type_node,
15512 opaque_V2SI_type_node,
15513 opaque_V2SI_type_node,
15514 opaque_V2SI_type_node,
15515 opaque_V2SI_type_node,
15516 NULL_TREE);
15518 tree v2sf_ftype_4_v2sf
15519 = build_function_type_list (opaque_V2SF_type_node,
15520 opaque_V2SF_type_node,
15521 opaque_V2SF_type_node,
15522 opaque_V2SF_type_node,
15523 opaque_V2SF_type_node,
15524 NULL_TREE);
15526 tree int_ftype_int_v2si_v2si
15527 = build_function_type_list (integer_type_node,
15528 integer_type_node,
15529 opaque_V2SI_type_node,
15530 opaque_V2SI_type_node,
15531 NULL_TREE);
15533 tree int_ftype_int_v2sf_v2sf
15534 = build_function_type_list (integer_type_node,
15535 integer_type_node,
15536 opaque_V2SF_type_node,
15537 opaque_V2SF_type_node,
15538 NULL_TREE);
15540 tree void_ftype_v2si_puint_int
15541 = build_function_type_list (void_type_node,
15542 opaque_V2SI_type_node,
15543 puint_type_node,
15544 integer_type_node,
15545 NULL_TREE);
15547 tree void_ftype_v2si_puint_char
15548 = build_function_type_list (void_type_node,
15549 opaque_V2SI_type_node,
15550 puint_type_node,
15551 char_type_node,
15552 NULL_TREE);
15554 tree void_ftype_v2si_pv2si_int
15555 = build_function_type_list (void_type_node,
15556 opaque_V2SI_type_node,
15557 opaque_p_V2SI_type_node,
15558 integer_type_node,
15559 NULL_TREE);
15561 tree void_ftype_v2si_pv2si_char
15562 = build_function_type_list (void_type_node,
15563 opaque_V2SI_type_node,
15564 opaque_p_V2SI_type_node,
15565 char_type_node,
15566 NULL_TREE);
15568 tree void_ftype_int
15569 = build_function_type_list (void_type_node, integer_type_node, NULL_TREE);
15571 tree int_ftype_void
15572 = build_function_type_list (integer_type_node, NULL_TREE);
15574 tree v2si_ftype_pv2si_int
15575 = build_function_type_list (opaque_V2SI_type_node,
15576 opaque_p_V2SI_type_node,
15577 integer_type_node,
15578 NULL_TREE);
15580 tree v2si_ftype_puint_int
15581 = build_function_type_list (opaque_V2SI_type_node,
15582 puint_type_node,
15583 integer_type_node,
15584 NULL_TREE);
15586 tree v2si_ftype_pushort_int
15587 = build_function_type_list (opaque_V2SI_type_node,
15588 pushort_type_node,
15589 integer_type_node,
15590 NULL_TREE);
15592 tree v2si_ftype_signed_char
15593 = build_function_type_list (opaque_V2SI_type_node,
15594 signed_char_type_node,
15595 NULL_TREE);
15597 add_builtin_type ("__ev64_opaque__", opaque_V2SI_type_node);
15599 /* Initialize irregular SPE builtins. */
15601 def_builtin ("__builtin_spe_mtspefscr", void_ftype_int, SPE_BUILTIN_MTSPEFSCR);
15602 def_builtin ("__builtin_spe_mfspefscr", int_ftype_void, SPE_BUILTIN_MFSPEFSCR);
15603 def_builtin ("__builtin_spe_evstddx", void_ftype_v2si_pv2si_int, SPE_BUILTIN_EVSTDDX);
15604 def_builtin ("__builtin_spe_evstdhx", void_ftype_v2si_pv2si_int, SPE_BUILTIN_EVSTDHX);
15605 def_builtin ("__builtin_spe_evstdwx", void_ftype_v2si_pv2si_int, SPE_BUILTIN_EVSTDWX);
15606 def_builtin ("__builtin_spe_evstwhex", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWHEX);
15607 def_builtin ("__builtin_spe_evstwhox", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWHOX);
15608 def_builtin ("__builtin_spe_evstwwex", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWWEX);
15609 def_builtin ("__builtin_spe_evstwwox", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWWOX);
15610 def_builtin ("__builtin_spe_evstdd", void_ftype_v2si_pv2si_char, SPE_BUILTIN_EVSTDD);
15611 def_builtin ("__builtin_spe_evstdh", void_ftype_v2si_pv2si_char, SPE_BUILTIN_EVSTDH);
15612 def_builtin ("__builtin_spe_evstdw", void_ftype_v2si_pv2si_char, SPE_BUILTIN_EVSTDW);
15613 def_builtin ("__builtin_spe_evstwhe", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWHE);
15614 def_builtin ("__builtin_spe_evstwho", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWHO);
15615 def_builtin ("__builtin_spe_evstwwe", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWWE);
15616 def_builtin ("__builtin_spe_evstwwo", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWWO);
15617 def_builtin ("__builtin_spe_evsplatfi", v2si_ftype_signed_char, SPE_BUILTIN_EVSPLATFI);
15618 def_builtin ("__builtin_spe_evsplati", v2si_ftype_signed_char, SPE_BUILTIN_EVSPLATI);
15620 /* Loads. */
15621 def_builtin ("__builtin_spe_evlddx", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDDX);
15622 def_builtin ("__builtin_spe_evldwx", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDWX);
15623 def_builtin ("__builtin_spe_evldhx", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDHX);
15624 def_builtin ("__builtin_spe_evlwhex", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHEX);
15625 def_builtin ("__builtin_spe_evlwhoux", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOUX);
15626 def_builtin ("__builtin_spe_evlwhosx", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOSX);
15627 def_builtin ("__builtin_spe_evlwwsplatx", v2si_ftype_puint_int, SPE_BUILTIN_EVLWWSPLATX);
15628 def_builtin ("__builtin_spe_evlwhsplatx", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHSPLATX);
15629 def_builtin ("__builtin_spe_evlhhesplatx", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHESPLATX);
15630 def_builtin ("__builtin_spe_evlhhousplatx", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOUSPLATX);
15631 def_builtin ("__builtin_spe_evlhhossplatx", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOSSPLATX);
15632 def_builtin ("__builtin_spe_evldd", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDD);
15633 def_builtin ("__builtin_spe_evldw", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDW);
15634 def_builtin ("__builtin_spe_evldh", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDH);
15635 def_builtin ("__builtin_spe_evlhhesplat", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHESPLAT);
15636 def_builtin ("__builtin_spe_evlhhossplat", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOSSPLAT);
15637 def_builtin ("__builtin_spe_evlhhousplat", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOUSPLAT);
15638 def_builtin ("__builtin_spe_evlwhe", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHE);
15639 def_builtin ("__builtin_spe_evlwhos", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOS);
15640 def_builtin ("__builtin_spe_evlwhou", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOU);
15641 def_builtin ("__builtin_spe_evlwhsplat", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHSPLAT);
15642 def_builtin ("__builtin_spe_evlwwsplat", v2si_ftype_puint_int, SPE_BUILTIN_EVLWWSPLAT);
15644 /* Predicates. */
15645 d = bdesc_spe_predicates;
15646 for (i = 0; i < ARRAY_SIZE (bdesc_spe_predicates); ++i, d++)
15648 tree type;
15650 switch (insn_data[d->icode].operand[1].mode)
15652 case V2SImode:
15653 type = int_ftype_int_v2si_v2si;
15654 break;
15655 case V2SFmode:
15656 type = int_ftype_int_v2sf_v2sf;
15657 break;
15658 default:
15659 gcc_unreachable ();
15662 def_builtin (d->name, type, d->code);
15665 /* Evsel predicates. */
15666 d = bdesc_spe_evsel;
15667 for (i = 0; i < ARRAY_SIZE (bdesc_spe_evsel); ++i, d++)
15669 tree type;
15671 switch (insn_data[d->icode].operand[1].mode)
15673 case V2SImode:
15674 type = v2si_ftype_4_v2si;
15675 break;
15676 case V2SFmode:
15677 type = v2sf_ftype_4_v2sf;
15678 break;
15679 default:
15680 gcc_unreachable ();
15683 def_builtin (d->name, type, d->code);
15687 static void
15688 paired_init_builtins (void)
15690 const struct builtin_description *d;
15691 size_t i;
15693 tree int_ftype_int_v2sf_v2sf
15694 = build_function_type_list (integer_type_node,
15695 integer_type_node,
15696 V2SF_type_node,
15697 V2SF_type_node,
15698 NULL_TREE);
15699 tree pcfloat_type_node =
15700 build_pointer_type (build_qualified_type
15701 (float_type_node, TYPE_QUAL_CONST));
15703 tree v2sf_ftype_long_pcfloat = build_function_type_list (V2SF_type_node,
15704 long_integer_type_node,
15705 pcfloat_type_node,
15706 NULL_TREE);
15707 tree void_ftype_v2sf_long_pcfloat =
15708 build_function_type_list (void_type_node,
15709 V2SF_type_node,
15710 long_integer_type_node,
15711 pcfloat_type_node,
15712 NULL_TREE);
15715 def_builtin ("__builtin_paired_lx", v2sf_ftype_long_pcfloat,
15716 PAIRED_BUILTIN_LX);
15719 def_builtin ("__builtin_paired_stx", void_ftype_v2sf_long_pcfloat,
15720 PAIRED_BUILTIN_STX);
15722 /* Predicates. */
15723 d = bdesc_paired_preds;
15724 for (i = 0; i < ARRAY_SIZE (bdesc_paired_preds); ++i, d++)
15726 tree type;
15728 if (TARGET_DEBUG_BUILTIN)
15729 fprintf (stderr, "paired pred #%d, insn = %s [%d], mode = %s\n",
15730 (int)i, get_insn_name (d->icode), (int)d->icode,
15731 GET_MODE_NAME (insn_data[d->icode].operand[1].mode));
15733 switch (insn_data[d->icode].operand[1].mode)
15735 case V2SFmode:
15736 type = int_ftype_int_v2sf_v2sf;
15737 break;
15738 default:
15739 gcc_unreachable ();
15742 def_builtin (d->name, type, d->code);
15746 static void
15747 altivec_init_builtins (void)
15749 const struct builtin_description *d;
15750 size_t i;
15751 tree ftype;
15752 tree decl;
15754 tree pvoid_type_node = build_pointer_type (void_type_node);
15756 tree pcvoid_type_node
15757 = build_pointer_type (build_qualified_type (void_type_node,
15758 TYPE_QUAL_CONST));
15760 tree int_ftype_opaque
15761 = build_function_type_list (integer_type_node,
15762 opaque_V4SI_type_node, NULL_TREE);
15763 tree opaque_ftype_opaque
15764 = build_function_type_list (integer_type_node, NULL_TREE);
15765 tree opaque_ftype_opaque_int
15766 = build_function_type_list (opaque_V4SI_type_node,
15767 opaque_V4SI_type_node, integer_type_node, NULL_TREE);
15768 tree opaque_ftype_opaque_opaque_int
15769 = build_function_type_list (opaque_V4SI_type_node,
15770 opaque_V4SI_type_node, opaque_V4SI_type_node,
15771 integer_type_node, NULL_TREE);
15772 tree opaque_ftype_opaque_opaque_opaque
15773 = build_function_type_list (opaque_V4SI_type_node,
15774 opaque_V4SI_type_node, opaque_V4SI_type_node,
15775 opaque_V4SI_type_node, NULL_TREE);
15776 tree int_ftype_int_opaque_opaque
15777 = build_function_type_list (integer_type_node,
15778 integer_type_node, opaque_V4SI_type_node,
15779 opaque_V4SI_type_node, NULL_TREE);
15780 tree int_ftype_int_v4si_v4si
15781 = build_function_type_list (integer_type_node,
15782 integer_type_node, V4SI_type_node,
15783 V4SI_type_node, NULL_TREE);
15784 tree int_ftype_int_v2di_v2di
15785 = build_function_type_list (integer_type_node,
15786 integer_type_node, V2DI_type_node,
15787 V2DI_type_node, NULL_TREE);
15788 tree void_ftype_v4si
15789 = build_function_type_list (void_type_node, V4SI_type_node, NULL_TREE);
15790 tree v8hi_ftype_void
15791 = build_function_type_list (V8HI_type_node, NULL_TREE);
15792 tree void_ftype_void
15793 = build_function_type_list (void_type_node, NULL_TREE);
15794 tree void_ftype_int
15795 = build_function_type_list (void_type_node, integer_type_node, NULL_TREE);
15797 tree opaque_ftype_long_pcvoid
15798 = build_function_type_list (opaque_V4SI_type_node,
15799 long_integer_type_node, pcvoid_type_node,
15800 NULL_TREE);
15801 tree v16qi_ftype_long_pcvoid
15802 = build_function_type_list (V16QI_type_node,
15803 long_integer_type_node, pcvoid_type_node,
15804 NULL_TREE);
15805 tree v8hi_ftype_long_pcvoid
15806 = build_function_type_list (V8HI_type_node,
15807 long_integer_type_node, pcvoid_type_node,
15808 NULL_TREE);
15809 tree v4si_ftype_long_pcvoid
15810 = build_function_type_list (V4SI_type_node,
15811 long_integer_type_node, pcvoid_type_node,
15812 NULL_TREE);
15813 tree v4sf_ftype_long_pcvoid
15814 = build_function_type_list (V4SF_type_node,
15815 long_integer_type_node, pcvoid_type_node,
15816 NULL_TREE);
15817 tree v2df_ftype_long_pcvoid
15818 = build_function_type_list (V2DF_type_node,
15819 long_integer_type_node, pcvoid_type_node,
15820 NULL_TREE);
15821 tree v2di_ftype_long_pcvoid
15822 = build_function_type_list (V2DI_type_node,
15823 long_integer_type_node, pcvoid_type_node,
15824 NULL_TREE);
15826 tree void_ftype_opaque_long_pvoid
15827 = build_function_type_list (void_type_node,
15828 opaque_V4SI_type_node, long_integer_type_node,
15829 pvoid_type_node, NULL_TREE);
15830 tree void_ftype_v4si_long_pvoid
15831 = build_function_type_list (void_type_node,
15832 V4SI_type_node, long_integer_type_node,
15833 pvoid_type_node, NULL_TREE);
15834 tree void_ftype_v16qi_long_pvoid
15835 = build_function_type_list (void_type_node,
15836 V16QI_type_node, long_integer_type_node,
15837 pvoid_type_node, NULL_TREE);
15838 tree void_ftype_v8hi_long_pvoid
15839 = build_function_type_list (void_type_node,
15840 V8HI_type_node, long_integer_type_node,
15841 pvoid_type_node, NULL_TREE);
15842 tree void_ftype_v4sf_long_pvoid
15843 = build_function_type_list (void_type_node,
15844 V4SF_type_node, long_integer_type_node,
15845 pvoid_type_node, NULL_TREE);
15846 tree void_ftype_v2df_long_pvoid
15847 = build_function_type_list (void_type_node,
15848 V2DF_type_node, long_integer_type_node,
15849 pvoid_type_node, NULL_TREE);
15850 tree void_ftype_v2di_long_pvoid
15851 = build_function_type_list (void_type_node,
15852 V2DI_type_node, long_integer_type_node,
15853 pvoid_type_node, NULL_TREE);
15854 tree int_ftype_int_v8hi_v8hi
15855 = build_function_type_list (integer_type_node,
15856 integer_type_node, V8HI_type_node,
15857 V8HI_type_node, NULL_TREE);
15858 tree int_ftype_int_v16qi_v16qi
15859 = build_function_type_list (integer_type_node,
15860 integer_type_node, V16QI_type_node,
15861 V16QI_type_node, NULL_TREE);
15862 tree int_ftype_int_v4sf_v4sf
15863 = build_function_type_list (integer_type_node,
15864 integer_type_node, V4SF_type_node,
15865 V4SF_type_node, NULL_TREE);
15866 tree int_ftype_int_v2df_v2df
15867 = build_function_type_list (integer_type_node,
15868 integer_type_node, V2DF_type_node,
15869 V2DF_type_node, NULL_TREE);
15870 tree v2di_ftype_v2di
15871 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
15872 tree v4si_ftype_v4si
15873 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
15874 tree v8hi_ftype_v8hi
15875 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
15876 tree v16qi_ftype_v16qi
15877 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
15878 tree v4sf_ftype_v4sf
15879 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
15880 tree v2df_ftype_v2df
15881 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
15882 tree void_ftype_pcvoid_int_int
15883 = build_function_type_list (void_type_node,
15884 pcvoid_type_node, integer_type_node,
15885 integer_type_node, NULL_TREE);
15887 def_builtin ("__builtin_altivec_mtvscr", void_ftype_v4si, ALTIVEC_BUILTIN_MTVSCR);
15888 def_builtin ("__builtin_altivec_mfvscr", v8hi_ftype_void, ALTIVEC_BUILTIN_MFVSCR);
15889 def_builtin ("__builtin_altivec_dssall", void_ftype_void, ALTIVEC_BUILTIN_DSSALL);
15890 def_builtin ("__builtin_altivec_dss", void_ftype_int, ALTIVEC_BUILTIN_DSS);
15891 def_builtin ("__builtin_altivec_lvsl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVSL);
15892 def_builtin ("__builtin_altivec_lvsr", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVSR);
15893 def_builtin ("__builtin_altivec_lvebx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEBX);
15894 def_builtin ("__builtin_altivec_lvehx", v8hi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEHX);
15895 def_builtin ("__builtin_altivec_lvewx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEWX);
15896 def_builtin ("__builtin_altivec_lvxl", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVXL);
15897 def_builtin ("__builtin_altivec_lvxl_v2df", v2df_ftype_long_pcvoid,
15898 ALTIVEC_BUILTIN_LVXL_V2DF);
15899 def_builtin ("__builtin_altivec_lvxl_v2di", v2di_ftype_long_pcvoid,
15900 ALTIVEC_BUILTIN_LVXL_V2DI);
15901 def_builtin ("__builtin_altivec_lvxl_v4sf", v4sf_ftype_long_pcvoid,
15902 ALTIVEC_BUILTIN_LVXL_V4SF);
15903 def_builtin ("__builtin_altivec_lvxl_v4si", v4si_ftype_long_pcvoid,
15904 ALTIVEC_BUILTIN_LVXL_V4SI);
15905 def_builtin ("__builtin_altivec_lvxl_v8hi", v8hi_ftype_long_pcvoid,
15906 ALTIVEC_BUILTIN_LVXL_V8HI);
15907 def_builtin ("__builtin_altivec_lvxl_v16qi", v16qi_ftype_long_pcvoid,
15908 ALTIVEC_BUILTIN_LVXL_V16QI);
15909 def_builtin ("__builtin_altivec_lvx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVX);
15910 def_builtin ("__builtin_altivec_lvx_v2df", v2df_ftype_long_pcvoid,
15911 ALTIVEC_BUILTIN_LVX_V2DF);
15912 def_builtin ("__builtin_altivec_lvx_v2di", v2di_ftype_long_pcvoid,
15913 ALTIVEC_BUILTIN_LVX_V2DI);
15914 def_builtin ("__builtin_altivec_lvx_v4sf", v4sf_ftype_long_pcvoid,
15915 ALTIVEC_BUILTIN_LVX_V4SF);
15916 def_builtin ("__builtin_altivec_lvx_v4si", v4si_ftype_long_pcvoid,
15917 ALTIVEC_BUILTIN_LVX_V4SI);
15918 def_builtin ("__builtin_altivec_lvx_v8hi", v8hi_ftype_long_pcvoid,
15919 ALTIVEC_BUILTIN_LVX_V8HI);
15920 def_builtin ("__builtin_altivec_lvx_v16qi", v16qi_ftype_long_pcvoid,
15921 ALTIVEC_BUILTIN_LVX_V16QI);
15922 def_builtin ("__builtin_altivec_stvx", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVX);
15923 def_builtin ("__builtin_altivec_stvx_v2df", void_ftype_v2df_long_pvoid,
15924 ALTIVEC_BUILTIN_STVX_V2DF);
15925 def_builtin ("__builtin_altivec_stvx_v2di", void_ftype_v2di_long_pvoid,
15926 ALTIVEC_BUILTIN_STVX_V2DI);
15927 def_builtin ("__builtin_altivec_stvx_v4sf", void_ftype_v4sf_long_pvoid,
15928 ALTIVEC_BUILTIN_STVX_V4SF);
15929 def_builtin ("__builtin_altivec_stvx_v4si", void_ftype_v4si_long_pvoid,
15930 ALTIVEC_BUILTIN_STVX_V4SI);
15931 def_builtin ("__builtin_altivec_stvx_v8hi", void_ftype_v8hi_long_pvoid,
15932 ALTIVEC_BUILTIN_STVX_V8HI);
15933 def_builtin ("__builtin_altivec_stvx_v16qi", void_ftype_v16qi_long_pvoid,
15934 ALTIVEC_BUILTIN_STVX_V16QI);
15935 def_builtin ("__builtin_altivec_stvewx", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVEWX);
15936 def_builtin ("__builtin_altivec_stvxl", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVXL);
15937 def_builtin ("__builtin_altivec_stvxl_v2df", void_ftype_v2df_long_pvoid,
15938 ALTIVEC_BUILTIN_STVXL_V2DF);
15939 def_builtin ("__builtin_altivec_stvxl_v2di", void_ftype_v2di_long_pvoid,
15940 ALTIVEC_BUILTIN_STVXL_V2DI);
15941 def_builtin ("__builtin_altivec_stvxl_v4sf", void_ftype_v4sf_long_pvoid,
15942 ALTIVEC_BUILTIN_STVXL_V4SF);
15943 def_builtin ("__builtin_altivec_stvxl_v4si", void_ftype_v4si_long_pvoid,
15944 ALTIVEC_BUILTIN_STVXL_V4SI);
15945 def_builtin ("__builtin_altivec_stvxl_v8hi", void_ftype_v8hi_long_pvoid,
15946 ALTIVEC_BUILTIN_STVXL_V8HI);
15947 def_builtin ("__builtin_altivec_stvxl_v16qi", void_ftype_v16qi_long_pvoid,
15948 ALTIVEC_BUILTIN_STVXL_V16QI);
15949 def_builtin ("__builtin_altivec_stvebx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVEBX);
15950 def_builtin ("__builtin_altivec_stvehx", void_ftype_v8hi_long_pvoid, ALTIVEC_BUILTIN_STVEHX);
15951 def_builtin ("__builtin_vec_ld", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LD);
15952 def_builtin ("__builtin_vec_lde", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LDE);
15953 def_builtin ("__builtin_vec_ldl", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LDL);
15954 def_builtin ("__builtin_vec_lvsl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVSL);
15955 def_builtin ("__builtin_vec_lvsr", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVSR);
15956 def_builtin ("__builtin_vec_lvebx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEBX);
15957 def_builtin ("__builtin_vec_lvehx", v8hi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEHX);
15958 def_builtin ("__builtin_vec_lvewx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEWX);
15959 def_builtin ("__builtin_vec_st", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_ST);
15960 def_builtin ("__builtin_vec_ste", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STE);
15961 def_builtin ("__builtin_vec_stl", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STL);
15962 def_builtin ("__builtin_vec_stvewx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEWX);
15963 def_builtin ("__builtin_vec_stvebx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEBX);
15964 def_builtin ("__builtin_vec_stvehx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEHX);
15966 def_builtin ("__builtin_vsx_lxvd2x_v2df", v2df_ftype_long_pcvoid,
15967 VSX_BUILTIN_LXVD2X_V2DF);
15968 def_builtin ("__builtin_vsx_lxvd2x_v2di", v2di_ftype_long_pcvoid,
15969 VSX_BUILTIN_LXVD2X_V2DI);
15970 def_builtin ("__builtin_vsx_lxvw4x_v4sf", v4sf_ftype_long_pcvoid,
15971 VSX_BUILTIN_LXVW4X_V4SF);
15972 def_builtin ("__builtin_vsx_lxvw4x_v4si", v4si_ftype_long_pcvoid,
15973 VSX_BUILTIN_LXVW4X_V4SI);
15974 def_builtin ("__builtin_vsx_lxvw4x_v8hi", v8hi_ftype_long_pcvoid,
15975 VSX_BUILTIN_LXVW4X_V8HI);
15976 def_builtin ("__builtin_vsx_lxvw4x_v16qi", v16qi_ftype_long_pcvoid,
15977 VSX_BUILTIN_LXVW4X_V16QI);
15978 def_builtin ("__builtin_vsx_stxvd2x_v2df", void_ftype_v2df_long_pvoid,
15979 VSX_BUILTIN_STXVD2X_V2DF);
15980 def_builtin ("__builtin_vsx_stxvd2x_v2di", void_ftype_v2di_long_pvoid,
15981 VSX_BUILTIN_STXVD2X_V2DI);
15982 def_builtin ("__builtin_vsx_stxvw4x_v4sf", void_ftype_v4sf_long_pvoid,
15983 VSX_BUILTIN_STXVW4X_V4SF);
15984 def_builtin ("__builtin_vsx_stxvw4x_v4si", void_ftype_v4si_long_pvoid,
15985 VSX_BUILTIN_STXVW4X_V4SI);
15986 def_builtin ("__builtin_vsx_stxvw4x_v8hi", void_ftype_v8hi_long_pvoid,
15987 VSX_BUILTIN_STXVW4X_V8HI);
15988 def_builtin ("__builtin_vsx_stxvw4x_v16qi", void_ftype_v16qi_long_pvoid,
15989 VSX_BUILTIN_STXVW4X_V16QI);
15991 def_builtin ("__builtin_vsx_ld_elemrev_v2df", v2df_ftype_long_pcvoid,
15992 VSX_BUILTIN_LD_ELEMREV_V2DF);
15993 def_builtin ("__builtin_vsx_ld_elemrev_v2di", v2di_ftype_long_pcvoid,
15994 VSX_BUILTIN_LD_ELEMREV_V2DI);
15995 def_builtin ("__builtin_vsx_ld_elemrev_v4sf", v4sf_ftype_long_pcvoid,
15996 VSX_BUILTIN_LD_ELEMREV_V4SF);
15997 def_builtin ("__builtin_vsx_ld_elemrev_v4si", v4si_ftype_long_pcvoid,
15998 VSX_BUILTIN_LD_ELEMREV_V4SI);
15999 def_builtin ("__builtin_vsx_st_elemrev_v2df", void_ftype_v2df_long_pvoid,
16000 VSX_BUILTIN_ST_ELEMREV_V2DF);
16001 def_builtin ("__builtin_vsx_st_elemrev_v2di", void_ftype_v2di_long_pvoid,
16002 VSX_BUILTIN_ST_ELEMREV_V2DI);
16003 def_builtin ("__builtin_vsx_st_elemrev_v4sf", void_ftype_v4sf_long_pvoid,
16004 VSX_BUILTIN_ST_ELEMREV_V4SF);
16005 def_builtin ("__builtin_vsx_st_elemrev_v4si", void_ftype_v4si_long_pvoid,
16006 VSX_BUILTIN_ST_ELEMREV_V4SI);
16008 if (TARGET_P9_VECTOR)
16010 def_builtin ("__builtin_vsx_ld_elemrev_v8hi", v8hi_ftype_long_pcvoid,
16011 VSX_BUILTIN_LD_ELEMREV_V8HI);
16012 def_builtin ("__builtin_vsx_ld_elemrev_v16qi", v16qi_ftype_long_pcvoid,
16013 VSX_BUILTIN_LD_ELEMREV_V16QI);
16014 def_builtin ("__builtin_vsx_st_elemrev_v8hi",
16015 void_ftype_v8hi_long_pvoid, VSX_BUILTIN_ST_ELEMREV_V8HI);
16016 def_builtin ("__builtin_vsx_st_elemrev_v16qi",
16017 void_ftype_v16qi_long_pvoid, VSX_BUILTIN_ST_ELEMREV_V16QI);
16020 def_builtin ("__builtin_vec_vsx_ld", opaque_ftype_long_pcvoid,
16021 VSX_BUILTIN_VEC_LD);
16022 def_builtin ("__builtin_vec_vsx_st", void_ftype_opaque_long_pvoid,
16023 VSX_BUILTIN_VEC_ST);
16024 def_builtin ("__builtin_vec_xl", opaque_ftype_long_pcvoid,
16025 VSX_BUILTIN_VEC_XL);
16026 def_builtin ("__builtin_vec_xst", void_ftype_opaque_long_pvoid,
16027 VSX_BUILTIN_VEC_XST);
16029 def_builtin ("__builtin_vec_step", int_ftype_opaque, ALTIVEC_BUILTIN_VEC_STEP);
16030 def_builtin ("__builtin_vec_splats", opaque_ftype_opaque, ALTIVEC_BUILTIN_VEC_SPLATS);
16031 def_builtin ("__builtin_vec_promote", opaque_ftype_opaque, ALTIVEC_BUILTIN_VEC_PROMOTE);
16033 def_builtin ("__builtin_vec_sld", opaque_ftype_opaque_opaque_int, ALTIVEC_BUILTIN_VEC_SLD);
16034 def_builtin ("__builtin_vec_splat", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_SPLAT);
16035 def_builtin ("__builtin_vec_extract", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_EXTRACT);
16036 def_builtin ("__builtin_vec_insert", opaque_ftype_opaque_opaque_int, ALTIVEC_BUILTIN_VEC_INSERT);
16037 def_builtin ("__builtin_vec_vspltw", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTW);
16038 def_builtin ("__builtin_vec_vsplth", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTH);
16039 def_builtin ("__builtin_vec_vspltb", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTB);
16040 def_builtin ("__builtin_vec_ctf", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTF);
16041 def_builtin ("__builtin_vec_vcfsx", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VCFSX);
16042 def_builtin ("__builtin_vec_vcfux", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VCFUX);
16043 def_builtin ("__builtin_vec_cts", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTS);
16044 def_builtin ("__builtin_vec_ctu", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTU);
16046 def_builtin ("__builtin_vec_adde", opaque_ftype_opaque_opaque_opaque,
16047 ALTIVEC_BUILTIN_VEC_ADDE);
16049 /* Cell builtins. */
16050 def_builtin ("__builtin_altivec_lvlx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVLX);
16051 def_builtin ("__builtin_altivec_lvlxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVLXL);
16052 def_builtin ("__builtin_altivec_lvrx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVRX);
16053 def_builtin ("__builtin_altivec_lvrxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVRXL);
16055 def_builtin ("__builtin_vec_lvlx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVLX);
16056 def_builtin ("__builtin_vec_lvlxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVLXL);
16057 def_builtin ("__builtin_vec_lvrx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVRX);
16058 def_builtin ("__builtin_vec_lvrxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVRXL);
16060 def_builtin ("__builtin_altivec_stvlx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVLX);
16061 def_builtin ("__builtin_altivec_stvlxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVLXL);
16062 def_builtin ("__builtin_altivec_stvrx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVRX);
16063 def_builtin ("__builtin_altivec_stvrxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVRXL);
16065 def_builtin ("__builtin_vec_stvlx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVLX);
16066 def_builtin ("__builtin_vec_stvlxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVLXL);
16067 def_builtin ("__builtin_vec_stvrx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVRX);
16068 def_builtin ("__builtin_vec_stvrxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVRXL);
16070 /* Add the DST variants. */
16071 d = bdesc_dst;
16072 for (i = 0; i < ARRAY_SIZE (bdesc_dst); i++, d++)
16073 def_builtin (d->name, void_ftype_pcvoid_int_int, d->code);
16075 /* Initialize the predicates. */
16076 d = bdesc_altivec_preds;
16077 for (i = 0; i < ARRAY_SIZE (bdesc_altivec_preds); i++, d++)
16079 machine_mode mode1;
16080 tree type;
16082 if (rs6000_overloaded_builtin_p (d->code))
16083 mode1 = VOIDmode;
16084 else
16085 mode1 = insn_data[d->icode].operand[1].mode;
16087 switch (mode1)
16089 case VOIDmode:
16090 type = int_ftype_int_opaque_opaque;
16091 break;
16092 case V2DImode:
16093 type = int_ftype_int_v2di_v2di;
16094 break;
16095 case V4SImode:
16096 type = int_ftype_int_v4si_v4si;
16097 break;
16098 case V8HImode:
16099 type = int_ftype_int_v8hi_v8hi;
16100 break;
16101 case V16QImode:
16102 type = int_ftype_int_v16qi_v16qi;
16103 break;
16104 case V4SFmode:
16105 type = int_ftype_int_v4sf_v4sf;
16106 break;
16107 case V2DFmode:
16108 type = int_ftype_int_v2df_v2df;
16109 break;
16110 default:
16111 gcc_unreachable ();
16114 def_builtin (d->name, type, d->code);
16117 /* Initialize the abs* operators. */
16118 d = bdesc_abs;
16119 for (i = 0; i < ARRAY_SIZE (bdesc_abs); i++, d++)
16121 machine_mode mode0;
16122 tree type;
16124 mode0 = insn_data[d->icode].operand[0].mode;
16126 switch (mode0)
16128 case V2DImode:
16129 type = v2di_ftype_v2di;
16130 break;
16131 case V4SImode:
16132 type = v4si_ftype_v4si;
16133 break;
16134 case V8HImode:
16135 type = v8hi_ftype_v8hi;
16136 break;
16137 case V16QImode:
16138 type = v16qi_ftype_v16qi;
16139 break;
16140 case V4SFmode:
16141 type = v4sf_ftype_v4sf;
16142 break;
16143 case V2DFmode:
16144 type = v2df_ftype_v2df;
16145 break;
16146 default:
16147 gcc_unreachable ();
16150 def_builtin (d->name, type, d->code);
16153 /* Initialize target builtin that implements
16154 targetm.vectorize.builtin_mask_for_load. */
16156 decl = add_builtin_function ("__builtin_altivec_mask_for_load",
16157 v16qi_ftype_long_pcvoid,
16158 ALTIVEC_BUILTIN_MASK_FOR_LOAD,
16159 BUILT_IN_MD, NULL, NULL_TREE);
16160 TREE_READONLY (decl) = 1;
16161 /* Record the decl. Will be used by rs6000_builtin_mask_for_load. */
16162 altivec_builtin_mask_for_load = decl;
16164 /* Access to the vec_init patterns. */
16165 ftype = build_function_type_list (V4SI_type_node, integer_type_node,
16166 integer_type_node, integer_type_node,
16167 integer_type_node, NULL_TREE);
16168 def_builtin ("__builtin_vec_init_v4si", ftype, ALTIVEC_BUILTIN_VEC_INIT_V4SI);
16170 ftype = build_function_type_list (V8HI_type_node, short_integer_type_node,
16171 short_integer_type_node,
16172 short_integer_type_node,
16173 short_integer_type_node,
16174 short_integer_type_node,
16175 short_integer_type_node,
16176 short_integer_type_node,
16177 short_integer_type_node, NULL_TREE);
16178 def_builtin ("__builtin_vec_init_v8hi", ftype, ALTIVEC_BUILTIN_VEC_INIT_V8HI);
16180 ftype = build_function_type_list (V16QI_type_node, char_type_node,
16181 char_type_node, char_type_node,
16182 char_type_node, char_type_node,
16183 char_type_node, char_type_node,
16184 char_type_node, char_type_node,
16185 char_type_node, char_type_node,
16186 char_type_node, char_type_node,
16187 char_type_node, char_type_node,
16188 char_type_node, NULL_TREE);
16189 def_builtin ("__builtin_vec_init_v16qi", ftype,
16190 ALTIVEC_BUILTIN_VEC_INIT_V16QI);
16192 ftype = build_function_type_list (V4SF_type_node, float_type_node,
16193 float_type_node, float_type_node,
16194 float_type_node, NULL_TREE);
16195 def_builtin ("__builtin_vec_init_v4sf", ftype, ALTIVEC_BUILTIN_VEC_INIT_V4SF);
16197 /* VSX builtins. */
16198 ftype = build_function_type_list (V2DF_type_node, double_type_node,
16199 double_type_node, NULL_TREE);
16200 def_builtin ("__builtin_vec_init_v2df", ftype, VSX_BUILTIN_VEC_INIT_V2DF);
16202 ftype = build_function_type_list (V2DI_type_node, intDI_type_node,
16203 intDI_type_node, NULL_TREE);
16204 def_builtin ("__builtin_vec_init_v2di", ftype, VSX_BUILTIN_VEC_INIT_V2DI);
16206 /* Access to the vec_set patterns. */
16207 ftype = build_function_type_list (V4SI_type_node, V4SI_type_node,
16208 intSI_type_node,
16209 integer_type_node, NULL_TREE);
16210 def_builtin ("__builtin_vec_set_v4si", ftype, ALTIVEC_BUILTIN_VEC_SET_V4SI);
16212 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
16213 intHI_type_node,
16214 integer_type_node, NULL_TREE);
16215 def_builtin ("__builtin_vec_set_v8hi", ftype, ALTIVEC_BUILTIN_VEC_SET_V8HI);
16217 ftype = build_function_type_list (V16QI_type_node, V16QI_type_node,
16218 intQI_type_node,
16219 integer_type_node, NULL_TREE);
16220 def_builtin ("__builtin_vec_set_v16qi", ftype, ALTIVEC_BUILTIN_VEC_SET_V16QI);
16222 ftype = build_function_type_list (V4SF_type_node, V4SF_type_node,
16223 float_type_node,
16224 integer_type_node, NULL_TREE);
16225 def_builtin ("__builtin_vec_set_v4sf", ftype, ALTIVEC_BUILTIN_VEC_SET_V4SF);
16227 ftype = build_function_type_list (V2DF_type_node, V2DF_type_node,
16228 double_type_node,
16229 integer_type_node, NULL_TREE);
16230 def_builtin ("__builtin_vec_set_v2df", ftype, VSX_BUILTIN_VEC_SET_V2DF);
16232 ftype = build_function_type_list (V2DI_type_node, V2DI_type_node,
16233 intDI_type_node,
16234 integer_type_node, NULL_TREE);
16235 def_builtin ("__builtin_vec_set_v2di", ftype, VSX_BUILTIN_VEC_SET_V2DI);
16237 /* Access to the vec_extract patterns. */
16238 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
16239 integer_type_node, NULL_TREE);
16240 def_builtin ("__builtin_vec_ext_v4si", ftype, ALTIVEC_BUILTIN_VEC_EXT_V4SI);
16242 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
16243 integer_type_node, NULL_TREE);
16244 def_builtin ("__builtin_vec_ext_v8hi", ftype, ALTIVEC_BUILTIN_VEC_EXT_V8HI);
16246 ftype = build_function_type_list (intQI_type_node, V16QI_type_node,
16247 integer_type_node, NULL_TREE);
16248 def_builtin ("__builtin_vec_ext_v16qi", ftype, ALTIVEC_BUILTIN_VEC_EXT_V16QI);
16250 ftype = build_function_type_list (float_type_node, V4SF_type_node,
16251 integer_type_node, NULL_TREE);
16252 def_builtin ("__builtin_vec_ext_v4sf", ftype, ALTIVEC_BUILTIN_VEC_EXT_V4SF);
16254 ftype = build_function_type_list (double_type_node, V2DF_type_node,
16255 integer_type_node, NULL_TREE);
16256 def_builtin ("__builtin_vec_ext_v2df", ftype, VSX_BUILTIN_VEC_EXT_V2DF);
16258 ftype = build_function_type_list (intDI_type_node, V2DI_type_node,
16259 integer_type_node, NULL_TREE);
16260 def_builtin ("__builtin_vec_ext_v2di", ftype, VSX_BUILTIN_VEC_EXT_V2DI);
16263 if (V1TI_type_node)
16265 tree v1ti_ftype_long_pcvoid
16266 = build_function_type_list (V1TI_type_node,
16267 long_integer_type_node, pcvoid_type_node,
16268 NULL_TREE);
16269 tree void_ftype_v1ti_long_pvoid
16270 = build_function_type_list (void_type_node,
16271 V1TI_type_node, long_integer_type_node,
16272 pvoid_type_node, NULL_TREE);
16273 def_builtin ("__builtin_vsx_lxvd2x_v1ti", v1ti_ftype_long_pcvoid,
16274 VSX_BUILTIN_LXVD2X_V1TI);
16275 def_builtin ("__builtin_vsx_stxvd2x_v1ti", void_ftype_v1ti_long_pvoid,
16276 VSX_BUILTIN_STXVD2X_V1TI);
16277 ftype = build_function_type_list (V1TI_type_node, intTI_type_node,
16278 NULL_TREE, NULL_TREE);
16279 def_builtin ("__builtin_vec_init_v1ti", ftype, VSX_BUILTIN_VEC_INIT_V1TI);
16280 ftype = build_function_type_list (V1TI_type_node, V1TI_type_node,
16281 intTI_type_node,
16282 integer_type_node, NULL_TREE);
16283 def_builtin ("__builtin_vec_set_v1ti", ftype, VSX_BUILTIN_VEC_SET_V1TI);
16284 ftype = build_function_type_list (intTI_type_node, V1TI_type_node,
16285 integer_type_node, NULL_TREE);
16286 def_builtin ("__builtin_vec_ext_v1ti", ftype, VSX_BUILTIN_VEC_EXT_V1TI);
16291 static void
16292 htm_init_builtins (void)
16294 HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;
16295 const struct builtin_description *d;
16296 size_t i;
16298 d = bdesc_htm;
16299 for (i = 0; i < ARRAY_SIZE (bdesc_htm); i++, d++)
16301 tree op[MAX_HTM_OPERANDS], type;
16302 HOST_WIDE_INT mask = d->mask;
16303 unsigned attr = rs6000_builtin_info[d->code].attr;
16304 bool void_func = (attr & RS6000_BTC_VOID);
16305 int attr_args = (attr & RS6000_BTC_TYPE_MASK);
16306 int nopnds = 0;
16307 tree gpr_type_node;
16308 tree rettype;
16309 tree argtype;
16311 if (TARGET_32BIT && TARGET_POWERPC64)
16312 gpr_type_node = long_long_unsigned_type_node;
16313 else
16314 gpr_type_node = long_unsigned_type_node;
16316 if (attr & RS6000_BTC_SPR)
16318 rettype = gpr_type_node;
16319 argtype = gpr_type_node;
16321 else if (d->code == HTM_BUILTIN_TABORTDC
16322 || d->code == HTM_BUILTIN_TABORTDCI)
16324 rettype = unsigned_type_node;
16325 argtype = gpr_type_node;
16327 else
16329 rettype = unsigned_type_node;
16330 argtype = unsigned_type_node;
16333 if ((mask & builtin_mask) != mask)
16335 if (TARGET_DEBUG_BUILTIN)
16336 fprintf (stderr, "htm_builtin, skip binary %s\n", d->name);
16337 continue;
16340 if (d->name == 0)
16342 if (TARGET_DEBUG_BUILTIN)
16343 fprintf (stderr, "htm_builtin, bdesc_htm[%ld] no name\n",
16344 (long unsigned) i);
16345 continue;
16348 op[nopnds++] = (void_func) ? void_type_node : rettype;
16350 if (attr_args == RS6000_BTC_UNARY)
16351 op[nopnds++] = argtype;
16352 else if (attr_args == RS6000_BTC_BINARY)
16354 op[nopnds++] = argtype;
16355 op[nopnds++] = argtype;
16357 else if (attr_args == RS6000_BTC_TERNARY)
16359 op[nopnds++] = argtype;
16360 op[nopnds++] = argtype;
16361 op[nopnds++] = argtype;
16364 switch (nopnds)
16366 case 1:
16367 type = build_function_type_list (op[0], NULL_TREE);
16368 break;
16369 case 2:
16370 type = build_function_type_list (op[0], op[1], NULL_TREE);
16371 break;
16372 case 3:
16373 type = build_function_type_list (op[0], op[1], op[2], NULL_TREE);
16374 break;
16375 case 4:
16376 type = build_function_type_list (op[0], op[1], op[2], op[3],
16377 NULL_TREE);
16378 break;
16379 default:
16380 gcc_unreachable ();
16383 def_builtin (d->name, type, d->code);
16387 /* Hash function for builtin functions with up to 3 arguments and a return
16388 type. */
16389 hashval_t
16390 builtin_hasher::hash (builtin_hash_struct *bh)
16392 unsigned ret = 0;
16393 int i;
16395 for (i = 0; i < 4; i++)
16397 ret = (ret * (unsigned)MAX_MACHINE_MODE) + ((unsigned)bh->mode[i]);
16398 ret = (ret * 2) + bh->uns_p[i];
16401 return ret;
16404 /* Compare builtin hash entries H1 and H2 for equivalence. */
16405 bool
16406 builtin_hasher::equal (builtin_hash_struct *p1, builtin_hash_struct *p2)
16408 return ((p1->mode[0] == p2->mode[0])
16409 && (p1->mode[1] == p2->mode[1])
16410 && (p1->mode[2] == p2->mode[2])
16411 && (p1->mode[3] == p2->mode[3])
16412 && (p1->uns_p[0] == p2->uns_p[0])
16413 && (p1->uns_p[1] == p2->uns_p[1])
16414 && (p1->uns_p[2] == p2->uns_p[2])
16415 && (p1->uns_p[3] == p2->uns_p[3]));
16418 /* Map types for builtin functions with an explicit return type and up to 3
16419 arguments. Functions with fewer than 3 arguments use VOIDmode as the type
16420 of the argument. */
16421 static tree
16422 builtin_function_type (machine_mode mode_ret, machine_mode mode_arg0,
16423 machine_mode mode_arg1, machine_mode mode_arg2,
16424 enum rs6000_builtins builtin, const char *name)
16426 struct builtin_hash_struct h;
16427 struct builtin_hash_struct *h2;
16428 int num_args = 3;
16429 int i;
16430 tree ret_type = NULL_TREE;
16431 tree arg_type[3] = { NULL_TREE, NULL_TREE, NULL_TREE };
16433 /* Create builtin_hash_table. */
16434 if (builtin_hash_table == NULL)
16435 builtin_hash_table = hash_table<builtin_hasher>::create_ggc (1500);
16437 h.type = NULL_TREE;
16438 h.mode[0] = mode_ret;
16439 h.mode[1] = mode_arg0;
16440 h.mode[2] = mode_arg1;
16441 h.mode[3] = mode_arg2;
16442 h.uns_p[0] = 0;
16443 h.uns_p[1] = 0;
16444 h.uns_p[2] = 0;
16445 h.uns_p[3] = 0;
16447 /* If the builtin is a type that produces unsigned results or takes unsigned
16448 arguments, and it is returned as a decl for the vectorizer (such as
16449 widening multiplies, permute), make sure the arguments and return value
16450 are type correct. */
16451 switch (builtin)
16453 /* unsigned 1 argument functions. */
16454 case CRYPTO_BUILTIN_VSBOX:
16455 case P8V_BUILTIN_VGBBD:
16456 case MISC_BUILTIN_CDTBCD:
16457 case MISC_BUILTIN_CBCDTD:
16458 h.uns_p[0] = 1;
16459 h.uns_p[1] = 1;
16460 break;
16462 /* unsigned 2 argument functions. */
16463 case ALTIVEC_BUILTIN_VMULEUB_UNS:
16464 case ALTIVEC_BUILTIN_VMULEUH_UNS:
16465 case ALTIVEC_BUILTIN_VMULOUB_UNS:
16466 case ALTIVEC_BUILTIN_VMULOUH_UNS:
16467 case CRYPTO_BUILTIN_VCIPHER:
16468 case CRYPTO_BUILTIN_VCIPHERLAST:
16469 case CRYPTO_BUILTIN_VNCIPHER:
16470 case CRYPTO_BUILTIN_VNCIPHERLAST:
16471 case CRYPTO_BUILTIN_VPMSUMB:
16472 case CRYPTO_BUILTIN_VPMSUMH:
16473 case CRYPTO_BUILTIN_VPMSUMW:
16474 case CRYPTO_BUILTIN_VPMSUMD:
16475 case CRYPTO_BUILTIN_VPMSUM:
16476 case MISC_BUILTIN_ADDG6S:
16477 case MISC_BUILTIN_DIVWEU:
16478 case MISC_BUILTIN_DIVWEUO:
16479 case MISC_BUILTIN_DIVDEU:
16480 case MISC_BUILTIN_DIVDEUO:
16481 h.uns_p[0] = 1;
16482 h.uns_p[1] = 1;
16483 h.uns_p[2] = 1;
16484 break;
16486 /* unsigned 3 argument functions. */
16487 case ALTIVEC_BUILTIN_VPERM_16QI_UNS:
16488 case ALTIVEC_BUILTIN_VPERM_8HI_UNS:
16489 case ALTIVEC_BUILTIN_VPERM_4SI_UNS:
16490 case ALTIVEC_BUILTIN_VPERM_2DI_UNS:
16491 case ALTIVEC_BUILTIN_VSEL_16QI_UNS:
16492 case ALTIVEC_BUILTIN_VSEL_8HI_UNS:
16493 case ALTIVEC_BUILTIN_VSEL_4SI_UNS:
16494 case ALTIVEC_BUILTIN_VSEL_2DI_UNS:
16495 case VSX_BUILTIN_VPERM_16QI_UNS:
16496 case VSX_BUILTIN_VPERM_8HI_UNS:
16497 case VSX_BUILTIN_VPERM_4SI_UNS:
16498 case VSX_BUILTIN_VPERM_2DI_UNS:
16499 case VSX_BUILTIN_XXSEL_16QI_UNS:
16500 case VSX_BUILTIN_XXSEL_8HI_UNS:
16501 case VSX_BUILTIN_XXSEL_4SI_UNS:
16502 case VSX_BUILTIN_XXSEL_2DI_UNS:
16503 case CRYPTO_BUILTIN_VPERMXOR:
16504 case CRYPTO_BUILTIN_VPERMXOR_V2DI:
16505 case CRYPTO_BUILTIN_VPERMXOR_V4SI:
16506 case CRYPTO_BUILTIN_VPERMXOR_V8HI:
16507 case CRYPTO_BUILTIN_VPERMXOR_V16QI:
16508 case CRYPTO_BUILTIN_VSHASIGMAW:
16509 case CRYPTO_BUILTIN_VSHASIGMAD:
16510 case CRYPTO_BUILTIN_VSHASIGMA:
16511 h.uns_p[0] = 1;
16512 h.uns_p[1] = 1;
16513 h.uns_p[2] = 1;
16514 h.uns_p[3] = 1;
16515 break;
16517 /* signed permute functions with unsigned char mask. */
16518 case ALTIVEC_BUILTIN_VPERM_16QI:
16519 case ALTIVEC_BUILTIN_VPERM_8HI:
16520 case ALTIVEC_BUILTIN_VPERM_4SI:
16521 case ALTIVEC_BUILTIN_VPERM_4SF:
16522 case ALTIVEC_BUILTIN_VPERM_2DI:
16523 case ALTIVEC_BUILTIN_VPERM_2DF:
16524 case VSX_BUILTIN_VPERM_16QI:
16525 case VSX_BUILTIN_VPERM_8HI:
16526 case VSX_BUILTIN_VPERM_4SI:
16527 case VSX_BUILTIN_VPERM_4SF:
16528 case VSX_BUILTIN_VPERM_2DI:
16529 case VSX_BUILTIN_VPERM_2DF:
16530 h.uns_p[3] = 1;
16531 break;
16533 /* unsigned args, signed return. */
16534 case VSX_BUILTIN_XVCVUXDDP_UNS:
16535 case ALTIVEC_BUILTIN_UNSFLOAT_V4SI_V4SF:
16536 h.uns_p[1] = 1;
16537 break;
16539 /* signed args, unsigned return. */
16540 case VSX_BUILTIN_XVCVDPUXDS_UNS:
16541 case ALTIVEC_BUILTIN_FIXUNS_V4SF_V4SI:
16542 case MISC_BUILTIN_UNPACK_TD:
16543 case MISC_BUILTIN_UNPACK_V1TI:
16544 h.uns_p[0] = 1;
16545 break;
16547 /* unsigned arguments for 128-bit pack instructions. */
16548 case MISC_BUILTIN_PACK_TD:
16549 case MISC_BUILTIN_PACK_V1TI:
16550 h.uns_p[1] = 1;
16551 h.uns_p[2] = 1;
16552 break;
16554 default:
16555 break;
16558 /* Figure out how many args are present. */
16559 while (num_args > 0 && h.mode[num_args] == VOIDmode)
16560 num_args--;
16562 if (num_args == 0)
16563 fatal_error (input_location,
16564 "internal error: builtin function %s had no type", name);
16566 ret_type = builtin_mode_to_type[h.mode[0]][h.uns_p[0]];
16567 if (!ret_type && h.uns_p[0])
16568 ret_type = builtin_mode_to_type[h.mode[0]][0];
16570 if (!ret_type)
16571 fatal_error (input_location,
16572 "internal error: builtin function %s had an unexpected "
16573 "return type %s", name, GET_MODE_NAME (h.mode[0]));
16575 for (i = 0; i < (int) ARRAY_SIZE (arg_type); i++)
16576 arg_type[i] = NULL_TREE;
16578 for (i = 0; i < num_args; i++)
16580 int m = (int) h.mode[i+1];
16581 int uns_p = h.uns_p[i+1];
16583 arg_type[i] = builtin_mode_to_type[m][uns_p];
16584 if (!arg_type[i] && uns_p)
16585 arg_type[i] = builtin_mode_to_type[m][0];
16587 if (!arg_type[i])
16588 fatal_error (input_location,
16589 "internal error: builtin function %s, argument %d "
16590 "had unexpected argument type %s", name, i,
16591 GET_MODE_NAME (m));
16594 builtin_hash_struct **found = builtin_hash_table->find_slot (&h, INSERT);
16595 if (*found == NULL)
16597 h2 = ggc_alloc<builtin_hash_struct> ();
16598 *h2 = h;
16599 *found = h2;
16601 h2->type = build_function_type_list (ret_type, arg_type[0], arg_type[1],
16602 arg_type[2], NULL_TREE);
16605 return (*found)->type;
16608 static void
16609 rs6000_common_init_builtins (void)
16611 const struct builtin_description *d;
16612 size_t i;
16614 tree opaque_ftype_opaque = NULL_TREE;
16615 tree opaque_ftype_opaque_opaque = NULL_TREE;
16616 tree opaque_ftype_opaque_opaque_opaque = NULL_TREE;
16617 tree v2si_ftype_qi = NULL_TREE;
16618 tree v2si_ftype_v2si_qi = NULL_TREE;
16619 tree v2si_ftype_int_qi = NULL_TREE;
16620 HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;
16622 if (!TARGET_PAIRED_FLOAT)
16624 builtin_mode_to_type[V2SImode][0] = opaque_V2SI_type_node;
16625 builtin_mode_to_type[V2SFmode][0] = opaque_V2SF_type_node;
16628 /* Paired and SPE builtins are only available if you build a compiler with
16629 the appropriate options, so only create those builtins with the
16630 appropriate compiler option. Create Altivec and VSX builtins on machines
16631 with at least the general purpose extensions (970 and newer) to allow the
16632 use of the target attribute.. */
16634 if (TARGET_EXTRA_BUILTINS)
16635 builtin_mask |= RS6000_BTM_COMMON;
16637 /* Add the ternary operators. */
16638 d = bdesc_3arg;
16639 for (i = 0; i < ARRAY_SIZE (bdesc_3arg); i++, d++)
16641 tree type;
16642 HOST_WIDE_INT mask = d->mask;
16644 if ((mask & builtin_mask) != mask)
16646 if (TARGET_DEBUG_BUILTIN)
16647 fprintf (stderr, "rs6000_builtin, skip ternary %s\n", d->name);
16648 continue;
16651 if (rs6000_overloaded_builtin_p (d->code))
16653 if (! (type = opaque_ftype_opaque_opaque_opaque))
16654 type = opaque_ftype_opaque_opaque_opaque
16655 = build_function_type_list (opaque_V4SI_type_node,
16656 opaque_V4SI_type_node,
16657 opaque_V4SI_type_node,
16658 opaque_V4SI_type_node,
16659 NULL_TREE);
16661 else
16663 enum insn_code icode = d->icode;
16664 if (d->name == 0)
16666 if (TARGET_DEBUG_BUILTIN)
16667 fprintf (stderr, "rs6000_builtin, bdesc_3arg[%ld] no name\n",
16668 (long unsigned)i);
16670 continue;
16673 if (icode == CODE_FOR_nothing)
16675 if (TARGET_DEBUG_BUILTIN)
16676 fprintf (stderr, "rs6000_builtin, skip ternary %s (no code)\n",
16677 d->name);
16679 continue;
16682 type = builtin_function_type (insn_data[icode].operand[0].mode,
16683 insn_data[icode].operand[1].mode,
16684 insn_data[icode].operand[2].mode,
16685 insn_data[icode].operand[3].mode,
16686 d->code, d->name);
16689 def_builtin (d->name, type, d->code);
16692 /* Add the binary operators. */
16693 d = bdesc_2arg;
16694 for (i = 0; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
16696 machine_mode mode0, mode1, mode2;
16697 tree type;
16698 HOST_WIDE_INT mask = d->mask;
16700 if ((mask & builtin_mask) != mask)
16702 if (TARGET_DEBUG_BUILTIN)
16703 fprintf (stderr, "rs6000_builtin, skip binary %s\n", d->name);
16704 continue;
16707 if (rs6000_overloaded_builtin_p (d->code))
16709 if (! (type = opaque_ftype_opaque_opaque))
16710 type = opaque_ftype_opaque_opaque
16711 = build_function_type_list (opaque_V4SI_type_node,
16712 opaque_V4SI_type_node,
16713 opaque_V4SI_type_node,
16714 NULL_TREE);
16716 else
16718 enum insn_code icode = d->icode;
16719 if (d->name == 0)
16721 if (TARGET_DEBUG_BUILTIN)
16722 fprintf (stderr, "rs6000_builtin, bdesc_2arg[%ld] no name\n",
16723 (long unsigned)i);
16725 continue;
16728 if (icode == CODE_FOR_nothing)
16730 if (TARGET_DEBUG_BUILTIN)
16731 fprintf (stderr, "rs6000_builtin, skip binary %s (no code)\n",
16732 d->name);
16734 continue;
16737 mode0 = insn_data[icode].operand[0].mode;
16738 mode1 = insn_data[icode].operand[1].mode;
16739 mode2 = insn_data[icode].operand[2].mode;
16741 if (mode0 == V2SImode && mode1 == V2SImode && mode2 == QImode)
16743 if (! (type = v2si_ftype_v2si_qi))
16744 type = v2si_ftype_v2si_qi
16745 = build_function_type_list (opaque_V2SI_type_node,
16746 opaque_V2SI_type_node,
16747 char_type_node,
16748 NULL_TREE);
16751 else if (mode0 == V2SImode && GET_MODE_CLASS (mode1) == MODE_INT
16752 && mode2 == QImode)
16754 if (! (type = v2si_ftype_int_qi))
16755 type = v2si_ftype_int_qi
16756 = build_function_type_list (opaque_V2SI_type_node,
16757 integer_type_node,
16758 char_type_node,
16759 NULL_TREE);
16762 else
16763 type = builtin_function_type (mode0, mode1, mode2, VOIDmode,
16764 d->code, d->name);
16767 def_builtin (d->name, type, d->code);
16770 /* Add the simple unary operators. */
16771 d = bdesc_1arg;
16772 for (i = 0; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
16774 machine_mode mode0, mode1;
16775 tree type;
16776 HOST_WIDE_INT mask = d->mask;
16778 if ((mask & builtin_mask) != mask)
16780 if (TARGET_DEBUG_BUILTIN)
16781 fprintf (stderr, "rs6000_builtin, skip unary %s\n", d->name);
16782 continue;
16785 if (rs6000_overloaded_builtin_p (d->code))
16787 if (! (type = opaque_ftype_opaque))
16788 type = opaque_ftype_opaque
16789 = build_function_type_list (opaque_V4SI_type_node,
16790 opaque_V4SI_type_node,
16791 NULL_TREE);
16793 else
16795 enum insn_code icode = d->icode;
16796 if (d->name == 0)
16798 if (TARGET_DEBUG_BUILTIN)
16799 fprintf (stderr, "rs6000_builtin, bdesc_1arg[%ld] no name\n",
16800 (long unsigned)i);
16802 continue;
16805 if (icode == CODE_FOR_nothing)
16807 if (TARGET_DEBUG_BUILTIN)
16808 fprintf (stderr, "rs6000_builtin, skip unary %s (no code)\n",
16809 d->name);
16811 continue;
16814 mode0 = insn_data[icode].operand[0].mode;
16815 mode1 = insn_data[icode].operand[1].mode;
16817 if (mode0 == V2SImode && mode1 == QImode)
16819 if (! (type = v2si_ftype_qi))
16820 type = v2si_ftype_qi
16821 = build_function_type_list (opaque_V2SI_type_node,
16822 char_type_node,
16823 NULL_TREE);
16826 else
16827 type = builtin_function_type (mode0, mode1, VOIDmode, VOIDmode,
16828 d->code, d->name);
16831 def_builtin (d->name, type, d->code);
16835 /* Set up AIX/Darwin/64-bit Linux quad floating point routines. */
16836 static void
16837 init_float128_ibm (machine_mode mode)
16839 if (!TARGET_XL_COMPAT)
16841 set_optab_libfunc (add_optab, mode, "__gcc_qadd");
16842 set_optab_libfunc (sub_optab, mode, "__gcc_qsub");
16843 set_optab_libfunc (smul_optab, mode, "__gcc_qmul");
16844 set_optab_libfunc (sdiv_optab, mode, "__gcc_qdiv");
16846 if (!(TARGET_HARD_FLOAT && (TARGET_FPRS || TARGET_E500_DOUBLE)))
16848 set_optab_libfunc (neg_optab, mode, "__gcc_qneg");
16849 set_optab_libfunc (eq_optab, mode, "__gcc_qeq");
16850 set_optab_libfunc (ne_optab, mode, "__gcc_qne");
16851 set_optab_libfunc (gt_optab, mode, "__gcc_qgt");
16852 set_optab_libfunc (ge_optab, mode, "__gcc_qge");
16853 set_optab_libfunc (lt_optab, mode, "__gcc_qlt");
16854 set_optab_libfunc (le_optab, mode, "__gcc_qle");
16856 set_conv_libfunc (sext_optab, mode, SFmode, "__gcc_stoq");
16857 set_conv_libfunc (sext_optab, mode, DFmode, "__gcc_dtoq");
16858 set_conv_libfunc (trunc_optab, SFmode, mode, "__gcc_qtos");
16859 set_conv_libfunc (trunc_optab, DFmode, mode, "__gcc_qtod");
16860 set_conv_libfunc (sfix_optab, SImode, mode, "__gcc_qtoi");
16861 set_conv_libfunc (ufix_optab, SImode, mode, "__gcc_qtou");
16862 set_conv_libfunc (sfloat_optab, mode, SImode, "__gcc_itoq");
16863 set_conv_libfunc (ufloat_optab, mode, SImode, "__gcc_utoq");
16866 if (!(TARGET_HARD_FLOAT && TARGET_FPRS))
16867 set_optab_libfunc (unord_optab, mode, "__gcc_qunord");
16869 else
16871 set_optab_libfunc (add_optab, mode, "_xlqadd");
16872 set_optab_libfunc (sub_optab, mode, "_xlqsub");
16873 set_optab_libfunc (smul_optab, mode, "_xlqmul");
16874 set_optab_libfunc (sdiv_optab, mode, "_xlqdiv");
16877 /* Add various conversions for IFmode to use the traditional TFmode
16878 names. */
16879 if (mode == IFmode)
16881 set_conv_libfunc (sext_optab, mode, SDmode, "__dpd_extendsdtf2");
16882 set_conv_libfunc (sext_optab, mode, DDmode, "__dpd_extendddtf2");
16883 set_conv_libfunc (trunc_optab, mode, TDmode, "__dpd_trunctftd2");
16884 set_conv_libfunc (trunc_optab, SDmode, mode, "__dpd_trunctfsd2");
16885 set_conv_libfunc (trunc_optab, DDmode, mode, "__dpd_trunctfdd2");
16886 set_conv_libfunc (sext_optab, TDmode, mode, "__dpd_extendtdtf2");
16888 if (TARGET_POWERPC64)
16890 set_conv_libfunc (sfix_optab, TImode, mode, "__fixtfti");
16891 set_conv_libfunc (ufix_optab, TImode, mode, "__fixunstfti");
16892 set_conv_libfunc (sfloat_optab, mode, TImode, "__floattitf");
16893 set_conv_libfunc (ufloat_optab, mode, TImode, "__floatuntitf");
16898 /* Set up IEEE 128-bit floating point routines. Use different names if the
16899 arguments can be passed in a vector register. The historical PowerPC
16900 implementation of IEEE 128-bit floating point used _q_<op> for the names, so
16901 continue to use that if we aren't using vector registers to pass IEEE
16902 128-bit floating point. */
16904 static void
16905 init_float128_ieee (machine_mode mode)
16907 if (FLOAT128_VECTOR_P (mode))
16909 set_optab_libfunc (add_optab, mode, "__addkf3");
16910 set_optab_libfunc (sub_optab, mode, "__subkf3");
16911 set_optab_libfunc (neg_optab, mode, "__negkf2");
16912 set_optab_libfunc (smul_optab, mode, "__mulkf3");
16913 set_optab_libfunc (sdiv_optab, mode, "__divkf3");
16914 set_optab_libfunc (sqrt_optab, mode, "__sqrtkf2");
16915 set_optab_libfunc (abs_optab, mode, "__abstkf2");
16917 set_optab_libfunc (eq_optab, mode, "__eqkf2");
16918 set_optab_libfunc (ne_optab, mode, "__nekf2");
16919 set_optab_libfunc (gt_optab, mode, "__gtkf2");
16920 set_optab_libfunc (ge_optab, mode, "__gekf2");
16921 set_optab_libfunc (lt_optab, mode, "__ltkf2");
16922 set_optab_libfunc (le_optab, mode, "__lekf2");
16923 set_optab_libfunc (unord_optab, mode, "__unordkf2");
16925 set_conv_libfunc (sext_optab, mode, SFmode, "__extendsfkf2");
16926 set_conv_libfunc (sext_optab, mode, DFmode, "__extenddfkf2");
16927 set_conv_libfunc (trunc_optab, SFmode, mode, "__trunckfsf2");
16928 set_conv_libfunc (trunc_optab, DFmode, mode, "__trunckfdf2");
16930 set_conv_libfunc (sext_optab, mode, IFmode, "__extendtfkf2");
16931 if (mode != TFmode && FLOAT128_IBM_P (TFmode))
16932 set_conv_libfunc (sext_optab, mode, TFmode, "__extendtfkf2");
16934 set_conv_libfunc (trunc_optab, IFmode, mode, "__trunckftf2");
16935 if (mode != TFmode && FLOAT128_IBM_P (TFmode))
16936 set_conv_libfunc (trunc_optab, TFmode, mode, "__trunckftf2");
16938 set_conv_libfunc (sext_optab, mode, SDmode, "__dpd_extendsdkf2");
16939 set_conv_libfunc (sext_optab, mode, DDmode, "__dpd_extendddkf2");
16940 set_conv_libfunc (trunc_optab, mode, TDmode, "__dpd_trunckftd2");
16941 set_conv_libfunc (trunc_optab, SDmode, mode, "__dpd_trunckfsd2");
16942 set_conv_libfunc (trunc_optab, DDmode, mode, "__dpd_trunckfdd2");
16943 set_conv_libfunc (sext_optab, TDmode, mode, "__dpd_extendtdkf2");
16945 set_conv_libfunc (sfix_optab, SImode, mode, "__fixkfsi");
16946 set_conv_libfunc (ufix_optab, SImode, mode, "__fixunskfsi");
16947 set_conv_libfunc (sfix_optab, DImode, mode, "__fixkfdi");
16948 set_conv_libfunc (ufix_optab, DImode, mode, "__fixunskfdi");
16950 set_conv_libfunc (sfloat_optab, mode, SImode, "__floatsikf");
16951 set_conv_libfunc (ufloat_optab, mode, SImode, "__floatunsikf");
16952 set_conv_libfunc (sfloat_optab, mode, DImode, "__floatdikf");
16953 set_conv_libfunc (ufloat_optab, mode, DImode, "__floatundikf");
16955 if (TARGET_POWERPC64)
16957 set_conv_libfunc (sfix_optab, TImode, mode, "__fixkfti");
16958 set_conv_libfunc (ufix_optab, TImode, mode, "__fixunskfti");
16959 set_conv_libfunc (sfloat_optab, mode, TImode, "__floattikf");
16960 set_conv_libfunc (ufloat_optab, mode, TImode, "__floatuntikf");
16964 else
16966 set_optab_libfunc (add_optab, mode, "_q_add");
16967 set_optab_libfunc (sub_optab, mode, "_q_sub");
16968 set_optab_libfunc (neg_optab, mode, "_q_neg");
16969 set_optab_libfunc (smul_optab, mode, "_q_mul");
16970 set_optab_libfunc (sdiv_optab, mode, "_q_div");
16971 if (TARGET_PPC_GPOPT)
16972 set_optab_libfunc (sqrt_optab, mode, "_q_sqrt");
16974 set_optab_libfunc (eq_optab, mode, "_q_feq");
16975 set_optab_libfunc (ne_optab, mode, "_q_fne");
16976 set_optab_libfunc (gt_optab, mode, "_q_fgt");
16977 set_optab_libfunc (ge_optab, mode, "_q_fge");
16978 set_optab_libfunc (lt_optab, mode, "_q_flt");
16979 set_optab_libfunc (le_optab, mode, "_q_fle");
16981 set_conv_libfunc (sext_optab, mode, SFmode, "_q_stoq");
16982 set_conv_libfunc (sext_optab, mode, DFmode, "_q_dtoq");
16983 set_conv_libfunc (trunc_optab, SFmode, mode, "_q_qtos");
16984 set_conv_libfunc (trunc_optab, DFmode, mode, "_q_qtod");
16985 set_conv_libfunc (sfix_optab, SImode, mode, "_q_qtoi");
16986 set_conv_libfunc (ufix_optab, SImode, mode, "_q_qtou");
16987 set_conv_libfunc (sfloat_optab, mode, SImode, "_q_itoq");
16988 set_conv_libfunc (ufloat_optab, mode, SImode, "_q_utoq");
16992 static void
16993 rs6000_init_libfuncs (void)
16995 /* __float128 support. */
16996 if (TARGET_FLOAT128)
16998 init_float128_ibm (IFmode);
16999 init_float128_ieee (KFmode);
17002 /* AIX/Darwin/64-bit Linux quad floating point routines. */
17003 if (TARGET_LONG_DOUBLE_128)
17005 if (!TARGET_IEEEQUAD)
17006 init_float128_ibm (TFmode);
17008 /* IEEE 128-bit including 32-bit SVR4 quad floating point routines. */
17009 else
17010 init_float128_ieee (TFmode);
17015 /* Expand a block clear operation, and return 1 if successful. Return 0
17016 if we should let the compiler generate normal code.
17018 operands[0] is the destination
17019 operands[1] is the length
17020 operands[3] is the alignment */
17023 expand_block_clear (rtx operands[])
17025 rtx orig_dest = operands[0];
17026 rtx bytes_rtx = operands[1];
17027 rtx align_rtx = operands[3];
17028 bool constp = (GET_CODE (bytes_rtx) == CONST_INT);
17029 HOST_WIDE_INT align;
17030 HOST_WIDE_INT bytes;
17031 int offset;
17032 int clear_bytes;
17033 int clear_step;
17035 /* If this is not a fixed size move, just call memcpy */
17036 if (! constp)
17037 return 0;
17039 /* This must be a fixed size alignment */
17040 gcc_assert (GET_CODE (align_rtx) == CONST_INT);
17041 align = INTVAL (align_rtx) * BITS_PER_UNIT;
17043 /* Anything to clear? */
17044 bytes = INTVAL (bytes_rtx);
17045 if (bytes <= 0)
17046 return 1;
17048 /* Use the builtin memset after a point, to avoid huge code bloat.
17049 When optimize_size, avoid any significant code bloat; calling
17050 memset is about 4 instructions, so allow for one instruction to
17051 load zero and three to do clearing. */
17052 if (TARGET_ALTIVEC && align >= 128)
17053 clear_step = 16;
17054 else if (TARGET_POWERPC64 && (align >= 64 || !STRICT_ALIGNMENT))
17055 clear_step = 8;
17056 else if (TARGET_SPE && align >= 64)
17057 clear_step = 8;
17058 else
17059 clear_step = 4;
17061 if (optimize_size && bytes > 3 * clear_step)
17062 return 0;
17063 if (! optimize_size && bytes > 8 * clear_step)
17064 return 0;
17066 for (offset = 0; bytes > 0; offset += clear_bytes, bytes -= clear_bytes)
17068 machine_mode mode = BLKmode;
17069 rtx dest;
17071 if (bytes >= 16 && TARGET_ALTIVEC && align >= 128)
17073 clear_bytes = 16;
17074 mode = V4SImode;
17076 else if (bytes >= 8 && TARGET_SPE && align >= 64)
17078 clear_bytes = 8;
17079 mode = V2SImode;
17081 else if (bytes >= 8 && TARGET_POWERPC64
17082 && (align >= 64 || !STRICT_ALIGNMENT))
17084 clear_bytes = 8;
17085 mode = DImode;
17086 if (offset == 0 && align < 64)
17088 rtx addr;
17090 /* If the address form is reg+offset with offset not a
17091 multiple of four, reload into reg indirect form here
17092 rather than waiting for reload. This way we get one
17093 reload, not one per store. */
17094 addr = XEXP (orig_dest, 0);
17095 if ((GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
17096 && GET_CODE (XEXP (addr, 1)) == CONST_INT
17097 && (INTVAL (XEXP (addr, 1)) & 3) != 0)
17099 addr = copy_addr_to_reg (addr);
17100 orig_dest = replace_equiv_address (orig_dest, addr);
17104 else if (bytes >= 4 && (align >= 32 || !STRICT_ALIGNMENT))
17105 { /* move 4 bytes */
17106 clear_bytes = 4;
17107 mode = SImode;
17109 else if (bytes >= 2 && (align >= 16 || !STRICT_ALIGNMENT))
17110 { /* move 2 bytes */
17111 clear_bytes = 2;
17112 mode = HImode;
17114 else /* move 1 byte at a time */
17116 clear_bytes = 1;
17117 mode = QImode;
17120 dest = adjust_address (orig_dest, mode, offset);
17122 emit_move_insn (dest, CONST0_RTX (mode));
17125 return 1;
17129 /* Expand a block move operation, and return 1 if successful. Return 0
17130 if we should let the compiler generate normal code.
17132 operands[0] is the destination
17133 operands[1] is the source
17134 operands[2] is the length
17135 operands[3] is the alignment */
17137 #define MAX_MOVE_REG 4
17140 expand_block_move (rtx operands[])
17142 rtx orig_dest = operands[0];
17143 rtx orig_src = operands[1];
17144 rtx bytes_rtx = operands[2];
17145 rtx align_rtx = operands[3];
17146 int constp = (GET_CODE (bytes_rtx) == CONST_INT);
17147 int align;
17148 int bytes;
17149 int offset;
17150 int move_bytes;
17151 rtx stores[MAX_MOVE_REG];
17152 int num_reg = 0;
17154 /* If this is not a fixed size move, just call memcpy */
17155 if (! constp)
17156 return 0;
17158 /* This must be a fixed size alignment */
17159 gcc_assert (GET_CODE (align_rtx) == CONST_INT);
17160 align = INTVAL (align_rtx) * BITS_PER_UNIT;
17162 /* Anything to move? */
17163 bytes = INTVAL (bytes_rtx);
17164 if (bytes <= 0)
17165 return 1;
17167 if (bytes > rs6000_block_move_inline_limit)
17168 return 0;
17170 for (offset = 0; bytes > 0; offset += move_bytes, bytes -= move_bytes)
17172 union {
17173 rtx (*movmemsi) (rtx, rtx, rtx, rtx);
17174 rtx (*mov) (rtx, rtx);
17175 } gen_func;
17176 machine_mode mode = BLKmode;
17177 rtx src, dest;
17179 /* Altivec first, since it will be faster than a string move
17180 when it applies, and usually not significantly larger. */
17181 if (TARGET_ALTIVEC && bytes >= 16 && align >= 128)
17183 move_bytes = 16;
17184 mode = V4SImode;
17185 gen_func.mov = gen_movv4si;
17187 else if (TARGET_SPE && bytes >= 8 && align >= 64)
17189 move_bytes = 8;
17190 mode = V2SImode;
17191 gen_func.mov = gen_movv2si;
17193 else if (TARGET_STRING
17194 && bytes > 24 /* move up to 32 bytes at a time */
17195 && ! fixed_regs[5]
17196 && ! fixed_regs[6]
17197 && ! fixed_regs[7]
17198 && ! fixed_regs[8]
17199 && ! fixed_regs[9]
17200 && ! fixed_regs[10]
17201 && ! fixed_regs[11]
17202 && ! fixed_regs[12])
17204 move_bytes = (bytes > 32) ? 32 : bytes;
17205 gen_func.movmemsi = gen_movmemsi_8reg;
17207 else if (TARGET_STRING
17208 && bytes > 16 /* move up to 24 bytes at a time */
17209 && ! fixed_regs[5]
17210 && ! fixed_regs[6]
17211 && ! fixed_regs[7]
17212 && ! fixed_regs[8]
17213 && ! fixed_regs[9]
17214 && ! fixed_regs[10])
17216 move_bytes = (bytes > 24) ? 24 : bytes;
17217 gen_func.movmemsi = gen_movmemsi_6reg;
17219 else if (TARGET_STRING
17220 && bytes > 8 /* move up to 16 bytes at a time */
17221 && ! fixed_regs[5]
17222 && ! fixed_regs[6]
17223 && ! fixed_regs[7]
17224 && ! fixed_regs[8])
17226 move_bytes = (bytes > 16) ? 16 : bytes;
17227 gen_func.movmemsi = gen_movmemsi_4reg;
17229 else if (bytes >= 8 && TARGET_POWERPC64
17230 && (align >= 64 || !STRICT_ALIGNMENT))
17232 move_bytes = 8;
17233 mode = DImode;
17234 gen_func.mov = gen_movdi;
17235 if (offset == 0 && align < 64)
17237 rtx addr;
17239 /* If the address form is reg+offset with offset not a
17240 multiple of four, reload into reg indirect form here
17241 rather than waiting for reload. This way we get one
17242 reload, not one per load and/or store. */
17243 addr = XEXP (orig_dest, 0);
17244 if ((GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
17245 && GET_CODE (XEXP (addr, 1)) == CONST_INT
17246 && (INTVAL (XEXP (addr, 1)) & 3) != 0)
17248 addr = copy_addr_to_reg (addr);
17249 orig_dest = replace_equiv_address (orig_dest, addr);
17251 addr = XEXP (orig_src, 0);
17252 if ((GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
17253 && GET_CODE (XEXP (addr, 1)) == CONST_INT
17254 && (INTVAL (XEXP (addr, 1)) & 3) != 0)
17256 addr = copy_addr_to_reg (addr);
17257 orig_src = replace_equiv_address (orig_src, addr);
17261 else if (TARGET_STRING && bytes > 4 && !TARGET_POWERPC64)
17262 { /* move up to 8 bytes at a time */
17263 move_bytes = (bytes > 8) ? 8 : bytes;
17264 gen_func.movmemsi = gen_movmemsi_2reg;
17266 else if (bytes >= 4 && (align >= 32 || !STRICT_ALIGNMENT))
17267 { /* move 4 bytes */
17268 move_bytes = 4;
17269 mode = SImode;
17270 gen_func.mov = gen_movsi;
17272 else if (bytes >= 2 && (align >= 16 || !STRICT_ALIGNMENT))
17273 { /* move 2 bytes */
17274 move_bytes = 2;
17275 mode = HImode;
17276 gen_func.mov = gen_movhi;
17278 else if (TARGET_STRING && bytes > 1)
17279 { /* move up to 4 bytes at a time */
17280 move_bytes = (bytes > 4) ? 4 : bytes;
17281 gen_func.movmemsi = gen_movmemsi_1reg;
17283 else /* move 1 byte at a time */
17285 move_bytes = 1;
17286 mode = QImode;
17287 gen_func.mov = gen_movqi;
17290 src = adjust_address (orig_src, mode, offset);
17291 dest = adjust_address (orig_dest, mode, offset);
17293 if (mode != BLKmode)
17295 rtx tmp_reg = gen_reg_rtx (mode);
17297 emit_insn ((*gen_func.mov) (tmp_reg, src));
17298 stores[num_reg++] = (*gen_func.mov) (dest, tmp_reg);
17301 if (mode == BLKmode || num_reg >= MAX_MOVE_REG || bytes == move_bytes)
17303 int i;
17304 for (i = 0; i < num_reg; i++)
17305 emit_insn (stores[i]);
17306 num_reg = 0;
17309 if (mode == BLKmode)
17311 /* Move the address into scratch registers. The movmemsi
17312 patterns require zero offset. */
17313 if (!REG_P (XEXP (src, 0)))
17315 rtx src_reg = copy_addr_to_reg (XEXP (src, 0));
17316 src = replace_equiv_address (src, src_reg);
17318 set_mem_size (src, move_bytes);
17320 if (!REG_P (XEXP (dest, 0)))
17322 rtx dest_reg = copy_addr_to_reg (XEXP (dest, 0));
17323 dest = replace_equiv_address (dest, dest_reg);
17325 set_mem_size (dest, move_bytes);
17327 emit_insn ((*gen_func.movmemsi) (dest, src,
17328 GEN_INT (move_bytes & 31),
17329 align_rtx));
17333 return 1;
17337 /* Return a string to perform a load_multiple operation.
17338 operands[0] is the vector.
17339 operands[1] is the source address.
17340 operands[2] is the first destination register. */
17342 const char *
17343 rs6000_output_load_multiple (rtx operands[3])
17345 /* We have to handle the case where the pseudo used to contain the address
17346 is assigned to one of the output registers. */
17347 int i, j;
17348 int words = XVECLEN (operands[0], 0);
17349 rtx xop[10];
17351 if (XVECLEN (operands[0], 0) == 1)
17352 return "lwz %2,0(%1)";
17354 for (i = 0; i < words; i++)
17355 if (refers_to_regno_p (REGNO (operands[2]) + i, operands[1]))
17357 if (i == words-1)
17359 xop[0] = GEN_INT (4 * (words-1));
17360 xop[1] = operands[1];
17361 xop[2] = operands[2];
17362 output_asm_insn ("lswi %2,%1,%0\n\tlwz %1,%0(%1)", xop);
17363 return "";
17365 else if (i == 0)
17367 xop[0] = GEN_INT (4 * (words-1));
17368 xop[1] = operands[1];
17369 xop[2] = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
17370 output_asm_insn ("addi %1,%1,4\n\tlswi %2,%1,%0\n\tlwz %1,-4(%1)", xop);
17371 return "";
17373 else
17375 for (j = 0; j < words; j++)
17376 if (j != i)
17378 xop[0] = GEN_INT (j * 4);
17379 xop[1] = operands[1];
17380 xop[2] = gen_rtx_REG (SImode, REGNO (operands[2]) + j);
17381 output_asm_insn ("lwz %2,%0(%1)", xop);
17383 xop[0] = GEN_INT (i * 4);
17384 xop[1] = operands[1];
17385 output_asm_insn ("lwz %1,%0(%1)", xop);
17386 return "";
17390 return "lswi %2,%1,%N0";
17394 /* A validation routine: say whether CODE, a condition code, and MODE
17395 match. The other alternatives either don't make sense or should
17396 never be generated. */
17398 void
17399 validate_condition_mode (enum rtx_code code, machine_mode mode)
17401 gcc_assert ((GET_RTX_CLASS (code) == RTX_COMPARE
17402 || GET_RTX_CLASS (code) == RTX_COMM_COMPARE)
17403 && GET_MODE_CLASS (mode) == MODE_CC);
17405 /* These don't make sense. */
17406 gcc_assert ((code != GT && code != LT && code != GE && code != LE)
17407 || mode != CCUNSmode);
17409 gcc_assert ((code != GTU && code != LTU && code != GEU && code != LEU)
17410 || mode == CCUNSmode);
17412 gcc_assert (mode == CCFPmode
17413 || (code != ORDERED && code != UNORDERED
17414 && code != UNEQ && code != LTGT
17415 && code != UNGT && code != UNLT
17416 && code != UNGE && code != UNLE));
17418 /* These should never be generated except for
17419 flag_finite_math_only. */
17420 gcc_assert (mode != CCFPmode
17421 || flag_finite_math_only
17422 || (code != LE && code != GE
17423 && code != UNEQ && code != LTGT
17424 && code != UNGT && code != UNLT));
17426 /* These are invalid; the information is not there. */
17427 gcc_assert (mode != CCEQmode || code == EQ || code == NE);
17431 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm,
17432 rldicl, rldicr, or rldic instruction in mode MODE. If so, if E is
17433 not zero, store there the bit offset (counted from the right) where
17434 the single stretch of 1 bits begins; and similarly for B, the bit
17435 offset where it ends. */
17437 bool
17438 rs6000_is_valid_mask (rtx mask, int *b, int *e, machine_mode mode)
17440 unsigned HOST_WIDE_INT val = INTVAL (mask);
17441 unsigned HOST_WIDE_INT bit;
17442 int nb, ne;
17443 int n = GET_MODE_PRECISION (mode);
17445 if (mode != DImode && mode != SImode)
17446 return false;
17448 if (INTVAL (mask) >= 0)
17450 bit = val & -val;
17451 ne = exact_log2 (bit);
17452 nb = exact_log2 (val + bit);
17454 else if (val + 1 == 0)
17456 nb = n;
17457 ne = 0;
17459 else if (val & 1)
17461 val = ~val;
17462 bit = val & -val;
17463 nb = exact_log2 (bit);
17464 ne = exact_log2 (val + bit);
17466 else
17468 bit = val & -val;
17469 ne = exact_log2 (bit);
17470 if (val + bit == 0)
17471 nb = n;
17472 else
17473 nb = 0;
17476 nb--;
17478 if (nb < 0 || ne < 0 || nb >= n || ne >= n)
17479 return false;
17481 if (b)
17482 *b = nb;
17483 if (e)
17484 *e = ne;
17486 return true;
17489 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm, rldicl,
17490 or rldicr instruction, to implement an AND with it in mode MODE. */
17492 bool
17493 rs6000_is_valid_and_mask (rtx mask, machine_mode mode)
17495 int nb, ne;
17497 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
17498 return false;
17500 /* For DImode, we need a rldicl, rldicr, or a rlwinm with mask that
17501 does not wrap. */
17502 if (mode == DImode)
17503 return (ne == 0 || nb == 63 || (nb < 32 && ne <= nb));
17505 /* For SImode, rlwinm can do everything. */
17506 if (mode == SImode)
17507 return (nb < 32 && ne < 32);
17509 return false;
17512 /* Return the instruction template for an AND with mask in mode MODE, with
17513 operands OPERANDS. If DOT is true, make it a record-form instruction. */
17515 const char *
17516 rs6000_insn_for_and_mask (machine_mode mode, rtx *operands, bool dot)
17518 int nb, ne;
17520 if (!rs6000_is_valid_mask (operands[2], &nb, &ne, mode))
17521 gcc_unreachable ();
17523 if (mode == DImode && ne == 0)
17525 operands[3] = GEN_INT (63 - nb);
17526 if (dot)
17527 return "rldicl. %0,%1,0,%3";
17528 return "rldicl %0,%1,0,%3";
17531 if (mode == DImode && nb == 63)
17533 operands[3] = GEN_INT (63 - ne);
17534 if (dot)
17535 return "rldicr. %0,%1,0,%3";
17536 return "rldicr %0,%1,0,%3";
17539 if (nb < 32 && ne < 32)
17541 operands[3] = GEN_INT (31 - nb);
17542 operands[4] = GEN_INT (31 - ne);
17543 if (dot)
17544 return "rlwinm. %0,%1,0,%3,%4";
17545 return "rlwinm %0,%1,0,%3,%4";
17548 gcc_unreachable ();
17551 /* Return whether MASK (a CONST_INT) is a valid mask for any rlw[i]nm,
17552 rld[i]cl, rld[i]cr, or rld[i]c instruction, to implement an AND with
17553 shift SHIFT (a ROTATE, ASHIFT, or LSHIFTRT) in mode MODE. */
17555 bool
17556 rs6000_is_valid_shift_mask (rtx mask, rtx shift, machine_mode mode)
17558 int nb, ne;
17560 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
17561 return false;
17563 int n = GET_MODE_PRECISION (mode);
17564 int sh = -1;
17566 if (CONST_INT_P (XEXP (shift, 1)))
17568 sh = INTVAL (XEXP (shift, 1));
17569 if (sh < 0 || sh >= n)
17570 return false;
17573 rtx_code code = GET_CODE (shift);
17575 /* Convert any shift by 0 to a rotate, to simplify below code. */
17576 if (sh == 0)
17577 code = ROTATE;
17579 /* Convert rotate to simple shift if we can, to make analysis simpler. */
17580 if (code == ROTATE && sh >= 0 && nb >= ne && ne >= sh)
17581 code = ASHIFT;
17582 if (code == ROTATE && sh >= 0 && nb >= ne && nb < sh)
17584 code = LSHIFTRT;
17585 sh = n - sh;
17588 /* DImode rotates need rld*. */
17589 if (mode == DImode && code == ROTATE)
17590 return (nb == 63 || ne == 0 || ne == sh);
17592 /* SImode rotates need rlw*. */
17593 if (mode == SImode && code == ROTATE)
17594 return (nb < 32 && ne < 32 && sh < 32);
17596 /* Wrap-around masks are only okay for rotates. */
17597 if (ne > nb)
17598 return false;
17600 /* Variable shifts are only okay for rotates. */
17601 if (sh < 0)
17602 return false;
17604 /* Don't allow ASHIFT if the mask is wrong for that. */
17605 if (code == ASHIFT && ne < sh)
17606 return false;
17608 /* If we can do it with an rlw*, we can do it. Don't allow LSHIFTRT
17609 if the mask is wrong for that. */
17610 if (nb < 32 && ne < 32 && sh < 32
17611 && !(code == LSHIFTRT && nb >= 32 - sh))
17612 return true;
17614 /* If we can do it with an rld*, we can do it. Don't allow LSHIFTRT
17615 if the mask is wrong for that. */
17616 if (code == LSHIFTRT)
17617 sh = 64 - sh;
17618 if (nb == 63 || ne == 0 || ne == sh)
17619 return !(code == LSHIFTRT && nb >= sh);
17621 return false;
17624 /* Return the instruction template for a shift with mask in mode MODE, with
17625 operands OPERANDS. If DOT is true, make it a record-form instruction. */
17627 const char *
17628 rs6000_insn_for_shift_mask (machine_mode mode, rtx *operands, bool dot)
17630 int nb, ne;
17632 if (!rs6000_is_valid_mask (operands[3], &nb, &ne, mode))
17633 gcc_unreachable ();
17635 if (mode == DImode && ne == 0)
17637 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
17638 operands[2] = GEN_INT (64 - INTVAL (operands[2]));
17639 operands[3] = GEN_INT (63 - nb);
17640 if (dot)
17641 return "rld%I2cl. %0,%1,%2,%3";
17642 return "rld%I2cl %0,%1,%2,%3";
17645 if (mode == DImode && nb == 63)
17647 operands[3] = GEN_INT (63 - ne);
17648 if (dot)
17649 return "rld%I2cr. %0,%1,%2,%3";
17650 return "rld%I2cr %0,%1,%2,%3";
17653 if (mode == DImode
17654 && GET_CODE (operands[4]) != LSHIFTRT
17655 && CONST_INT_P (operands[2])
17656 && ne == INTVAL (operands[2]))
17658 operands[3] = GEN_INT (63 - nb);
17659 if (dot)
17660 return "rld%I2c. %0,%1,%2,%3";
17661 return "rld%I2c %0,%1,%2,%3";
17664 if (nb < 32 && ne < 32)
17666 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
17667 operands[2] = GEN_INT (32 - INTVAL (operands[2]));
17668 operands[3] = GEN_INT (31 - nb);
17669 operands[4] = GEN_INT (31 - ne);
17670 /* This insn can also be a 64-bit rotate with mask that really makes
17671 it just a shift right (with mask); the %h below are to adjust for
17672 that situation (shift count is >= 32 in that case). */
17673 if (dot)
17674 return "rlw%I2nm. %0,%1,%h2,%3,%4";
17675 return "rlw%I2nm %0,%1,%h2,%3,%4";
17678 gcc_unreachable ();
17681 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwimi or
17682 rldimi instruction, to implement an insert with shift SHIFT (a ROTATE,
17683 ASHIFT, or LSHIFTRT) in mode MODE. */
17685 bool
17686 rs6000_is_valid_insert_mask (rtx mask, rtx shift, machine_mode mode)
17688 int nb, ne;
17690 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
17691 return false;
17693 int n = GET_MODE_PRECISION (mode);
17695 int sh = INTVAL (XEXP (shift, 1));
17696 if (sh < 0 || sh >= n)
17697 return false;
17699 rtx_code code = GET_CODE (shift);
17701 /* Convert any shift by 0 to a rotate, to simplify below code. */
17702 if (sh == 0)
17703 code = ROTATE;
17705 /* Convert rotate to simple shift if we can, to make analysis simpler. */
17706 if (code == ROTATE && sh >= 0 && nb >= ne && ne >= sh)
17707 code = ASHIFT;
17708 if (code == ROTATE && sh >= 0 && nb >= ne && nb < sh)
17710 code = LSHIFTRT;
17711 sh = n - sh;
17714 /* DImode rotates need rldimi. */
17715 if (mode == DImode && code == ROTATE)
17716 return (ne == sh);
17718 /* SImode rotates need rlwimi. */
17719 if (mode == SImode && code == ROTATE)
17720 return (nb < 32 && ne < 32 && sh < 32);
17722 /* Wrap-around masks are only okay for rotates. */
17723 if (ne > nb)
17724 return false;
17726 /* Don't allow ASHIFT if the mask is wrong for that. */
17727 if (code == ASHIFT && ne < sh)
17728 return false;
17730 /* If we can do it with an rlwimi, we can do it. Don't allow LSHIFTRT
17731 if the mask is wrong for that. */
17732 if (nb < 32 && ne < 32 && sh < 32
17733 && !(code == LSHIFTRT && nb >= 32 - sh))
17734 return true;
17736 /* If we can do it with an rldimi, we can do it. Don't allow LSHIFTRT
17737 if the mask is wrong for that. */
17738 if (code == LSHIFTRT)
17739 sh = 64 - sh;
17740 if (ne == sh)
17741 return !(code == LSHIFTRT && nb >= sh);
17743 return false;
17746 /* Return the instruction template for an insert with mask in mode MODE, with
17747 operands OPERANDS. If DOT is true, make it a record-form instruction. */
17749 const char *
17750 rs6000_insn_for_insert_mask (machine_mode mode, rtx *operands, bool dot)
17752 int nb, ne;
17754 if (!rs6000_is_valid_mask (operands[3], &nb, &ne, mode))
17755 gcc_unreachable ();
17757 /* Prefer rldimi because rlwimi is cracked. */
17758 if (TARGET_POWERPC64
17759 && (!dot || mode == DImode)
17760 && GET_CODE (operands[4]) != LSHIFTRT
17761 && ne == INTVAL (operands[2]))
17763 operands[3] = GEN_INT (63 - nb);
17764 if (dot)
17765 return "rldimi. %0,%1,%2,%3";
17766 return "rldimi %0,%1,%2,%3";
17769 if (nb < 32 && ne < 32)
17771 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
17772 operands[2] = GEN_INT (32 - INTVAL (operands[2]));
17773 operands[3] = GEN_INT (31 - nb);
17774 operands[4] = GEN_INT (31 - ne);
17775 if (dot)
17776 return "rlwimi. %0,%1,%2,%3,%4";
17777 return "rlwimi %0,%1,%2,%3,%4";
17780 gcc_unreachable ();
17783 /* Return whether an AND with C (a CONST_INT) in mode MODE can be done
17784 using two machine instructions. */
17786 bool
17787 rs6000_is_valid_2insn_and (rtx c, machine_mode mode)
17789 /* There are two kinds of AND we can handle with two insns:
17790 1) those we can do with two rl* insn;
17791 2) ori[s];xori[s].
17793 We do not handle that last case yet. */
17795 /* If there is just one stretch of ones, we can do it. */
17796 if (rs6000_is_valid_mask (c, NULL, NULL, mode))
17797 return true;
17799 /* Otherwise, fill in the lowest "hole"; if we can do the result with
17800 one insn, we can do the whole thing with two. */
17801 unsigned HOST_WIDE_INT val = INTVAL (c);
17802 unsigned HOST_WIDE_INT bit1 = val & -val;
17803 unsigned HOST_WIDE_INT bit2 = (val + bit1) & ~val;
17804 unsigned HOST_WIDE_INT val1 = (val + bit1) & val;
17805 unsigned HOST_WIDE_INT bit3 = val1 & -val1;
17806 return rs6000_is_valid_and_mask (GEN_INT (val + bit3 - bit2), mode);
17809 /* Emit a potentially record-form instruction, setting DST from SRC.
17810 If DOT is 0, that is all; otherwise, set CCREG to the result of the
17811 signed comparison of DST with zero. If DOT is 1, the generated RTL
17812 doesn't care about the DST result; if DOT is 2, it does. If CCREG
17813 is CR0 do a single dot insn (as a PARALLEL); otherwise, do a SET and
17814 a separate COMPARE. */
17816 static void
17817 rs6000_emit_dot_insn (rtx dst, rtx src, int dot, rtx ccreg)
17819 if (dot == 0)
17821 emit_move_insn (dst, src);
17822 return;
17825 if (cc_reg_not_cr0_operand (ccreg, CCmode))
17827 emit_move_insn (dst, src);
17828 emit_move_insn (ccreg, gen_rtx_COMPARE (CCmode, dst, const0_rtx));
17829 return;
17832 rtx ccset = gen_rtx_SET (ccreg, gen_rtx_COMPARE (CCmode, src, const0_rtx));
17833 if (dot == 1)
17835 rtx clobber = gen_rtx_CLOBBER (VOIDmode, dst);
17836 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, ccset, clobber)));
17838 else
17840 rtx set = gen_rtx_SET (dst, src);
17841 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, ccset, set)));
17845 /* Emit the two insns to do an AND in mode MODE, with operands OPERANDS.
17846 If EXPAND is true, split rotate-and-mask instructions we generate to
17847 their constituent parts as well (this is used during expand); if DOT
17848 is 1, make the last insn a record-form instruction clobbering the
17849 destination GPR and setting the CC reg (from operands[3]); if 2, set
17850 that GPR as well as the CC reg. */
17852 void
17853 rs6000_emit_2insn_and (machine_mode mode, rtx *operands, bool expand, int dot)
17855 gcc_assert (!(expand && dot));
17857 unsigned HOST_WIDE_INT val = INTVAL (operands[2]);
17859 /* If it is one stretch of ones, it is DImode; shift left, mask, then
17860 shift right. This generates better code than doing the masks without
17861 shifts, or shifting first right and then left. */
17862 int nb, ne;
17863 if (rs6000_is_valid_mask (operands[2], &nb, &ne, mode) && nb >= ne)
17865 gcc_assert (mode == DImode);
17867 int shift = 63 - nb;
17868 if (expand)
17870 rtx tmp1 = gen_reg_rtx (DImode);
17871 rtx tmp2 = gen_reg_rtx (DImode);
17872 emit_insn (gen_ashldi3 (tmp1, operands[1], GEN_INT (shift)));
17873 emit_insn (gen_anddi3 (tmp2, tmp1, GEN_INT (val << shift)));
17874 emit_insn (gen_lshrdi3 (operands[0], tmp2, GEN_INT (shift)));
17876 else
17878 rtx tmp = gen_rtx_ASHIFT (mode, operands[1], GEN_INT (shift));
17879 tmp = gen_rtx_AND (mode, tmp, GEN_INT (val << shift));
17880 emit_move_insn (operands[0], tmp);
17881 tmp = gen_rtx_LSHIFTRT (mode, operands[0], GEN_INT (shift));
17882 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
17884 return;
17887 /* Otherwise, make a mask2 that cuts out the lowest "hole", and a mask1
17888 that does the rest. */
17889 unsigned HOST_WIDE_INT bit1 = val & -val;
17890 unsigned HOST_WIDE_INT bit2 = (val + bit1) & ~val;
17891 unsigned HOST_WIDE_INT val1 = (val + bit1) & val;
17892 unsigned HOST_WIDE_INT bit3 = val1 & -val1;
17894 unsigned HOST_WIDE_INT mask1 = -bit3 + bit2 - 1;
17895 unsigned HOST_WIDE_INT mask2 = val + bit3 - bit2;
17897 gcc_assert (rs6000_is_valid_and_mask (GEN_INT (mask2), mode));
17899 /* Two "no-rotate"-and-mask instructions, for SImode. */
17900 if (rs6000_is_valid_and_mask (GEN_INT (mask1), mode))
17902 gcc_assert (mode == SImode);
17904 rtx reg = expand ? gen_reg_rtx (mode) : operands[0];
17905 rtx tmp = gen_rtx_AND (mode, operands[1], GEN_INT (mask1));
17906 emit_move_insn (reg, tmp);
17907 tmp = gen_rtx_AND (mode, reg, GEN_INT (mask2));
17908 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
17909 return;
17912 gcc_assert (mode == DImode);
17914 /* Two "no-rotate"-and-mask instructions, for DImode: both are rlwinm
17915 insns; we have to do the first in SImode, because it wraps. */
17916 if (mask2 <= 0xffffffff
17917 && rs6000_is_valid_and_mask (GEN_INT (mask1), SImode))
17919 rtx reg = expand ? gen_reg_rtx (mode) : operands[0];
17920 rtx tmp = gen_rtx_AND (SImode, gen_lowpart (SImode, operands[1]),
17921 GEN_INT (mask1));
17922 rtx reg_low = gen_lowpart (SImode, reg);
17923 emit_move_insn (reg_low, tmp);
17924 tmp = gen_rtx_AND (mode, reg, GEN_INT (mask2));
17925 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
17926 return;
17929 /* Two rld* insns: rotate, clear the hole in the middle (which now is
17930 at the top end), rotate back and clear the other hole. */
17931 int right = exact_log2 (bit3);
17932 int left = 64 - right;
17934 /* Rotate the mask too. */
17935 mask1 = (mask1 >> right) | ((bit2 - 1) << left);
17937 if (expand)
17939 rtx tmp1 = gen_reg_rtx (DImode);
17940 rtx tmp2 = gen_reg_rtx (DImode);
17941 rtx tmp3 = gen_reg_rtx (DImode);
17942 emit_insn (gen_rotldi3 (tmp1, operands[1], GEN_INT (left)));
17943 emit_insn (gen_anddi3 (tmp2, tmp1, GEN_INT (mask1)));
17944 emit_insn (gen_rotldi3 (tmp3, tmp2, GEN_INT (right)));
17945 emit_insn (gen_anddi3 (operands[0], tmp3, GEN_INT (mask2)));
17947 else
17949 rtx tmp = gen_rtx_ROTATE (mode, operands[1], GEN_INT (left));
17950 tmp = gen_rtx_AND (mode, tmp, GEN_INT (mask1));
17951 emit_move_insn (operands[0], tmp);
17952 tmp = gen_rtx_ROTATE (mode, operands[0], GEN_INT (right));
17953 tmp = gen_rtx_AND (mode, tmp, GEN_INT (mask2));
17954 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
17958 /* Return 1 if REGNO (reg1) == REGNO (reg2) - 1 making them candidates
17959 for lfq and stfq insns iff the registers are hard registers. */
17962 registers_ok_for_quad_peep (rtx reg1, rtx reg2)
17964 /* We might have been passed a SUBREG. */
17965 if (GET_CODE (reg1) != REG || GET_CODE (reg2) != REG)
17966 return 0;
17968 /* We might have been passed non floating point registers. */
17969 if (!FP_REGNO_P (REGNO (reg1))
17970 || !FP_REGNO_P (REGNO (reg2)))
17971 return 0;
17973 return (REGNO (reg1) == REGNO (reg2) - 1);
17976 /* Return 1 if addr1 and addr2 are suitable for lfq or stfq insn.
17977 addr1 and addr2 must be in consecutive memory locations
17978 (addr2 == addr1 + 8). */
17981 mems_ok_for_quad_peep (rtx mem1, rtx mem2)
17983 rtx addr1, addr2;
17984 unsigned int reg1, reg2;
17985 int offset1, offset2;
17987 /* The mems cannot be volatile. */
17988 if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
17989 return 0;
17991 addr1 = XEXP (mem1, 0);
17992 addr2 = XEXP (mem2, 0);
17994 /* Extract an offset (if used) from the first addr. */
17995 if (GET_CODE (addr1) == PLUS)
17997 /* If not a REG, return zero. */
17998 if (GET_CODE (XEXP (addr1, 0)) != REG)
17999 return 0;
18000 else
18002 reg1 = REGNO (XEXP (addr1, 0));
18003 /* The offset must be constant! */
18004 if (GET_CODE (XEXP (addr1, 1)) != CONST_INT)
18005 return 0;
18006 offset1 = INTVAL (XEXP (addr1, 1));
18009 else if (GET_CODE (addr1) != REG)
18010 return 0;
18011 else
18013 reg1 = REGNO (addr1);
18014 /* This was a simple (mem (reg)) expression. Offset is 0. */
18015 offset1 = 0;
18018 /* And now for the second addr. */
18019 if (GET_CODE (addr2) == PLUS)
18021 /* If not a REG, return zero. */
18022 if (GET_CODE (XEXP (addr2, 0)) != REG)
18023 return 0;
18024 else
18026 reg2 = REGNO (XEXP (addr2, 0));
18027 /* The offset must be constant. */
18028 if (GET_CODE (XEXP (addr2, 1)) != CONST_INT)
18029 return 0;
18030 offset2 = INTVAL (XEXP (addr2, 1));
18033 else if (GET_CODE (addr2) != REG)
18034 return 0;
18035 else
18037 reg2 = REGNO (addr2);
18038 /* This was a simple (mem (reg)) expression. Offset is 0. */
18039 offset2 = 0;
18042 /* Both of these must have the same base register. */
18043 if (reg1 != reg2)
18044 return 0;
18046 /* The offset for the second addr must be 8 more than the first addr. */
18047 if (offset2 != offset1 + 8)
18048 return 0;
18050 /* All the tests passed. addr1 and addr2 are valid for lfq or stfq
18051 instructions. */
18052 return 1;
18057 rs6000_secondary_memory_needed_rtx (machine_mode mode)
18059 static bool eliminated = false;
18060 rtx ret;
18062 if (mode != SDmode || TARGET_NO_SDMODE_STACK)
18063 ret = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
18064 else
18066 rtx mem = cfun->machine->sdmode_stack_slot;
18067 gcc_assert (mem != NULL_RTX);
18069 if (!eliminated)
18071 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
18072 cfun->machine->sdmode_stack_slot = mem;
18073 eliminated = true;
18075 ret = mem;
18078 if (TARGET_DEBUG_ADDR)
18080 fprintf (stderr, "\nrs6000_secondary_memory_needed_rtx, mode %s, rtx:\n",
18081 GET_MODE_NAME (mode));
18082 if (!ret)
18083 fprintf (stderr, "\tNULL_RTX\n");
18084 else
18085 debug_rtx (ret);
18088 return ret;
18091 /* Return the mode to be used for memory when a secondary memory
18092 location is needed. For SDmode values we need to use DDmode, in
18093 all other cases we can use the same mode. */
18094 machine_mode
18095 rs6000_secondary_memory_needed_mode (machine_mode mode)
18097 if (lra_in_progress && mode == SDmode)
18098 return DDmode;
18099 return mode;
18102 static tree
18103 rs6000_check_sdmode (tree *tp, int *walk_subtrees, void *data ATTRIBUTE_UNUSED)
18105 /* Don't walk into types. */
18106 if (*tp == NULL_TREE || *tp == error_mark_node || TYPE_P (*tp))
18108 *walk_subtrees = 0;
18109 return NULL_TREE;
18112 switch (TREE_CODE (*tp))
18114 case VAR_DECL:
18115 case PARM_DECL:
18116 case FIELD_DECL:
18117 case RESULT_DECL:
18118 case SSA_NAME:
18119 case REAL_CST:
18120 case MEM_REF:
18121 case VIEW_CONVERT_EXPR:
18122 if (TYPE_MODE (TREE_TYPE (*tp)) == SDmode)
18123 return *tp;
18124 break;
18125 default:
18126 break;
18129 return NULL_TREE;
18132 /* Classify a register type. Because the FMRGOW/FMRGEW instructions only work
18133 on traditional floating point registers, and the VMRGOW/VMRGEW instructions
18134 only work on the traditional altivec registers, note if an altivec register
18135 was chosen. */
18137 static enum rs6000_reg_type
18138 register_to_reg_type (rtx reg, bool *is_altivec)
18140 HOST_WIDE_INT regno;
18141 enum reg_class rclass;
18143 if (GET_CODE (reg) == SUBREG)
18144 reg = SUBREG_REG (reg);
18146 if (!REG_P (reg))
18147 return NO_REG_TYPE;
18149 regno = REGNO (reg);
18150 if (regno >= FIRST_PSEUDO_REGISTER)
18152 if (!lra_in_progress && !reload_in_progress && !reload_completed)
18153 return PSEUDO_REG_TYPE;
18155 regno = true_regnum (reg);
18156 if (regno < 0 || regno >= FIRST_PSEUDO_REGISTER)
18157 return PSEUDO_REG_TYPE;
18160 gcc_assert (regno >= 0);
18162 if (is_altivec && ALTIVEC_REGNO_P (regno))
18163 *is_altivec = true;
18165 rclass = rs6000_regno_regclass[regno];
18166 return reg_class_to_reg_type[(int)rclass];
18169 /* Helper function to return the cost of adding a TOC entry address. */
18171 static inline int
18172 rs6000_secondary_reload_toc_costs (addr_mask_type addr_mask)
18174 int ret;
18176 if (TARGET_CMODEL != CMODEL_SMALL)
18177 ret = ((addr_mask & RELOAD_REG_OFFSET) == 0) ? 1 : 2;
18179 else
18180 ret = (TARGET_MINIMAL_TOC) ? 6 : 3;
18182 return ret;
18185 /* Helper function for rs6000_secondary_reload to determine whether the memory
18186 address (ADDR) with a given register class (RCLASS) and machine mode (MODE)
18187 needs reloading. Return negative if the memory is not handled by the memory
18188 helper functions and to try a different reload method, 0 if no additional
18189 instructions are need, and positive to give the extra cost for the
18190 memory. */
18192 static int
18193 rs6000_secondary_reload_memory (rtx addr,
18194 enum reg_class rclass,
18195 machine_mode mode)
18197 int extra_cost = 0;
18198 rtx reg, and_arg, plus_arg0, plus_arg1;
18199 addr_mask_type addr_mask;
18200 const char *type = NULL;
18201 const char *fail_msg = NULL;
18203 if (GPR_REG_CLASS_P (rclass))
18204 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
18206 else if (rclass == FLOAT_REGS)
18207 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
18209 else if (rclass == ALTIVEC_REGS)
18210 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
18212 /* For the combined VSX_REGS, turn off Altivec AND -16. */
18213 else if (rclass == VSX_REGS)
18214 addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_VMX]
18215 & ~RELOAD_REG_AND_M16);
18217 /* If the register allocator hasn't made up its mind yet on the register
18218 class to use, settle on defaults to use. */
18219 else if (rclass == NO_REGS)
18221 addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_ANY]
18222 & ~RELOAD_REG_AND_M16);
18224 if ((addr_mask & RELOAD_REG_MULTIPLE) != 0)
18225 addr_mask &= ~(RELOAD_REG_INDEXED
18226 | RELOAD_REG_PRE_INCDEC
18227 | RELOAD_REG_PRE_MODIFY);
18230 else
18231 addr_mask = 0;
18233 /* If the register isn't valid in this register class, just return now. */
18234 if ((addr_mask & RELOAD_REG_VALID) == 0)
18236 if (TARGET_DEBUG_ADDR)
18238 fprintf (stderr,
18239 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
18240 "not valid in class\n",
18241 GET_MODE_NAME (mode), reg_class_names[rclass]);
18242 debug_rtx (addr);
18245 return -1;
18248 switch (GET_CODE (addr))
18250 /* Does the register class supports auto update forms for this mode? We
18251 don't need a scratch register, since the powerpc only supports
18252 PRE_INC, PRE_DEC, and PRE_MODIFY. */
18253 case PRE_INC:
18254 case PRE_DEC:
18255 reg = XEXP (addr, 0);
18256 if (!base_reg_operand (addr, GET_MODE (reg)))
18258 fail_msg = "no base register #1";
18259 extra_cost = -1;
18262 else if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
18264 extra_cost = 1;
18265 type = "update";
18267 break;
18269 case PRE_MODIFY:
18270 reg = XEXP (addr, 0);
18271 plus_arg1 = XEXP (addr, 1);
18272 if (!base_reg_operand (reg, GET_MODE (reg))
18273 || GET_CODE (plus_arg1) != PLUS
18274 || !rtx_equal_p (reg, XEXP (plus_arg1, 0)))
18276 fail_msg = "bad PRE_MODIFY";
18277 extra_cost = -1;
18280 else if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
18282 extra_cost = 1;
18283 type = "update";
18285 break;
18287 /* Do we need to simulate AND -16 to clear the bottom address bits used
18288 in VMX load/stores? Only allow the AND for vector sizes. */
18289 case AND:
18290 and_arg = XEXP (addr, 0);
18291 if (GET_MODE_SIZE (mode) != 16
18292 || GET_CODE (XEXP (addr, 1)) != CONST_INT
18293 || INTVAL (XEXP (addr, 1)) != -16)
18295 fail_msg = "bad Altivec AND #1";
18296 extra_cost = -1;
18299 if (rclass != ALTIVEC_REGS)
18301 if (legitimate_indirect_address_p (and_arg, false))
18302 extra_cost = 1;
18304 else if (legitimate_indexed_address_p (and_arg, false))
18305 extra_cost = 2;
18307 else
18309 fail_msg = "bad Altivec AND #2";
18310 extra_cost = -1;
18313 type = "and";
18315 break;
18317 /* If this is an indirect address, make sure it is a base register. */
18318 case REG:
18319 case SUBREG:
18320 if (!legitimate_indirect_address_p (addr, false))
18322 extra_cost = 1;
18323 type = "move";
18325 break;
18327 /* If this is an indexed address, make sure the register class can handle
18328 indexed addresses for this mode. */
18329 case PLUS:
18330 plus_arg0 = XEXP (addr, 0);
18331 plus_arg1 = XEXP (addr, 1);
18333 /* (plus (plus (reg) (constant)) (constant)) is generated during
18334 push_reload processing, so handle it now. */
18335 if (GET_CODE (plus_arg0) == PLUS && CONST_INT_P (plus_arg1))
18337 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
18339 extra_cost = 1;
18340 type = "offset";
18344 /* (plus (plus (reg) (constant)) (reg)) is also generated during
18345 push_reload processing, so handle it now. */
18346 else if (GET_CODE (plus_arg0) == PLUS && REG_P (plus_arg1))
18348 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
18350 extra_cost = 1;
18351 type = "indexed #2";
18355 else if (!base_reg_operand (plus_arg0, GET_MODE (plus_arg0)))
18357 fail_msg = "no base register #2";
18358 extra_cost = -1;
18361 else if (int_reg_operand (plus_arg1, GET_MODE (plus_arg1)))
18363 if ((addr_mask & RELOAD_REG_INDEXED) == 0
18364 || !legitimate_indexed_address_p (addr, false))
18366 extra_cost = 1;
18367 type = "indexed";
18371 /* Make sure the register class can handle offset addresses. */
18372 else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
18374 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
18376 extra_cost = 1;
18377 type = "offset";
18381 else
18383 fail_msg = "bad PLUS";
18384 extra_cost = -1;
18387 break;
18389 case LO_SUM:
18390 if (!legitimate_lo_sum_address_p (mode, addr, false))
18392 fail_msg = "bad LO_SUM";
18393 extra_cost = -1;
18396 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
18398 extra_cost = 1;
18399 type = "lo_sum";
18401 break;
18403 /* Static addresses need to create a TOC entry. */
18404 case CONST:
18405 case SYMBOL_REF:
18406 case LABEL_REF:
18407 type = "address";
18408 extra_cost = rs6000_secondary_reload_toc_costs (addr_mask);
18409 break;
18411 /* TOC references look like offsetable memory. */
18412 case UNSPEC:
18413 if (TARGET_CMODEL == CMODEL_SMALL || XINT (addr, 1) != UNSPEC_TOCREL)
18415 fail_msg = "bad UNSPEC";
18416 extra_cost = -1;
18419 else if ((addr_mask & RELOAD_REG_OFFSET) == 0)
18421 extra_cost = 1;
18422 type = "toc reference";
18424 break;
18426 default:
18428 fail_msg = "bad address";
18429 extra_cost = -1;
18433 if (TARGET_DEBUG_ADDR /* && extra_cost != 0 */)
18435 if (extra_cost < 0)
18436 fprintf (stderr,
18437 "rs6000_secondary_reload_memory error: mode = %s, "
18438 "class = %s, addr_mask = '%s', %s\n",
18439 GET_MODE_NAME (mode),
18440 reg_class_names[rclass],
18441 rs6000_debug_addr_mask (addr_mask, false),
18442 (fail_msg != NULL) ? fail_msg : "<bad address>");
18444 else
18445 fprintf (stderr,
18446 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
18447 "addr_mask = '%s', extra cost = %d, %s\n",
18448 GET_MODE_NAME (mode),
18449 reg_class_names[rclass],
18450 rs6000_debug_addr_mask (addr_mask, false),
18451 extra_cost,
18452 (type) ? type : "<none>");
18454 debug_rtx (addr);
18457 return extra_cost;
18460 /* Helper function for rs6000_secondary_reload to return true if a move to a
18461 different register classe is really a simple move. */
18463 static bool
18464 rs6000_secondary_reload_simple_move (enum rs6000_reg_type to_type,
18465 enum rs6000_reg_type from_type,
18466 machine_mode mode)
18468 int size;
18470 /* Add support for various direct moves available. In this function, we only
18471 look at cases where we don't need any extra registers, and one or more
18472 simple move insns are issued. At present, 32-bit integers are not allowed
18473 in FPR/VSX registers. Single precision binary floating is not a simple
18474 move because we need to convert to the single precision memory layout.
18475 The 4-byte SDmode can be moved. */
18476 size = GET_MODE_SIZE (mode);
18477 if (TARGET_DIRECT_MOVE
18478 && ((mode == SDmode) || (TARGET_POWERPC64 && size == 8))
18479 && ((to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
18480 || (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)))
18481 return true;
18483 else if (TARGET_DIRECT_MOVE_128 && size == 16
18484 && ((to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
18485 || (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)))
18486 return true;
18488 else if (TARGET_MFPGPR && TARGET_POWERPC64 && size == 8
18489 && ((to_type == GPR_REG_TYPE && from_type == FPR_REG_TYPE)
18490 || (to_type == FPR_REG_TYPE && from_type == GPR_REG_TYPE)))
18491 return true;
18493 else if ((size == 4 || (TARGET_POWERPC64 && size == 8))
18494 && ((to_type == GPR_REG_TYPE && from_type == SPR_REG_TYPE)
18495 || (to_type == SPR_REG_TYPE && from_type == GPR_REG_TYPE)))
18496 return true;
18498 return false;
18501 /* Direct move helper function for rs6000_secondary_reload, handle all of the
18502 special direct moves that involve allocating an extra register, return the
18503 insn code of the helper function if there is such a function or
18504 CODE_FOR_nothing if not. */
18506 static bool
18507 rs6000_secondary_reload_direct_move (enum rs6000_reg_type to_type,
18508 enum rs6000_reg_type from_type,
18509 machine_mode mode,
18510 secondary_reload_info *sri,
18511 bool altivec_p)
18513 bool ret = false;
18514 enum insn_code icode = CODE_FOR_nothing;
18515 int cost = 0;
18516 int size = GET_MODE_SIZE (mode);
18518 if (TARGET_POWERPC64)
18520 if (size == 16)
18522 /* Handle moving 128-bit values from GPRs to VSX point registers on
18523 ISA 2.07 (power8, power9) when running in 64-bit mode using
18524 XXPERMDI to glue the two 64-bit values back together. */
18525 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
18527 cost = 3; /* 2 mtvsrd's, 1 xxpermdi. */
18528 icode = reg_addr[mode].reload_vsx_gpr;
18531 /* Handle moving 128-bit values from VSX point registers to GPRs on
18532 ISA 2.07 when running in 64-bit mode using XXPERMDI to get access to the
18533 bottom 64-bit value. */
18534 else if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
18536 cost = 3; /* 2 mfvsrd's, 1 xxpermdi. */
18537 icode = reg_addr[mode].reload_gpr_vsx;
18541 else if (mode == SFmode)
18543 if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
18545 cost = 3; /* xscvdpspn, mfvsrd, and. */
18546 icode = reg_addr[mode].reload_gpr_vsx;
18549 else if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
18551 cost = 2; /* mtvsrz, xscvspdpn. */
18552 icode = reg_addr[mode].reload_vsx_gpr;
18557 if (TARGET_POWERPC64 && size == 16)
18559 /* Handle moving 128-bit values from GPRs to VSX point registers on
18560 ISA 2.07 when running in 64-bit mode using XXPERMDI to glue the two
18561 64-bit values back together. */
18562 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
18564 cost = 3; /* 2 mtvsrd's, 1 xxpermdi. */
18565 icode = reg_addr[mode].reload_vsx_gpr;
18568 /* Handle moving 128-bit values from VSX point registers to GPRs on
18569 ISA 2.07 when running in 64-bit mode using XXPERMDI to get access to the
18570 bottom 64-bit value. */
18571 else if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
18573 cost = 3; /* 2 mfvsrd's, 1 xxpermdi. */
18574 icode = reg_addr[mode].reload_gpr_vsx;
18578 else if (!TARGET_POWERPC64 && size == 8)
18580 /* Handle moving 64-bit values from GPRs to floating point registers on
18581 ISA 2.07 when running in 32-bit mode using FMRGOW to glue the two
18582 32-bit values back together. Altivec register classes must be handled
18583 specially since a different instruction is used, and the secondary
18584 reload support requires a single instruction class in the scratch
18585 register constraint. However, right now TFmode is not allowed in
18586 Altivec registers, so the pattern will never match. */
18587 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE && !altivec_p)
18589 cost = 3; /* 2 mtvsrwz's, 1 fmrgow. */
18590 icode = reg_addr[mode].reload_fpr_gpr;
18594 if (icode != CODE_FOR_nothing)
18596 ret = true;
18597 if (sri)
18599 sri->icode = icode;
18600 sri->extra_cost = cost;
18604 return ret;
18607 /* Return whether a move between two register classes can be done either
18608 directly (simple move) or via a pattern that uses a single extra temporary
18609 (using ISA 2.07's direct move in this case. */
18611 static bool
18612 rs6000_secondary_reload_move (enum rs6000_reg_type to_type,
18613 enum rs6000_reg_type from_type,
18614 machine_mode mode,
18615 secondary_reload_info *sri,
18616 bool altivec_p)
18618 /* Fall back to load/store reloads if either type is not a register. */
18619 if (to_type == NO_REG_TYPE || from_type == NO_REG_TYPE)
18620 return false;
18622 /* If we haven't allocated registers yet, assume the move can be done for the
18623 standard register types. */
18624 if ((to_type == PSEUDO_REG_TYPE && from_type == PSEUDO_REG_TYPE)
18625 || (to_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (from_type))
18626 || (from_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (to_type)))
18627 return true;
18629 /* Moves to the same set of registers is a simple move for non-specialized
18630 registers. */
18631 if (to_type == from_type && IS_STD_REG_TYPE (to_type))
18632 return true;
18634 /* Check whether a simple move can be done directly. */
18635 if (rs6000_secondary_reload_simple_move (to_type, from_type, mode))
18637 if (sri)
18639 sri->icode = CODE_FOR_nothing;
18640 sri->extra_cost = 0;
18642 return true;
18645 /* Now check if we can do it in a few steps. */
18646 return rs6000_secondary_reload_direct_move (to_type, from_type, mode, sri,
18647 altivec_p);
18650 /* Inform reload about cases where moving X with a mode MODE to a register in
18651 RCLASS requires an extra scratch or immediate register. Return the class
18652 needed for the immediate register.
18654 For VSX and Altivec, we may need a register to convert sp+offset into
18655 reg+sp.
18657 For misaligned 64-bit gpr loads and stores we need a register to
18658 convert an offset address to indirect. */
18660 static reg_class_t
18661 rs6000_secondary_reload (bool in_p,
18662 rtx x,
18663 reg_class_t rclass_i,
18664 machine_mode mode,
18665 secondary_reload_info *sri)
18667 enum reg_class rclass = (enum reg_class) rclass_i;
18668 reg_class_t ret = ALL_REGS;
18669 enum insn_code icode;
18670 bool default_p = false;
18671 bool done_p = false;
18673 /* Allow subreg of memory before/during reload. */
18674 bool memory_p = (MEM_P (x)
18675 || (!reload_completed && GET_CODE (x) == SUBREG
18676 && MEM_P (SUBREG_REG (x))));
18678 sri->icode = CODE_FOR_nothing;
18679 sri->extra_cost = 0;
18680 icode = ((in_p)
18681 ? reg_addr[mode].reload_load
18682 : reg_addr[mode].reload_store);
18684 if (REG_P (x) || register_operand (x, mode))
18686 enum rs6000_reg_type to_type = reg_class_to_reg_type[(int)rclass];
18687 bool altivec_p = (rclass == ALTIVEC_REGS);
18688 enum rs6000_reg_type from_type = register_to_reg_type (x, &altivec_p);
18690 if (!in_p)
18692 enum rs6000_reg_type exchange = to_type;
18693 to_type = from_type;
18694 from_type = exchange;
18697 /* Can we do a direct move of some sort? */
18698 if (rs6000_secondary_reload_move (to_type, from_type, mode, sri,
18699 altivec_p))
18701 icode = (enum insn_code)sri->icode;
18702 default_p = false;
18703 done_p = true;
18704 ret = NO_REGS;
18708 /* Make sure 0.0 is not reloaded or forced into memory. */
18709 if (x == CONST0_RTX (mode) && VSX_REG_CLASS_P (rclass))
18711 ret = NO_REGS;
18712 default_p = false;
18713 done_p = true;
18716 /* If this is a scalar floating point value and we want to load it into the
18717 traditional Altivec registers, do it via a move via a traditional floating
18718 point register, unless we have D-form addressing. Also make sure that
18719 non-zero constants use a FPR. */
18720 if (!done_p && reg_addr[mode].scalar_in_vmx_p
18721 && !mode_supports_vmx_dform (mode)
18722 && (rclass == VSX_REGS || rclass == ALTIVEC_REGS)
18723 && (memory_p || (GET_CODE (x) == CONST_DOUBLE)))
18725 ret = FLOAT_REGS;
18726 default_p = false;
18727 done_p = true;
18730 /* Handle reload of load/stores if we have reload helper functions. */
18731 if (!done_p && icode != CODE_FOR_nothing && memory_p)
18733 int extra_cost = rs6000_secondary_reload_memory (XEXP (x, 0), rclass,
18734 mode);
18736 if (extra_cost >= 0)
18738 done_p = true;
18739 ret = NO_REGS;
18740 if (extra_cost > 0)
18742 sri->extra_cost = extra_cost;
18743 sri->icode = icode;
18748 /* Handle unaligned loads and stores of integer registers. */
18749 if (!done_p && TARGET_POWERPC64
18750 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
18751 && memory_p
18752 && GET_MODE_SIZE (GET_MODE (x)) >= UNITS_PER_WORD)
18754 rtx addr = XEXP (x, 0);
18755 rtx off = address_offset (addr);
18757 if (off != NULL_RTX)
18759 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
18760 unsigned HOST_WIDE_INT offset = INTVAL (off);
18762 /* We need a secondary reload when our legitimate_address_p
18763 says the address is good (as otherwise the entire address
18764 will be reloaded), and the offset is not a multiple of
18765 four or we have an address wrap. Address wrap will only
18766 occur for LO_SUMs since legitimate_offset_address_p
18767 rejects addresses for 16-byte mems that will wrap. */
18768 if (GET_CODE (addr) == LO_SUM
18769 ? (1 /* legitimate_address_p allows any offset for lo_sum */
18770 && ((offset & 3) != 0
18771 || ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra))
18772 : (offset + 0x8000 < 0x10000 - extra /* legitimate_address_p */
18773 && (offset & 3) != 0))
18775 /* -m32 -mpowerpc64 needs to use a 32-bit scratch register. */
18776 if (in_p)
18777 sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_load
18778 : CODE_FOR_reload_di_load);
18779 else
18780 sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_store
18781 : CODE_FOR_reload_di_store);
18782 sri->extra_cost = 2;
18783 ret = NO_REGS;
18784 done_p = true;
18786 else
18787 default_p = true;
18789 else
18790 default_p = true;
18793 if (!done_p && !TARGET_POWERPC64
18794 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
18795 && memory_p
18796 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
18798 rtx addr = XEXP (x, 0);
18799 rtx off = address_offset (addr);
18801 if (off != NULL_RTX)
18803 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
18804 unsigned HOST_WIDE_INT offset = INTVAL (off);
18806 /* We need a secondary reload when our legitimate_address_p
18807 says the address is good (as otherwise the entire address
18808 will be reloaded), and we have a wrap.
18810 legitimate_lo_sum_address_p allows LO_SUM addresses to
18811 have any offset so test for wrap in the low 16 bits.
18813 legitimate_offset_address_p checks for the range
18814 [-0x8000,0x7fff] for mode size of 8 and [-0x8000,0x7ff7]
18815 for mode size of 16. We wrap at [0x7ffc,0x7fff] and
18816 [0x7ff4,0x7fff] respectively, so test for the
18817 intersection of these ranges, [0x7ffc,0x7fff] and
18818 [0x7ff4,0x7ff7] respectively.
18820 Note that the address we see here may have been
18821 manipulated by legitimize_reload_address. */
18822 if (GET_CODE (addr) == LO_SUM
18823 ? ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra
18824 : offset - (0x8000 - extra) < UNITS_PER_WORD)
18826 if (in_p)
18827 sri->icode = CODE_FOR_reload_si_load;
18828 else
18829 sri->icode = CODE_FOR_reload_si_store;
18830 sri->extra_cost = 2;
18831 ret = NO_REGS;
18832 done_p = true;
18834 else
18835 default_p = true;
18837 else
18838 default_p = true;
18841 if (!done_p)
18842 default_p = true;
18844 if (default_p)
18845 ret = default_secondary_reload (in_p, x, rclass, mode, sri);
18847 gcc_assert (ret != ALL_REGS);
18849 if (TARGET_DEBUG_ADDR)
18851 fprintf (stderr,
18852 "\nrs6000_secondary_reload, return %s, in_p = %s, rclass = %s, "
18853 "mode = %s",
18854 reg_class_names[ret],
18855 in_p ? "true" : "false",
18856 reg_class_names[rclass],
18857 GET_MODE_NAME (mode));
18859 if (reload_completed)
18860 fputs (", after reload", stderr);
18862 if (!done_p)
18863 fputs (", done_p not set", stderr);
18865 if (default_p)
18866 fputs (", default secondary reload", stderr);
18868 if (sri->icode != CODE_FOR_nothing)
18869 fprintf (stderr, ", reload func = %s, extra cost = %d",
18870 insn_data[sri->icode].name, sri->extra_cost);
18872 else if (sri->extra_cost > 0)
18873 fprintf (stderr, ", extra cost = %d", sri->extra_cost);
18875 fputs ("\n", stderr);
18876 debug_rtx (x);
18879 return ret;
18882 /* Better tracing for rs6000_secondary_reload_inner. */
18884 static void
18885 rs6000_secondary_reload_trace (int line, rtx reg, rtx mem, rtx scratch,
18886 bool store_p)
18888 rtx set, clobber;
18890 gcc_assert (reg != NULL_RTX && mem != NULL_RTX && scratch != NULL_RTX);
18892 fprintf (stderr, "rs6000_secondary_reload_inner:%d, type = %s\n", line,
18893 store_p ? "store" : "load");
18895 if (store_p)
18896 set = gen_rtx_SET (mem, reg);
18897 else
18898 set = gen_rtx_SET (reg, mem);
18900 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
18901 debug_rtx (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
18904 static void rs6000_secondary_reload_fail (int, rtx, rtx, rtx, bool)
18905 ATTRIBUTE_NORETURN;
18907 static void
18908 rs6000_secondary_reload_fail (int line, rtx reg, rtx mem, rtx scratch,
18909 bool store_p)
18911 rs6000_secondary_reload_trace (line, reg, mem, scratch, store_p);
18912 gcc_unreachable ();
18915 /* Fixup reload addresses for values in GPR, FPR, and VMX registers that have
18916 reload helper functions. These were identified in
18917 rs6000_secondary_reload_memory, and if reload decided to use the secondary
18918 reload, it calls the insns:
18919 reload_<RELOAD:mode>_<P:mptrsize>_store
18920 reload_<RELOAD:mode>_<P:mptrsize>_load
18922 which in turn calls this function, to do whatever is necessary to create
18923 valid addresses. */
18925 void
18926 rs6000_secondary_reload_inner (rtx reg, rtx mem, rtx scratch, bool store_p)
18928 int regno = true_regnum (reg);
18929 machine_mode mode = GET_MODE (reg);
18930 addr_mask_type addr_mask;
18931 rtx addr;
18932 rtx new_addr;
18933 rtx op_reg, op0, op1;
18934 rtx and_op;
18935 rtx cc_clobber;
18936 rtvec rv;
18938 if (regno < 0 || regno >= FIRST_PSEUDO_REGISTER || !MEM_P (mem)
18939 || !base_reg_operand (scratch, GET_MODE (scratch)))
18940 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
18942 if (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO))
18943 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
18945 else if (IN_RANGE (regno, FIRST_FPR_REGNO, LAST_FPR_REGNO))
18946 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
18948 else if (IN_RANGE (regno, FIRST_ALTIVEC_REGNO, LAST_ALTIVEC_REGNO))
18949 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
18951 else
18952 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
18954 /* Make sure the mode is valid in this register class. */
18955 if ((addr_mask & RELOAD_REG_VALID) == 0)
18956 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
18958 if (TARGET_DEBUG_ADDR)
18959 rs6000_secondary_reload_trace (__LINE__, reg, mem, scratch, store_p);
18961 new_addr = addr = XEXP (mem, 0);
18962 switch (GET_CODE (addr))
18964 /* Does the register class support auto update forms for this mode? If
18965 not, do the update now. We don't need a scratch register, since the
18966 powerpc only supports PRE_INC, PRE_DEC, and PRE_MODIFY. */
18967 case PRE_INC:
18968 case PRE_DEC:
18969 op_reg = XEXP (addr, 0);
18970 if (!base_reg_operand (op_reg, Pmode))
18971 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
18973 if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
18975 emit_insn (gen_add2_insn (op_reg, GEN_INT (GET_MODE_SIZE (mode))));
18976 new_addr = op_reg;
18978 break;
18980 case PRE_MODIFY:
18981 op0 = XEXP (addr, 0);
18982 op1 = XEXP (addr, 1);
18983 if (!base_reg_operand (op0, Pmode)
18984 || GET_CODE (op1) != PLUS
18985 || !rtx_equal_p (op0, XEXP (op1, 0)))
18986 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
18988 if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
18990 emit_insn (gen_rtx_SET (op0, op1));
18991 new_addr = reg;
18993 break;
18995 /* Do we need to simulate AND -16 to clear the bottom address bits used
18996 in VMX load/stores? */
18997 case AND:
18998 op0 = XEXP (addr, 0);
18999 op1 = XEXP (addr, 1);
19000 if ((addr_mask & RELOAD_REG_AND_M16) == 0)
19002 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
19003 op_reg = op0;
19005 else if (GET_CODE (op1) == PLUS)
19007 emit_insn (gen_rtx_SET (scratch, op1));
19008 op_reg = scratch;
19011 else
19012 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
19014 and_op = gen_rtx_AND (GET_MODE (scratch), op_reg, op1);
19015 cc_clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (CCmode));
19016 rv = gen_rtvec (2, gen_rtx_SET (scratch, and_op), cc_clobber);
19017 emit_insn (gen_rtx_PARALLEL (VOIDmode, rv));
19018 new_addr = scratch;
19020 break;
19022 /* If this is an indirect address, make sure it is a base register. */
19023 case REG:
19024 case SUBREG:
19025 if (!base_reg_operand (addr, GET_MODE (addr)))
19027 emit_insn (gen_rtx_SET (scratch, addr));
19028 new_addr = scratch;
19030 break;
19032 /* If this is an indexed address, make sure the register class can handle
19033 indexed addresses for this mode. */
19034 case PLUS:
19035 op0 = XEXP (addr, 0);
19036 op1 = XEXP (addr, 1);
19037 if (!base_reg_operand (op0, Pmode))
19038 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
19040 else if (int_reg_operand (op1, Pmode))
19042 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
19044 emit_insn (gen_rtx_SET (scratch, addr));
19045 new_addr = scratch;
19049 /* Make sure the register class can handle offset addresses. */
19050 else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
19052 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
19054 emit_insn (gen_rtx_SET (scratch, addr));
19055 new_addr = scratch;
19059 else
19060 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
19062 break;
19064 case LO_SUM:
19065 op0 = XEXP (addr, 0);
19066 op1 = XEXP (addr, 1);
19067 if (!base_reg_operand (op0, Pmode))
19068 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
19070 else if (int_reg_operand (op1, Pmode))
19072 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
19074 emit_insn (gen_rtx_SET (scratch, addr));
19075 new_addr = scratch;
19079 /* Make sure the register class can handle offset addresses. */
19080 else if (legitimate_lo_sum_address_p (mode, addr, false))
19082 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
19084 emit_insn (gen_rtx_SET (scratch, addr));
19085 new_addr = scratch;
19089 else
19090 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
19092 break;
19094 case SYMBOL_REF:
19095 case CONST:
19096 case LABEL_REF:
19097 rs6000_emit_move (scratch, addr, Pmode);
19098 new_addr = scratch;
19099 break;
19101 default:
19102 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
19105 /* Adjust the address if it changed. */
19106 if (addr != new_addr)
19108 mem = replace_equiv_address_nv (mem, new_addr);
19109 if (TARGET_DEBUG_ADDR)
19110 fprintf (stderr, "\nrs6000_secondary_reload_inner, mem adjusted.\n");
19113 /* Now create the move. */
19114 if (store_p)
19115 emit_insn (gen_rtx_SET (mem, reg));
19116 else
19117 emit_insn (gen_rtx_SET (reg, mem));
19119 return;
19122 /* Convert reloads involving 64-bit gprs and misaligned offset
19123 addressing, or multiple 32-bit gprs and offsets that are too large,
19124 to use indirect addressing. */
19126 void
19127 rs6000_secondary_reload_gpr (rtx reg, rtx mem, rtx scratch, bool store_p)
19129 int regno = true_regnum (reg);
19130 enum reg_class rclass;
19131 rtx addr;
19132 rtx scratch_or_premodify = scratch;
19134 if (TARGET_DEBUG_ADDR)
19136 fprintf (stderr, "\nrs6000_secondary_reload_gpr, type = %s\n",
19137 store_p ? "store" : "load");
19138 fprintf (stderr, "reg:\n");
19139 debug_rtx (reg);
19140 fprintf (stderr, "mem:\n");
19141 debug_rtx (mem);
19142 fprintf (stderr, "scratch:\n");
19143 debug_rtx (scratch);
19146 gcc_assert (regno >= 0 && regno < FIRST_PSEUDO_REGISTER);
19147 gcc_assert (GET_CODE (mem) == MEM);
19148 rclass = REGNO_REG_CLASS (regno);
19149 gcc_assert (rclass == GENERAL_REGS || rclass == BASE_REGS);
19150 addr = XEXP (mem, 0);
19152 if (GET_CODE (addr) == PRE_MODIFY)
19154 gcc_assert (REG_P (XEXP (addr, 0))
19155 && GET_CODE (XEXP (addr, 1)) == PLUS
19156 && XEXP (XEXP (addr, 1), 0) == XEXP (addr, 0));
19157 scratch_or_premodify = XEXP (addr, 0);
19158 if (!HARD_REGISTER_P (scratch_or_premodify))
19159 /* If we have a pseudo here then reload will have arranged
19160 to have it replaced, but only in the original insn.
19161 Use the replacement here too. */
19162 scratch_or_premodify = find_replacement (&XEXP (addr, 0));
19164 /* RTL emitted by rs6000_secondary_reload_gpr uses RTL
19165 expressions from the original insn, without unsharing them.
19166 Any RTL that points into the original insn will of course
19167 have register replacements applied. That is why we don't
19168 need to look for replacements under the PLUS. */
19169 addr = XEXP (addr, 1);
19171 gcc_assert (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM);
19173 rs6000_emit_move (scratch_or_premodify, addr, Pmode);
19175 mem = replace_equiv_address_nv (mem, scratch_or_premodify);
19177 /* Now create the move. */
19178 if (store_p)
19179 emit_insn (gen_rtx_SET (mem, reg));
19180 else
19181 emit_insn (gen_rtx_SET (reg, mem));
19183 return;
19186 /* Allocate a 64-bit stack slot to be used for copying SDmode values through if
19187 this function has any SDmode references. If we are on a power7 or later, we
19188 don't need the 64-bit stack slot since the LFIWZX and STIFWX instructions
19189 can load/store the value. */
19191 static void
19192 rs6000_alloc_sdmode_stack_slot (void)
19194 tree t;
19195 basic_block bb;
19196 gimple_stmt_iterator gsi;
19198 gcc_assert (cfun->machine->sdmode_stack_slot == NULL_RTX);
19199 /* We use a different approach for dealing with the secondary
19200 memory in LRA. */
19201 if (ira_use_lra_p)
19202 return;
19204 if (TARGET_NO_SDMODE_STACK)
19205 return;
19207 FOR_EACH_BB_FN (bb, cfun)
19208 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
19210 tree ret = walk_gimple_op (gsi_stmt (gsi), rs6000_check_sdmode, NULL);
19211 if (ret)
19213 rtx stack = assign_stack_local (DDmode, GET_MODE_SIZE (DDmode), 0);
19214 cfun->machine->sdmode_stack_slot = adjust_address_nv (stack,
19215 SDmode, 0);
19216 return;
19220 /* Check for any SDmode parameters of the function. */
19221 for (t = DECL_ARGUMENTS (cfun->decl); t; t = DECL_CHAIN (t))
19223 if (TREE_TYPE (t) == error_mark_node)
19224 continue;
19226 if (TYPE_MODE (TREE_TYPE (t)) == SDmode
19227 || TYPE_MODE (DECL_ARG_TYPE (t)) == SDmode)
19229 rtx stack = assign_stack_local (DDmode, GET_MODE_SIZE (DDmode), 0);
19230 cfun->machine->sdmode_stack_slot = adjust_address_nv (stack,
19231 SDmode, 0);
19232 return;
19237 static void
19238 rs6000_instantiate_decls (void)
19240 if (cfun->machine->sdmode_stack_slot != NULL_RTX)
19241 instantiate_decl_rtl (cfun->machine->sdmode_stack_slot);
19244 /* Given an rtx X being reloaded into a reg required to be
19245 in class CLASS, return the class of reg to actually use.
19246 In general this is just CLASS; but on some machines
19247 in some cases it is preferable to use a more restrictive class.
19249 On the RS/6000, we have to return NO_REGS when we want to reload a
19250 floating-point CONST_DOUBLE to force it to be copied to memory.
19252 We also don't want to reload integer values into floating-point
19253 registers if we can at all help it. In fact, this can
19254 cause reload to die, if it tries to generate a reload of CTR
19255 into a FP register and discovers it doesn't have the memory location
19256 required.
19258 ??? Would it be a good idea to have reload do the converse, that is
19259 try to reload floating modes into FP registers if possible?
19262 static enum reg_class
19263 rs6000_preferred_reload_class (rtx x, enum reg_class rclass)
19265 machine_mode mode = GET_MODE (x);
19266 bool is_constant = CONSTANT_P (x);
19268 /* If a mode can't go in FPR/ALTIVEC/VSX registers, don't return a preferred
19269 reload class for it. */
19270 if ((rclass == ALTIVEC_REGS || rclass == VSX_REGS)
19271 && (reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_VALID) == 0)
19272 return NO_REGS;
19274 if ((rclass == FLOAT_REGS || rclass == VSX_REGS)
19275 && (reg_addr[mode].addr_mask[RELOAD_REG_FPR] & RELOAD_REG_VALID) == 0)
19276 return NO_REGS;
19278 /* For VSX, see if we should prefer FLOAT_REGS or ALTIVEC_REGS. Do not allow
19279 the reloading of address expressions using PLUS into floating point
19280 registers. */
19281 if (TARGET_VSX && VSX_REG_CLASS_P (rclass) && GET_CODE (x) != PLUS)
19283 if (is_constant)
19285 /* Zero is always allowed in all VSX registers. */
19286 if (x == CONST0_RTX (mode))
19287 return rclass;
19289 /* If this is a vector constant that can be formed with a few Altivec
19290 instructions, we want altivec registers. */
19291 if (GET_CODE (x) == CONST_VECTOR && easy_vector_constant (x, mode))
19292 return ALTIVEC_REGS;
19294 /* Force constant to memory. */
19295 return NO_REGS;
19298 /* D-form addressing can easily reload the value. */
19299 if (mode_supports_vmx_dform (mode))
19300 return rclass;
19302 /* If this is a scalar floating point value and we don't have D-form
19303 addressing, prefer the traditional floating point registers so that we
19304 can use D-form (register+offset) addressing. */
19305 if (GET_MODE_SIZE (mode) < 16 && rclass == VSX_REGS)
19306 return FLOAT_REGS;
19308 /* Prefer the Altivec registers if Altivec is handling the vector
19309 operations (i.e. V16QI, V8HI, and V4SI), or if we prefer Altivec
19310 loads. */
19311 if (VECTOR_UNIT_ALTIVEC_P (mode) || VECTOR_MEM_ALTIVEC_P (mode)
19312 || mode == V1TImode)
19313 return ALTIVEC_REGS;
19315 return rclass;
19318 if (is_constant || GET_CODE (x) == PLUS)
19320 if (reg_class_subset_p (GENERAL_REGS, rclass))
19321 return GENERAL_REGS;
19322 if (reg_class_subset_p (BASE_REGS, rclass))
19323 return BASE_REGS;
19324 return NO_REGS;
19327 /* If we haven't picked a register class, and the type is a vector or
19328 floating point type, prefer to use the VSX, FPR, or Altivec register
19329 classes. */
19330 if (rclass == NO_REGS)
19332 if (TARGET_VSX && VECTOR_MEM_VSX_OR_P8_VECTOR_P (mode))
19333 return VSX_REGS;
19335 if (TARGET_ALTIVEC && VECTOR_MEM_ALTIVEC_P (mode))
19336 return ALTIVEC_REGS;
19338 if (DECIMAL_FLOAT_MODE_P (mode))
19339 return TARGET_DFP ? FLOAT_REGS : NO_REGS;
19341 if (TARGET_FPRS && TARGET_HARD_FLOAT && FLOAT_MODE_P (mode)
19342 && (reg_addr[mode].addr_mask[RELOAD_REG_FPR] & RELOAD_REG_VALID) == 0)
19343 return FLOAT_REGS;
19346 if (GET_MODE_CLASS (mode) == MODE_INT && rclass == NON_SPECIAL_REGS)
19347 return GENERAL_REGS;
19349 return rclass;
19352 /* Debug version of rs6000_preferred_reload_class. */
19353 static enum reg_class
19354 rs6000_debug_preferred_reload_class (rtx x, enum reg_class rclass)
19356 enum reg_class ret = rs6000_preferred_reload_class (x, rclass);
19358 fprintf (stderr,
19359 "\nrs6000_preferred_reload_class, return %s, rclass = %s, "
19360 "mode = %s, x:\n",
19361 reg_class_names[ret], reg_class_names[rclass],
19362 GET_MODE_NAME (GET_MODE (x)));
19363 debug_rtx (x);
19365 return ret;
19368 /* If we are copying between FP or AltiVec registers and anything else, we need
19369 a memory location. The exception is when we are targeting ppc64 and the
19370 move to/from fpr to gpr instructions are available. Also, under VSX, you
19371 can copy vector registers from the FP register set to the Altivec register
19372 set and vice versa. */
19374 static bool
19375 rs6000_secondary_memory_needed (enum reg_class from_class,
19376 enum reg_class to_class,
19377 machine_mode mode)
19379 enum rs6000_reg_type from_type, to_type;
19380 bool altivec_p = ((from_class == ALTIVEC_REGS)
19381 || (to_class == ALTIVEC_REGS));
19383 /* If a simple/direct move is available, we don't need secondary memory */
19384 from_type = reg_class_to_reg_type[(int)from_class];
19385 to_type = reg_class_to_reg_type[(int)to_class];
19387 if (rs6000_secondary_reload_move (to_type, from_type, mode,
19388 (secondary_reload_info *)0, altivec_p))
19389 return false;
19391 /* If we have a floating point or vector register class, we need to use
19392 memory to transfer the data. */
19393 if (IS_FP_VECT_REG_TYPE (from_type) || IS_FP_VECT_REG_TYPE (to_type))
19394 return true;
19396 return false;
19399 /* Debug version of rs6000_secondary_memory_needed. */
19400 static bool
19401 rs6000_debug_secondary_memory_needed (enum reg_class from_class,
19402 enum reg_class to_class,
19403 machine_mode mode)
19405 bool ret = rs6000_secondary_memory_needed (from_class, to_class, mode);
19407 fprintf (stderr,
19408 "rs6000_secondary_memory_needed, return: %s, from_class = %s, "
19409 "to_class = %s, mode = %s\n",
19410 ret ? "true" : "false",
19411 reg_class_names[from_class],
19412 reg_class_names[to_class],
19413 GET_MODE_NAME (mode));
19415 return ret;
19418 /* Return the register class of a scratch register needed to copy IN into
19419 or out of a register in RCLASS in MODE. If it can be done directly,
19420 NO_REGS is returned. */
19422 static enum reg_class
19423 rs6000_secondary_reload_class (enum reg_class rclass, machine_mode mode,
19424 rtx in)
19426 int regno;
19428 if (TARGET_ELF || (DEFAULT_ABI == ABI_DARWIN
19429 #if TARGET_MACHO
19430 && MACHOPIC_INDIRECT
19431 #endif
19434 /* We cannot copy a symbolic operand directly into anything
19435 other than BASE_REGS for TARGET_ELF. So indicate that a
19436 register from BASE_REGS is needed as an intermediate
19437 register.
19439 On Darwin, pic addresses require a load from memory, which
19440 needs a base register. */
19441 if (rclass != BASE_REGS
19442 && (GET_CODE (in) == SYMBOL_REF
19443 || GET_CODE (in) == HIGH
19444 || GET_CODE (in) == LABEL_REF
19445 || GET_CODE (in) == CONST))
19446 return BASE_REGS;
19449 if (GET_CODE (in) == REG)
19451 regno = REGNO (in);
19452 if (regno >= FIRST_PSEUDO_REGISTER)
19454 regno = true_regnum (in);
19455 if (regno >= FIRST_PSEUDO_REGISTER)
19456 regno = -1;
19459 else if (GET_CODE (in) == SUBREG)
19461 regno = true_regnum (in);
19462 if (regno >= FIRST_PSEUDO_REGISTER)
19463 regno = -1;
19465 else
19466 regno = -1;
19468 /* If we have VSX register moves, prefer moving scalar values between
19469 Altivec registers and GPR by going via an FPR (and then via memory)
19470 instead of reloading the secondary memory address for Altivec moves. */
19471 if (TARGET_VSX
19472 && GET_MODE_SIZE (mode) < 16
19473 && !mode_supports_vmx_dform (mode)
19474 && (((rclass == GENERAL_REGS || rclass == BASE_REGS)
19475 && (regno >= 0 && ALTIVEC_REGNO_P (regno)))
19476 || ((rclass == VSX_REGS || rclass == ALTIVEC_REGS)
19477 && (regno >= 0 && INT_REGNO_P (regno)))))
19478 return FLOAT_REGS;
19480 /* We can place anything into GENERAL_REGS and can put GENERAL_REGS
19481 into anything. */
19482 if (rclass == GENERAL_REGS || rclass == BASE_REGS
19483 || (regno >= 0 && INT_REGNO_P (regno)))
19484 return NO_REGS;
19486 /* Constants, memory, and VSX registers can go into VSX registers (both the
19487 traditional floating point and the altivec registers). */
19488 if (rclass == VSX_REGS
19489 && (regno == -1 || VSX_REGNO_P (regno)))
19490 return NO_REGS;
19492 /* Constants, memory, and FP registers can go into FP registers. */
19493 if ((regno == -1 || FP_REGNO_P (regno))
19494 && (rclass == FLOAT_REGS || rclass == NON_SPECIAL_REGS))
19495 return (mode != SDmode || lra_in_progress) ? NO_REGS : GENERAL_REGS;
19497 /* Memory, and AltiVec registers can go into AltiVec registers. */
19498 if ((regno == -1 || ALTIVEC_REGNO_P (regno))
19499 && rclass == ALTIVEC_REGS)
19500 return NO_REGS;
19502 /* We can copy among the CR registers. */
19503 if ((rclass == CR_REGS || rclass == CR0_REGS)
19504 && regno >= 0 && CR_REGNO_P (regno))
19505 return NO_REGS;
19507 /* Otherwise, we need GENERAL_REGS. */
19508 return GENERAL_REGS;
19511 /* Debug version of rs6000_secondary_reload_class. */
19512 static enum reg_class
19513 rs6000_debug_secondary_reload_class (enum reg_class rclass,
19514 machine_mode mode, rtx in)
19516 enum reg_class ret = rs6000_secondary_reload_class (rclass, mode, in);
19517 fprintf (stderr,
19518 "\nrs6000_secondary_reload_class, return %s, rclass = %s, "
19519 "mode = %s, input rtx:\n",
19520 reg_class_names[ret], reg_class_names[rclass],
19521 GET_MODE_NAME (mode));
19522 debug_rtx (in);
19524 return ret;
19527 /* Return nonzero if for CLASS a mode change from FROM to TO is invalid. */
19529 static bool
19530 rs6000_cannot_change_mode_class (machine_mode from,
19531 machine_mode to,
19532 enum reg_class rclass)
19534 unsigned from_size = GET_MODE_SIZE (from);
19535 unsigned to_size = GET_MODE_SIZE (to);
19537 if (from_size != to_size)
19539 enum reg_class xclass = (TARGET_VSX) ? VSX_REGS : FLOAT_REGS;
19541 if (reg_classes_intersect_p (xclass, rclass))
19543 unsigned to_nregs = hard_regno_nregs[FIRST_FPR_REGNO][to];
19544 unsigned from_nregs = hard_regno_nregs[FIRST_FPR_REGNO][from];
19545 bool to_float128_vector_p = FLOAT128_VECTOR_P (to);
19546 bool from_float128_vector_p = FLOAT128_VECTOR_P (from);
19548 /* Don't allow 64-bit types to overlap with 128-bit types that take a
19549 single register under VSX because the scalar part of the register
19550 is in the upper 64-bits, and not the lower 64-bits. Types like
19551 TFmode/TDmode that take 2 scalar register can overlap. 128-bit
19552 IEEE floating point can't overlap, and neither can small
19553 values. */
19555 if (to_float128_vector_p && from_float128_vector_p)
19556 return false;
19558 else if (to_float128_vector_p || from_float128_vector_p)
19559 return true;
19561 /* TDmode in floating-mode registers must always go into a register
19562 pair with the most significant word in the even-numbered register
19563 to match ISA requirements. In little-endian mode, this does not
19564 match subreg numbering, so we cannot allow subregs. */
19565 if (!BYTES_BIG_ENDIAN && (to == TDmode || from == TDmode))
19566 return true;
19568 if (from_size < 8 || to_size < 8)
19569 return true;
19571 if (from_size == 8 && (8 * to_nregs) != to_size)
19572 return true;
19574 if (to_size == 8 && (8 * from_nregs) != from_size)
19575 return true;
19577 return false;
19579 else
19580 return false;
19583 if (TARGET_E500_DOUBLE
19584 && ((((to) == DFmode) + ((from) == DFmode)) == 1
19585 || (((to) == TFmode) + ((from) == TFmode)) == 1
19586 || (((to) == IFmode) + ((from) == IFmode)) == 1
19587 || (((to) == KFmode) + ((from) == KFmode)) == 1
19588 || (((to) == DDmode) + ((from) == DDmode)) == 1
19589 || (((to) == TDmode) + ((from) == TDmode)) == 1
19590 || (((to) == DImode) + ((from) == DImode)) == 1))
19591 return true;
19593 /* Since the VSX register set includes traditional floating point registers
19594 and altivec registers, just check for the size being different instead of
19595 trying to check whether the modes are vector modes. Otherwise it won't
19596 allow say DF and DI to change classes. For types like TFmode and TDmode
19597 that take 2 64-bit registers, rather than a single 128-bit register, don't
19598 allow subregs of those types to other 128 bit types. */
19599 if (TARGET_VSX && VSX_REG_CLASS_P (rclass))
19601 unsigned num_regs = (from_size + 15) / 16;
19602 if (hard_regno_nregs[FIRST_FPR_REGNO][to] > num_regs
19603 || hard_regno_nregs[FIRST_FPR_REGNO][from] > num_regs)
19604 return true;
19606 return (from_size != 8 && from_size != 16);
19609 if (TARGET_ALTIVEC && rclass == ALTIVEC_REGS
19610 && (ALTIVEC_VECTOR_MODE (from) + ALTIVEC_VECTOR_MODE (to)) == 1)
19611 return true;
19613 if (TARGET_SPE && (SPE_VECTOR_MODE (from) + SPE_VECTOR_MODE (to)) == 1
19614 && reg_classes_intersect_p (GENERAL_REGS, rclass))
19615 return true;
19617 return false;
19620 /* Debug version of rs6000_cannot_change_mode_class. */
19621 static bool
19622 rs6000_debug_cannot_change_mode_class (machine_mode from,
19623 machine_mode to,
19624 enum reg_class rclass)
19626 bool ret = rs6000_cannot_change_mode_class (from, to, rclass);
19628 fprintf (stderr,
19629 "rs6000_cannot_change_mode_class, return %s, from = %s, "
19630 "to = %s, rclass = %s\n",
19631 ret ? "true" : "false",
19632 GET_MODE_NAME (from), GET_MODE_NAME (to),
19633 reg_class_names[rclass]);
19635 return ret;
19638 /* Return a string to do a move operation of 128 bits of data. */
19640 const char *
19641 rs6000_output_move_128bit (rtx operands[])
19643 rtx dest = operands[0];
19644 rtx src = operands[1];
19645 machine_mode mode = GET_MODE (dest);
19646 int dest_regno;
19647 int src_regno;
19648 bool dest_gpr_p, dest_fp_p, dest_vmx_p, dest_vsx_p;
19649 bool src_gpr_p, src_fp_p, src_vmx_p, src_vsx_p;
19651 if (REG_P (dest))
19653 dest_regno = REGNO (dest);
19654 dest_gpr_p = INT_REGNO_P (dest_regno);
19655 dest_fp_p = FP_REGNO_P (dest_regno);
19656 dest_vmx_p = ALTIVEC_REGNO_P (dest_regno);
19657 dest_vsx_p = dest_fp_p | dest_vmx_p;
19659 else
19661 dest_regno = -1;
19662 dest_gpr_p = dest_fp_p = dest_vmx_p = dest_vsx_p = false;
19665 if (REG_P (src))
19667 src_regno = REGNO (src);
19668 src_gpr_p = INT_REGNO_P (src_regno);
19669 src_fp_p = FP_REGNO_P (src_regno);
19670 src_vmx_p = ALTIVEC_REGNO_P (src_regno);
19671 src_vsx_p = src_fp_p | src_vmx_p;
19673 else
19675 src_regno = -1;
19676 src_gpr_p = src_fp_p = src_vmx_p = src_vsx_p = false;
19679 /* Register moves. */
19680 if (dest_regno >= 0 && src_regno >= 0)
19682 if (dest_gpr_p)
19684 if (src_gpr_p)
19685 return "#";
19687 if (TARGET_DIRECT_MOVE_128 && src_vsx_p)
19688 return (WORDS_BIG_ENDIAN
19689 ? "mfvsrd %0,%x1\n\tmfvsrld %L0,%x1"
19690 : "mfvsrd %L0,%x1\n\tmfvsrld %0,%x1");
19692 else if (TARGET_VSX && TARGET_DIRECT_MOVE && src_vsx_p)
19693 return "#";
19696 else if (TARGET_VSX && dest_vsx_p)
19698 if (src_vsx_p)
19699 return "xxlor %x0,%x1,%x1";
19701 else if (TARGET_DIRECT_MOVE_128 && src_gpr_p)
19702 return (WORDS_BIG_ENDIAN
19703 ? "mtvsrdd %x0,%1,%L1"
19704 : "mtvsrdd %x0,%L1,%1");
19706 else if (TARGET_DIRECT_MOVE && src_gpr_p)
19707 return "#";
19710 else if (TARGET_ALTIVEC && dest_vmx_p && src_vmx_p)
19711 return "vor %0,%1,%1";
19713 else if (dest_fp_p && src_fp_p)
19714 return "#";
19717 /* Loads. */
19718 else if (dest_regno >= 0 && MEM_P (src))
19720 if (dest_gpr_p)
19722 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
19723 return "lq %0,%1";
19724 else
19725 return "#";
19728 else if (TARGET_ALTIVEC && dest_vmx_p
19729 && altivec_indexed_or_indirect_operand (src, mode))
19730 return "lvx %0,%y1";
19732 else if (TARGET_VSX && dest_vsx_p)
19734 if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
19735 return "lxvw4x %x0,%y1";
19736 else
19737 return "lxvd2x %x0,%y1";
19740 else if (TARGET_ALTIVEC && dest_vmx_p)
19741 return "lvx %0,%y1";
19743 else if (dest_fp_p)
19744 return "#";
19747 /* Stores. */
19748 else if (src_regno >= 0 && MEM_P (dest))
19750 if (src_gpr_p)
19752 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
19753 return "stq %1,%0";
19754 else
19755 return "#";
19758 else if (TARGET_ALTIVEC && src_vmx_p
19759 && altivec_indexed_or_indirect_operand (src, mode))
19760 return "stvx %1,%y0";
19762 else if (TARGET_VSX && src_vsx_p)
19764 if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
19765 return "stxvw4x %x1,%y0";
19766 else
19767 return "stxvd2x %x1,%y0";
19770 else if (TARGET_ALTIVEC && src_vmx_p)
19771 return "stvx %1,%y0";
19773 else if (src_fp_p)
19774 return "#";
19777 /* Constants. */
19778 else if (dest_regno >= 0
19779 && (GET_CODE (src) == CONST_INT
19780 || GET_CODE (src) == CONST_WIDE_INT
19781 || GET_CODE (src) == CONST_DOUBLE
19782 || GET_CODE (src) == CONST_VECTOR))
19784 if (dest_gpr_p)
19785 return "#";
19787 else if (TARGET_VSX && dest_vsx_p && zero_constant (src, mode))
19788 return "xxlxor %x0,%x0,%x0";
19790 else if (TARGET_ALTIVEC && dest_vmx_p)
19791 return output_vec_const_move (operands);
19794 fatal_insn ("Bad 128-bit move", gen_rtx_SET (dest, src));
19797 /* Validate a 128-bit move. */
19798 bool
19799 rs6000_move_128bit_ok_p (rtx operands[])
19801 machine_mode mode = GET_MODE (operands[0]);
19802 return (gpc_reg_operand (operands[0], mode)
19803 || gpc_reg_operand (operands[1], mode));
19806 /* Return true if a 128-bit move needs to be split. */
19807 bool
19808 rs6000_split_128bit_ok_p (rtx operands[])
19810 if (!reload_completed)
19811 return false;
19813 if (!gpr_or_gpr_p (operands[0], operands[1]))
19814 return false;
19816 if (quad_load_store_p (operands[0], operands[1]))
19817 return false;
19819 return true;
19823 /* Given a comparison operation, return the bit number in CCR to test. We
19824 know this is a valid comparison.
19826 SCC_P is 1 if this is for an scc. That means that %D will have been
19827 used instead of %C, so the bits will be in different places.
19829 Return -1 if OP isn't a valid comparison for some reason. */
19832 ccr_bit (rtx op, int scc_p)
19834 enum rtx_code code = GET_CODE (op);
19835 machine_mode cc_mode;
19836 int cc_regnum;
19837 int base_bit;
19838 rtx reg;
19840 if (!COMPARISON_P (op))
19841 return -1;
19843 reg = XEXP (op, 0);
19845 gcc_assert (GET_CODE (reg) == REG && CR_REGNO_P (REGNO (reg)));
19847 cc_mode = GET_MODE (reg);
19848 cc_regnum = REGNO (reg);
19849 base_bit = 4 * (cc_regnum - CR0_REGNO);
19851 validate_condition_mode (code, cc_mode);
19853 /* When generating a sCOND operation, only positive conditions are
19854 allowed. */
19855 gcc_assert (!scc_p
19856 || code == EQ || code == GT || code == LT || code == UNORDERED
19857 || code == GTU || code == LTU);
19859 switch (code)
19861 case NE:
19862 return scc_p ? base_bit + 3 : base_bit + 2;
19863 case EQ:
19864 return base_bit + 2;
19865 case GT: case GTU: case UNLE:
19866 return base_bit + 1;
19867 case LT: case LTU: case UNGE:
19868 return base_bit;
19869 case ORDERED: case UNORDERED:
19870 return base_bit + 3;
19872 case GE: case GEU:
19873 /* If scc, we will have done a cror to put the bit in the
19874 unordered position. So test that bit. For integer, this is ! LT
19875 unless this is an scc insn. */
19876 return scc_p ? base_bit + 3 : base_bit;
19878 case LE: case LEU:
19879 return scc_p ? base_bit + 3 : base_bit + 1;
19881 default:
19882 gcc_unreachable ();
19886 /* Return the GOT register. */
19889 rs6000_got_register (rtx value ATTRIBUTE_UNUSED)
19891 /* The second flow pass currently (June 1999) can't update
19892 regs_ever_live without disturbing other parts of the compiler, so
19893 update it here to make the prolog/epilogue code happy. */
19894 if (!can_create_pseudo_p ()
19895 && !df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM))
19896 df_set_regs_ever_live (RS6000_PIC_OFFSET_TABLE_REGNUM, true);
19898 crtl->uses_pic_offset_table = 1;
19900 return pic_offset_table_rtx;
19903 static rs6000_stack_t stack_info;
19905 /* Function to init struct machine_function.
19906 This will be called, via a pointer variable,
19907 from push_function_context. */
19909 static struct machine_function *
19910 rs6000_init_machine_status (void)
19912 stack_info.reload_completed = 0;
19913 return ggc_cleared_alloc<machine_function> ();
19916 #define INT_P(X) (GET_CODE (X) == CONST_INT && GET_MODE (X) == VOIDmode)
19918 /* Write out a function code label. */
19920 void
19921 rs6000_output_function_entry (FILE *file, const char *fname)
19923 if (fname[0] != '.')
19925 switch (DEFAULT_ABI)
19927 default:
19928 gcc_unreachable ();
19930 case ABI_AIX:
19931 if (DOT_SYMBOLS)
19932 putc ('.', file);
19933 else
19934 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "L.");
19935 break;
19937 case ABI_ELFv2:
19938 case ABI_V4:
19939 case ABI_DARWIN:
19940 break;
19944 RS6000_OUTPUT_BASENAME (file, fname);
19947 /* Print an operand. Recognize special options, documented below. */
19949 #if TARGET_ELF
19950 #define SMALL_DATA_RELOC ((rs6000_sdata == SDATA_EABI) ? "sda21" : "sdarel")
19951 #define SMALL_DATA_REG ((rs6000_sdata == SDATA_EABI) ? 0 : 13)
19952 #else
19953 #define SMALL_DATA_RELOC "sda21"
19954 #define SMALL_DATA_REG 0
19955 #endif
19957 void
19958 print_operand (FILE *file, rtx x, int code)
19960 int i;
19961 unsigned HOST_WIDE_INT uval;
19963 switch (code)
19965 /* %a is output_address. */
19967 /* %c is output_addr_const if a CONSTANT_ADDRESS_P, otherwise
19968 output_operand. */
19970 case 'D':
19971 /* Like 'J' but get to the GT bit only. */
19972 gcc_assert (REG_P (x));
19974 /* Bit 1 is GT bit. */
19975 i = 4 * (REGNO (x) - CR0_REGNO) + 1;
19977 /* Add one for shift count in rlinm for scc. */
19978 fprintf (file, "%d", i + 1);
19979 return;
19981 case 'e':
19982 /* If the low 16 bits are 0, but some other bit is set, write 's'. */
19983 if (! INT_P (x))
19985 output_operand_lossage ("invalid %%e value");
19986 return;
19989 uval = INTVAL (x);
19990 if ((uval & 0xffff) == 0 && uval != 0)
19991 putc ('s', file);
19992 return;
19994 case 'E':
19995 /* X is a CR register. Print the number of the EQ bit of the CR */
19996 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
19997 output_operand_lossage ("invalid %%E value");
19998 else
19999 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO) + 2);
20000 return;
20002 case 'f':
20003 /* X is a CR register. Print the shift count needed to move it
20004 to the high-order four bits. */
20005 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
20006 output_operand_lossage ("invalid %%f value");
20007 else
20008 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO));
20009 return;
20011 case 'F':
20012 /* Similar, but print the count for the rotate in the opposite
20013 direction. */
20014 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
20015 output_operand_lossage ("invalid %%F value");
20016 else
20017 fprintf (file, "%d", 32 - 4 * (REGNO (x) - CR0_REGNO));
20018 return;
20020 case 'G':
20021 /* X is a constant integer. If it is negative, print "m",
20022 otherwise print "z". This is to make an aze or ame insn. */
20023 if (GET_CODE (x) != CONST_INT)
20024 output_operand_lossage ("invalid %%G value");
20025 else if (INTVAL (x) >= 0)
20026 putc ('z', file);
20027 else
20028 putc ('m', file);
20029 return;
20031 case 'h':
20032 /* If constant, output low-order five bits. Otherwise, write
20033 normally. */
20034 if (INT_P (x))
20035 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 31);
20036 else
20037 print_operand (file, x, 0);
20038 return;
20040 case 'H':
20041 /* If constant, output low-order six bits. Otherwise, write
20042 normally. */
20043 if (INT_P (x))
20044 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 63);
20045 else
20046 print_operand (file, x, 0);
20047 return;
20049 case 'I':
20050 /* Print `i' if this is a constant, else nothing. */
20051 if (INT_P (x))
20052 putc ('i', file);
20053 return;
20055 case 'j':
20056 /* Write the bit number in CCR for jump. */
20057 i = ccr_bit (x, 0);
20058 if (i == -1)
20059 output_operand_lossage ("invalid %%j code");
20060 else
20061 fprintf (file, "%d", i);
20062 return;
20064 case 'J':
20065 /* Similar, but add one for shift count in rlinm for scc and pass
20066 scc flag to `ccr_bit'. */
20067 i = ccr_bit (x, 1);
20068 if (i == -1)
20069 output_operand_lossage ("invalid %%J code");
20070 else
20071 /* If we want bit 31, write a shift count of zero, not 32. */
20072 fprintf (file, "%d", i == 31 ? 0 : i + 1);
20073 return;
20075 case 'k':
20076 /* X must be a constant. Write the 1's complement of the
20077 constant. */
20078 if (! INT_P (x))
20079 output_operand_lossage ("invalid %%k value");
20080 else
20081 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~ INTVAL (x));
20082 return;
20084 case 'K':
20085 /* X must be a symbolic constant on ELF. Write an
20086 expression suitable for an 'addi' that adds in the low 16
20087 bits of the MEM. */
20088 if (GET_CODE (x) == CONST)
20090 if (GET_CODE (XEXP (x, 0)) != PLUS
20091 || (GET_CODE (XEXP (XEXP (x, 0), 0)) != SYMBOL_REF
20092 && GET_CODE (XEXP (XEXP (x, 0), 0)) != LABEL_REF)
20093 || GET_CODE (XEXP (XEXP (x, 0), 1)) != CONST_INT)
20094 output_operand_lossage ("invalid %%K value");
20096 print_operand_address (file, x);
20097 fputs ("@l", file);
20098 return;
20100 /* %l is output_asm_label. */
20102 case 'L':
20103 /* Write second word of DImode or DFmode reference. Works on register
20104 or non-indexed memory only. */
20105 if (REG_P (x))
20106 fputs (reg_names[REGNO (x) + 1], file);
20107 else if (MEM_P (x))
20109 machine_mode mode = GET_MODE (x);
20110 /* Handle possible auto-increment. Since it is pre-increment and
20111 we have already done it, we can just use an offset of word. */
20112 if (GET_CODE (XEXP (x, 0)) == PRE_INC
20113 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
20114 output_address (mode, plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
20115 UNITS_PER_WORD));
20116 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
20117 output_address (mode, plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
20118 UNITS_PER_WORD));
20119 else
20120 output_address (mode, XEXP (adjust_address_nv (x, SImode,
20121 UNITS_PER_WORD),
20122 0));
20124 if (small_data_operand (x, GET_MODE (x)))
20125 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
20126 reg_names[SMALL_DATA_REG]);
20128 return;
20130 case 'N':
20131 /* Write the number of elements in the vector times 4. */
20132 if (GET_CODE (x) != PARALLEL)
20133 output_operand_lossage ("invalid %%N value");
20134 else
20135 fprintf (file, "%d", XVECLEN (x, 0) * 4);
20136 return;
20138 case 'O':
20139 /* Similar, but subtract 1 first. */
20140 if (GET_CODE (x) != PARALLEL)
20141 output_operand_lossage ("invalid %%O value");
20142 else
20143 fprintf (file, "%d", (XVECLEN (x, 0) - 1) * 4);
20144 return;
20146 case 'p':
20147 /* X is a CONST_INT that is a power of two. Output the logarithm. */
20148 if (! INT_P (x)
20149 || INTVAL (x) < 0
20150 || (i = exact_log2 (INTVAL (x))) < 0)
20151 output_operand_lossage ("invalid %%p value");
20152 else
20153 fprintf (file, "%d", i);
20154 return;
20156 case 'P':
20157 /* The operand must be an indirect memory reference. The result
20158 is the register name. */
20159 if (GET_CODE (x) != MEM || GET_CODE (XEXP (x, 0)) != REG
20160 || REGNO (XEXP (x, 0)) >= 32)
20161 output_operand_lossage ("invalid %%P value");
20162 else
20163 fputs (reg_names[REGNO (XEXP (x, 0))], file);
20164 return;
20166 case 'q':
20167 /* This outputs the logical code corresponding to a boolean
20168 expression. The expression may have one or both operands
20169 negated (if one, only the first one). For condition register
20170 logical operations, it will also treat the negated
20171 CR codes as NOTs, but not handle NOTs of them. */
20173 const char *const *t = 0;
20174 const char *s;
20175 enum rtx_code code = GET_CODE (x);
20176 static const char * const tbl[3][3] = {
20177 { "and", "andc", "nor" },
20178 { "or", "orc", "nand" },
20179 { "xor", "eqv", "xor" } };
20181 if (code == AND)
20182 t = tbl[0];
20183 else if (code == IOR)
20184 t = tbl[1];
20185 else if (code == XOR)
20186 t = tbl[2];
20187 else
20188 output_operand_lossage ("invalid %%q value");
20190 if (GET_CODE (XEXP (x, 0)) != NOT)
20191 s = t[0];
20192 else
20194 if (GET_CODE (XEXP (x, 1)) == NOT)
20195 s = t[2];
20196 else
20197 s = t[1];
20200 fputs (s, file);
20202 return;
20204 case 'Q':
20205 if (! TARGET_MFCRF)
20206 return;
20207 fputc (',', file);
20208 /* FALLTHRU */
20210 case 'R':
20211 /* X is a CR register. Print the mask for `mtcrf'. */
20212 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
20213 output_operand_lossage ("invalid %%R value");
20214 else
20215 fprintf (file, "%d", 128 >> (REGNO (x) - CR0_REGNO));
20216 return;
20218 case 's':
20219 /* Low 5 bits of 32 - value */
20220 if (! INT_P (x))
20221 output_operand_lossage ("invalid %%s value");
20222 else
20223 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (32 - INTVAL (x)) & 31);
20224 return;
20226 case 't':
20227 /* Like 'J' but get to the OVERFLOW/UNORDERED bit. */
20228 gcc_assert (REG_P (x) && GET_MODE (x) == CCmode);
20230 /* Bit 3 is OV bit. */
20231 i = 4 * (REGNO (x) - CR0_REGNO) + 3;
20233 /* If we want bit 31, write a shift count of zero, not 32. */
20234 fprintf (file, "%d", i == 31 ? 0 : i + 1);
20235 return;
20237 case 'T':
20238 /* Print the symbolic name of a branch target register. */
20239 if (GET_CODE (x) != REG || (REGNO (x) != LR_REGNO
20240 && REGNO (x) != CTR_REGNO))
20241 output_operand_lossage ("invalid %%T value");
20242 else if (REGNO (x) == LR_REGNO)
20243 fputs ("lr", file);
20244 else
20245 fputs ("ctr", file);
20246 return;
20248 case 'u':
20249 /* High-order or low-order 16 bits of constant, whichever is non-zero,
20250 for use in unsigned operand. */
20251 if (! INT_P (x))
20253 output_operand_lossage ("invalid %%u value");
20254 return;
20257 uval = INTVAL (x);
20258 if ((uval & 0xffff) == 0)
20259 uval >>= 16;
20261 fprintf (file, HOST_WIDE_INT_PRINT_HEX, uval & 0xffff);
20262 return;
20264 case 'v':
20265 /* High-order 16 bits of constant for use in signed operand. */
20266 if (! INT_P (x))
20267 output_operand_lossage ("invalid %%v value");
20268 else
20269 fprintf (file, HOST_WIDE_INT_PRINT_HEX,
20270 (INTVAL (x) >> 16) & 0xffff);
20271 return;
20273 case 'U':
20274 /* Print `u' if this has an auto-increment or auto-decrement. */
20275 if (MEM_P (x)
20276 && (GET_CODE (XEXP (x, 0)) == PRE_INC
20277 || GET_CODE (XEXP (x, 0)) == PRE_DEC
20278 || GET_CODE (XEXP (x, 0)) == PRE_MODIFY))
20279 putc ('u', file);
20280 return;
20282 case 'V':
20283 /* Print the trap code for this operand. */
20284 switch (GET_CODE (x))
20286 case EQ:
20287 fputs ("eq", file); /* 4 */
20288 break;
20289 case NE:
20290 fputs ("ne", file); /* 24 */
20291 break;
20292 case LT:
20293 fputs ("lt", file); /* 16 */
20294 break;
20295 case LE:
20296 fputs ("le", file); /* 20 */
20297 break;
20298 case GT:
20299 fputs ("gt", file); /* 8 */
20300 break;
20301 case GE:
20302 fputs ("ge", file); /* 12 */
20303 break;
20304 case LTU:
20305 fputs ("llt", file); /* 2 */
20306 break;
20307 case LEU:
20308 fputs ("lle", file); /* 6 */
20309 break;
20310 case GTU:
20311 fputs ("lgt", file); /* 1 */
20312 break;
20313 case GEU:
20314 fputs ("lge", file); /* 5 */
20315 break;
20316 default:
20317 gcc_unreachable ();
20319 break;
20321 case 'w':
20322 /* If constant, low-order 16 bits of constant, signed. Otherwise, write
20323 normally. */
20324 if (INT_P (x))
20325 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
20326 ((INTVAL (x) & 0xffff) ^ 0x8000) - 0x8000);
20327 else
20328 print_operand (file, x, 0);
20329 return;
20331 case 'x':
20332 /* X is a FPR or Altivec register used in a VSX context. */
20333 if (GET_CODE (x) != REG || !VSX_REGNO_P (REGNO (x)))
20334 output_operand_lossage ("invalid %%x value");
20335 else
20337 int reg = REGNO (x);
20338 int vsx_reg = (FP_REGNO_P (reg)
20339 ? reg - 32
20340 : reg - FIRST_ALTIVEC_REGNO + 32);
20342 #ifdef TARGET_REGNAMES
20343 if (TARGET_REGNAMES)
20344 fprintf (file, "%%vs%d", vsx_reg);
20345 else
20346 #endif
20347 fprintf (file, "%d", vsx_reg);
20349 return;
20351 case 'X':
20352 if (MEM_P (x)
20353 && (legitimate_indexed_address_p (XEXP (x, 0), 0)
20354 || (GET_CODE (XEXP (x, 0)) == PRE_MODIFY
20355 && legitimate_indexed_address_p (XEXP (XEXP (x, 0), 1), 0))))
20356 putc ('x', file);
20357 return;
20359 case 'Y':
20360 /* Like 'L', for third word of TImode/PTImode */
20361 if (REG_P (x))
20362 fputs (reg_names[REGNO (x) + 2], file);
20363 else if (MEM_P (x))
20365 machine_mode mode = GET_MODE (x);
20366 if (GET_CODE (XEXP (x, 0)) == PRE_INC
20367 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
20368 output_address (mode, plus_constant (Pmode,
20369 XEXP (XEXP (x, 0), 0), 8));
20370 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
20371 output_address (mode, plus_constant (Pmode,
20372 XEXP (XEXP (x, 0), 0), 8));
20373 else
20374 output_address (mode, XEXP (adjust_address_nv (x, SImode, 8), 0));
20375 if (small_data_operand (x, GET_MODE (x)))
20376 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
20377 reg_names[SMALL_DATA_REG]);
20379 return;
20381 case 'z':
20382 /* X is a SYMBOL_REF. Write out the name preceded by a
20383 period and without any trailing data in brackets. Used for function
20384 names. If we are configured for System V (or the embedded ABI) on
20385 the PowerPC, do not emit the period, since those systems do not use
20386 TOCs and the like. */
20387 gcc_assert (GET_CODE (x) == SYMBOL_REF);
20389 /* For macho, check to see if we need a stub. */
20390 if (TARGET_MACHO)
20392 const char *name = XSTR (x, 0);
20393 #if TARGET_MACHO
20394 if (darwin_emit_branch_islands
20395 && MACHOPIC_INDIRECT
20396 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
20397 name = machopic_indirection_name (x, /*stub_p=*/true);
20398 #endif
20399 assemble_name (file, name);
20401 else if (!DOT_SYMBOLS)
20402 assemble_name (file, XSTR (x, 0));
20403 else
20404 rs6000_output_function_entry (file, XSTR (x, 0));
20405 return;
20407 case 'Z':
20408 /* Like 'L', for last word of TImode/PTImode. */
20409 if (REG_P (x))
20410 fputs (reg_names[REGNO (x) + 3], file);
20411 else if (MEM_P (x))
20413 machine_mode mode = GET_MODE (x);
20414 if (GET_CODE (XEXP (x, 0)) == PRE_INC
20415 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
20416 output_address (mode, plus_constant (Pmode,
20417 XEXP (XEXP (x, 0), 0), 12));
20418 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
20419 output_address (mode, plus_constant (Pmode,
20420 XEXP (XEXP (x, 0), 0), 12));
20421 else
20422 output_address (mode, XEXP (adjust_address_nv (x, SImode, 12), 0));
20423 if (small_data_operand (x, GET_MODE (x)))
20424 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
20425 reg_names[SMALL_DATA_REG]);
20427 return;
20429 /* Print AltiVec or SPE memory operand. */
20430 case 'y':
20432 rtx tmp;
20434 gcc_assert (MEM_P (x));
20436 tmp = XEXP (x, 0);
20438 /* Ugly hack because %y is overloaded. */
20439 if ((TARGET_SPE || TARGET_E500_DOUBLE)
20440 && (GET_MODE_SIZE (GET_MODE (x)) == 8
20441 || FLOAT128_2REG_P (GET_MODE (x))
20442 || GET_MODE (x) == TImode
20443 || GET_MODE (x) == PTImode))
20445 /* Handle [reg]. */
20446 if (REG_P (tmp))
20448 fprintf (file, "0(%s)", reg_names[REGNO (tmp)]);
20449 break;
20451 /* Handle [reg+UIMM]. */
20452 else if (GET_CODE (tmp) == PLUS &&
20453 GET_CODE (XEXP (tmp, 1)) == CONST_INT)
20455 int x;
20457 gcc_assert (REG_P (XEXP (tmp, 0)));
20459 x = INTVAL (XEXP (tmp, 1));
20460 fprintf (file, "%d(%s)", x, reg_names[REGNO (XEXP (tmp, 0))]);
20461 break;
20464 /* Fall through. Must be [reg+reg]. */
20466 if (VECTOR_MEM_ALTIVEC_P (GET_MODE (x))
20467 && GET_CODE (tmp) == AND
20468 && GET_CODE (XEXP (tmp, 1)) == CONST_INT
20469 && INTVAL (XEXP (tmp, 1)) == -16)
20470 tmp = XEXP (tmp, 0);
20471 else if (VECTOR_MEM_VSX_P (GET_MODE (x))
20472 && GET_CODE (tmp) == PRE_MODIFY)
20473 tmp = XEXP (tmp, 1);
20474 if (REG_P (tmp))
20475 fprintf (file, "0,%s", reg_names[REGNO (tmp)]);
20476 else
20478 if (GET_CODE (tmp) != PLUS
20479 || !REG_P (XEXP (tmp, 0))
20480 || !REG_P (XEXP (tmp, 1)))
20482 output_operand_lossage ("invalid %%y value, try using the 'Z' constraint");
20483 break;
20486 if (REGNO (XEXP (tmp, 0)) == 0)
20487 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 1)) ],
20488 reg_names[ REGNO (XEXP (tmp, 0)) ]);
20489 else
20490 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 0)) ],
20491 reg_names[ REGNO (XEXP (tmp, 1)) ]);
20493 break;
20496 case 0:
20497 if (REG_P (x))
20498 fprintf (file, "%s", reg_names[REGNO (x)]);
20499 else if (MEM_P (x))
20501 /* We need to handle PRE_INC and PRE_DEC here, since we need to
20502 know the width from the mode. */
20503 if (GET_CODE (XEXP (x, 0)) == PRE_INC)
20504 fprintf (file, "%d(%s)", GET_MODE_SIZE (GET_MODE (x)),
20505 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
20506 else if (GET_CODE (XEXP (x, 0)) == PRE_DEC)
20507 fprintf (file, "%d(%s)", - GET_MODE_SIZE (GET_MODE (x)),
20508 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
20509 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
20510 output_address (GET_MODE (x), XEXP (XEXP (x, 0), 1));
20511 else
20512 output_address (GET_MODE (x), XEXP (x, 0));
20514 else
20516 if (toc_relative_expr_p (x, false))
20517 /* This hack along with a corresponding hack in
20518 rs6000_output_addr_const_extra arranges to output addends
20519 where the assembler expects to find them. eg.
20520 (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 4)
20521 without this hack would be output as "x@toc+4". We
20522 want "x+4@toc". */
20523 output_addr_const (file, CONST_CAST_RTX (tocrel_base));
20524 else
20525 output_addr_const (file, x);
20527 return;
20529 case '&':
20530 if (const char *name = get_some_local_dynamic_name ())
20531 assemble_name (file, name);
20532 else
20533 output_operand_lossage ("'%%&' used without any "
20534 "local dynamic TLS references");
20535 return;
20537 default:
20538 output_operand_lossage ("invalid %%xn code");
20542 /* Print the address of an operand. */
20544 void
20545 print_operand_address (FILE *file, rtx x)
20547 if (REG_P (x))
20548 fprintf (file, "0(%s)", reg_names[ REGNO (x) ]);
20549 else if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == CONST
20550 || GET_CODE (x) == LABEL_REF)
20552 output_addr_const (file, x);
20553 if (small_data_operand (x, GET_MODE (x)))
20554 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
20555 reg_names[SMALL_DATA_REG]);
20556 else
20557 gcc_assert (!TARGET_TOC);
20559 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
20560 && REG_P (XEXP (x, 1)))
20562 if (REGNO (XEXP (x, 0)) == 0)
20563 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 1)) ],
20564 reg_names[ REGNO (XEXP (x, 0)) ]);
20565 else
20566 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 0)) ],
20567 reg_names[ REGNO (XEXP (x, 1)) ]);
20569 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
20570 && GET_CODE (XEXP (x, 1)) == CONST_INT)
20571 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
20572 INTVAL (XEXP (x, 1)), reg_names[ REGNO (XEXP (x, 0)) ]);
20573 #if TARGET_MACHO
20574 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
20575 && CONSTANT_P (XEXP (x, 1)))
20577 fprintf (file, "lo16(");
20578 output_addr_const (file, XEXP (x, 1));
20579 fprintf (file, ")(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
20581 #endif
20582 #if TARGET_ELF
20583 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
20584 && CONSTANT_P (XEXP (x, 1)))
20586 output_addr_const (file, XEXP (x, 1));
20587 fprintf (file, "@l(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
20589 #endif
20590 else if (toc_relative_expr_p (x, false))
20592 /* This hack along with a corresponding hack in
20593 rs6000_output_addr_const_extra arranges to output addends
20594 where the assembler expects to find them. eg.
20595 (lo_sum (reg 9)
20596 . (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 8))
20597 without this hack would be output as "x@toc+8@l(9)". We
20598 want "x+8@toc@l(9)". */
20599 output_addr_const (file, CONST_CAST_RTX (tocrel_base));
20600 if (GET_CODE (x) == LO_SUM)
20601 fprintf (file, "@l(%s)", reg_names[REGNO (XEXP (x, 0))]);
20602 else
20603 fprintf (file, "(%s)", reg_names[REGNO (XVECEXP (tocrel_base, 0, 1))]);
20605 else
20606 gcc_unreachable ();
20609 /* Implement TARGET_OUTPUT_ADDR_CONST_EXTRA. */
20611 static bool
20612 rs6000_output_addr_const_extra (FILE *file, rtx x)
20614 if (GET_CODE (x) == UNSPEC)
20615 switch (XINT (x, 1))
20617 case UNSPEC_TOCREL:
20618 gcc_checking_assert (GET_CODE (XVECEXP (x, 0, 0)) == SYMBOL_REF
20619 && REG_P (XVECEXP (x, 0, 1))
20620 && REGNO (XVECEXP (x, 0, 1)) == TOC_REGISTER);
20621 output_addr_const (file, XVECEXP (x, 0, 0));
20622 if (x == tocrel_base && tocrel_offset != const0_rtx)
20624 if (INTVAL (tocrel_offset) >= 0)
20625 fprintf (file, "+");
20626 output_addr_const (file, CONST_CAST_RTX (tocrel_offset));
20628 if (!TARGET_AIX || (TARGET_ELF && TARGET_MINIMAL_TOC))
20630 putc ('-', file);
20631 assemble_name (file, toc_label_name);
20632 need_toc_init = 1;
20634 else if (TARGET_ELF)
20635 fputs ("@toc", file);
20636 return true;
20638 #if TARGET_MACHO
20639 case UNSPEC_MACHOPIC_OFFSET:
20640 output_addr_const (file, XVECEXP (x, 0, 0));
20641 putc ('-', file);
20642 machopic_output_function_base_name (file);
20643 return true;
20644 #endif
20646 return false;
20649 /* Target hook for assembling integer objects. The PowerPC version has
20650 to handle fixup entries for relocatable code if RELOCATABLE_NEEDS_FIXUP
20651 is defined. It also needs to handle DI-mode objects on 64-bit
20652 targets. */
20654 static bool
20655 rs6000_assemble_integer (rtx x, unsigned int size, int aligned_p)
20657 #ifdef RELOCATABLE_NEEDS_FIXUP
20658 /* Special handling for SI values. */
20659 if (RELOCATABLE_NEEDS_FIXUP && size == 4 && aligned_p)
20661 static int recurse = 0;
20663 /* For -mrelocatable, we mark all addresses that need to be fixed up in
20664 the .fixup section. Since the TOC section is already relocated, we
20665 don't need to mark it here. We used to skip the text section, but it
20666 should never be valid for relocated addresses to be placed in the text
20667 section. */
20668 if (TARGET_RELOCATABLE
20669 && in_section != toc_section
20670 && !recurse
20671 && !CONST_SCALAR_INT_P (x)
20672 && CONSTANT_P (x))
20674 char buf[256];
20676 recurse = 1;
20677 ASM_GENERATE_INTERNAL_LABEL (buf, "LCP", fixuplabelno);
20678 fixuplabelno++;
20679 ASM_OUTPUT_LABEL (asm_out_file, buf);
20680 fprintf (asm_out_file, "\t.long\t(");
20681 output_addr_const (asm_out_file, x);
20682 fprintf (asm_out_file, ")@fixup\n");
20683 fprintf (asm_out_file, "\t.section\t\".fixup\",\"aw\"\n");
20684 ASM_OUTPUT_ALIGN (asm_out_file, 2);
20685 fprintf (asm_out_file, "\t.long\t");
20686 assemble_name (asm_out_file, buf);
20687 fprintf (asm_out_file, "\n\t.previous\n");
20688 recurse = 0;
20689 return true;
20691 /* Remove initial .'s to turn a -mcall-aixdesc function
20692 address into the address of the descriptor, not the function
20693 itself. */
20694 else if (GET_CODE (x) == SYMBOL_REF
20695 && XSTR (x, 0)[0] == '.'
20696 && DEFAULT_ABI == ABI_AIX)
20698 const char *name = XSTR (x, 0);
20699 while (*name == '.')
20700 name++;
20702 fprintf (asm_out_file, "\t.long\t%s\n", name);
20703 return true;
20706 #endif /* RELOCATABLE_NEEDS_FIXUP */
20707 return default_assemble_integer (x, size, aligned_p);
20710 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
20711 /* Emit an assembler directive to set symbol visibility for DECL to
20712 VISIBILITY_TYPE. */
20714 static void
20715 rs6000_assemble_visibility (tree decl, int vis)
20717 if (TARGET_XCOFF)
20718 return;
20720 /* Functions need to have their entry point symbol visibility set as
20721 well as their descriptor symbol visibility. */
20722 if (DEFAULT_ABI == ABI_AIX
20723 && DOT_SYMBOLS
20724 && TREE_CODE (decl) == FUNCTION_DECL)
20726 static const char * const visibility_types[] = {
20727 NULL, "internal", "hidden", "protected"
20730 const char *name, *type;
20732 name = ((* targetm.strip_name_encoding)
20733 (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl))));
20734 type = visibility_types[vis];
20736 fprintf (asm_out_file, "\t.%s\t%s\n", type, name);
20737 fprintf (asm_out_file, "\t.%s\t.%s\n", type, name);
20739 else
20740 default_assemble_visibility (decl, vis);
20742 #endif
20744 enum rtx_code
20745 rs6000_reverse_condition (machine_mode mode, enum rtx_code code)
20747 /* Reversal of FP compares takes care -- an ordered compare
20748 becomes an unordered compare and vice versa. */
20749 if (mode == CCFPmode
20750 && (!flag_finite_math_only
20751 || code == UNLT || code == UNLE || code == UNGT || code == UNGE
20752 || code == UNEQ || code == LTGT))
20753 return reverse_condition_maybe_unordered (code);
20754 else
20755 return reverse_condition (code);
20758 /* Generate a compare for CODE. Return a brand-new rtx that
20759 represents the result of the compare. */
20761 static rtx
20762 rs6000_generate_compare (rtx cmp, machine_mode mode)
20764 machine_mode comp_mode;
20765 rtx compare_result;
20766 enum rtx_code code = GET_CODE (cmp);
20767 rtx op0 = XEXP (cmp, 0);
20768 rtx op1 = XEXP (cmp, 1);
20770 if (!TARGET_FLOAT128_HW && FLOAT128_VECTOR_P (mode))
20771 comp_mode = CCmode;
20772 else if (FLOAT_MODE_P (mode))
20773 comp_mode = CCFPmode;
20774 else if (code == GTU || code == LTU
20775 || code == GEU || code == LEU)
20776 comp_mode = CCUNSmode;
20777 else if ((code == EQ || code == NE)
20778 && unsigned_reg_p (op0)
20779 && (unsigned_reg_p (op1)
20780 || (CONST_INT_P (op1) && INTVAL (op1) != 0)))
20781 /* These are unsigned values, perhaps there will be a later
20782 ordering compare that can be shared with this one. */
20783 comp_mode = CCUNSmode;
20784 else
20785 comp_mode = CCmode;
20787 /* If we have an unsigned compare, make sure we don't have a signed value as
20788 an immediate. */
20789 if (comp_mode == CCUNSmode && GET_CODE (op1) == CONST_INT
20790 && INTVAL (op1) < 0)
20792 op0 = copy_rtx_if_shared (op0);
20793 op1 = force_reg (GET_MODE (op0), op1);
20794 cmp = gen_rtx_fmt_ee (code, GET_MODE (cmp), op0, op1);
20797 /* First, the compare. */
20798 compare_result = gen_reg_rtx (comp_mode);
20800 /* E500 FP compare instructions on the GPRs. Yuck! */
20801 if ((!TARGET_FPRS && TARGET_HARD_FLOAT)
20802 && FLOAT_MODE_P (mode))
20804 rtx cmp, or_result, compare_result2;
20805 machine_mode op_mode = GET_MODE (op0);
20806 bool reverse_p;
20808 if (op_mode == VOIDmode)
20809 op_mode = GET_MODE (op1);
20811 /* First reverse the condition codes that aren't directly supported. */
20812 switch (code)
20814 case NE:
20815 case UNLT:
20816 case UNLE:
20817 case UNGT:
20818 case UNGE:
20819 code = reverse_condition_maybe_unordered (code);
20820 reverse_p = true;
20821 break;
20823 case EQ:
20824 case LT:
20825 case LE:
20826 case GT:
20827 case GE:
20828 reverse_p = false;
20829 break;
20831 default:
20832 gcc_unreachable ();
20835 /* The E500 FP compare instructions toggle the GT bit (CR bit 1) only.
20836 This explains the following mess. */
20838 switch (code)
20840 case EQ:
20841 switch (op_mode)
20843 case SFmode:
20844 cmp = (flag_finite_math_only && !flag_trapping_math)
20845 ? gen_tstsfeq_gpr (compare_result, op0, op1)
20846 : gen_cmpsfeq_gpr (compare_result, op0, op1);
20847 break;
20849 case DFmode:
20850 cmp = (flag_finite_math_only && !flag_trapping_math)
20851 ? gen_tstdfeq_gpr (compare_result, op0, op1)
20852 : gen_cmpdfeq_gpr (compare_result, op0, op1);
20853 break;
20855 case TFmode:
20856 case IFmode:
20857 case KFmode:
20858 cmp = (flag_finite_math_only && !flag_trapping_math)
20859 ? gen_tsttfeq_gpr (compare_result, op0, op1)
20860 : gen_cmptfeq_gpr (compare_result, op0, op1);
20861 break;
20863 default:
20864 gcc_unreachable ();
20866 break;
20868 case GT:
20869 case GE:
20870 switch (op_mode)
20872 case SFmode:
20873 cmp = (flag_finite_math_only && !flag_trapping_math)
20874 ? gen_tstsfgt_gpr (compare_result, op0, op1)
20875 : gen_cmpsfgt_gpr (compare_result, op0, op1);
20876 break;
20878 case DFmode:
20879 cmp = (flag_finite_math_only && !flag_trapping_math)
20880 ? gen_tstdfgt_gpr (compare_result, op0, op1)
20881 : gen_cmpdfgt_gpr (compare_result, op0, op1);
20882 break;
20884 case TFmode:
20885 case IFmode:
20886 case KFmode:
20887 cmp = (flag_finite_math_only && !flag_trapping_math)
20888 ? gen_tsttfgt_gpr (compare_result, op0, op1)
20889 : gen_cmptfgt_gpr (compare_result, op0, op1);
20890 break;
20892 default:
20893 gcc_unreachable ();
20895 break;
20897 case LT:
20898 case LE:
20899 switch (op_mode)
20901 case SFmode:
20902 cmp = (flag_finite_math_only && !flag_trapping_math)
20903 ? gen_tstsflt_gpr (compare_result, op0, op1)
20904 : gen_cmpsflt_gpr (compare_result, op0, op1);
20905 break;
20907 case DFmode:
20908 cmp = (flag_finite_math_only && !flag_trapping_math)
20909 ? gen_tstdflt_gpr (compare_result, op0, op1)
20910 : gen_cmpdflt_gpr (compare_result, op0, op1);
20911 break;
20913 case TFmode:
20914 case IFmode:
20915 case KFmode:
20916 cmp = (flag_finite_math_only && !flag_trapping_math)
20917 ? gen_tsttflt_gpr (compare_result, op0, op1)
20918 : gen_cmptflt_gpr (compare_result, op0, op1);
20919 break;
20921 default:
20922 gcc_unreachable ();
20924 break;
20926 default:
20927 gcc_unreachable ();
20930 /* Synthesize LE and GE from LT/GT || EQ. */
20931 if (code == LE || code == GE)
20933 emit_insn (cmp);
20935 compare_result2 = gen_reg_rtx (CCFPmode);
20937 /* Do the EQ. */
20938 switch (op_mode)
20940 case SFmode:
20941 cmp = (flag_finite_math_only && !flag_trapping_math)
20942 ? gen_tstsfeq_gpr (compare_result2, op0, op1)
20943 : gen_cmpsfeq_gpr (compare_result2, op0, op1);
20944 break;
20946 case DFmode:
20947 cmp = (flag_finite_math_only && !flag_trapping_math)
20948 ? gen_tstdfeq_gpr (compare_result2, op0, op1)
20949 : gen_cmpdfeq_gpr (compare_result2, op0, op1);
20950 break;
20952 case TFmode:
20953 case IFmode:
20954 case KFmode:
20955 cmp = (flag_finite_math_only && !flag_trapping_math)
20956 ? gen_tsttfeq_gpr (compare_result2, op0, op1)
20957 : gen_cmptfeq_gpr (compare_result2, op0, op1);
20958 break;
20960 default:
20961 gcc_unreachable ();
20964 emit_insn (cmp);
20966 /* OR them together. */
20967 or_result = gen_reg_rtx (CCFPmode);
20968 cmp = gen_e500_cr_ior_compare (or_result, compare_result,
20969 compare_result2);
20970 compare_result = or_result;
20973 code = reverse_p ? NE : EQ;
20975 emit_insn (cmp);
20978 /* IEEE 128-bit support in VSX registers when we do not have hardware
20979 support. */
20980 else if (!TARGET_FLOAT128_HW && FLOAT128_VECTOR_P (mode))
20982 rtx libfunc = NULL_RTX;
20983 bool uneq_or_ltgt = false;
20984 rtx dest = gen_reg_rtx (SImode);
20986 switch (code)
20988 case EQ:
20989 case NE:
20990 libfunc = optab_libfunc (eq_optab, mode);
20991 break;
20993 case GT:
20994 case GE:
20995 libfunc = optab_libfunc (ge_optab, mode);
20996 break;
20998 case LT:
20999 case LE:
21000 libfunc = optab_libfunc (le_optab, mode);
21001 break;
21003 case UNORDERED:
21004 case ORDERED:
21005 libfunc = optab_libfunc (unord_optab, mode);
21006 code = (code == UNORDERED) ? NE : EQ;
21007 break;
21009 case UNGE:
21010 case UNGT:
21011 libfunc = optab_libfunc (le_optab, mode);
21012 code = (code == UNGE) ? GE : GT;
21013 break;
21015 case UNLE:
21016 case UNLT:
21017 libfunc = optab_libfunc (ge_optab, mode);
21018 code = (code == UNLE) ? LE : LT;
21019 break;
21021 case UNEQ:
21022 case LTGT:
21023 libfunc = optab_libfunc (le_optab, mode);
21024 uneq_or_ltgt = true;
21025 code = (code = UNEQ) ? NE : EQ;
21026 break;
21028 default:
21029 gcc_unreachable ();
21032 gcc_assert (libfunc);
21033 dest = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
21034 SImode, 2, op0, mode, op1, mode);
21036 /* If this is UNEQ or LTGT, we call __lekf2, which returns -1 for less
21037 than, 0 for equal, +1 for greater, and +2 for nan. We add 1, to give
21038 a value of 0..3, and then do and AND immediate of 1 to isolate whether
21039 it is 0/Nan (i.e. bottom bit is 0), or less than/greater than
21040 (i.e. bottom bit is 1). */
21041 if (uneq_or_ltgt)
21043 rtx add_result = gen_reg_rtx (SImode);
21044 rtx and_result = gen_reg_rtx (SImode);
21045 emit_insn (gen_addsi3 (add_result, dest, GEN_INT (1)));
21046 emit_insn (gen_andsi3 (and_result, add_result, GEN_INT (1)));
21047 dest = and_result;
21050 emit_insn (gen_rtx_SET (compare_result,
21051 gen_rtx_COMPARE (comp_mode, dest, const0_rtx)));
21054 else
21056 /* Generate XLC-compatible TFmode compare as PARALLEL with extra
21057 CLOBBERs to match cmptf_internal2 pattern. */
21058 if (comp_mode == CCFPmode && TARGET_XL_COMPAT
21059 && FLOAT128_IBM_P (GET_MODE (op0))
21060 && TARGET_HARD_FLOAT && TARGET_FPRS)
21061 emit_insn (gen_rtx_PARALLEL (VOIDmode,
21062 gen_rtvec (10,
21063 gen_rtx_SET (compare_result,
21064 gen_rtx_COMPARE (comp_mode, op0, op1)),
21065 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
21066 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
21067 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
21068 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
21069 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
21070 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
21071 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
21072 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
21073 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (Pmode)))));
21074 else if (GET_CODE (op1) == UNSPEC
21075 && XINT (op1, 1) == UNSPEC_SP_TEST)
21077 rtx op1b = XVECEXP (op1, 0, 0);
21078 comp_mode = CCEQmode;
21079 compare_result = gen_reg_rtx (CCEQmode);
21080 if (TARGET_64BIT)
21081 emit_insn (gen_stack_protect_testdi (compare_result, op0, op1b));
21082 else
21083 emit_insn (gen_stack_protect_testsi (compare_result, op0, op1b));
21085 else
21086 emit_insn (gen_rtx_SET (compare_result,
21087 gen_rtx_COMPARE (comp_mode, op0, op1)));
21090 /* Some kinds of FP comparisons need an OR operation;
21091 under flag_finite_math_only we don't bother. */
21092 if (FLOAT_MODE_P (mode)
21093 && (!FLOAT128_IEEE_P (mode) || TARGET_FLOAT128_HW)
21094 && !flag_finite_math_only
21095 && !(TARGET_HARD_FLOAT && !TARGET_FPRS)
21096 && (code == LE || code == GE
21097 || code == UNEQ || code == LTGT
21098 || code == UNGT || code == UNLT))
21100 enum rtx_code or1, or2;
21101 rtx or1_rtx, or2_rtx, compare2_rtx;
21102 rtx or_result = gen_reg_rtx (CCEQmode);
21104 switch (code)
21106 case LE: or1 = LT; or2 = EQ; break;
21107 case GE: or1 = GT; or2 = EQ; break;
21108 case UNEQ: or1 = UNORDERED; or2 = EQ; break;
21109 case LTGT: or1 = LT; or2 = GT; break;
21110 case UNGT: or1 = UNORDERED; or2 = GT; break;
21111 case UNLT: or1 = UNORDERED; or2 = LT; break;
21112 default: gcc_unreachable ();
21114 validate_condition_mode (or1, comp_mode);
21115 validate_condition_mode (or2, comp_mode);
21116 or1_rtx = gen_rtx_fmt_ee (or1, SImode, compare_result, const0_rtx);
21117 or2_rtx = gen_rtx_fmt_ee (or2, SImode, compare_result, const0_rtx);
21118 compare2_rtx = gen_rtx_COMPARE (CCEQmode,
21119 gen_rtx_IOR (SImode, or1_rtx, or2_rtx),
21120 const_true_rtx);
21121 emit_insn (gen_rtx_SET (or_result, compare2_rtx));
21123 compare_result = or_result;
21124 code = EQ;
21127 validate_condition_mode (code, GET_MODE (compare_result));
21129 return gen_rtx_fmt_ee (code, VOIDmode, compare_result, const0_rtx);
21133 /* Return the diagnostic message string if the binary operation OP is
21134 not permitted on TYPE1 and TYPE2, NULL otherwise. */
21136 static const char*
21137 rs6000_invalid_binary_op (int op ATTRIBUTE_UNUSED,
21138 const_tree type1,
21139 const_tree type2)
21141 enum machine_mode mode1 = TYPE_MODE (type1);
21142 enum machine_mode mode2 = TYPE_MODE (type2);
21144 /* For complex modes, use the inner type. */
21145 if (COMPLEX_MODE_P (mode1))
21146 mode1 = GET_MODE_INNER (mode1);
21148 if (COMPLEX_MODE_P (mode2))
21149 mode2 = GET_MODE_INNER (mode2);
21151 /* Don't allow IEEE 754R 128-bit binary floating point and IBM extended
21152 double to intermix unless -mfloat128-convert. */
21153 if (mode1 == mode2)
21154 return NULL;
21156 if (!TARGET_FLOAT128_CVT)
21158 if ((mode1 == KFmode && mode2 == IFmode)
21159 || (mode1 == IFmode && mode2 == KFmode))
21160 return N_("__float128 and __ibm128 cannot be used in the same "
21161 "expression");
21163 if (TARGET_IEEEQUAD
21164 && ((mode1 == IFmode && mode2 == TFmode)
21165 || (mode1 == TFmode && mode2 == IFmode)))
21166 return N_("__ibm128 and long double cannot be used in the same "
21167 "expression");
21169 if (!TARGET_IEEEQUAD
21170 && ((mode1 == KFmode && mode2 == TFmode)
21171 || (mode1 == TFmode && mode2 == KFmode)))
21172 return N_("__float128 and long double cannot be used in the same "
21173 "expression");
21176 return NULL;
21180 /* Expand floating point conversion to/from __float128 and __ibm128. */
21182 void
21183 rs6000_expand_float128_convert (rtx dest, rtx src, bool unsigned_p)
21185 machine_mode dest_mode = GET_MODE (dest);
21186 machine_mode src_mode = GET_MODE (src);
21187 convert_optab cvt = unknown_optab;
21188 bool do_move = false;
21189 rtx libfunc = NULL_RTX;
21190 rtx dest2;
21191 typedef rtx (*rtx_2func_t) (rtx, rtx);
21192 rtx_2func_t hw_convert = (rtx_2func_t)0;
21193 size_t kf_or_tf;
21195 struct hw_conv_t {
21196 rtx_2func_t from_df;
21197 rtx_2func_t from_sf;
21198 rtx_2func_t from_si_sign;
21199 rtx_2func_t from_si_uns;
21200 rtx_2func_t from_di_sign;
21201 rtx_2func_t from_di_uns;
21202 rtx_2func_t to_df;
21203 rtx_2func_t to_sf;
21204 rtx_2func_t to_si_sign;
21205 rtx_2func_t to_si_uns;
21206 rtx_2func_t to_di_sign;
21207 rtx_2func_t to_di_uns;
21208 } hw_conversions[2] = {
21209 /* convertions to/from KFmode */
21211 gen_extenddfkf2_hw, /* KFmode <- DFmode. */
21212 gen_extendsfkf2_hw, /* KFmode <- SFmode. */
21213 gen_float_kfsi2_hw, /* KFmode <- SImode (signed). */
21214 gen_floatuns_kfsi2_hw, /* KFmode <- SImode (unsigned). */
21215 gen_float_kfdi2_hw, /* KFmode <- DImode (signed). */
21216 gen_floatuns_kfdi2_hw, /* KFmode <- DImode (unsigned). */
21217 gen_trunckfdf2_hw, /* DFmode <- KFmode. */
21218 gen_trunckfsf2_hw, /* SFmode <- KFmode. */
21219 gen_fix_kfsi2_hw, /* SImode <- KFmode (signed). */
21220 gen_fixuns_kfsi2_hw, /* SImode <- KFmode (unsigned). */
21221 gen_fix_kfdi2_hw, /* DImode <- KFmode (signed). */
21222 gen_fixuns_kfdi2_hw, /* DImode <- KFmode (unsigned). */
21225 /* convertions to/from TFmode */
21227 gen_extenddftf2_hw, /* TFmode <- DFmode. */
21228 gen_extendsftf2_hw, /* TFmode <- SFmode. */
21229 gen_float_tfsi2_hw, /* TFmode <- SImode (signed). */
21230 gen_floatuns_tfsi2_hw, /* TFmode <- SImode (unsigned). */
21231 gen_float_tfdi2_hw, /* TFmode <- DImode (signed). */
21232 gen_floatuns_tfdi2_hw, /* TFmode <- DImode (unsigned). */
21233 gen_trunctfdf2_hw, /* DFmode <- TFmode. */
21234 gen_trunctfsf2_hw, /* SFmode <- TFmode. */
21235 gen_fix_tfsi2_hw, /* SImode <- TFmode (signed). */
21236 gen_fixuns_tfsi2_hw, /* SImode <- TFmode (unsigned). */
21237 gen_fix_tfdi2_hw, /* DImode <- TFmode (signed). */
21238 gen_fixuns_tfdi2_hw, /* DImode <- TFmode (unsigned). */
21242 if (dest_mode == src_mode)
21243 gcc_unreachable ();
21245 /* Eliminate memory operations. */
21246 if (MEM_P (src))
21247 src = force_reg (src_mode, src);
21249 if (MEM_P (dest))
21251 rtx tmp = gen_reg_rtx (dest_mode);
21252 rs6000_expand_float128_convert (tmp, src, unsigned_p);
21253 rs6000_emit_move (dest, tmp, dest_mode);
21254 return;
21257 /* Convert to IEEE 128-bit floating point. */
21258 if (FLOAT128_IEEE_P (dest_mode))
21260 if (dest_mode == KFmode)
21261 kf_or_tf = 0;
21262 else if (dest_mode == TFmode)
21263 kf_or_tf = 1;
21264 else
21265 gcc_unreachable ();
21267 switch (src_mode)
21269 case DFmode:
21270 cvt = sext_optab;
21271 hw_convert = hw_conversions[kf_or_tf].from_df;
21272 break;
21274 case SFmode:
21275 cvt = sext_optab;
21276 hw_convert = hw_conversions[kf_or_tf].from_sf;
21277 break;
21279 case KFmode:
21280 case IFmode:
21281 case TFmode:
21282 if (FLOAT128_IBM_P (src_mode))
21283 cvt = sext_optab;
21284 else
21285 do_move = true;
21286 break;
21288 case SImode:
21289 if (unsigned_p)
21291 cvt = ufloat_optab;
21292 hw_convert = hw_conversions[kf_or_tf].from_si_uns;
21294 else
21296 cvt = sfloat_optab;
21297 hw_convert = hw_conversions[kf_or_tf].from_si_sign;
21299 break;
21301 case DImode:
21302 if (unsigned_p)
21304 cvt = ufloat_optab;
21305 hw_convert = hw_conversions[kf_or_tf].from_di_uns;
21307 else
21309 cvt = sfloat_optab;
21310 hw_convert = hw_conversions[kf_or_tf].from_di_sign;
21312 break;
21314 default:
21315 gcc_unreachable ();
21319 /* Convert from IEEE 128-bit floating point. */
21320 else if (FLOAT128_IEEE_P (src_mode))
21322 if (src_mode == KFmode)
21323 kf_or_tf = 0;
21324 else if (src_mode == TFmode)
21325 kf_or_tf = 1;
21326 else
21327 gcc_unreachable ();
21329 switch (dest_mode)
21331 case DFmode:
21332 cvt = trunc_optab;
21333 hw_convert = hw_conversions[kf_or_tf].to_df;
21334 break;
21336 case SFmode:
21337 cvt = trunc_optab;
21338 hw_convert = hw_conversions[kf_or_tf].to_sf;
21339 break;
21341 case KFmode:
21342 case IFmode:
21343 case TFmode:
21344 if (FLOAT128_IBM_P (dest_mode))
21345 cvt = trunc_optab;
21346 else
21347 do_move = true;
21348 break;
21350 case SImode:
21351 if (unsigned_p)
21353 cvt = ufix_optab;
21354 hw_convert = hw_conversions[kf_or_tf].to_si_uns;
21356 else
21358 cvt = sfix_optab;
21359 hw_convert = hw_conversions[kf_or_tf].to_si_sign;
21361 break;
21363 case DImode:
21364 if (unsigned_p)
21366 cvt = ufix_optab;
21367 hw_convert = hw_conversions[kf_or_tf].to_di_uns;
21369 else
21371 cvt = sfix_optab;
21372 hw_convert = hw_conversions[kf_or_tf].to_di_sign;
21374 break;
21376 default:
21377 gcc_unreachable ();
21381 /* Both IBM format. */
21382 else if (FLOAT128_IBM_P (dest_mode) && FLOAT128_IBM_P (src_mode))
21383 do_move = true;
21385 else
21386 gcc_unreachable ();
21388 /* Handle conversion between TFmode/KFmode. */
21389 if (do_move)
21390 emit_move_insn (dest, gen_lowpart (dest_mode, src));
21392 /* Handle conversion if we have hardware support. */
21393 else if (TARGET_FLOAT128_HW && hw_convert)
21394 emit_insn ((hw_convert) (dest, src));
21396 /* Call an external function to do the conversion. */
21397 else if (cvt != unknown_optab)
21399 libfunc = convert_optab_libfunc (cvt, dest_mode, src_mode);
21400 gcc_assert (libfunc != NULL_RTX);
21402 dest2 = emit_library_call_value (libfunc, dest, LCT_CONST, dest_mode, 1, src,
21403 src_mode);
21405 gcc_assert (dest2 != NULL_RTX);
21406 if (!rtx_equal_p (dest, dest2))
21407 emit_move_insn (dest, dest2);
21410 else
21411 gcc_unreachable ();
21413 return;
21416 /* Split a conversion from __float128 to an integer type into separate insns.
21417 OPERANDS points to the destination, source, and V2DI temporary
21418 register. CODE is either FIX or UNSIGNED_FIX. */
21420 void
21421 convert_float128_to_int (rtx *operands, enum rtx_code code)
21423 rtx dest = operands[0];
21424 rtx src = operands[1];
21425 rtx tmp = operands[2];
21426 rtx cvt;
21427 rtvec cvt_vec;
21428 rtx cvt_unspec;
21429 rtvec move_vec;
21430 rtx move_unspec;
21432 if (GET_CODE (tmp) == SCRATCH)
21433 tmp = gen_reg_rtx (V2DImode);
21435 if (MEM_P (dest))
21436 dest = rs6000_address_for_fpconvert (dest);
21438 /* Generate the actual convert insn of the form:
21439 (set (tmp) (unspec:V2DI [(fix:SI (reg:KF))] UNSPEC_IEEE128_CONVERT)). */
21440 cvt = gen_rtx_fmt_e (code, GET_MODE (dest), src);
21441 cvt_vec = gen_rtvec (1, cvt);
21442 cvt_unspec = gen_rtx_UNSPEC (V2DImode, cvt_vec, UNSPEC_IEEE128_CONVERT);
21443 emit_insn (gen_rtx_SET (tmp, cvt_unspec));
21445 /* Generate the move insn of the form:
21446 (set (dest:SI) (unspec:SI [(tmp:V2DI))] UNSPEC_IEEE128_MOVE)). */
21447 move_vec = gen_rtvec (1, tmp);
21448 move_unspec = gen_rtx_UNSPEC (GET_MODE (dest), move_vec, UNSPEC_IEEE128_MOVE);
21449 emit_insn (gen_rtx_SET (dest, move_unspec));
21452 /* Split a conversion from an integer type to __float128 into separate insns.
21453 OPERANDS points to the destination, source, and V2DI temporary
21454 register. CODE is either FLOAT or UNSIGNED_FLOAT. */
21456 void
21457 convert_int_to_float128 (rtx *operands, enum rtx_code code)
21459 rtx dest = operands[0];
21460 rtx src = operands[1];
21461 rtx tmp = operands[2];
21462 rtx cvt;
21463 rtvec cvt_vec;
21464 rtx cvt_unspec;
21465 rtvec move_vec;
21466 rtx move_unspec;
21467 rtx unsigned_flag;
21469 if (GET_CODE (tmp) == SCRATCH)
21470 tmp = gen_reg_rtx (V2DImode);
21472 if (MEM_P (src))
21473 src = rs6000_address_for_fpconvert (src);
21475 /* Generate the move of the integer into the Altivec register of the form:
21476 (set (tmp:V2DI) (unspec:V2DI [(src:SI)
21477 (const_int 0)] UNSPEC_IEEE128_MOVE)).
21480 (set (tmp:V2DI) (unspec:V2DI [(src:DI)] UNSPEC_IEEE128_MOVE)). */
21482 if (GET_MODE (src) == SImode)
21484 unsigned_flag = (code == UNSIGNED_FLOAT) ? const1_rtx : const0_rtx;
21485 move_vec = gen_rtvec (2, src, unsigned_flag);
21487 else
21488 move_vec = gen_rtvec (1, src);
21490 move_unspec = gen_rtx_UNSPEC (V2DImode, move_vec, UNSPEC_IEEE128_MOVE);
21491 emit_insn (gen_rtx_SET (tmp, move_unspec));
21493 /* Generate the actual convert insn of the form:
21494 (set (dest:KF) (float:KF (unspec:DI [(tmp:V2DI)]
21495 UNSPEC_IEEE128_CONVERT))). */
21496 cvt_vec = gen_rtvec (1, tmp);
21497 cvt_unspec = gen_rtx_UNSPEC (DImode, cvt_vec, UNSPEC_IEEE128_CONVERT);
21498 cvt = gen_rtx_fmt_e (code, GET_MODE (dest), cvt_unspec);
21499 emit_insn (gen_rtx_SET (dest, cvt));
21503 /* Emit the RTL for an sISEL pattern. */
21505 void
21506 rs6000_emit_sISEL (machine_mode mode ATTRIBUTE_UNUSED, rtx operands[])
21508 rs6000_emit_int_cmove (operands[0], operands[1], const1_rtx, const0_rtx);
21511 /* Emit RTL that sets a register to zero if OP1 and OP2 are equal. SCRATCH
21512 can be used as that dest register. Return the dest register. */
21515 rs6000_emit_eqne (machine_mode mode, rtx op1, rtx op2, rtx scratch)
21517 if (op2 == const0_rtx)
21518 return op1;
21520 if (GET_CODE (scratch) == SCRATCH)
21521 scratch = gen_reg_rtx (mode);
21523 if (logical_operand (op2, mode))
21524 emit_insn (gen_rtx_SET (scratch, gen_rtx_XOR (mode, op1, op2)));
21525 else
21526 emit_insn (gen_rtx_SET (scratch,
21527 gen_rtx_PLUS (mode, op1, negate_rtx (mode, op2))));
21529 return scratch;
21532 void
21533 rs6000_emit_sCOND (machine_mode mode, rtx operands[])
21535 rtx condition_rtx;
21536 machine_mode op_mode;
21537 enum rtx_code cond_code;
21538 rtx result = operands[0];
21540 condition_rtx = rs6000_generate_compare (operands[1], mode);
21541 cond_code = GET_CODE (condition_rtx);
21543 if (FLOAT_MODE_P (mode)
21544 && !TARGET_FPRS && TARGET_HARD_FLOAT)
21546 rtx t;
21548 PUT_MODE (condition_rtx, SImode);
21549 t = XEXP (condition_rtx, 0);
21551 gcc_assert (cond_code == NE || cond_code == EQ);
21553 if (cond_code == NE)
21554 emit_insn (gen_e500_flip_gt_bit (t, t));
21556 emit_insn (gen_move_from_CR_gt_bit (result, t));
21557 return;
21560 if (cond_code == NE
21561 || cond_code == GE || cond_code == LE
21562 || cond_code == GEU || cond_code == LEU
21563 || cond_code == ORDERED || cond_code == UNGE || cond_code == UNLE)
21565 rtx not_result = gen_reg_rtx (CCEQmode);
21566 rtx not_op, rev_cond_rtx;
21567 machine_mode cc_mode;
21569 cc_mode = GET_MODE (XEXP (condition_rtx, 0));
21571 rev_cond_rtx = gen_rtx_fmt_ee (rs6000_reverse_condition (cc_mode, cond_code),
21572 SImode, XEXP (condition_rtx, 0), const0_rtx);
21573 not_op = gen_rtx_COMPARE (CCEQmode, rev_cond_rtx, const0_rtx);
21574 emit_insn (gen_rtx_SET (not_result, not_op));
21575 condition_rtx = gen_rtx_EQ (VOIDmode, not_result, const0_rtx);
21578 op_mode = GET_MODE (XEXP (operands[1], 0));
21579 if (op_mode == VOIDmode)
21580 op_mode = GET_MODE (XEXP (operands[1], 1));
21582 if (TARGET_POWERPC64 && (op_mode == DImode || FLOAT_MODE_P (mode)))
21584 PUT_MODE (condition_rtx, DImode);
21585 convert_move (result, condition_rtx, 0);
21587 else
21589 PUT_MODE (condition_rtx, SImode);
21590 emit_insn (gen_rtx_SET (result, condition_rtx));
21594 /* Emit a branch of kind CODE to location LOC. */
21596 void
21597 rs6000_emit_cbranch (machine_mode mode, rtx operands[])
21599 rtx condition_rtx, loc_ref;
21601 condition_rtx = rs6000_generate_compare (operands[0], mode);
21602 loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
21603 emit_jump_insn (gen_rtx_SET (pc_rtx,
21604 gen_rtx_IF_THEN_ELSE (VOIDmode, condition_rtx,
21605 loc_ref, pc_rtx)));
21608 /* Return the string to output a conditional branch to LABEL, which is
21609 the operand template of the label, or NULL if the branch is really a
21610 conditional return.
21612 OP is the conditional expression. XEXP (OP, 0) is assumed to be a
21613 condition code register and its mode specifies what kind of
21614 comparison we made.
21616 REVERSED is nonzero if we should reverse the sense of the comparison.
21618 INSN is the insn. */
21620 char *
21621 output_cbranch (rtx op, const char *label, int reversed, rtx_insn *insn)
21623 static char string[64];
21624 enum rtx_code code = GET_CODE (op);
21625 rtx cc_reg = XEXP (op, 0);
21626 machine_mode mode = GET_MODE (cc_reg);
21627 int cc_regno = REGNO (cc_reg) - CR0_REGNO;
21628 int need_longbranch = label != NULL && get_attr_length (insn) == 8;
21629 int really_reversed = reversed ^ need_longbranch;
21630 char *s = string;
21631 const char *ccode;
21632 const char *pred;
21633 rtx note;
21635 validate_condition_mode (code, mode);
21637 /* Work out which way this really branches. We could use
21638 reverse_condition_maybe_unordered here always but this
21639 makes the resulting assembler clearer. */
21640 if (really_reversed)
21642 /* Reversal of FP compares takes care -- an ordered compare
21643 becomes an unordered compare and vice versa. */
21644 if (mode == CCFPmode)
21645 code = reverse_condition_maybe_unordered (code);
21646 else
21647 code = reverse_condition (code);
21650 if ((!TARGET_FPRS && TARGET_HARD_FLOAT) && mode == CCFPmode)
21652 /* The efscmp/tst* instructions twiddle bit 2, which maps nicely
21653 to the GT bit. */
21654 switch (code)
21656 case EQ:
21657 /* Opposite of GT. */
21658 code = GT;
21659 break;
21661 case NE:
21662 code = UNLE;
21663 break;
21665 default:
21666 gcc_unreachable ();
21670 switch (code)
21672 /* Not all of these are actually distinct opcodes, but
21673 we distinguish them for clarity of the resulting assembler. */
21674 case NE: case LTGT:
21675 ccode = "ne"; break;
21676 case EQ: case UNEQ:
21677 ccode = "eq"; break;
21678 case GE: case GEU:
21679 ccode = "ge"; break;
21680 case GT: case GTU: case UNGT:
21681 ccode = "gt"; break;
21682 case LE: case LEU:
21683 ccode = "le"; break;
21684 case LT: case LTU: case UNLT:
21685 ccode = "lt"; break;
21686 case UNORDERED: ccode = "un"; break;
21687 case ORDERED: ccode = "nu"; break;
21688 case UNGE: ccode = "nl"; break;
21689 case UNLE: ccode = "ng"; break;
21690 default:
21691 gcc_unreachable ();
21694 /* Maybe we have a guess as to how likely the branch is. */
21695 pred = "";
21696 note = find_reg_note (insn, REG_BR_PROB, NULL_RTX);
21697 if (note != NULL_RTX)
21699 /* PROB is the difference from 50%. */
21700 int prob = XINT (note, 0) - REG_BR_PROB_BASE / 2;
21702 /* Only hint for highly probable/improbable branches on newer cpus when
21703 we have real profile data, as static prediction overrides processor
21704 dynamic prediction. For older cpus we may as well always hint, but
21705 assume not taken for branches that are very close to 50% as a
21706 mispredicted taken branch is more expensive than a
21707 mispredicted not-taken branch. */
21708 if (rs6000_always_hint
21709 || (abs (prob) > REG_BR_PROB_BASE / 100 * 48
21710 && (profile_status_for_fn (cfun) != PROFILE_GUESSED)
21711 && br_prob_note_reliable_p (note)))
21713 if (abs (prob) > REG_BR_PROB_BASE / 20
21714 && ((prob > 0) ^ need_longbranch))
21715 pred = "+";
21716 else
21717 pred = "-";
21721 if (label == NULL)
21722 s += sprintf (s, "b%slr%s ", ccode, pred);
21723 else
21724 s += sprintf (s, "b%s%s ", ccode, pred);
21726 /* We need to escape any '%' characters in the reg_names string.
21727 Assume they'd only be the first character.... */
21728 if (reg_names[cc_regno + CR0_REGNO][0] == '%')
21729 *s++ = '%';
21730 s += sprintf (s, "%s", reg_names[cc_regno + CR0_REGNO]);
21732 if (label != NULL)
21734 /* If the branch distance was too far, we may have to use an
21735 unconditional branch to go the distance. */
21736 if (need_longbranch)
21737 s += sprintf (s, ",$+8\n\tb %s", label);
21738 else
21739 s += sprintf (s, ",%s", label);
21742 return string;
21745 /* Return the string to flip the GT bit on a CR. */
21746 char *
21747 output_e500_flip_gt_bit (rtx dst, rtx src)
21749 static char string[64];
21750 int a, b;
21752 gcc_assert (GET_CODE (dst) == REG && CR_REGNO_P (REGNO (dst))
21753 && GET_CODE (src) == REG && CR_REGNO_P (REGNO (src)));
21755 /* GT bit. */
21756 a = 4 * (REGNO (dst) - CR0_REGNO) + 1;
21757 b = 4 * (REGNO (src) - CR0_REGNO) + 1;
21759 sprintf (string, "crnot %d,%d", a, b);
21760 return string;
21763 /* Return insn for VSX or Altivec comparisons. */
21765 static rtx
21766 rs6000_emit_vector_compare_inner (enum rtx_code code, rtx op0, rtx op1)
21768 rtx mask;
21769 machine_mode mode = GET_MODE (op0);
21771 switch (code)
21773 default:
21774 break;
21776 case GE:
21777 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
21778 return NULL_RTX;
21780 case EQ:
21781 case GT:
21782 case GTU:
21783 case ORDERED:
21784 case UNORDERED:
21785 case UNEQ:
21786 case LTGT:
21787 mask = gen_reg_rtx (mode);
21788 emit_insn (gen_rtx_SET (mask, gen_rtx_fmt_ee (code, mode, op0, op1)));
21789 return mask;
21792 return NULL_RTX;
21795 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
21796 DMODE is expected destination mode. This is a recursive function. */
21798 static rtx
21799 rs6000_emit_vector_compare (enum rtx_code rcode,
21800 rtx op0, rtx op1,
21801 machine_mode dmode)
21803 rtx mask;
21804 bool swap_operands = false;
21805 bool try_again = false;
21807 gcc_assert (VECTOR_UNIT_ALTIVEC_OR_VSX_P (dmode));
21808 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
21810 /* See if the comparison works as is. */
21811 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
21812 if (mask)
21813 return mask;
21815 switch (rcode)
21817 case LT:
21818 rcode = GT;
21819 swap_operands = true;
21820 try_again = true;
21821 break;
21822 case LTU:
21823 rcode = GTU;
21824 swap_operands = true;
21825 try_again = true;
21826 break;
21827 case NE:
21828 case UNLE:
21829 case UNLT:
21830 case UNGE:
21831 case UNGT:
21832 /* Invert condition and try again.
21833 e.g., A != B becomes ~(A==B). */
21835 enum rtx_code rev_code;
21836 enum insn_code nor_code;
21837 rtx mask2;
21839 rev_code = reverse_condition_maybe_unordered (rcode);
21840 if (rev_code == UNKNOWN)
21841 return NULL_RTX;
21843 nor_code = optab_handler (one_cmpl_optab, dmode);
21844 if (nor_code == CODE_FOR_nothing)
21845 return NULL_RTX;
21847 mask2 = rs6000_emit_vector_compare (rev_code, op0, op1, dmode);
21848 if (!mask2)
21849 return NULL_RTX;
21851 mask = gen_reg_rtx (dmode);
21852 emit_insn (GEN_FCN (nor_code) (mask, mask2));
21853 return mask;
21855 break;
21856 case GE:
21857 case GEU:
21858 case LE:
21859 case LEU:
21860 /* Try GT/GTU/LT/LTU OR EQ */
21862 rtx c_rtx, eq_rtx;
21863 enum insn_code ior_code;
21864 enum rtx_code new_code;
21866 switch (rcode)
21868 case GE:
21869 new_code = GT;
21870 break;
21872 case GEU:
21873 new_code = GTU;
21874 break;
21876 case LE:
21877 new_code = LT;
21878 break;
21880 case LEU:
21881 new_code = LTU;
21882 break;
21884 default:
21885 gcc_unreachable ();
21888 ior_code = optab_handler (ior_optab, dmode);
21889 if (ior_code == CODE_FOR_nothing)
21890 return NULL_RTX;
21892 c_rtx = rs6000_emit_vector_compare (new_code, op0, op1, dmode);
21893 if (!c_rtx)
21894 return NULL_RTX;
21896 eq_rtx = rs6000_emit_vector_compare (EQ, op0, op1, dmode);
21897 if (!eq_rtx)
21898 return NULL_RTX;
21900 mask = gen_reg_rtx (dmode);
21901 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
21902 return mask;
21904 break;
21905 default:
21906 return NULL_RTX;
21909 if (try_again)
21911 if (swap_operands)
21912 std::swap (op0, op1);
21914 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
21915 if (mask)
21916 return mask;
21919 /* You only get two chances. */
21920 return NULL_RTX;
21923 /* Emit vector conditional expression. DEST is destination. OP_TRUE and
21924 OP_FALSE are two VEC_COND_EXPR operands. CC_OP0 and CC_OP1 are the two
21925 operands for the relation operation COND. */
21928 rs6000_emit_vector_cond_expr (rtx dest, rtx op_true, rtx op_false,
21929 rtx cond, rtx cc_op0, rtx cc_op1)
21931 machine_mode dest_mode = GET_MODE (dest);
21932 machine_mode mask_mode = GET_MODE (cc_op0);
21933 enum rtx_code rcode = GET_CODE (cond);
21934 machine_mode cc_mode = CCmode;
21935 rtx mask;
21936 rtx cond2;
21937 rtx tmp;
21938 bool invert_move = false;
21940 if (VECTOR_UNIT_NONE_P (dest_mode))
21941 return 0;
21943 gcc_assert (GET_MODE_SIZE (dest_mode) == GET_MODE_SIZE (mask_mode)
21944 && GET_MODE_NUNITS (dest_mode) == GET_MODE_NUNITS (mask_mode));
21946 switch (rcode)
21948 /* Swap operands if we can, and fall back to doing the operation as
21949 specified, and doing a NOR to invert the test. */
21950 case NE:
21951 case UNLE:
21952 case UNLT:
21953 case UNGE:
21954 case UNGT:
21955 /* Invert condition and try again.
21956 e.g., A = (B != C) ? D : E becomes A = (B == C) ? E : D. */
21957 invert_move = true;
21958 rcode = reverse_condition_maybe_unordered (rcode);
21959 if (rcode == UNKNOWN)
21960 return 0;
21961 break;
21963 /* Mark unsigned tests with CCUNSmode. */
21964 case GTU:
21965 case GEU:
21966 case LTU:
21967 case LEU:
21968 cc_mode = CCUNSmode;
21969 break;
21971 default:
21972 break;
21975 /* Get the vector mask for the given relational operations. */
21976 mask = rs6000_emit_vector_compare (rcode, cc_op0, cc_op1, mask_mode);
21978 if (!mask)
21979 return 0;
21981 if (invert_move)
21983 tmp = op_true;
21984 op_true = op_false;
21985 op_false = tmp;
21988 cond2 = gen_rtx_fmt_ee (NE, cc_mode, gen_lowpart (dest_mode, mask),
21989 CONST0_RTX (dest_mode));
21990 emit_insn (gen_rtx_SET (dest,
21991 gen_rtx_IF_THEN_ELSE (dest_mode,
21992 cond2,
21993 op_true,
21994 op_false)));
21995 return 1;
21998 /* Emit a conditional move: move TRUE_COND to DEST if OP of the
21999 operands of the last comparison is nonzero/true, FALSE_COND if it
22000 is zero/false. Return 0 if the hardware has no such operation. */
22003 rs6000_emit_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
22005 enum rtx_code code = GET_CODE (op);
22006 rtx op0 = XEXP (op, 0);
22007 rtx op1 = XEXP (op, 1);
22008 machine_mode compare_mode = GET_MODE (op0);
22009 machine_mode result_mode = GET_MODE (dest);
22010 rtx temp;
22011 bool is_against_zero;
22013 /* These modes should always match. */
22014 if (GET_MODE (op1) != compare_mode
22015 /* In the isel case however, we can use a compare immediate, so
22016 op1 may be a small constant. */
22017 && (!TARGET_ISEL || !short_cint_operand (op1, VOIDmode)))
22018 return 0;
22019 if (GET_MODE (true_cond) != result_mode)
22020 return 0;
22021 if (GET_MODE (false_cond) != result_mode)
22022 return 0;
22024 /* Don't allow using floating point comparisons for integer results for
22025 now. */
22026 if (FLOAT_MODE_P (compare_mode) && !FLOAT_MODE_P (result_mode))
22027 return 0;
22029 /* First, work out if the hardware can do this at all, or
22030 if it's too slow.... */
22031 if (!FLOAT_MODE_P (compare_mode))
22033 if (TARGET_ISEL)
22034 return rs6000_emit_int_cmove (dest, op, true_cond, false_cond);
22035 return 0;
22037 else if (TARGET_HARD_FLOAT && !TARGET_FPRS
22038 && SCALAR_FLOAT_MODE_P (compare_mode))
22039 return 0;
22041 is_against_zero = op1 == CONST0_RTX (compare_mode);
22043 /* A floating-point subtract might overflow, underflow, or produce
22044 an inexact result, thus changing the floating-point flags, so it
22045 can't be generated if we care about that. It's safe if one side
22046 of the construct is zero, since then no subtract will be
22047 generated. */
22048 if (SCALAR_FLOAT_MODE_P (compare_mode)
22049 && flag_trapping_math && ! is_against_zero)
22050 return 0;
22052 /* Eliminate half of the comparisons by switching operands, this
22053 makes the remaining code simpler. */
22054 if (code == UNLT || code == UNGT || code == UNORDERED || code == NE
22055 || code == LTGT || code == LT || code == UNLE)
22057 code = reverse_condition_maybe_unordered (code);
22058 temp = true_cond;
22059 true_cond = false_cond;
22060 false_cond = temp;
22063 /* UNEQ and LTGT take four instructions for a comparison with zero,
22064 it'll probably be faster to use a branch here too. */
22065 if (code == UNEQ && HONOR_NANS (compare_mode))
22066 return 0;
22068 /* We're going to try to implement comparisons by performing
22069 a subtract, then comparing against zero. Unfortunately,
22070 Inf - Inf is NaN which is not zero, and so if we don't
22071 know that the operand is finite and the comparison
22072 would treat EQ different to UNORDERED, we can't do it. */
22073 if (HONOR_INFINITIES (compare_mode)
22074 && code != GT && code != UNGE
22075 && (GET_CODE (op1) != CONST_DOUBLE
22076 || real_isinf (CONST_DOUBLE_REAL_VALUE (op1)))
22077 /* Constructs of the form (a OP b ? a : b) are safe. */
22078 && ((! rtx_equal_p (op0, false_cond) && ! rtx_equal_p (op1, false_cond))
22079 || (! rtx_equal_p (op0, true_cond)
22080 && ! rtx_equal_p (op1, true_cond))))
22081 return 0;
22083 /* At this point we know we can use fsel. */
22085 /* Reduce the comparison to a comparison against zero. */
22086 if (! is_against_zero)
22088 temp = gen_reg_rtx (compare_mode);
22089 emit_insn (gen_rtx_SET (temp, gen_rtx_MINUS (compare_mode, op0, op1)));
22090 op0 = temp;
22091 op1 = CONST0_RTX (compare_mode);
22094 /* If we don't care about NaNs we can reduce some of the comparisons
22095 down to faster ones. */
22096 if (! HONOR_NANS (compare_mode))
22097 switch (code)
22099 case GT:
22100 code = LE;
22101 temp = true_cond;
22102 true_cond = false_cond;
22103 false_cond = temp;
22104 break;
22105 case UNGE:
22106 code = GE;
22107 break;
22108 case UNEQ:
22109 code = EQ;
22110 break;
22111 default:
22112 break;
22115 /* Now, reduce everything down to a GE. */
22116 switch (code)
22118 case GE:
22119 break;
22121 case LE:
22122 temp = gen_reg_rtx (compare_mode);
22123 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
22124 op0 = temp;
22125 break;
22127 case ORDERED:
22128 temp = gen_reg_rtx (compare_mode);
22129 emit_insn (gen_rtx_SET (temp, gen_rtx_ABS (compare_mode, op0)));
22130 op0 = temp;
22131 break;
22133 case EQ:
22134 temp = gen_reg_rtx (compare_mode);
22135 emit_insn (gen_rtx_SET (temp,
22136 gen_rtx_NEG (compare_mode,
22137 gen_rtx_ABS (compare_mode, op0))));
22138 op0 = temp;
22139 break;
22141 case UNGE:
22142 /* a UNGE 0 <-> (a GE 0 || -a UNLT 0) */
22143 temp = gen_reg_rtx (result_mode);
22144 emit_insn (gen_rtx_SET (temp,
22145 gen_rtx_IF_THEN_ELSE (result_mode,
22146 gen_rtx_GE (VOIDmode,
22147 op0, op1),
22148 true_cond, false_cond)));
22149 false_cond = true_cond;
22150 true_cond = temp;
22152 temp = gen_reg_rtx (compare_mode);
22153 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
22154 op0 = temp;
22155 break;
22157 case GT:
22158 /* a GT 0 <-> (a GE 0 && -a UNLT 0) */
22159 temp = gen_reg_rtx (result_mode);
22160 emit_insn (gen_rtx_SET (temp,
22161 gen_rtx_IF_THEN_ELSE (result_mode,
22162 gen_rtx_GE (VOIDmode,
22163 op0, op1),
22164 true_cond, false_cond)));
22165 true_cond = false_cond;
22166 false_cond = temp;
22168 temp = gen_reg_rtx (compare_mode);
22169 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
22170 op0 = temp;
22171 break;
22173 default:
22174 gcc_unreachable ();
22177 emit_insn (gen_rtx_SET (dest,
22178 gen_rtx_IF_THEN_ELSE (result_mode,
22179 gen_rtx_GE (VOIDmode,
22180 op0, op1),
22181 true_cond, false_cond)));
22182 return 1;
22185 /* Same as above, but for ints (isel). */
22187 static int
22188 rs6000_emit_int_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
22190 rtx condition_rtx, cr;
22191 machine_mode mode = GET_MODE (dest);
22192 enum rtx_code cond_code;
22193 rtx (*isel_func) (rtx, rtx, rtx, rtx, rtx);
22194 bool signedp;
22196 if (mode != SImode && (!TARGET_POWERPC64 || mode != DImode))
22197 return 0;
22199 /* We still have to do the compare, because isel doesn't do a
22200 compare, it just looks at the CRx bits set by a previous compare
22201 instruction. */
22202 condition_rtx = rs6000_generate_compare (op, mode);
22203 cond_code = GET_CODE (condition_rtx);
22204 cr = XEXP (condition_rtx, 0);
22205 signedp = GET_MODE (cr) == CCmode;
22207 isel_func = (mode == SImode
22208 ? (signedp ? gen_isel_signed_si : gen_isel_unsigned_si)
22209 : (signedp ? gen_isel_signed_di : gen_isel_unsigned_di));
22211 switch (cond_code)
22213 case LT: case GT: case LTU: case GTU: case EQ:
22214 /* isel handles these directly. */
22215 break;
22217 default:
22218 /* We need to swap the sense of the comparison. */
22220 std::swap (false_cond, true_cond);
22221 PUT_CODE (condition_rtx, reverse_condition (cond_code));
22223 break;
22226 false_cond = force_reg (mode, false_cond);
22227 if (true_cond != const0_rtx)
22228 true_cond = force_reg (mode, true_cond);
22230 emit_insn (isel_func (dest, condition_rtx, true_cond, false_cond, cr));
22232 return 1;
22235 const char *
22236 output_isel (rtx *operands)
22238 enum rtx_code code;
22240 code = GET_CODE (operands[1]);
22242 if (code == GE || code == GEU || code == LE || code == LEU || code == NE)
22244 gcc_assert (GET_CODE (operands[2]) == REG
22245 && GET_CODE (operands[3]) == REG);
22246 PUT_CODE (operands[1], reverse_condition (code));
22247 return "isel %0,%3,%2,%j1";
22250 return "isel %0,%2,%3,%j1";
22253 void
22254 rs6000_emit_minmax (rtx dest, enum rtx_code code, rtx op0, rtx op1)
22256 machine_mode mode = GET_MODE (op0);
22257 enum rtx_code c;
22258 rtx target;
22260 /* VSX/altivec have direct min/max insns. */
22261 if ((code == SMAX || code == SMIN)
22262 && (VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode)
22263 || (mode == SFmode && VECTOR_UNIT_VSX_P (DFmode))))
22265 emit_insn (gen_rtx_SET (dest, gen_rtx_fmt_ee (code, mode, op0, op1)));
22266 return;
22269 if (code == SMAX || code == SMIN)
22270 c = GE;
22271 else
22272 c = GEU;
22274 if (code == SMAX || code == UMAX)
22275 target = emit_conditional_move (dest, c, op0, op1, mode,
22276 op0, op1, mode, 0);
22277 else
22278 target = emit_conditional_move (dest, c, op0, op1, mode,
22279 op1, op0, mode, 0);
22280 gcc_assert (target);
22281 if (target != dest)
22282 emit_move_insn (dest, target);
22285 /* A subroutine of the atomic operation splitters. Jump to LABEL if
22286 COND is true. Mark the jump as unlikely to be taken. */
22288 static void
22289 emit_unlikely_jump (rtx cond, rtx label)
22291 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
22292 rtx x;
22294 x = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, label, pc_rtx);
22295 x = emit_jump_insn (gen_rtx_SET (pc_rtx, x));
22296 add_int_reg_note (x, REG_BR_PROB, very_unlikely);
22299 /* A subroutine of the atomic operation splitters. Emit a load-locked
22300 instruction in MODE. For QI/HImode, possibly use a pattern than includes
22301 the zero_extend operation. */
22303 static void
22304 emit_load_locked (machine_mode mode, rtx reg, rtx mem)
22306 rtx (*fn) (rtx, rtx) = NULL;
22308 switch (mode)
22310 case QImode:
22311 fn = gen_load_lockedqi;
22312 break;
22313 case HImode:
22314 fn = gen_load_lockedhi;
22315 break;
22316 case SImode:
22317 if (GET_MODE (mem) == QImode)
22318 fn = gen_load_lockedqi_si;
22319 else if (GET_MODE (mem) == HImode)
22320 fn = gen_load_lockedhi_si;
22321 else
22322 fn = gen_load_lockedsi;
22323 break;
22324 case DImode:
22325 fn = gen_load_lockeddi;
22326 break;
22327 case TImode:
22328 fn = gen_load_lockedti;
22329 break;
22330 default:
22331 gcc_unreachable ();
22333 emit_insn (fn (reg, mem));
22336 /* A subroutine of the atomic operation splitters. Emit a store-conditional
22337 instruction in MODE. */
22339 static void
22340 emit_store_conditional (machine_mode mode, rtx res, rtx mem, rtx val)
22342 rtx (*fn) (rtx, rtx, rtx) = NULL;
22344 switch (mode)
22346 case QImode:
22347 fn = gen_store_conditionalqi;
22348 break;
22349 case HImode:
22350 fn = gen_store_conditionalhi;
22351 break;
22352 case SImode:
22353 fn = gen_store_conditionalsi;
22354 break;
22355 case DImode:
22356 fn = gen_store_conditionaldi;
22357 break;
22358 case TImode:
22359 fn = gen_store_conditionalti;
22360 break;
22361 default:
22362 gcc_unreachable ();
22365 /* Emit sync before stwcx. to address PPC405 Erratum. */
22366 if (PPC405_ERRATUM77)
22367 emit_insn (gen_hwsync ());
22369 emit_insn (fn (res, mem, val));
22372 /* Expand barriers before and after a load_locked/store_cond sequence. */
22374 static rtx
22375 rs6000_pre_atomic_barrier (rtx mem, enum memmodel model)
22377 rtx addr = XEXP (mem, 0);
22378 int strict_p = (reload_in_progress || reload_completed);
22380 if (!legitimate_indirect_address_p (addr, strict_p)
22381 && !legitimate_indexed_address_p (addr, strict_p))
22383 addr = force_reg (Pmode, addr);
22384 mem = replace_equiv_address_nv (mem, addr);
22387 switch (model)
22389 case MEMMODEL_RELAXED:
22390 case MEMMODEL_CONSUME:
22391 case MEMMODEL_ACQUIRE:
22392 break;
22393 case MEMMODEL_RELEASE:
22394 case MEMMODEL_ACQ_REL:
22395 emit_insn (gen_lwsync ());
22396 break;
22397 case MEMMODEL_SEQ_CST:
22398 emit_insn (gen_hwsync ());
22399 break;
22400 default:
22401 gcc_unreachable ();
22403 return mem;
22406 static void
22407 rs6000_post_atomic_barrier (enum memmodel model)
22409 switch (model)
22411 case MEMMODEL_RELAXED:
22412 case MEMMODEL_CONSUME:
22413 case MEMMODEL_RELEASE:
22414 break;
22415 case MEMMODEL_ACQUIRE:
22416 case MEMMODEL_ACQ_REL:
22417 case MEMMODEL_SEQ_CST:
22418 emit_insn (gen_isync ());
22419 break;
22420 default:
22421 gcc_unreachable ();
22425 /* A subroutine of the various atomic expanders. For sub-word operations,
22426 we must adjust things to operate on SImode. Given the original MEM,
22427 return a new aligned memory. Also build and return the quantities by
22428 which to shift and mask. */
22430 static rtx
22431 rs6000_adjust_atomic_subword (rtx orig_mem, rtx *pshift, rtx *pmask)
22433 rtx addr, align, shift, mask, mem;
22434 HOST_WIDE_INT shift_mask;
22435 machine_mode mode = GET_MODE (orig_mem);
22437 /* For smaller modes, we have to implement this via SImode. */
22438 shift_mask = (mode == QImode ? 0x18 : 0x10);
22440 addr = XEXP (orig_mem, 0);
22441 addr = force_reg (GET_MODE (addr), addr);
22443 /* Aligned memory containing subword. Generate a new memory. We
22444 do not want any of the existing MEM_ATTR data, as we're now
22445 accessing memory outside the original object. */
22446 align = expand_simple_binop (Pmode, AND, addr, GEN_INT (-4),
22447 NULL_RTX, 1, OPTAB_LIB_WIDEN);
22448 mem = gen_rtx_MEM (SImode, align);
22449 MEM_VOLATILE_P (mem) = MEM_VOLATILE_P (orig_mem);
22450 if (MEM_ALIAS_SET (orig_mem) == ALIAS_SET_MEMORY_BARRIER)
22451 set_mem_alias_set (mem, ALIAS_SET_MEMORY_BARRIER);
22453 /* Shift amount for subword relative to aligned word. */
22454 shift = gen_reg_rtx (SImode);
22455 addr = gen_lowpart (SImode, addr);
22456 rtx tmp = gen_reg_rtx (SImode);
22457 emit_insn (gen_ashlsi3 (tmp, addr, GEN_INT (3)));
22458 emit_insn (gen_andsi3 (shift, tmp, GEN_INT (shift_mask)));
22459 if (BYTES_BIG_ENDIAN)
22460 shift = expand_simple_binop (SImode, XOR, shift, GEN_INT (shift_mask),
22461 shift, 1, OPTAB_LIB_WIDEN);
22462 *pshift = shift;
22464 /* Mask for insertion. */
22465 mask = expand_simple_binop (SImode, ASHIFT, GEN_INT (GET_MODE_MASK (mode)),
22466 shift, NULL_RTX, 1, OPTAB_LIB_WIDEN);
22467 *pmask = mask;
22469 return mem;
22472 /* A subroutine of the various atomic expanders. For sub-word operands,
22473 combine OLDVAL and NEWVAL via MASK. Returns a new pseduo. */
22475 static rtx
22476 rs6000_mask_atomic_subword (rtx oldval, rtx newval, rtx mask)
22478 rtx x;
22480 x = gen_reg_rtx (SImode);
22481 emit_insn (gen_rtx_SET (x, gen_rtx_AND (SImode,
22482 gen_rtx_NOT (SImode, mask),
22483 oldval)));
22485 x = expand_simple_binop (SImode, IOR, newval, x, x, 1, OPTAB_LIB_WIDEN);
22487 return x;
22490 /* A subroutine of the various atomic expanders. For sub-word operands,
22491 extract WIDE to NARROW via SHIFT. */
22493 static void
22494 rs6000_finish_atomic_subword (rtx narrow, rtx wide, rtx shift)
22496 wide = expand_simple_binop (SImode, LSHIFTRT, wide, shift,
22497 wide, 1, OPTAB_LIB_WIDEN);
22498 emit_move_insn (narrow, gen_lowpart (GET_MODE (narrow), wide));
22501 /* Expand an atomic compare and swap operation. */
22503 void
22504 rs6000_expand_atomic_compare_and_swap (rtx operands[])
22506 rtx boolval, retval, mem, oldval, newval, cond;
22507 rtx label1, label2, x, mask, shift;
22508 machine_mode mode, orig_mode;
22509 enum memmodel mod_s, mod_f;
22510 bool is_weak;
22512 boolval = operands[0];
22513 retval = operands[1];
22514 mem = operands[2];
22515 oldval = operands[3];
22516 newval = operands[4];
22517 is_weak = (INTVAL (operands[5]) != 0);
22518 mod_s = memmodel_base (INTVAL (operands[6]));
22519 mod_f = memmodel_base (INTVAL (operands[7]));
22520 orig_mode = mode = GET_MODE (mem);
22522 mask = shift = NULL_RTX;
22523 if (mode == QImode || mode == HImode)
22525 /* Before power8, we didn't have access to lbarx/lharx, so generate a
22526 lwarx and shift/mask operations. With power8, we need to do the
22527 comparison in SImode, but the store is still done in QI/HImode. */
22528 oldval = convert_modes (SImode, mode, oldval, 1);
22530 if (!TARGET_SYNC_HI_QI)
22532 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
22534 /* Shift and mask OLDVAL into position with the word. */
22535 oldval = expand_simple_binop (SImode, ASHIFT, oldval, shift,
22536 NULL_RTX, 1, OPTAB_LIB_WIDEN);
22538 /* Shift and mask NEWVAL into position within the word. */
22539 newval = convert_modes (SImode, mode, newval, 1);
22540 newval = expand_simple_binop (SImode, ASHIFT, newval, shift,
22541 NULL_RTX, 1, OPTAB_LIB_WIDEN);
22544 /* Prepare to adjust the return value. */
22545 retval = gen_reg_rtx (SImode);
22546 mode = SImode;
22548 else if (reg_overlap_mentioned_p (retval, oldval))
22549 oldval = copy_to_reg (oldval);
22551 if (mode != TImode && !reg_or_short_operand (oldval, mode))
22552 oldval = copy_to_mode_reg (mode, oldval);
22554 if (reg_overlap_mentioned_p (retval, newval))
22555 newval = copy_to_reg (newval);
22557 mem = rs6000_pre_atomic_barrier (mem, mod_s);
22559 label1 = NULL_RTX;
22560 if (!is_weak)
22562 label1 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
22563 emit_label (XEXP (label1, 0));
22565 label2 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
22567 emit_load_locked (mode, retval, mem);
22569 x = retval;
22570 if (mask)
22571 x = expand_simple_binop (SImode, AND, retval, mask,
22572 NULL_RTX, 1, OPTAB_LIB_WIDEN);
22574 cond = gen_reg_rtx (CCmode);
22575 /* If we have TImode, synthesize a comparison. */
22576 if (mode != TImode)
22577 x = gen_rtx_COMPARE (CCmode, x, oldval);
22578 else
22580 rtx xor1_result = gen_reg_rtx (DImode);
22581 rtx xor2_result = gen_reg_rtx (DImode);
22582 rtx or_result = gen_reg_rtx (DImode);
22583 rtx new_word0 = simplify_gen_subreg (DImode, x, TImode, 0);
22584 rtx new_word1 = simplify_gen_subreg (DImode, x, TImode, 8);
22585 rtx old_word0 = simplify_gen_subreg (DImode, oldval, TImode, 0);
22586 rtx old_word1 = simplify_gen_subreg (DImode, oldval, TImode, 8);
22588 emit_insn (gen_xordi3 (xor1_result, new_word0, old_word0));
22589 emit_insn (gen_xordi3 (xor2_result, new_word1, old_word1));
22590 emit_insn (gen_iordi3 (or_result, xor1_result, xor2_result));
22591 x = gen_rtx_COMPARE (CCmode, or_result, const0_rtx);
22594 emit_insn (gen_rtx_SET (cond, x));
22596 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
22597 emit_unlikely_jump (x, label2);
22599 x = newval;
22600 if (mask)
22601 x = rs6000_mask_atomic_subword (retval, newval, mask);
22603 emit_store_conditional (orig_mode, cond, mem, x);
22605 if (!is_weak)
22607 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
22608 emit_unlikely_jump (x, label1);
22611 if (!is_mm_relaxed (mod_f))
22612 emit_label (XEXP (label2, 0));
22614 rs6000_post_atomic_barrier (mod_s);
22616 if (is_mm_relaxed (mod_f))
22617 emit_label (XEXP (label2, 0));
22619 if (shift)
22620 rs6000_finish_atomic_subword (operands[1], retval, shift);
22621 else if (mode != GET_MODE (operands[1]))
22622 convert_move (operands[1], retval, 1);
22624 /* In all cases, CR0 contains EQ on success, and NE on failure. */
22625 x = gen_rtx_EQ (SImode, cond, const0_rtx);
22626 emit_insn (gen_rtx_SET (boolval, x));
22629 /* Expand an atomic exchange operation. */
22631 void
22632 rs6000_expand_atomic_exchange (rtx operands[])
22634 rtx retval, mem, val, cond;
22635 machine_mode mode;
22636 enum memmodel model;
22637 rtx label, x, mask, shift;
22639 retval = operands[0];
22640 mem = operands[1];
22641 val = operands[2];
22642 model = memmodel_base (INTVAL (operands[3]));
22643 mode = GET_MODE (mem);
22645 mask = shift = NULL_RTX;
22646 if (!TARGET_SYNC_HI_QI && (mode == QImode || mode == HImode))
22648 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
22650 /* Shift and mask VAL into position with the word. */
22651 val = convert_modes (SImode, mode, val, 1);
22652 val = expand_simple_binop (SImode, ASHIFT, val, shift,
22653 NULL_RTX, 1, OPTAB_LIB_WIDEN);
22655 /* Prepare to adjust the return value. */
22656 retval = gen_reg_rtx (SImode);
22657 mode = SImode;
22660 mem = rs6000_pre_atomic_barrier (mem, model);
22662 label = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
22663 emit_label (XEXP (label, 0));
22665 emit_load_locked (mode, retval, mem);
22667 x = val;
22668 if (mask)
22669 x = rs6000_mask_atomic_subword (retval, val, mask);
22671 cond = gen_reg_rtx (CCmode);
22672 emit_store_conditional (mode, cond, mem, x);
22674 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
22675 emit_unlikely_jump (x, label);
22677 rs6000_post_atomic_barrier (model);
22679 if (shift)
22680 rs6000_finish_atomic_subword (operands[0], retval, shift);
22683 /* Expand an atomic fetch-and-operate pattern. CODE is the binary operation
22684 to perform. MEM is the memory on which to operate. VAL is the second
22685 operand of the binary operator. BEFORE and AFTER are optional locations to
22686 return the value of MEM either before of after the operation. MODEL_RTX
22687 is a CONST_INT containing the memory model to use. */
22689 void
22690 rs6000_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
22691 rtx orig_before, rtx orig_after, rtx model_rtx)
22693 enum memmodel model = memmodel_base (INTVAL (model_rtx));
22694 machine_mode mode = GET_MODE (mem);
22695 machine_mode store_mode = mode;
22696 rtx label, x, cond, mask, shift;
22697 rtx before = orig_before, after = orig_after;
22699 mask = shift = NULL_RTX;
22700 /* On power8, we want to use SImode for the operation. On previous systems,
22701 use the operation in a subword and shift/mask to get the proper byte or
22702 halfword. */
22703 if (mode == QImode || mode == HImode)
22705 if (TARGET_SYNC_HI_QI)
22707 val = convert_modes (SImode, mode, val, 1);
22709 /* Prepare to adjust the return value. */
22710 before = gen_reg_rtx (SImode);
22711 if (after)
22712 after = gen_reg_rtx (SImode);
22713 mode = SImode;
22715 else
22717 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
22719 /* Shift and mask VAL into position with the word. */
22720 val = convert_modes (SImode, mode, val, 1);
22721 val = expand_simple_binop (SImode, ASHIFT, val, shift,
22722 NULL_RTX, 1, OPTAB_LIB_WIDEN);
22724 switch (code)
22726 case IOR:
22727 case XOR:
22728 /* We've already zero-extended VAL. That is sufficient to
22729 make certain that it does not affect other bits. */
22730 mask = NULL;
22731 break;
22733 case AND:
22734 /* If we make certain that all of the other bits in VAL are
22735 set, that will be sufficient to not affect other bits. */
22736 x = gen_rtx_NOT (SImode, mask);
22737 x = gen_rtx_IOR (SImode, x, val);
22738 emit_insn (gen_rtx_SET (val, x));
22739 mask = NULL;
22740 break;
22742 case NOT:
22743 case PLUS:
22744 case MINUS:
22745 /* These will all affect bits outside the field and need
22746 adjustment via MASK within the loop. */
22747 break;
22749 default:
22750 gcc_unreachable ();
22753 /* Prepare to adjust the return value. */
22754 before = gen_reg_rtx (SImode);
22755 if (after)
22756 after = gen_reg_rtx (SImode);
22757 store_mode = mode = SImode;
22761 mem = rs6000_pre_atomic_barrier (mem, model);
22763 label = gen_label_rtx ();
22764 emit_label (label);
22765 label = gen_rtx_LABEL_REF (VOIDmode, label);
22767 if (before == NULL_RTX)
22768 before = gen_reg_rtx (mode);
22770 emit_load_locked (mode, before, mem);
22772 if (code == NOT)
22774 x = expand_simple_binop (mode, AND, before, val,
22775 NULL_RTX, 1, OPTAB_LIB_WIDEN);
22776 after = expand_simple_unop (mode, NOT, x, after, 1);
22778 else
22780 after = expand_simple_binop (mode, code, before, val,
22781 after, 1, OPTAB_LIB_WIDEN);
22784 x = after;
22785 if (mask)
22787 x = expand_simple_binop (SImode, AND, after, mask,
22788 NULL_RTX, 1, OPTAB_LIB_WIDEN);
22789 x = rs6000_mask_atomic_subword (before, x, mask);
22791 else if (store_mode != mode)
22792 x = convert_modes (store_mode, mode, x, 1);
22794 cond = gen_reg_rtx (CCmode);
22795 emit_store_conditional (store_mode, cond, mem, x);
22797 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
22798 emit_unlikely_jump (x, label);
22800 rs6000_post_atomic_barrier (model);
22802 if (shift)
22804 /* QImode/HImode on machines without lbarx/lharx where we do a lwarx and
22805 then do the calcuations in a SImode register. */
22806 if (orig_before)
22807 rs6000_finish_atomic_subword (orig_before, before, shift);
22808 if (orig_after)
22809 rs6000_finish_atomic_subword (orig_after, after, shift);
22811 else if (store_mode != mode)
22813 /* QImode/HImode on machines with lbarx/lharx where we do the native
22814 operation and then do the calcuations in a SImode register. */
22815 if (orig_before)
22816 convert_move (orig_before, before, 1);
22817 if (orig_after)
22818 convert_move (orig_after, after, 1);
22820 else if (orig_after && after != orig_after)
22821 emit_move_insn (orig_after, after);
22824 /* Emit instructions to move SRC to DST. Called by splitters for
22825 multi-register moves. It will emit at most one instruction for
22826 each register that is accessed; that is, it won't emit li/lis pairs
22827 (or equivalent for 64-bit code). One of SRC or DST must be a hard
22828 register. */
22830 void
22831 rs6000_split_multireg_move (rtx dst, rtx src)
22833 /* The register number of the first register being moved. */
22834 int reg;
22835 /* The mode that is to be moved. */
22836 machine_mode mode;
22837 /* The mode that the move is being done in, and its size. */
22838 machine_mode reg_mode;
22839 int reg_mode_size;
22840 /* The number of registers that will be moved. */
22841 int nregs;
22843 reg = REG_P (dst) ? REGNO (dst) : REGNO (src);
22844 mode = GET_MODE (dst);
22845 nregs = hard_regno_nregs[reg][mode];
22846 if (FP_REGNO_P (reg))
22847 reg_mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode :
22848 ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT) ? DFmode : SFmode);
22849 else if (ALTIVEC_REGNO_P (reg))
22850 reg_mode = V16QImode;
22851 else if (TARGET_E500_DOUBLE && FLOAT128_2REG_P (mode))
22852 reg_mode = DFmode;
22853 else
22854 reg_mode = word_mode;
22855 reg_mode_size = GET_MODE_SIZE (reg_mode);
22857 gcc_assert (reg_mode_size * nregs == GET_MODE_SIZE (mode));
22859 /* TDmode residing in FP registers is special, since the ISA requires that
22860 the lower-numbered word of a register pair is always the most significant
22861 word, even in little-endian mode. This does not match the usual subreg
22862 semantics, so we cannnot use simplify_gen_subreg in those cases. Access
22863 the appropriate constituent registers "by hand" in little-endian mode.
22865 Note we do not need to check for destructive overlap here since TDmode
22866 can only reside in even/odd register pairs. */
22867 if (FP_REGNO_P (reg) && DECIMAL_FLOAT_MODE_P (mode) && !BYTES_BIG_ENDIAN)
22869 rtx p_src, p_dst;
22870 int i;
22872 for (i = 0; i < nregs; i++)
22874 if (REG_P (src) && FP_REGNO_P (REGNO (src)))
22875 p_src = gen_rtx_REG (reg_mode, REGNO (src) + nregs - 1 - i);
22876 else
22877 p_src = simplify_gen_subreg (reg_mode, src, mode,
22878 i * reg_mode_size);
22880 if (REG_P (dst) && FP_REGNO_P (REGNO (dst)))
22881 p_dst = gen_rtx_REG (reg_mode, REGNO (dst) + nregs - 1 - i);
22882 else
22883 p_dst = simplify_gen_subreg (reg_mode, dst, mode,
22884 i * reg_mode_size);
22886 emit_insn (gen_rtx_SET (p_dst, p_src));
22889 return;
22892 if (REG_P (src) && REG_P (dst) && (REGNO (src) < REGNO (dst)))
22894 /* Move register range backwards, if we might have destructive
22895 overlap. */
22896 int i;
22897 for (i = nregs - 1; i >= 0; i--)
22898 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
22899 i * reg_mode_size),
22900 simplify_gen_subreg (reg_mode, src, mode,
22901 i * reg_mode_size)));
22903 else
22905 int i;
22906 int j = -1;
22907 bool used_update = false;
22908 rtx restore_basereg = NULL_RTX;
22910 if (MEM_P (src) && INT_REGNO_P (reg))
22912 rtx breg;
22914 if (GET_CODE (XEXP (src, 0)) == PRE_INC
22915 || GET_CODE (XEXP (src, 0)) == PRE_DEC)
22917 rtx delta_rtx;
22918 breg = XEXP (XEXP (src, 0), 0);
22919 delta_rtx = (GET_CODE (XEXP (src, 0)) == PRE_INC
22920 ? GEN_INT (GET_MODE_SIZE (GET_MODE (src)))
22921 : GEN_INT (-GET_MODE_SIZE (GET_MODE (src))));
22922 emit_insn (gen_add3_insn (breg, breg, delta_rtx));
22923 src = replace_equiv_address (src, breg);
22925 else if (! rs6000_offsettable_memref_p (src, reg_mode))
22927 if (GET_CODE (XEXP (src, 0)) == PRE_MODIFY)
22929 rtx basereg = XEXP (XEXP (src, 0), 0);
22930 if (TARGET_UPDATE)
22932 rtx ndst = simplify_gen_subreg (reg_mode, dst, mode, 0);
22933 emit_insn (gen_rtx_SET (ndst,
22934 gen_rtx_MEM (reg_mode,
22935 XEXP (src, 0))));
22936 used_update = true;
22938 else
22939 emit_insn (gen_rtx_SET (basereg,
22940 XEXP (XEXP (src, 0), 1)));
22941 src = replace_equiv_address (src, basereg);
22943 else
22945 rtx basereg = gen_rtx_REG (Pmode, reg);
22946 emit_insn (gen_rtx_SET (basereg, XEXP (src, 0)));
22947 src = replace_equiv_address (src, basereg);
22951 breg = XEXP (src, 0);
22952 if (GET_CODE (breg) == PLUS || GET_CODE (breg) == LO_SUM)
22953 breg = XEXP (breg, 0);
22955 /* If the base register we are using to address memory is
22956 also a destination reg, then change that register last. */
22957 if (REG_P (breg)
22958 && REGNO (breg) >= REGNO (dst)
22959 && REGNO (breg) < REGNO (dst) + nregs)
22960 j = REGNO (breg) - REGNO (dst);
22962 else if (MEM_P (dst) && INT_REGNO_P (reg))
22964 rtx breg;
22966 if (GET_CODE (XEXP (dst, 0)) == PRE_INC
22967 || GET_CODE (XEXP (dst, 0)) == PRE_DEC)
22969 rtx delta_rtx;
22970 breg = XEXP (XEXP (dst, 0), 0);
22971 delta_rtx = (GET_CODE (XEXP (dst, 0)) == PRE_INC
22972 ? GEN_INT (GET_MODE_SIZE (GET_MODE (dst)))
22973 : GEN_INT (-GET_MODE_SIZE (GET_MODE (dst))));
22975 /* We have to update the breg before doing the store.
22976 Use store with update, if available. */
22978 if (TARGET_UPDATE)
22980 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
22981 emit_insn (TARGET_32BIT
22982 ? (TARGET_POWERPC64
22983 ? gen_movdi_si_update (breg, breg, delta_rtx, nsrc)
22984 : gen_movsi_update (breg, breg, delta_rtx, nsrc))
22985 : gen_movdi_di_update (breg, breg, delta_rtx, nsrc));
22986 used_update = true;
22988 else
22989 emit_insn (gen_add3_insn (breg, breg, delta_rtx));
22990 dst = replace_equiv_address (dst, breg);
22992 else if (!rs6000_offsettable_memref_p (dst, reg_mode)
22993 && GET_CODE (XEXP (dst, 0)) != LO_SUM)
22995 if (GET_CODE (XEXP (dst, 0)) == PRE_MODIFY)
22997 rtx basereg = XEXP (XEXP (dst, 0), 0);
22998 if (TARGET_UPDATE)
23000 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
23001 emit_insn (gen_rtx_SET (gen_rtx_MEM (reg_mode,
23002 XEXP (dst, 0)),
23003 nsrc));
23004 used_update = true;
23006 else
23007 emit_insn (gen_rtx_SET (basereg,
23008 XEXP (XEXP (dst, 0), 1)));
23009 dst = replace_equiv_address (dst, basereg);
23011 else
23013 rtx basereg = XEXP (XEXP (dst, 0), 0);
23014 rtx offsetreg = XEXP (XEXP (dst, 0), 1);
23015 gcc_assert (GET_CODE (XEXP (dst, 0)) == PLUS
23016 && REG_P (basereg)
23017 && REG_P (offsetreg)
23018 && REGNO (basereg) != REGNO (offsetreg));
23019 if (REGNO (basereg) == 0)
23021 rtx tmp = offsetreg;
23022 offsetreg = basereg;
23023 basereg = tmp;
23025 emit_insn (gen_add3_insn (basereg, basereg, offsetreg));
23026 restore_basereg = gen_sub3_insn (basereg, basereg, offsetreg);
23027 dst = replace_equiv_address (dst, basereg);
23030 else if (GET_CODE (XEXP (dst, 0)) != LO_SUM)
23031 gcc_assert (rs6000_offsettable_memref_p (dst, reg_mode));
23034 for (i = 0; i < nregs; i++)
23036 /* Calculate index to next subword. */
23037 ++j;
23038 if (j == nregs)
23039 j = 0;
23041 /* If compiler already emitted move of first word by
23042 store with update, no need to do anything. */
23043 if (j == 0 && used_update)
23044 continue;
23046 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
23047 j * reg_mode_size),
23048 simplify_gen_subreg (reg_mode, src, mode,
23049 j * reg_mode_size)));
23051 if (restore_basereg != NULL_RTX)
23052 emit_insn (restore_basereg);
23057 /* This page contains routines that are used to determine what the
23058 function prologue and epilogue code will do and write them out. */
23060 static inline bool
23061 save_reg_p (int r)
23063 return !call_used_regs[r] && df_regs_ever_live_p (r);
23066 /* Determine whether the gp REG is really used. */
23068 static bool
23069 rs6000_reg_live_or_pic_offset_p (int reg)
23071 /* We need to mark the PIC offset register live for the same conditions
23072 as it is set up, or otherwise it won't be saved before we clobber it. */
23074 if (reg == RS6000_PIC_OFFSET_TABLE_REGNUM && !TARGET_SINGLE_PIC_BASE)
23076 if (TARGET_TOC && TARGET_MINIMAL_TOC
23077 && (crtl->calls_eh_return
23078 || df_regs_ever_live_p (reg)
23079 || get_pool_size ()))
23080 return true;
23082 if ((DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN)
23083 && flag_pic)
23084 return true;
23087 /* If the function calls eh_return, claim used all the registers that would
23088 be checked for liveness otherwise. */
23090 return ((crtl->calls_eh_return || df_regs_ever_live_p (reg))
23091 && !call_used_regs[reg]);
23094 /* Return the first fixed-point register that is required to be
23095 saved. 32 if none. */
23098 first_reg_to_save (void)
23100 int first_reg;
23102 /* Find lowest numbered live register. */
23103 for (first_reg = 13; first_reg <= 31; first_reg++)
23104 if (save_reg_p (first_reg))
23105 break;
23107 if (first_reg > RS6000_PIC_OFFSET_TABLE_REGNUM
23108 && ((DEFAULT_ABI == ABI_V4 && flag_pic != 0)
23109 || (DEFAULT_ABI == ABI_DARWIN && flag_pic)
23110 || (TARGET_TOC && TARGET_MINIMAL_TOC))
23111 && rs6000_reg_live_or_pic_offset_p (RS6000_PIC_OFFSET_TABLE_REGNUM))
23112 first_reg = RS6000_PIC_OFFSET_TABLE_REGNUM;
23114 #if TARGET_MACHO
23115 if (flag_pic
23116 && crtl->uses_pic_offset_table
23117 && first_reg > RS6000_PIC_OFFSET_TABLE_REGNUM)
23118 return RS6000_PIC_OFFSET_TABLE_REGNUM;
23119 #endif
23121 return first_reg;
23124 /* Similar, for FP regs. */
23127 first_fp_reg_to_save (void)
23129 int first_reg;
23131 /* Find lowest numbered live register. */
23132 for (first_reg = 14 + 32; first_reg <= 63; first_reg++)
23133 if (save_reg_p (first_reg))
23134 break;
23136 return first_reg;
23139 /* Similar, for AltiVec regs. */
23141 static int
23142 first_altivec_reg_to_save (void)
23144 int i;
23146 /* Stack frame remains as is unless we are in AltiVec ABI. */
23147 if (! TARGET_ALTIVEC_ABI)
23148 return LAST_ALTIVEC_REGNO + 1;
23150 /* On Darwin, the unwind routines are compiled without
23151 TARGET_ALTIVEC, and use save_world to save/restore the
23152 altivec registers when necessary. */
23153 if (DEFAULT_ABI == ABI_DARWIN && crtl->calls_eh_return
23154 && ! TARGET_ALTIVEC)
23155 return FIRST_ALTIVEC_REGNO + 20;
23157 /* Find lowest numbered live register. */
23158 for (i = FIRST_ALTIVEC_REGNO + 20; i <= LAST_ALTIVEC_REGNO; ++i)
23159 if (save_reg_p (i))
23160 break;
23162 return i;
23165 /* Return a 32-bit mask of the AltiVec registers we need to set in
23166 VRSAVE. Bit n of the return value is 1 if Vn is live. The MSB in
23167 the 32-bit word is 0. */
23169 static unsigned int
23170 compute_vrsave_mask (void)
23172 unsigned int i, mask = 0;
23174 /* On Darwin, the unwind routines are compiled without
23175 TARGET_ALTIVEC, and use save_world to save/restore the
23176 call-saved altivec registers when necessary. */
23177 if (DEFAULT_ABI == ABI_DARWIN && crtl->calls_eh_return
23178 && ! TARGET_ALTIVEC)
23179 mask |= 0xFFF;
23181 /* First, find out if we use _any_ altivec registers. */
23182 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
23183 if (df_regs_ever_live_p (i))
23184 mask |= ALTIVEC_REG_BIT (i);
23186 if (mask == 0)
23187 return mask;
23189 /* Next, remove the argument registers from the set. These must
23190 be in the VRSAVE mask set by the caller, so we don't need to add
23191 them in again. More importantly, the mask we compute here is
23192 used to generate CLOBBERs in the set_vrsave insn, and we do not
23193 wish the argument registers to die. */
23194 for (i = ALTIVEC_ARG_MIN_REG; i < (unsigned) crtl->args.info.vregno; i++)
23195 mask &= ~ALTIVEC_REG_BIT (i);
23197 /* Similarly, remove the return value from the set. */
23199 bool yes = false;
23200 diddle_return_value (is_altivec_return_reg, &yes);
23201 if (yes)
23202 mask &= ~ALTIVEC_REG_BIT (ALTIVEC_ARG_RETURN);
23205 return mask;
23208 /* For a very restricted set of circumstances, we can cut down the
23209 size of prologues/epilogues by calling our own save/restore-the-world
23210 routines. */
23212 static void
23213 compute_save_world_info (rs6000_stack_t *info)
23215 info->world_save_p = 1;
23216 info->world_save_p
23217 = (WORLD_SAVE_P (info)
23218 && DEFAULT_ABI == ABI_DARWIN
23219 && !cfun->has_nonlocal_label
23220 && info->first_fp_reg_save == FIRST_SAVED_FP_REGNO
23221 && info->first_gp_reg_save == FIRST_SAVED_GP_REGNO
23222 && info->first_altivec_reg_save == FIRST_SAVED_ALTIVEC_REGNO
23223 && info->cr_save_p);
23225 /* This will not work in conjunction with sibcalls. Make sure there
23226 are none. (This check is expensive, but seldom executed.) */
23227 if (WORLD_SAVE_P (info))
23229 rtx_insn *insn;
23230 for (insn = get_last_insn_anywhere (); insn; insn = PREV_INSN (insn))
23231 if (CALL_P (insn) && SIBLING_CALL_P (insn))
23233 info->world_save_p = 0;
23234 break;
23238 if (WORLD_SAVE_P (info))
23240 /* Even if we're not touching VRsave, make sure there's room on the
23241 stack for it, if it looks like we're calling SAVE_WORLD, which
23242 will attempt to save it. */
23243 info->vrsave_size = 4;
23245 /* If we are going to save the world, we need to save the link register too. */
23246 info->lr_save_p = 1;
23248 /* "Save" the VRsave register too if we're saving the world. */
23249 if (info->vrsave_mask == 0)
23250 info->vrsave_mask = compute_vrsave_mask ();
23252 /* Because the Darwin register save/restore routines only handle
23253 F14 .. F31 and V20 .. V31 as per the ABI, perform a consistency
23254 check. */
23255 gcc_assert (info->first_fp_reg_save >= FIRST_SAVED_FP_REGNO
23256 && (info->first_altivec_reg_save
23257 >= FIRST_SAVED_ALTIVEC_REGNO));
23260 return;
23264 static void
23265 is_altivec_return_reg (rtx reg, void *xyes)
23267 bool *yes = (bool *) xyes;
23268 if (REGNO (reg) == ALTIVEC_ARG_RETURN)
23269 *yes = true;
23273 /* Return whether REG is a global user reg or has been specifed by
23274 -ffixed-REG. We should not restore these, and so cannot use
23275 lmw or out-of-line restore functions if there are any. We also
23276 can't save them (well, emit frame notes for them), because frame
23277 unwinding during exception handling will restore saved registers. */
23279 static bool
23280 fixed_reg_p (int reg)
23282 /* Ignore fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] when the
23283 backend sets it, overriding anything the user might have given. */
23284 if (reg == RS6000_PIC_OFFSET_TABLE_REGNUM
23285 && ((DEFAULT_ABI == ABI_V4 && flag_pic)
23286 || (DEFAULT_ABI == ABI_DARWIN && flag_pic)
23287 || (TARGET_TOC && TARGET_MINIMAL_TOC)))
23288 return false;
23290 return fixed_regs[reg];
23293 /* Determine the strategy for savings/restoring registers. */
23295 enum {
23296 SAVE_MULTIPLE = 0x1,
23297 SAVE_INLINE_GPRS = 0x2,
23298 SAVE_INLINE_FPRS = 0x4,
23299 SAVE_NOINLINE_GPRS_SAVES_LR = 0x8,
23300 SAVE_NOINLINE_FPRS_SAVES_LR = 0x10,
23301 SAVE_INLINE_VRS = 0x20,
23302 REST_MULTIPLE = 0x100,
23303 REST_INLINE_GPRS = 0x200,
23304 REST_INLINE_FPRS = 0x400,
23305 REST_NOINLINE_FPRS_DOESNT_RESTORE_LR = 0x800,
23306 REST_INLINE_VRS = 0x1000
23309 static int
23310 rs6000_savres_strategy (rs6000_stack_t *info,
23311 bool using_static_chain_p)
23313 int strategy = 0;
23315 /* Select between in-line and out-of-line save and restore of regs.
23316 First, all the obvious cases where we don't use out-of-line. */
23317 if (crtl->calls_eh_return
23318 || cfun->machine->ra_need_lr)
23319 strategy |= (SAVE_INLINE_FPRS | REST_INLINE_FPRS
23320 | SAVE_INLINE_GPRS | REST_INLINE_GPRS
23321 | SAVE_INLINE_VRS | REST_INLINE_VRS);
23323 if (info->first_gp_reg_save == 32)
23324 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
23326 if (info->first_fp_reg_save == 64
23327 /* The out-of-line FP routines use double-precision stores;
23328 we can't use those routines if we don't have such stores. */
23329 || (TARGET_HARD_FLOAT && !TARGET_DOUBLE_FLOAT))
23330 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
23332 if (info->first_altivec_reg_save == LAST_ALTIVEC_REGNO + 1)
23333 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
23335 /* Define cutoff for using out-of-line functions to save registers. */
23336 if (DEFAULT_ABI == ABI_V4 || TARGET_ELF)
23338 if (!optimize_size)
23340 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
23341 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
23342 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
23344 else
23346 /* Prefer out-of-line restore if it will exit. */
23347 if (info->first_fp_reg_save > 61)
23348 strategy |= SAVE_INLINE_FPRS;
23349 if (info->first_gp_reg_save > 29)
23351 if (info->first_fp_reg_save == 64)
23352 strategy |= SAVE_INLINE_GPRS;
23353 else
23354 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
23356 if (info->first_altivec_reg_save == LAST_ALTIVEC_REGNO)
23357 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
23360 else if (DEFAULT_ABI == ABI_DARWIN)
23362 if (info->first_fp_reg_save > 60)
23363 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
23364 if (info->first_gp_reg_save > 29)
23365 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
23366 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
23368 else
23370 gcc_checking_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2);
23371 if (info->first_fp_reg_save > 61)
23372 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
23373 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
23374 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
23377 /* Don't bother to try to save things out-of-line if r11 is occupied
23378 by the static chain. It would require too much fiddling and the
23379 static chain is rarely used anyway. FPRs are saved w.r.t the stack
23380 pointer on Darwin, and AIX uses r1 or r12. */
23381 if (using_static_chain_p
23382 && (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN))
23383 strategy |= ((DEFAULT_ABI == ABI_DARWIN ? 0 : SAVE_INLINE_FPRS)
23384 | SAVE_INLINE_GPRS
23385 | SAVE_INLINE_VRS);
23387 /* Saving CR interferes with the exit routines used on the SPE, so
23388 just punt here. */
23389 if (TARGET_SPE_ABI
23390 && info->spe_64bit_regs_used
23391 && info->cr_save_p)
23392 strategy |= REST_INLINE_GPRS;
23394 /* We can only use the out-of-line routines to restore fprs if we've
23395 saved all the registers from first_fp_reg_save in the prologue.
23396 Otherwise, we risk loading garbage. Of course, if we have saved
23397 out-of-line then we know we haven't skipped any fprs. */
23398 if ((strategy & SAVE_INLINE_FPRS)
23399 && !(strategy & REST_INLINE_FPRS))
23401 int i;
23403 for (i = info->first_fp_reg_save; i < 64; i++)
23404 if (fixed_regs[i] || !save_reg_p (i))
23406 strategy |= REST_INLINE_FPRS;
23407 break;
23411 /* Similarly, for altivec regs. */
23412 if ((strategy & SAVE_INLINE_VRS)
23413 && !(strategy & REST_INLINE_VRS))
23415 int i;
23417 for (i = info->first_altivec_reg_save; i < LAST_ALTIVEC_REGNO + 1; i++)
23418 if (fixed_regs[i] || !save_reg_p (i))
23420 strategy |= REST_INLINE_VRS;
23421 break;
23425 if (TARGET_MULTIPLE
23426 && !TARGET_POWERPC64
23427 && !(TARGET_SPE_ABI && info->spe_64bit_regs_used)
23428 && info->first_gp_reg_save < 31)
23430 /* Prefer store multiple for saves over out-of-line routines,
23431 since the store-multiple instruction will always be smaller. */
23432 strategy |= SAVE_INLINE_GPRS | SAVE_MULTIPLE;
23434 /* info->lr_save_p isn't yet set if the only reason lr needs to be
23435 saved is an out-of-line save or restore. Set up the value for
23436 the next test (excluding out-of-line gprs). */
23437 bool lr_save_p = (info->lr_save_p
23438 || !(strategy & SAVE_INLINE_FPRS)
23439 || !(strategy & SAVE_INLINE_VRS)
23440 || !(strategy & REST_INLINE_FPRS)
23441 || !(strategy & REST_INLINE_VRS));
23443 /* The situation is more complicated with load multiple. We'd
23444 prefer to use the out-of-line routines for restores, since the
23445 "exit" out-of-line routines can handle the restore of LR and the
23446 frame teardown. However if doesn't make sense to use the
23447 out-of-line routine if that is the only reason we'd need to save
23448 LR, and we can't use the "exit" out-of-line gpr restore if we
23449 have saved some fprs; In those cases it is advantageous to use
23450 load multiple when available. */
23451 if (info->first_fp_reg_save != 64 || !lr_save_p)
23452 strategy |= REST_INLINE_GPRS | REST_MULTIPLE;
23455 /* We can only use load multiple or the out-of-line routines to
23456 restore gprs if we've saved all the registers from
23457 first_gp_reg_save. Otherwise, we risk loading garbage.
23458 Of course, if we have saved out-of-line or used stmw then we know
23459 we haven't skipped any gprs. */
23460 if ((strategy & (SAVE_INLINE_GPRS | SAVE_MULTIPLE)) == SAVE_INLINE_GPRS
23461 && (strategy & (REST_INLINE_GPRS | REST_MULTIPLE)) != REST_INLINE_GPRS)
23463 int i;
23465 for (i = info->first_gp_reg_save; i < 32; i++)
23466 if (fixed_reg_p (i) || !save_reg_p (i))
23468 strategy |= REST_INLINE_GPRS;
23469 strategy &= ~REST_MULTIPLE;
23470 break;
23474 if (TARGET_ELF && TARGET_64BIT)
23476 if (!(strategy & SAVE_INLINE_FPRS))
23477 strategy |= SAVE_NOINLINE_FPRS_SAVES_LR;
23478 else if (!(strategy & SAVE_INLINE_GPRS)
23479 && info->first_fp_reg_save == 64)
23480 strategy |= SAVE_NOINLINE_GPRS_SAVES_LR;
23482 else if (TARGET_AIX && !(strategy & REST_INLINE_FPRS))
23483 strategy |= REST_NOINLINE_FPRS_DOESNT_RESTORE_LR;
23485 if (TARGET_MACHO && !(strategy & SAVE_INLINE_FPRS))
23486 strategy |= SAVE_NOINLINE_FPRS_SAVES_LR;
23488 return strategy;
23491 /* Calculate the stack information for the current function. This is
23492 complicated by having two separate calling sequences, the AIX calling
23493 sequence and the V.4 calling sequence.
23495 AIX (and Darwin/Mac OS X) stack frames look like:
23496 32-bit 64-bit
23497 SP----> +---------------------------------------+
23498 | back chain to caller | 0 0
23499 +---------------------------------------+
23500 | saved CR | 4 8 (8-11)
23501 +---------------------------------------+
23502 | saved LR | 8 16
23503 +---------------------------------------+
23504 | reserved for compilers | 12 24
23505 +---------------------------------------+
23506 | reserved for binders | 16 32
23507 +---------------------------------------+
23508 | saved TOC pointer | 20 40
23509 +---------------------------------------+
23510 | Parameter save area (P) | 24 48
23511 +---------------------------------------+
23512 | Alloca space (A) | 24+P etc.
23513 +---------------------------------------+
23514 | Local variable space (L) | 24+P+A
23515 +---------------------------------------+
23516 | Float/int conversion temporary (X) | 24+P+A+L
23517 +---------------------------------------+
23518 | Save area for AltiVec registers (W) | 24+P+A+L+X
23519 +---------------------------------------+
23520 | AltiVec alignment padding (Y) | 24+P+A+L+X+W
23521 +---------------------------------------+
23522 | Save area for VRSAVE register (Z) | 24+P+A+L+X+W+Y
23523 +---------------------------------------+
23524 | Save area for GP registers (G) | 24+P+A+X+L+X+W+Y+Z
23525 +---------------------------------------+
23526 | Save area for FP registers (F) | 24+P+A+X+L+X+W+Y+Z+G
23527 +---------------------------------------+
23528 old SP->| back chain to caller's caller |
23529 +---------------------------------------+
23531 The required alignment for AIX configurations is two words (i.e., 8
23532 or 16 bytes).
23534 The ELFv2 ABI is a variant of the AIX ABI. Stack frames look like:
23536 SP----> +---------------------------------------+
23537 | Back chain to caller | 0
23538 +---------------------------------------+
23539 | Save area for CR | 8
23540 +---------------------------------------+
23541 | Saved LR | 16
23542 +---------------------------------------+
23543 | Saved TOC pointer | 24
23544 +---------------------------------------+
23545 | Parameter save area (P) | 32
23546 +---------------------------------------+
23547 | Alloca space (A) | 32+P
23548 +---------------------------------------+
23549 | Local variable space (L) | 32+P+A
23550 +---------------------------------------+
23551 | Save area for AltiVec registers (W) | 32+P+A+L
23552 +---------------------------------------+
23553 | AltiVec alignment padding (Y) | 32+P+A+L+W
23554 +---------------------------------------+
23555 | Save area for GP registers (G) | 32+P+A+L+W+Y
23556 +---------------------------------------+
23557 | Save area for FP registers (F) | 32+P+A+L+W+Y+G
23558 +---------------------------------------+
23559 old SP->| back chain to caller's caller | 32+P+A+L+W+Y+G+F
23560 +---------------------------------------+
23563 V.4 stack frames look like:
23565 SP----> +---------------------------------------+
23566 | back chain to caller | 0
23567 +---------------------------------------+
23568 | caller's saved LR | 4
23569 +---------------------------------------+
23570 | Parameter save area (P) | 8
23571 +---------------------------------------+
23572 | Alloca space (A) | 8+P
23573 +---------------------------------------+
23574 | Varargs save area (V) | 8+P+A
23575 +---------------------------------------+
23576 | Local variable space (L) | 8+P+A+V
23577 +---------------------------------------+
23578 | Float/int conversion temporary (X) | 8+P+A+V+L
23579 +---------------------------------------+
23580 | Save area for AltiVec registers (W) | 8+P+A+V+L+X
23581 +---------------------------------------+
23582 | AltiVec alignment padding (Y) | 8+P+A+V+L+X+W
23583 +---------------------------------------+
23584 | Save area for VRSAVE register (Z) | 8+P+A+V+L+X+W+Y
23585 +---------------------------------------+
23586 | SPE: area for 64-bit GP registers |
23587 +---------------------------------------+
23588 | SPE alignment padding |
23589 +---------------------------------------+
23590 | saved CR (C) | 8+P+A+V+L+X+W+Y+Z
23591 +---------------------------------------+
23592 | Save area for GP registers (G) | 8+P+A+V+L+X+W+Y+Z+C
23593 +---------------------------------------+
23594 | Save area for FP registers (F) | 8+P+A+V+L+X+W+Y+Z+C+G
23595 +---------------------------------------+
23596 old SP->| back chain to caller's caller |
23597 +---------------------------------------+
23599 The required alignment for V.4 is 16 bytes, or 8 bytes if -meabi is
23600 given. (But note below and in sysv4.h that we require only 8 and
23601 may round up the size of our stack frame anyways. The historical
23602 reason is early versions of powerpc-linux which didn't properly
23603 align the stack at program startup. A happy side-effect is that
23604 -mno-eabi libraries can be used with -meabi programs.)
23606 The EABI configuration defaults to the V.4 layout. However,
23607 the stack alignment requirements may differ. If -mno-eabi is not
23608 given, the required stack alignment is 8 bytes; if -mno-eabi is
23609 given, the required alignment is 16 bytes. (But see V.4 comment
23610 above.) */
23612 #ifndef ABI_STACK_BOUNDARY
23613 #define ABI_STACK_BOUNDARY STACK_BOUNDARY
23614 #endif
23616 static rs6000_stack_t *
23617 rs6000_stack_info (void)
23619 /* We should never be called for thunks, we are not set up for that. */
23620 gcc_assert (!cfun->is_thunk);
23622 rs6000_stack_t *info = &stack_info;
23623 int reg_size = TARGET_32BIT ? 4 : 8;
23624 int ehrd_size;
23625 int ehcr_size;
23626 int save_align;
23627 int first_gp;
23628 HOST_WIDE_INT non_fixed_size;
23629 bool using_static_chain_p;
23631 if (reload_completed && info->reload_completed)
23632 return info;
23634 memset (info, 0, sizeof (*info));
23635 info->reload_completed = reload_completed;
23637 if (TARGET_SPE)
23639 /* Cache value so we don't rescan instruction chain over and over. */
23640 if (cfun->machine->spe_insn_chain_scanned_p == 0)
23641 cfun->machine->spe_insn_chain_scanned_p
23642 = spe_func_has_64bit_regs_p () + 1;
23643 info->spe_64bit_regs_used = cfun->machine->spe_insn_chain_scanned_p - 1;
23646 /* Select which calling sequence. */
23647 info->abi = DEFAULT_ABI;
23649 /* Calculate which registers need to be saved & save area size. */
23650 info->first_gp_reg_save = first_reg_to_save ();
23651 /* Assume that we will have to save RS6000_PIC_OFFSET_TABLE_REGNUM,
23652 even if it currently looks like we won't. Reload may need it to
23653 get at a constant; if so, it will have already created a constant
23654 pool entry for it. */
23655 if (((TARGET_TOC && TARGET_MINIMAL_TOC)
23656 || (flag_pic == 1 && DEFAULT_ABI == ABI_V4)
23657 || (flag_pic && DEFAULT_ABI == ABI_DARWIN))
23658 && crtl->uses_const_pool
23659 && info->first_gp_reg_save > RS6000_PIC_OFFSET_TABLE_REGNUM)
23660 first_gp = RS6000_PIC_OFFSET_TABLE_REGNUM;
23661 else
23662 first_gp = info->first_gp_reg_save;
23664 info->gp_size = reg_size * (32 - first_gp);
23666 /* For the SPE, we have an additional upper 32-bits on each GPR.
23667 Ideally we should save the entire 64-bits only when the upper
23668 half is used in SIMD instructions. Since we only record
23669 registers live (not the size they are used in), this proves
23670 difficult because we'd have to traverse the instruction chain at
23671 the right time, taking reload into account. This is a real pain,
23672 so we opt to save the GPRs in 64-bits always if but one register
23673 gets used in 64-bits. Otherwise, all the registers in the frame
23674 get saved in 32-bits.
23676 So... since when we save all GPRs (except the SP) in 64-bits, the
23677 traditional GP save area will be empty. */
23678 if (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0)
23679 info->gp_size = 0;
23681 info->first_fp_reg_save = first_fp_reg_to_save ();
23682 info->fp_size = 8 * (64 - info->first_fp_reg_save);
23684 info->first_altivec_reg_save = first_altivec_reg_to_save ();
23685 info->altivec_size = 16 * (LAST_ALTIVEC_REGNO + 1
23686 - info->first_altivec_reg_save);
23688 /* Does this function call anything? */
23689 info->calls_p = (!crtl->is_leaf || cfun->machine->ra_needs_full_frame);
23691 /* Determine if we need to save the condition code registers. */
23692 if (save_reg_p (CR2_REGNO)
23693 || save_reg_p (CR3_REGNO)
23694 || save_reg_p (CR4_REGNO))
23696 info->cr_save_p = 1;
23697 if (DEFAULT_ABI == ABI_V4)
23698 info->cr_size = reg_size;
23701 /* If the current function calls __builtin_eh_return, then we need
23702 to allocate stack space for registers that will hold data for
23703 the exception handler. */
23704 if (crtl->calls_eh_return)
23706 unsigned int i;
23707 for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM; ++i)
23708 continue;
23710 /* SPE saves EH registers in 64-bits. */
23711 ehrd_size = i * (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0
23712 ? UNITS_PER_SPE_WORD : UNITS_PER_WORD);
23714 else
23715 ehrd_size = 0;
23717 /* In the ELFv2 ABI, we also need to allocate space for separate
23718 CR field save areas if the function calls __builtin_eh_return. */
23719 if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return)
23721 /* This hard-codes that we have three call-saved CR fields. */
23722 ehcr_size = 3 * reg_size;
23723 /* We do *not* use the regular CR save mechanism. */
23724 info->cr_save_p = 0;
23726 else
23727 ehcr_size = 0;
23729 /* Determine various sizes. */
23730 info->reg_size = reg_size;
23731 info->fixed_size = RS6000_SAVE_AREA;
23732 info->vars_size = RS6000_ALIGN (get_frame_size (), 8);
23733 info->parm_size = RS6000_ALIGN (crtl->outgoing_args_size,
23734 TARGET_ALTIVEC ? 16 : 8);
23735 if (FRAME_GROWS_DOWNWARD)
23736 info->vars_size
23737 += RS6000_ALIGN (info->fixed_size + info->vars_size + info->parm_size,
23738 ABI_STACK_BOUNDARY / BITS_PER_UNIT)
23739 - (info->fixed_size + info->vars_size + info->parm_size);
23741 if (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0)
23742 info->spe_gp_size = 8 * (32 - first_gp);
23744 if (TARGET_ALTIVEC_ABI)
23745 info->vrsave_mask = compute_vrsave_mask ();
23747 if (TARGET_ALTIVEC_VRSAVE && info->vrsave_mask)
23748 info->vrsave_size = 4;
23750 compute_save_world_info (info);
23752 /* Calculate the offsets. */
23753 switch (DEFAULT_ABI)
23755 case ABI_NONE:
23756 default:
23757 gcc_unreachable ();
23759 case ABI_AIX:
23760 case ABI_ELFv2:
23761 case ABI_DARWIN:
23762 info->fp_save_offset = -info->fp_size;
23763 info->gp_save_offset = info->fp_save_offset - info->gp_size;
23765 if (TARGET_ALTIVEC_ABI)
23767 info->vrsave_save_offset = info->gp_save_offset - info->vrsave_size;
23769 /* Align stack so vector save area is on a quadword boundary.
23770 The padding goes above the vectors. */
23771 if (info->altivec_size != 0)
23772 info->altivec_padding_size = info->vrsave_save_offset & 0xF;
23774 info->altivec_save_offset = info->vrsave_save_offset
23775 - info->altivec_padding_size
23776 - info->altivec_size;
23777 gcc_assert (info->altivec_size == 0
23778 || info->altivec_save_offset % 16 == 0);
23780 /* Adjust for AltiVec case. */
23781 info->ehrd_offset = info->altivec_save_offset - ehrd_size;
23783 else
23784 info->ehrd_offset = info->gp_save_offset - ehrd_size;
23786 info->ehcr_offset = info->ehrd_offset - ehcr_size;
23787 info->cr_save_offset = reg_size; /* first word when 64-bit. */
23788 info->lr_save_offset = 2*reg_size;
23789 break;
23791 case ABI_V4:
23792 info->fp_save_offset = -info->fp_size;
23793 info->gp_save_offset = info->fp_save_offset - info->gp_size;
23794 info->cr_save_offset = info->gp_save_offset - info->cr_size;
23796 if (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0)
23798 /* Align stack so SPE GPR save area is aligned on a
23799 double-word boundary. */
23800 if (info->spe_gp_size != 0 && info->cr_save_offset != 0)
23801 info->spe_padding_size = 8 - (-info->cr_save_offset % 8);
23802 else
23803 info->spe_padding_size = 0;
23805 info->spe_gp_save_offset = info->cr_save_offset
23806 - info->spe_padding_size
23807 - info->spe_gp_size;
23809 /* Adjust for SPE case. */
23810 info->ehrd_offset = info->spe_gp_save_offset;
23812 else if (TARGET_ALTIVEC_ABI)
23814 info->vrsave_save_offset = info->cr_save_offset - info->vrsave_size;
23816 /* Align stack so vector save area is on a quadword boundary. */
23817 if (info->altivec_size != 0)
23818 info->altivec_padding_size = 16 - (-info->vrsave_save_offset % 16);
23820 info->altivec_save_offset = info->vrsave_save_offset
23821 - info->altivec_padding_size
23822 - info->altivec_size;
23824 /* Adjust for AltiVec case. */
23825 info->ehrd_offset = info->altivec_save_offset;
23827 else
23828 info->ehrd_offset = info->cr_save_offset;
23830 info->ehrd_offset -= ehrd_size;
23831 info->lr_save_offset = reg_size;
23834 save_align = (TARGET_ALTIVEC_ABI || DEFAULT_ABI == ABI_DARWIN) ? 16 : 8;
23835 info->save_size = RS6000_ALIGN (info->fp_size
23836 + info->gp_size
23837 + info->altivec_size
23838 + info->altivec_padding_size
23839 + info->spe_gp_size
23840 + info->spe_padding_size
23841 + ehrd_size
23842 + ehcr_size
23843 + info->cr_size
23844 + info->vrsave_size,
23845 save_align);
23847 non_fixed_size = info->vars_size + info->parm_size + info->save_size;
23849 info->total_size = RS6000_ALIGN (non_fixed_size + info->fixed_size,
23850 ABI_STACK_BOUNDARY / BITS_PER_UNIT);
23852 /* Determine if we need to save the link register. */
23853 if (info->calls_p
23854 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
23855 && crtl->profile
23856 && !TARGET_PROFILE_KERNEL)
23857 || (DEFAULT_ABI == ABI_V4 && cfun->calls_alloca)
23858 #ifdef TARGET_RELOCATABLE
23859 || (TARGET_RELOCATABLE && (get_pool_size () != 0))
23860 #endif
23861 || rs6000_ra_ever_killed ())
23862 info->lr_save_p = 1;
23864 using_static_chain_p = (cfun->static_chain_decl != NULL_TREE
23865 && df_regs_ever_live_p (STATIC_CHAIN_REGNUM)
23866 && call_used_regs[STATIC_CHAIN_REGNUM]);
23867 info->savres_strategy = rs6000_savres_strategy (info, using_static_chain_p);
23869 if (!(info->savres_strategy & SAVE_INLINE_GPRS)
23870 || !(info->savres_strategy & SAVE_INLINE_FPRS)
23871 || !(info->savres_strategy & SAVE_INLINE_VRS)
23872 || !(info->savres_strategy & REST_INLINE_GPRS)
23873 || !(info->savres_strategy & REST_INLINE_FPRS)
23874 || !(info->savres_strategy & REST_INLINE_VRS))
23875 info->lr_save_p = 1;
23877 if (info->lr_save_p)
23878 df_set_regs_ever_live (LR_REGNO, true);
23880 /* Determine if we need to allocate any stack frame:
23882 For AIX we need to push the stack if a frame pointer is needed
23883 (because the stack might be dynamically adjusted), if we are
23884 debugging, if we make calls, or if the sum of fp_save, gp_save,
23885 and local variables are more than the space needed to save all
23886 non-volatile registers: 32-bit: 18*8 + 19*4 = 220 or 64-bit: 18*8
23887 + 18*8 = 288 (GPR13 reserved).
23889 For V.4 we don't have the stack cushion that AIX uses, but assume
23890 that the debugger can handle stackless frames. */
23892 if (info->calls_p)
23893 info->push_p = 1;
23895 else if (DEFAULT_ABI == ABI_V4)
23896 info->push_p = non_fixed_size != 0;
23898 else if (frame_pointer_needed)
23899 info->push_p = 1;
23901 else if (TARGET_XCOFF && write_symbols != NO_DEBUG)
23902 info->push_p = 1;
23904 else
23905 info->push_p = non_fixed_size > (TARGET_32BIT ? 220 : 288);
23907 return info;
23910 /* Return true if the current function uses any GPRs in 64-bit SIMD
23911 mode. */
23913 static bool
23914 spe_func_has_64bit_regs_p (void)
23916 rtx_insn *insns, *insn;
23918 /* Functions that save and restore all the call-saved registers will
23919 need to save/restore the registers in 64-bits. */
23920 if (crtl->calls_eh_return
23921 || cfun->calls_setjmp
23922 || crtl->has_nonlocal_goto)
23923 return true;
23925 insns = get_insns ();
23927 for (insn = NEXT_INSN (insns); insn != NULL_RTX; insn = NEXT_INSN (insn))
23929 if (INSN_P (insn))
23931 rtx i;
23933 /* FIXME: This should be implemented with attributes...
23935 (set_attr "spe64" "true")....then,
23936 if (get_spe64(insn)) return true;
23938 It's the only reliable way to do the stuff below. */
23940 i = PATTERN (insn);
23941 if (GET_CODE (i) == SET)
23943 machine_mode mode = GET_MODE (SET_SRC (i));
23945 if (SPE_VECTOR_MODE (mode))
23946 return true;
23947 if (TARGET_E500_DOUBLE
23948 && (mode == DFmode || FLOAT128_2REG_P (mode)))
23949 return true;
23954 return false;
23957 static void
23958 debug_stack_info (rs6000_stack_t *info)
23960 const char *abi_string;
23962 if (! info)
23963 info = rs6000_stack_info ();
23965 fprintf (stderr, "\nStack information for function %s:\n",
23966 ((current_function_decl && DECL_NAME (current_function_decl))
23967 ? IDENTIFIER_POINTER (DECL_NAME (current_function_decl))
23968 : "<unknown>"));
23970 switch (info->abi)
23972 default: abi_string = "Unknown"; break;
23973 case ABI_NONE: abi_string = "NONE"; break;
23974 case ABI_AIX: abi_string = "AIX"; break;
23975 case ABI_ELFv2: abi_string = "ELFv2"; break;
23976 case ABI_DARWIN: abi_string = "Darwin"; break;
23977 case ABI_V4: abi_string = "V.4"; break;
23980 fprintf (stderr, "\tABI = %5s\n", abi_string);
23982 if (TARGET_ALTIVEC_ABI)
23983 fprintf (stderr, "\tALTIVEC ABI extensions enabled.\n");
23985 if (TARGET_SPE_ABI)
23986 fprintf (stderr, "\tSPE ABI extensions enabled.\n");
23988 if (info->first_gp_reg_save != 32)
23989 fprintf (stderr, "\tfirst_gp_reg_save = %5d\n", info->first_gp_reg_save);
23991 if (info->first_fp_reg_save != 64)
23992 fprintf (stderr, "\tfirst_fp_reg_save = %5d\n", info->first_fp_reg_save);
23994 if (info->first_altivec_reg_save <= LAST_ALTIVEC_REGNO)
23995 fprintf (stderr, "\tfirst_altivec_reg_save = %5d\n",
23996 info->first_altivec_reg_save);
23998 if (info->lr_save_p)
23999 fprintf (stderr, "\tlr_save_p = %5d\n", info->lr_save_p);
24001 if (info->cr_save_p)
24002 fprintf (stderr, "\tcr_save_p = %5d\n", info->cr_save_p);
24004 if (info->vrsave_mask)
24005 fprintf (stderr, "\tvrsave_mask = 0x%x\n", info->vrsave_mask);
24007 if (info->push_p)
24008 fprintf (stderr, "\tpush_p = %5d\n", info->push_p);
24010 if (info->calls_p)
24011 fprintf (stderr, "\tcalls_p = %5d\n", info->calls_p);
24013 if (info->gp_size)
24014 fprintf (stderr, "\tgp_save_offset = %5d\n", info->gp_save_offset);
24016 if (info->fp_size)
24017 fprintf (stderr, "\tfp_save_offset = %5d\n", info->fp_save_offset);
24019 if (info->altivec_size)
24020 fprintf (stderr, "\taltivec_save_offset = %5d\n",
24021 info->altivec_save_offset);
24023 if (info->spe_gp_size)
24024 fprintf (stderr, "\tspe_gp_save_offset = %5d\n",
24025 info->spe_gp_save_offset);
24027 if (info->vrsave_size)
24028 fprintf (stderr, "\tvrsave_save_offset = %5d\n",
24029 info->vrsave_save_offset);
24031 if (info->lr_save_p)
24032 fprintf (stderr, "\tlr_save_offset = %5d\n", info->lr_save_offset);
24034 if (info->cr_save_p)
24035 fprintf (stderr, "\tcr_save_offset = %5d\n", info->cr_save_offset);
24037 if (info->varargs_save_offset)
24038 fprintf (stderr, "\tvarargs_save_offset = %5d\n", info->varargs_save_offset);
24040 if (info->total_size)
24041 fprintf (stderr, "\ttotal_size = " HOST_WIDE_INT_PRINT_DEC"\n",
24042 info->total_size);
24044 if (info->vars_size)
24045 fprintf (stderr, "\tvars_size = " HOST_WIDE_INT_PRINT_DEC"\n",
24046 info->vars_size);
24048 if (info->parm_size)
24049 fprintf (stderr, "\tparm_size = %5d\n", info->parm_size);
24051 if (info->fixed_size)
24052 fprintf (stderr, "\tfixed_size = %5d\n", info->fixed_size);
24054 if (info->gp_size)
24055 fprintf (stderr, "\tgp_size = %5d\n", info->gp_size);
24057 if (info->spe_gp_size)
24058 fprintf (stderr, "\tspe_gp_size = %5d\n", info->spe_gp_size);
24060 if (info->fp_size)
24061 fprintf (stderr, "\tfp_size = %5d\n", info->fp_size);
24063 if (info->altivec_size)
24064 fprintf (stderr, "\taltivec_size = %5d\n", info->altivec_size);
24066 if (info->vrsave_size)
24067 fprintf (stderr, "\tvrsave_size = %5d\n", info->vrsave_size);
24069 if (info->altivec_padding_size)
24070 fprintf (stderr, "\taltivec_padding_size= %5d\n",
24071 info->altivec_padding_size);
24073 if (info->spe_padding_size)
24074 fprintf (stderr, "\tspe_padding_size = %5d\n",
24075 info->spe_padding_size);
24077 if (info->cr_size)
24078 fprintf (stderr, "\tcr_size = %5d\n", info->cr_size);
24080 if (info->save_size)
24081 fprintf (stderr, "\tsave_size = %5d\n", info->save_size);
24083 if (info->reg_size != 4)
24084 fprintf (stderr, "\treg_size = %5d\n", info->reg_size);
24086 fprintf (stderr, "\tsave-strategy = %04x\n", info->savres_strategy);
24088 fprintf (stderr, "\n");
24092 rs6000_return_addr (int count, rtx frame)
24094 /* Currently we don't optimize very well between prolog and body
24095 code and for PIC code the code can be actually quite bad, so
24096 don't try to be too clever here. */
24097 if (count != 0
24098 || ((DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN) && flag_pic))
24100 cfun->machine->ra_needs_full_frame = 1;
24102 return
24103 gen_rtx_MEM
24104 (Pmode,
24105 memory_address
24106 (Pmode,
24107 plus_constant (Pmode,
24108 copy_to_reg
24109 (gen_rtx_MEM (Pmode,
24110 memory_address (Pmode, frame))),
24111 RETURN_ADDRESS_OFFSET)));
24114 cfun->machine->ra_need_lr = 1;
24115 return get_hard_reg_initial_val (Pmode, LR_REGNO);
24118 /* Say whether a function is a candidate for sibcall handling or not. */
24120 static bool
24121 rs6000_function_ok_for_sibcall (tree decl, tree exp)
24123 tree fntype;
24125 if (decl)
24126 fntype = TREE_TYPE (decl);
24127 else
24128 fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
24130 /* We can't do it if the called function has more vector parameters
24131 than the current function; there's nowhere to put the VRsave code. */
24132 if (TARGET_ALTIVEC_ABI
24133 && TARGET_ALTIVEC_VRSAVE
24134 && !(decl && decl == current_function_decl))
24136 function_args_iterator args_iter;
24137 tree type;
24138 int nvreg = 0;
24140 /* Functions with vector parameters are required to have a
24141 prototype, so the argument type info must be available
24142 here. */
24143 FOREACH_FUNCTION_ARGS(fntype, type, args_iter)
24144 if (TREE_CODE (type) == VECTOR_TYPE
24145 && ALTIVEC_OR_VSX_VECTOR_MODE (TYPE_MODE (type)))
24146 nvreg++;
24148 FOREACH_FUNCTION_ARGS(TREE_TYPE (current_function_decl), type, args_iter)
24149 if (TREE_CODE (type) == VECTOR_TYPE
24150 && ALTIVEC_OR_VSX_VECTOR_MODE (TYPE_MODE (type)))
24151 nvreg--;
24153 if (nvreg > 0)
24154 return false;
24157 /* Under the AIX or ELFv2 ABIs we can't allow calls to non-local
24158 functions, because the callee may have a different TOC pointer to
24159 the caller and there's no way to ensure we restore the TOC when
24160 we return. With the secure-plt SYSV ABI we can't make non-local
24161 calls when -fpic/PIC because the plt call stubs use r30. */
24162 if (DEFAULT_ABI == ABI_DARWIN
24163 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
24164 && decl
24165 && !DECL_EXTERNAL (decl)
24166 && !DECL_WEAK (decl)
24167 && (*targetm.binds_local_p) (decl))
24168 || (DEFAULT_ABI == ABI_V4
24169 && (!TARGET_SECURE_PLT
24170 || !flag_pic
24171 || (decl
24172 && (*targetm.binds_local_p) (decl)))))
24174 tree attr_list = TYPE_ATTRIBUTES (fntype);
24176 if (!lookup_attribute ("longcall", attr_list)
24177 || lookup_attribute ("shortcall", attr_list))
24178 return true;
24181 return false;
24184 static int
24185 rs6000_ra_ever_killed (void)
24187 rtx_insn *top;
24188 rtx reg;
24189 rtx_insn *insn;
24191 if (cfun->is_thunk)
24192 return 0;
24194 if (cfun->machine->lr_save_state)
24195 return cfun->machine->lr_save_state - 1;
24197 /* regs_ever_live has LR marked as used if any sibcalls are present,
24198 but this should not force saving and restoring in the
24199 pro/epilogue. Likewise, reg_set_between_p thinks a sibcall
24200 clobbers LR, so that is inappropriate. */
24202 /* Also, the prologue can generate a store into LR that
24203 doesn't really count, like this:
24205 move LR->R0
24206 bcl to set PIC register
24207 move LR->R31
24208 move R0->LR
24210 When we're called from the epilogue, we need to avoid counting
24211 this as a store. */
24213 push_topmost_sequence ();
24214 top = get_insns ();
24215 pop_topmost_sequence ();
24216 reg = gen_rtx_REG (Pmode, LR_REGNO);
24218 for (insn = NEXT_INSN (top); insn != NULL_RTX; insn = NEXT_INSN (insn))
24220 if (INSN_P (insn))
24222 if (CALL_P (insn))
24224 if (!SIBLING_CALL_P (insn))
24225 return 1;
24227 else if (find_regno_note (insn, REG_INC, LR_REGNO))
24228 return 1;
24229 else if (set_of (reg, insn) != NULL_RTX
24230 && !prologue_epilogue_contains (insn))
24231 return 1;
24234 return 0;
24237 /* Emit instructions needed to load the TOC register.
24238 This is only needed when TARGET_TOC, TARGET_MINIMAL_TOC, and there is
24239 a constant pool; or for SVR4 -fpic. */
24241 void
24242 rs6000_emit_load_toc_table (int fromprolog)
24244 rtx dest;
24245 dest = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
24247 if (TARGET_ELF && TARGET_SECURE_PLT && DEFAULT_ABI == ABI_V4 && flag_pic)
24249 char buf[30];
24250 rtx lab, tmp1, tmp2, got;
24252 lab = gen_label_rtx ();
24253 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (lab));
24254 lab = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
24255 if (flag_pic == 2)
24257 got = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (toc_label_name));
24258 need_toc_init = 1;
24260 else
24261 got = rs6000_got_sym ();
24262 tmp1 = tmp2 = dest;
24263 if (!fromprolog)
24265 tmp1 = gen_reg_rtx (Pmode);
24266 tmp2 = gen_reg_rtx (Pmode);
24268 emit_insn (gen_load_toc_v4_PIC_1 (lab));
24269 emit_move_insn (tmp1, gen_rtx_REG (Pmode, LR_REGNO));
24270 emit_insn (gen_load_toc_v4_PIC_3b (tmp2, tmp1, got, lab));
24271 emit_insn (gen_load_toc_v4_PIC_3c (dest, tmp2, got, lab));
24273 else if (TARGET_ELF && DEFAULT_ABI == ABI_V4 && flag_pic == 1)
24275 emit_insn (gen_load_toc_v4_pic_si ());
24276 emit_move_insn (dest, gen_rtx_REG (Pmode, LR_REGNO));
24278 else if (TARGET_ELF && DEFAULT_ABI == ABI_V4 && flag_pic == 2)
24280 char buf[30];
24281 rtx temp0 = (fromprolog
24282 ? gen_rtx_REG (Pmode, 0)
24283 : gen_reg_rtx (Pmode));
24285 if (fromprolog)
24287 rtx symF, symL;
24289 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
24290 symF = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
24292 ASM_GENERATE_INTERNAL_LABEL (buf, "LCL", rs6000_pic_labelno);
24293 symL = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
24295 emit_insn (gen_load_toc_v4_PIC_1 (symF));
24296 emit_move_insn (dest, gen_rtx_REG (Pmode, LR_REGNO));
24297 emit_insn (gen_load_toc_v4_PIC_2 (temp0, dest, symL, symF));
24299 else
24301 rtx tocsym, lab;
24303 tocsym = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (toc_label_name));
24304 need_toc_init = 1;
24305 lab = gen_label_rtx ();
24306 emit_insn (gen_load_toc_v4_PIC_1b (tocsym, lab));
24307 emit_move_insn (dest, gen_rtx_REG (Pmode, LR_REGNO));
24308 if (TARGET_LINK_STACK)
24309 emit_insn (gen_addsi3 (dest, dest, GEN_INT (4)));
24310 emit_move_insn (temp0, gen_rtx_MEM (Pmode, dest));
24312 emit_insn (gen_addsi3 (dest, temp0, dest));
24314 else if (TARGET_ELF && !TARGET_AIX && flag_pic == 0 && TARGET_MINIMAL_TOC)
24316 /* This is for AIX code running in non-PIC ELF32. */
24317 rtx realsym = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (toc_label_name));
24319 need_toc_init = 1;
24320 emit_insn (gen_elf_high (dest, realsym));
24321 emit_insn (gen_elf_low (dest, dest, realsym));
24323 else
24325 gcc_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2);
24327 if (TARGET_32BIT)
24328 emit_insn (gen_load_toc_aix_si (dest));
24329 else
24330 emit_insn (gen_load_toc_aix_di (dest));
24334 /* Emit instructions to restore the link register after determining where
24335 its value has been stored. */
24337 void
24338 rs6000_emit_eh_reg_restore (rtx source, rtx scratch)
24340 rs6000_stack_t *info = rs6000_stack_info ();
24341 rtx operands[2];
24343 operands[0] = source;
24344 operands[1] = scratch;
24346 if (info->lr_save_p)
24348 rtx frame_rtx = stack_pointer_rtx;
24349 HOST_WIDE_INT sp_offset = 0;
24350 rtx tmp;
24352 if (frame_pointer_needed
24353 || cfun->calls_alloca
24354 || info->total_size > 32767)
24356 tmp = gen_frame_mem (Pmode, frame_rtx);
24357 emit_move_insn (operands[1], tmp);
24358 frame_rtx = operands[1];
24360 else if (info->push_p)
24361 sp_offset = info->total_size;
24363 tmp = plus_constant (Pmode, frame_rtx,
24364 info->lr_save_offset + sp_offset);
24365 tmp = gen_frame_mem (Pmode, tmp);
24366 emit_move_insn (tmp, operands[0]);
24368 else
24369 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNO), operands[0]);
24371 /* Freeze lr_save_p. We've just emitted rtl that depends on the
24372 state of lr_save_p so any change from here on would be a bug. In
24373 particular, stop rs6000_ra_ever_killed from considering the SET
24374 of lr we may have added just above. */
24375 cfun->machine->lr_save_state = info->lr_save_p + 1;
24378 static GTY(()) alias_set_type set = -1;
24380 alias_set_type
24381 get_TOC_alias_set (void)
24383 if (set == -1)
24384 set = new_alias_set ();
24385 return set;
24388 /* This returns nonzero if the current function uses the TOC. This is
24389 determined by the presence of (use (unspec ... UNSPEC_TOC)), which
24390 is generated by the ABI_V4 load_toc_* patterns. */
24391 #if TARGET_ELF
24392 static int
24393 uses_TOC (void)
24395 rtx_insn *insn;
24397 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
24398 if (INSN_P (insn))
24400 rtx pat = PATTERN (insn);
24401 int i;
24403 if (GET_CODE (pat) == PARALLEL)
24404 for (i = 0; i < XVECLEN (pat, 0); i++)
24406 rtx sub = XVECEXP (pat, 0, i);
24407 if (GET_CODE (sub) == USE)
24409 sub = XEXP (sub, 0);
24410 if (GET_CODE (sub) == UNSPEC
24411 && XINT (sub, 1) == UNSPEC_TOC)
24412 return 1;
24416 return 0;
24418 #endif
24421 create_TOC_reference (rtx symbol, rtx largetoc_reg)
24423 rtx tocrel, tocreg, hi;
24425 if (TARGET_DEBUG_ADDR)
24427 if (GET_CODE (symbol) == SYMBOL_REF)
24428 fprintf (stderr, "\ncreate_TOC_reference, (symbol_ref %s)\n",
24429 XSTR (symbol, 0));
24430 else
24432 fprintf (stderr, "\ncreate_TOC_reference, code %s:\n",
24433 GET_RTX_NAME (GET_CODE (symbol)));
24434 debug_rtx (symbol);
24438 if (!can_create_pseudo_p ())
24439 df_set_regs_ever_live (TOC_REGISTER, true);
24441 tocreg = gen_rtx_REG (Pmode, TOC_REGISTER);
24442 tocrel = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, symbol, tocreg), UNSPEC_TOCREL);
24443 if (TARGET_CMODEL == CMODEL_SMALL || can_create_pseudo_p ())
24444 return tocrel;
24446 hi = gen_rtx_HIGH (Pmode, copy_rtx (tocrel));
24447 if (largetoc_reg != NULL)
24449 emit_move_insn (largetoc_reg, hi);
24450 hi = largetoc_reg;
24452 return gen_rtx_LO_SUM (Pmode, hi, tocrel);
24455 /* Issue assembly directives that create a reference to the given DWARF
24456 FRAME_TABLE_LABEL from the current function section. */
24457 void
24458 rs6000_aix_asm_output_dwarf_table_ref (char * frame_table_label)
24460 fprintf (asm_out_file, "\t.ref %s\n",
24461 (* targetm.strip_name_encoding) (frame_table_label));
24464 /* This ties together stack memory (MEM with an alias set of frame_alias_set)
24465 and the change to the stack pointer. */
24467 static void
24468 rs6000_emit_stack_tie (rtx fp, bool hard_frame_needed)
24470 rtvec p;
24471 int i;
24472 rtx regs[3];
24474 i = 0;
24475 regs[i++] = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
24476 if (hard_frame_needed)
24477 regs[i++] = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
24478 if (!(REGNO (fp) == STACK_POINTER_REGNUM
24479 || (hard_frame_needed
24480 && REGNO (fp) == HARD_FRAME_POINTER_REGNUM)))
24481 regs[i++] = fp;
24483 p = rtvec_alloc (i);
24484 while (--i >= 0)
24486 rtx mem = gen_frame_mem (BLKmode, regs[i]);
24487 RTVEC_ELT (p, i) = gen_rtx_SET (mem, const0_rtx);
24490 emit_insn (gen_stack_tie (gen_rtx_PARALLEL (VOIDmode, p)));
24493 /* Emit the correct code for allocating stack space, as insns.
24494 If COPY_REG, make sure a copy of the old frame is left there.
24495 The generated code may use hard register 0 as a temporary. */
24497 static rtx_insn *
24498 rs6000_emit_allocate_stack (HOST_WIDE_INT size, rtx copy_reg, int copy_off)
24500 rtx_insn *insn;
24501 rtx stack_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
24502 rtx tmp_reg = gen_rtx_REG (Pmode, 0);
24503 rtx todec = gen_int_mode (-size, Pmode);
24504 rtx par, set, mem;
24506 if (INTVAL (todec) != -size)
24508 warning (0, "stack frame too large");
24509 emit_insn (gen_trap ());
24510 return 0;
24513 if (crtl->limit_stack)
24515 if (REG_P (stack_limit_rtx)
24516 && REGNO (stack_limit_rtx) > 1
24517 && REGNO (stack_limit_rtx) <= 31)
24519 emit_insn (gen_add3_insn (tmp_reg, stack_limit_rtx, GEN_INT (size)));
24520 emit_insn (gen_cond_trap (LTU, stack_reg, tmp_reg,
24521 const0_rtx));
24523 else if (GET_CODE (stack_limit_rtx) == SYMBOL_REF
24524 && TARGET_32BIT
24525 && DEFAULT_ABI == ABI_V4)
24527 rtx toload = gen_rtx_CONST (VOIDmode,
24528 gen_rtx_PLUS (Pmode,
24529 stack_limit_rtx,
24530 GEN_INT (size)));
24532 emit_insn (gen_elf_high (tmp_reg, toload));
24533 emit_insn (gen_elf_low (tmp_reg, tmp_reg, toload));
24534 emit_insn (gen_cond_trap (LTU, stack_reg, tmp_reg,
24535 const0_rtx));
24537 else
24538 warning (0, "stack limit expression is not supported");
24541 if (copy_reg)
24543 if (copy_off != 0)
24544 emit_insn (gen_add3_insn (copy_reg, stack_reg, GEN_INT (copy_off)));
24545 else
24546 emit_move_insn (copy_reg, stack_reg);
24549 if (size > 32767)
24551 /* Need a note here so that try_split doesn't get confused. */
24552 if (get_last_insn () == NULL_RTX)
24553 emit_note (NOTE_INSN_DELETED);
24554 insn = emit_move_insn (tmp_reg, todec);
24555 try_split (PATTERN (insn), insn, 0);
24556 todec = tmp_reg;
24559 insn = emit_insn (TARGET_32BIT
24560 ? gen_movsi_update_stack (stack_reg, stack_reg,
24561 todec, stack_reg)
24562 : gen_movdi_di_update_stack (stack_reg, stack_reg,
24563 todec, stack_reg));
24564 /* Since we didn't use gen_frame_mem to generate the MEM, grab
24565 it now and set the alias set/attributes. The above gen_*_update
24566 calls will generate a PARALLEL with the MEM set being the first
24567 operation. */
24568 par = PATTERN (insn);
24569 gcc_assert (GET_CODE (par) == PARALLEL);
24570 set = XVECEXP (par, 0, 0);
24571 gcc_assert (GET_CODE (set) == SET);
24572 mem = SET_DEST (set);
24573 gcc_assert (MEM_P (mem));
24574 MEM_NOTRAP_P (mem) = 1;
24575 set_mem_alias_set (mem, get_frame_alias_set ());
24577 RTX_FRAME_RELATED_P (insn) = 1;
24578 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
24579 gen_rtx_SET (stack_reg, gen_rtx_PLUS (Pmode, stack_reg,
24580 GEN_INT (-size))));
24581 return insn;
24584 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
24586 #if PROBE_INTERVAL > 32768
24587 #error Cannot use indexed addressing mode for stack probing
24588 #endif
24590 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
24591 inclusive. These are offsets from the current stack pointer. */
24593 static void
24594 rs6000_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
24596 /* See if we have a constant small number of probes to generate. If so,
24597 that's the easy case. */
24598 if (first + size <= 32768)
24600 HOST_WIDE_INT i;
24602 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
24603 it exceeds SIZE. If only one probe is needed, this will not
24604 generate any code. Then probe at FIRST + SIZE. */
24605 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
24606 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
24607 -(first + i)));
24609 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
24610 -(first + size)));
24613 /* Otherwise, do the same as above, but in a loop. Note that we must be
24614 extra careful with variables wrapping around because we might be at
24615 the very top (or the very bottom) of the address space and we have
24616 to be able to handle this case properly; in particular, we use an
24617 equality test for the loop condition. */
24618 else
24620 HOST_WIDE_INT rounded_size;
24621 rtx r12 = gen_rtx_REG (Pmode, 12);
24622 rtx r0 = gen_rtx_REG (Pmode, 0);
24624 /* Sanity check for the addressing mode we're going to use. */
24625 gcc_assert (first <= 32768);
24627 /* Step 1: round SIZE to the previous multiple of the interval. */
24629 rounded_size = ROUND_DOWN (size, PROBE_INTERVAL);
24632 /* Step 2: compute initial and final value of the loop counter. */
24634 /* TEST_ADDR = SP + FIRST. */
24635 emit_insn (gen_rtx_SET (r12, plus_constant (Pmode, stack_pointer_rtx,
24636 -first)));
24638 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
24639 if (rounded_size > 32768)
24641 emit_move_insn (r0, GEN_INT (-rounded_size));
24642 emit_insn (gen_rtx_SET (r0, gen_rtx_PLUS (Pmode, r12, r0)));
24644 else
24645 emit_insn (gen_rtx_SET (r0, plus_constant (Pmode, r12,
24646 -rounded_size)));
24649 /* Step 3: the loop
24653 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
24654 probe at TEST_ADDR
24656 while (TEST_ADDR != LAST_ADDR)
24658 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
24659 until it is equal to ROUNDED_SIZE. */
24661 if (TARGET_64BIT)
24662 emit_insn (gen_probe_stack_rangedi (r12, r12, r0));
24663 else
24664 emit_insn (gen_probe_stack_rangesi (r12, r12, r0));
24667 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
24668 that SIZE is equal to ROUNDED_SIZE. */
24670 if (size != rounded_size)
24671 emit_stack_probe (plus_constant (Pmode, r12, rounded_size - size));
24675 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
24676 absolute addresses. */
24678 const char *
24679 output_probe_stack_range (rtx reg1, rtx reg2)
24681 static int labelno = 0;
24682 char loop_lab[32];
24683 rtx xops[2];
24685 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
24687 /* Loop. */
24688 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
24690 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
24691 xops[0] = reg1;
24692 xops[1] = GEN_INT (-PROBE_INTERVAL);
24693 output_asm_insn ("addi %0,%0,%1", xops);
24695 /* Probe at TEST_ADDR. */
24696 xops[1] = gen_rtx_REG (Pmode, 0);
24697 output_asm_insn ("stw %1,0(%0)", xops);
24699 /* Test if TEST_ADDR == LAST_ADDR. */
24700 xops[1] = reg2;
24701 if (TARGET_64BIT)
24702 output_asm_insn ("cmpd 0,%0,%1", xops);
24703 else
24704 output_asm_insn ("cmpw 0,%0,%1", xops);
24706 /* Branch. */
24707 fputs ("\tbne 0,", asm_out_file);
24708 assemble_name_raw (asm_out_file, loop_lab);
24709 fputc ('\n', asm_out_file);
24711 return "";
24714 /* Add to 'insn' a note which is PATTERN (INSN) but with REG replaced
24715 with (plus:P (reg 1) VAL), and with REG2 replaced with RREG if REG2
24716 is not NULL. It would be nice if dwarf2out_frame_debug_expr could
24717 deduce these equivalences by itself so it wasn't necessary to hold
24718 its hand so much. Don't be tempted to always supply d2_f_d_e with
24719 the actual cfa register, ie. r31 when we are using a hard frame
24720 pointer. That fails when saving regs off r1, and sched moves the
24721 r31 setup past the reg saves. */
24723 static rtx
24724 rs6000_frame_related (rtx insn, rtx reg, HOST_WIDE_INT val,
24725 rtx reg2, rtx rreg)
24727 rtx real, temp;
24729 if (REGNO (reg) == STACK_POINTER_REGNUM && reg2 == NULL_RTX)
24731 /* No need for any replacement. Just set RTX_FRAME_RELATED_P. */
24732 int i;
24734 gcc_checking_assert (val == 0);
24735 real = PATTERN (insn);
24736 if (GET_CODE (real) == PARALLEL)
24737 for (i = 0; i < XVECLEN (real, 0); i++)
24738 if (GET_CODE (XVECEXP (real, 0, i)) == SET)
24740 rtx set = XVECEXP (real, 0, i);
24742 /* If this PARALLEL has been emitted for out-of-line
24743 register save functions, or store multiple, then omit
24744 eh_frame info for any user-defined global regs. If
24745 eh_frame info is supplied, frame unwinding will
24746 restore a user reg. */
24747 if (!REG_P (SET_SRC (set))
24748 || !fixed_reg_p (REGNO (SET_SRC (set))))
24749 RTX_FRAME_RELATED_P (set) = 1;
24751 RTX_FRAME_RELATED_P (insn) = 1;
24752 return insn;
24755 /* copy_rtx will not make unique copies of registers, so we need to
24756 ensure we don't have unwanted sharing here. */
24757 if (reg == reg2)
24758 reg = gen_raw_REG (GET_MODE (reg), REGNO (reg));
24760 if (reg == rreg)
24761 reg = gen_raw_REG (GET_MODE (reg), REGNO (reg));
24763 real = copy_rtx (PATTERN (insn));
24765 if (reg2 != NULL_RTX)
24766 real = replace_rtx (real, reg2, rreg);
24768 if (REGNO (reg) == STACK_POINTER_REGNUM)
24769 gcc_checking_assert (val == 0);
24770 else
24771 real = replace_rtx (real, reg,
24772 gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode,
24773 STACK_POINTER_REGNUM),
24774 GEN_INT (val)));
24776 /* We expect that 'real' is either a SET or a PARALLEL containing
24777 SETs (and possibly other stuff). In a PARALLEL, all the SETs
24778 are important so they all have to be marked RTX_FRAME_RELATED_P. */
24780 if (GET_CODE (real) == SET)
24782 rtx set = real;
24784 temp = simplify_rtx (SET_SRC (set));
24785 if (temp)
24786 SET_SRC (set) = temp;
24787 temp = simplify_rtx (SET_DEST (set));
24788 if (temp)
24789 SET_DEST (set) = temp;
24790 if (GET_CODE (SET_DEST (set)) == MEM)
24792 temp = simplify_rtx (XEXP (SET_DEST (set), 0));
24793 if (temp)
24794 XEXP (SET_DEST (set), 0) = temp;
24797 else
24799 int i;
24801 gcc_assert (GET_CODE (real) == PARALLEL);
24802 for (i = 0; i < XVECLEN (real, 0); i++)
24803 if (GET_CODE (XVECEXP (real, 0, i)) == SET)
24805 rtx set = XVECEXP (real, 0, i);
24807 temp = simplify_rtx (SET_SRC (set));
24808 if (temp)
24809 SET_SRC (set) = temp;
24810 temp = simplify_rtx (SET_DEST (set));
24811 if (temp)
24812 SET_DEST (set) = temp;
24813 if (GET_CODE (SET_DEST (set)) == MEM)
24815 temp = simplify_rtx (XEXP (SET_DEST (set), 0));
24816 if (temp)
24817 XEXP (SET_DEST (set), 0) = temp;
24819 /* Omit eh_frame info for any user-defined global regs. */
24820 if (!REG_P (SET_SRC (set))
24821 || !fixed_reg_p (REGNO (SET_SRC (set))))
24822 RTX_FRAME_RELATED_P (set) = 1;
24826 RTX_FRAME_RELATED_P (insn) = 1;
24827 add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
24829 return insn;
24832 /* Returns an insn that has a vrsave set operation with the
24833 appropriate CLOBBERs. */
24835 static rtx
24836 generate_set_vrsave (rtx reg, rs6000_stack_t *info, int epiloguep)
24838 int nclobs, i;
24839 rtx insn, clobs[TOTAL_ALTIVEC_REGS + 1];
24840 rtx vrsave = gen_rtx_REG (SImode, VRSAVE_REGNO);
24842 clobs[0]
24843 = gen_rtx_SET (vrsave,
24844 gen_rtx_UNSPEC_VOLATILE (SImode,
24845 gen_rtvec (2, reg, vrsave),
24846 UNSPECV_SET_VRSAVE));
24848 nclobs = 1;
24850 /* We need to clobber the registers in the mask so the scheduler
24851 does not move sets to VRSAVE before sets of AltiVec registers.
24853 However, if the function receives nonlocal gotos, reload will set
24854 all call saved registers live. We will end up with:
24856 (set (reg 999) (mem))
24857 (parallel [ (set (reg vrsave) (unspec blah))
24858 (clobber (reg 999))])
24860 The clobber will cause the store into reg 999 to be dead, and
24861 flow will attempt to delete an epilogue insn. In this case, we
24862 need an unspec use/set of the register. */
24864 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
24865 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
24867 if (!epiloguep || call_used_regs [i])
24868 clobs[nclobs++] = gen_rtx_CLOBBER (VOIDmode,
24869 gen_rtx_REG (V4SImode, i));
24870 else
24872 rtx reg = gen_rtx_REG (V4SImode, i);
24874 clobs[nclobs++]
24875 = gen_rtx_SET (reg,
24876 gen_rtx_UNSPEC (V4SImode,
24877 gen_rtvec (1, reg), 27));
24881 insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nclobs));
24883 for (i = 0; i < nclobs; ++i)
24884 XVECEXP (insn, 0, i) = clobs[i];
24886 return insn;
24889 static rtx
24890 gen_frame_set (rtx reg, rtx frame_reg, int offset, bool store)
24892 rtx addr, mem;
24894 addr = gen_rtx_PLUS (Pmode, frame_reg, GEN_INT (offset));
24895 mem = gen_frame_mem (GET_MODE (reg), addr);
24896 return gen_rtx_SET (store ? mem : reg, store ? reg : mem);
24899 static rtx
24900 gen_frame_load (rtx reg, rtx frame_reg, int offset)
24902 return gen_frame_set (reg, frame_reg, offset, false);
24905 static rtx
24906 gen_frame_store (rtx reg, rtx frame_reg, int offset)
24908 return gen_frame_set (reg, frame_reg, offset, true);
24911 /* Save a register into the frame, and emit RTX_FRAME_RELATED_P notes.
24912 Save REGNO into [FRAME_REG + OFFSET] in mode MODE. */
24914 static rtx
24915 emit_frame_save (rtx frame_reg, machine_mode mode,
24916 unsigned int regno, int offset, HOST_WIDE_INT frame_reg_to_sp)
24918 rtx reg, insn;
24920 /* Some cases that need register indexed addressing. */
24921 gcc_checking_assert (!((TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (mode))
24922 || (TARGET_VSX && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
24923 || (TARGET_E500_DOUBLE && mode == DFmode)
24924 || (TARGET_SPE_ABI
24925 && SPE_VECTOR_MODE (mode)
24926 && !SPE_CONST_OFFSET_OK (offset))));
24928 reg = gen_rtx_REG (mode, regno);
24929 insn = emit_insn (gen_frame_store (reg, frame_reg, offset));
24930 return rs6000_frame_related (insn, frame_reg, frame_reg_to_sp,
24931 NULL_RTX, NULL_RTX);
24934 /* Emit an offset memory reference suitable for a frame store, while
24935 converting to a valid addressing mode. */
24937 static rtx
24938 gen_frame_mem_offset (machine_mode mode, rtx reg, int offset)
24940 rtx int_rtx, offset_rtx;
24942 int_rtx = GEN_INT (offset);
24944 if ((TARGET_SPE_ABI && SPE_VECTOR_MODE (mode) && !SPE_CONST_OFFSET_OK (offset))
24945 || (TARGET_E500_DOUBLE && mode == DFmode))
24947 offset_rtx = gen_rtx_REG (Pmode, FIXED_SCRATCH);
24948 emit_move_insn (offset_rtx, int_rtx);
24950 else
24951 offset_rtx = int_rtx;
24953 return gen_frame_mem (mode, gen_rtx_PLUS (Pmode, reg, offset_rtx));
24956 #ifndef TARGET_FIX_AND_CONTINUE
24957 #define TARGET_FIX_AND_CONTINUE 0
24958 #endif
24960 /* It's really GPR 13 or 14, FPR 14 and VR 20. We need the smallest. */
24961 #define FIRST_SAVRES_REGISTER FIRST_SAVED_GP_REGNO
24962 #define LAST_SAVRES_REGISTER 31
24963 #define N_SAVRES_REGISTERS (LAST_SAVRES_REGISTER - FIRST_SAVRES_REGISTER + 1)
24965 enum {
24966 SAVRES_LR = 0x1,
24967 SAVRES_SAVE = 0x2,
24968 SAVRES_REG = 0x0c,
24969 SAVRES_GPR = 0,
24970 SAVRES_FPR = 4,
24971 SAVRES_VR = 8
24974 static GTY(()) rtx savres_routine_syms[N_SAVRES_REGISTERS][12];
24976 /* Temporary holding space for an out-of-line register save/restore
24977 routine name. */
24978 static char savres_routine_name[30];
24980 /* Return the name for an out-of-line register save/restore routine.
24981 We are saving/restoring GPRs if GPR is true. */
24983 static char *
24984 rs6000_savres_routine_name (rs6000_stack_t *info, int regno, int sel)
24986 const char *prefix = "";
24987 const char *suffix = "";
24989 /* Different targets are supposed to define
24990 {SAVE,RESTORE}_FP_{PREFIX,SUFFIX} with the idea that the needed
24991 routine name could be defined with:
24993 sprintf (name, "%s%d%s", SAVE_FP_PREFIX, regno, SAVE_FP_SUFFIX)
24995 This is a nice idea in practice, but in reality, things are
24996 complicated in several ways:
24998 - ELF targets have save/restore routines for GPRs.
25000 - SPE targets use different prefixes for 32/64-bit registers, and
25001 neither of them fit neatly in the FOO_{PREFIX,SUFFIX} regimen.
25003 - PPC64 ELF targets have routines for save/restore of GPRs that
25004 differ in what they do with the link register, so having a set
25005 prefix doesn't work. (We only use one of the save routines at
25006 the moment, though.)
25008 - PPC32 elf targets have "exit" versions of the restore routines
25009 that restore the link register and can save some extra space.
25010 These require an extra suffix. (There are also "tail" versions
25011 of the restore routines and "GOT" versions of the save routines,
25012 but we don't generate those at present. Same problems apply,
25013 though.)
25015 We deal with all this by synthesizing our own prefix/suffix and
25016 using that for the simple sprintf call shown above. */
25017 if (TARGET_SPE)
25019 /* No floating point saves on the SPE. */
25020 gcc_assert ((sel & SAVRES_REG) == SAVRES_GPR);
25022 if ((sel & SAVRES_SAVE))
25023 prefix = info->spe_64bit_regs_used ? "_save64gpr_" : "_save32gpr_";
25024 else
25025 prefix = info->spe_64bit_regs_used ? "_rest64gpr_" : "_rest32gpr_";
25027 if ((sel & SAVRES_LR))
25028 suffix = "_x";
25030 else if (DEFAULT_ABI == ABI_V4)
25032 if (TARGET_64BIT)
25033 goto aix_names;
25035 if ((sel & SAVRES_REG) == SAVRES_GPR)
25036 prefix = (sel & SAVRES_SAVE) ? "_savegpr_" : "_restgpr_";
25037 else if ((sel & SAVRES_REG) == SAVRES_FPR)
25038 prefix = (sel & SAVRES_SAVE) ? "_savefpr_" : "_restfpr_";
25039 else if ((sel & SAVRES_REG) == SAVRES_VR)
25040 prefix = (sel & SAVRES_SAVE) ? "_savevr_" : "_restvr_";
25041 else
25042 abort ();
25044 if ((sel & SAVRES_LR))
25045 suffix = "_x";
25047 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
25049 #if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD)
25050 /* No out-of-line save/restore routines for GPRs on AIX. */
25051 gcc_assert (!TARGET_AIX || (sel & SAVRES_REG) != SAVRES_GPR);
25052 #endif
25054 aix_names:
25055 if ((sel & SAVRES_REG) == SAVRES_GPR)
25056 prefix = ((sel & SAVRES_SAVE)
25057 ? ((sel & SAVRES_LR) ? "_savegpr0_" : "_savegpr1_")
25058 : ((sel & SAVRES_LR) ? "_restgpr0_" : "_restgpr1_"));
25059 else if ((sel & SAVRES_REG) == SAVRES_FPR)
25061 #if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD)
25062 if ((sel & SAVRES_LR))
25063 prefix = ((sel & SAVRES_SAVE) ? "_savefpr_" : "_restfpr_");
25064 else
25065 #endif
25067 prefix = (sel & SAVRES_SAVE) ? SAVE_FP_PREFIX : RESTORE_FP_PREFIX;
25068 suffix = (sel & SAVRES_SAVE) ? SAVE_FP_SUFFIX : RESTORE_FP_SUFFIX;
25071 else if ((sel & SAVRES_REG) == SAVRES_VR)
25072 prefix = (sel & SAVRES_SAVE) ? "_savevr_" : "_restvr_";
25073 else
25074 abort ();
25077 if (DEFAULT_ABI == ABI_DARWIN)
25079 /* The Darwin approach is (slightly) different, in order to be
25080 compatible with code generated by the system toolchain. There is a
25081 single symbol for the start of save sequence, and the code here
25082 embeds an offset into that code on the basis of the first register
25083 to be saved. */
25084 prefix = (sel & SAVRES_SAVE) ? "save" : "rest" ;
25085 if ((sel & SAVRES_REG) == SAVRES_GPR)
25086 sprintf (savres_routine_name, "*%sGPR%s%s%.0d ; %s r%d-r31", prefix,
25087 ((sel & SAVRES_LR) ? "x" : ""), (regno == 13 ? "" : "+"),
25088 (regno - 13) * 4, prefix, regno);
25089 else if ((sel & SAVRES_REG) == SAVRES_FPR)
25090 sprintf (savres_routine_name, "*%sFP%s%.0d ; %s f%d-f31", prefix,
25091 (regno == 14 ? "" : "+"), (regno - 14) * 4, prefix, regno);
25092 else if ((sel & SAVRES_REG) == SAVRES_VR)
25093 sprintf (savres_routine_name, "*%sVEC%s%.0d ; %s v%d-v31", prefix,
25094 (regno == 20 ? "" : "+"), (regno - 20) * 8, prefix, regno);
25095 else
25096 abort ();
25098 else
25099 sprintf (savres_routine_name, "%s%d%s", prefix, regno, suffix);
25101 return savres_routine_name;
25104 /* Return an RTL SYMBOL_REF for an out-of-line register save/restore routine.
25105 We are saving/restoring GPRs if GPR is true. */
25107 static rtx
25108 rs6000_savres_routine_sym (rs6000_stack_t *info, int sel)
25110 int regno = ((sel & SAVRES_REG) == SAVRES_GPR
25111 ? info->first_gp_reg_save
25112 : (sel & SAVRES_REG) == SAVRES_FPR
25113 ? info->first_fp_reg_save - 32
25114 : (sel & SAVRES_REG) == SAVRES_VR
25115 ? info->first_altivec_reg_save - FIRST_ALTIVEC_REGNO
25116 : -1);
25117 rtx sym;
25118 int select = sel;
25120 /* On the SPE, we never have any FPRs, but we do have 32/64-bit
25121 versions of the gpr routines. */
25122 if (TARGET_SPE_ABI && (sel & SAVRES_REG) == SAVRES_GPR
25123 && info->spe_64bit_regs_used)
25124 select ^= SAVRES_FPR ^ SAVRES_GPR;
25126 /* Don't generate bogus routine names. */
25127 gcc_assert (FIRST_SAVRES_REGISTER <= regno
25128 && regno <= LAST_SAVRES_REGISTER
25129 && select >= 0 && select <= 12);
25131 sym = savres_routine_syms[regno-FIRST_SAVRES_REGISTER][select];
25133 if (sym == NULL)
25135 char *name;
25137 name = rs6000_savres_routine_name (info, regno, sel);
25139 sym = savres_routine_syms[regno-FIRST_SAVRES_REGISTER][select]
25140 = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
25141 SYMBOL_REF_FLAGS (sym) |= SYMBOL_FLAG_FUNCTION;
25144 return sym;
25147 /* Emit a sequence of insns, including a stack tie if needed, for
25148 resetting the stack pointer. If UPDT_REGNO is not 1, then don't
25149 reset the stack pointer, but move the base of the frame into
25150 reg UPDT_REGNO for use by out-of-line register restore routines. */
25152 static rtx
25153 rs6000_emit_stack_reset (rs6000_stack_t *info,
25154 rtx frame_reg_rtx, HOST_WIDE_INT frame_off,
25155 unsigned updt_regno)
25157 rtx updt_reg_rtx;
25159 /* This blockage is needed so that sched doesn't decide to move
25160 the sp change before the register restores. */
25161 if (DEFAULT_ABI == ABI_V4
25162 || (TARGET_SPE_ABI
25163 && info->spe_64bit_regs_used != 0
25164 && info->first_gp_reg_save != 32))
25165 rs6000_emit_stack_tie (frame_reg_rtx, frame_pointer_needed);
25167 /* If we are restoring registers out-of-line, we will be using the
25168 "exit" variants of the restore routines, which will reset the
25169 stack for us. But we do need to point updt_reg into the
25170 right place for those routines. */
25171 updt_reg_rtx = gen_rtx_REG (Pmode, updt_regno);
25173 if (frame_off != 0)
25174 return emit_insn (gen_add3_insn (updt_reg_rtx,
25175 frame_reg_rtx, GEN_INT (frame_off)));
25176 else if (REGNO (frame_reg_rtx) != updt_regno)
25177 return emit_move_insn (updt_reg_rtx, frame_reg_rtx);
25179 return NULL_RTX;
25182 /* Return the register number used as a pointer by out-of-line
25183 save/restore functions. */
25185 static inline unsigned
25186 ptr_regno_for_savres (int sel)
25188 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
25189 return (sel & SAVRES_REG) == SAVRES_FPR || (sel & SAVRES_LR) ? 1 : 12;
25190 return DEFAULT_ABI == ABI_DARWIN && (sel & SAVRES_REG) == SAVRES_FPR ? 1 : 11;
25193 /* Construct a parallel rtx describing the effect of a call to an
25194 out-of-line register save/restore routine, and emit the insn
25195 or jump_insn as appropriate. */
25197 static rtx
25198 rs6000_emit_savres_rtx (rs6000_stack_t *info,
25199 rtx frame_reg_rtx, int save_area_offset, int lr_offset,
25200 machine_mode reg_mode, int sel)
25202 int i;
25203 int offset, start_reg, end_reg, n_regs, use_reg;
25204 int reg_size = GET_MODE_SIZE (reg_mode);
25205 rtx sym;
25206 rtvec p;
25207 rtx par, insn;
25209 offset = 0;
25210 start_reg = ((sel & SAVRES_REG) == SAVRES_GPR
25211 ? info->first_gp_reg_save
25212 : (sel & SAVRES_REG) == SAVRES_FPR
25213 ? info->first_fp_reg_save
25214 : (sel & SAVRES_REG) == SAVRES_VR
25215 ? info->first_altivec_reg_save
25216 : -1);
25217 end_reg = ((sel & SAVRES_REG) == SAVRES_GPR
25218 ? 32
25219 : (sel & SAVRES_REG) == SAVRES_FPR
25220 ? 64
25221 : (sel & SAVRES_REG) == SAVRES_VR
25222 ? LAST_ALTIVEC_REGNO + 1
25223 : -1);
25224 n_regs = end_reg - start_reg;
25225 p = rtvec_alloc (3 + ((sel & SAVRES_LR) ? 1 : 0)
25226 + ((sel & SAVRES_REG) == SAVRES_VR ? 1 : 0)
25227 + n_regs);
25229 if (!(sel & SAVRES_SAVE) && (sel & SAVRES_LR))
25230 RTVEC_ELT (p, offset++) = ret_rtx;
25232 RTVEC_ELT (p, offset++)
25233 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, LR_REGNO));
25235 sym = rs6000_savres_routine_sym (info, sel);
25236 RTVEC_ELT (p, offset++) = gen_rtx_USE (VOIDmode, sym);
25238 use_reg = ptr_regno_for_savres (sel);
25239 if ((sel & SAVRES_REG) == SAVRES_VR)
25241 /* Vector regs are saved/restored using [reg+reg] addressing. */
25242 RTVEC_ELT (p, offset++)
25243 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, use_reg));
25244 RTVEC_ELT (p, offset++)
25245 = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, 0));
25247 else
25248 RTVEC_ELT (p, offset++)
25249 = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, use_reg));
25251 for (i = 0; i < end_reg - start_reg; i++)
25252 RTVEC_ELT (p, i + offset)
25253 = gen_frame_set (gen_rtx_REG (reg_mode, start_reg + i),
25254 frame_reg_rtx, save_area_offset + reg_size * i,
25255 (sel & SAVRES_SAVE) != 0);
25257 if ((sel & SAVRES_SAVE) && (sel & SAVRES_LR))
25258 RTVEC_ELT (p, i + offset)
25259 = gen_frame_store (gen_rtx_REG (Pmode, 0), frame_reg_rtx, lr_offset);
25261 par = gen_rtx_PARALLEL (VOIDmode, p);
25263 if (!(sel & SAVRES_SAVE) && (sel & SAVRES_LR))
25265 insn = emit_jump_insn (par);
25266 JUMP_LABEL (insn) = ret_rtx;
25268 else
25269 insn = emit_insn (par);
25270 return insn;
25273 /* Emit code to store CR fields that need to be saved into REG. */
25275 static void
25276 rs6000_emit_move_from_cr (rtx reg)
25278 /* Only the ELFv2 ABI allows storing only selected fields. */
25279 if (DEFAULT_ABI == ABI_ELFv2 && TARGET_MFCRF)
25281 int i, cr_reg[8], count = 0;
25283 /* Collect CR fields that must be saved. */
25284 for (i = 0; i < 8; i++)
25285 if (save_reg_p (CR0_REGNO + i))
25286 cr_reg[count++] = i;
25288 /* If it's just a single one, use mfcrf. */
25289 if (count == 1)
25291 rtvec p = rtvec_alloc (1);
25292 rtvec r = rtvec_alloc (2);
25293 RTVEC_ELT (r, 0) = gen_rtx_REG (CCmode, CR0_REGNO + cr_reg[0]);
25294 RTVEC_ELT (r, 1) = GEN_INT (1 << (7 - cr_reg[0]));
25295 RTVEC_ELT (p, 0)
25296 = gen_rtx_SET (reg,
25297 gen_rtx_UNSPEC (SImode, r, UNSPEC_MOVESI_FROM_CR));
25299 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
25300 return;
25303 /* ??? It might be better to handle count == 2 / 3 cases here
25304 as well, using logical operations to combine the values. */
25307 emit_insn (gen_movesi_from_cr (reg));
25310 /* Return whether the split-stack arg pointer (r12) is used. */
25312 static bool
25313 split_stack_arg_pointer_used_p (void)
25315 /* If the pseudo holding the arg pointer is no longer a pseudo,
25316 then the arg pointer is used. */
25317 if (cfun->machine->split_stack_arg_pointer != NULL_RTX
25318 && (!REG_P (cfun->machine->split_stack_arg_pointer)
25319 || (REGNO (cfun->machine->split_stack_arg_pointer)
25320 < FIRST_PSEUDO_REGISTER)))
25321 return true;
25323 /* Unfortunately we also need to do some code scanning, since
25324 r12 may have been substituted for the pseudo. */
25325 rtx_insn *insn;
25326 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
25327 FOR_BB_INSNS (bb, insn)
25328 if (NONDEBUG_INSN_P (insn))
25330 /* A call destroys r12. */
25331 if (CALL_P (insn))
25332 return false;
25334 df_ref use;
25335 FOR_EACH_INSN_USE (use, insn)
25337 rtx x = DF_REF_REG (use);
25338 if (REG_P (x) && REGNO (x) == 12)
25339 return true;
25341 df_ref def;
25342 FOR_EACH_INSN_DEF (def, insn)
25344 rtx x = DF_REF_REG (def);
25345 if (REG_P (x) && REGNO (x) == 12)
25346 return false;
25349 return bitmap_bit_p (DF_LR_OUT (bb), 12);
25352 /* Return whether we need to emit an ELFv2 global entry point prologue. */
25354 static bool
25355 rs6000_global_entry_point_needed_p (void)
25357 /* Only needed for the ELFv2 ABI. */
25358 if (DEFAULT_ABI != ABI_ELFv2)
25359 return false;
25361 /* With -msingle-pic-base, we assume the whole program shares the same
25362 TOC, so no global entry point prologues are needed anywhere. */
25363 if (TARGET_SINGLE_PIC_BASE)
25364 return false;
25366 /* Ensure we have a global entry point for thunks. ??? We could
25367 avoid that if the target routine doesn't need a global entry point,
25368 but we do not know whether this is the case at this point. */
25369 if (cfun->is_thunk)
25370 return true;
25372 /* For regular functions, rs6000_emit_prologue sets this flag if the
25373 routine ever uses the TOC pointer. */
25374 return cfun->machine->r2_setup_needed;
25377 /* Emit function prologue as insns. */
25379 void
25380 rs6000_emit_prologue (void)
25382 rs6000_stack_t *info = rs6000_stack_info ();
25383 machine_mode reg_mode = Pmode;
25384 int reg_size = TARGET_32BIT ? 4 : 8;
25385 rtx sp_reg_rtx = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
25386 rtx frame_reg_rtx = sp_reg_rtx;
25387 unsigned int cr_save_regno;
25388 rtx cr_save_rtx = NULL_RTX;
25389 rtx insn;
25390 int strategy;
25391 int using_static_chain_p = (cfun->static_chain_decl != NULL_TREE
25392 && df_regs_ever_live_p (STATIC_CHAIN_REGNUM)
25393 && call_used_regs[STATIC_CHAIN_REGNUM]);
25394 int using_split_stack = (flag_split_stack
25395 && (lookup_attribute ("no_split_stack",
25396 DECL_ATTRIBUTES (cfun->decl))
25397 == NULL));
25399 /* Offset to top of frame for frame_reg and sp respectively. */
25400 HOST_WIDE_INT frame_off = 0;
25401 HOST_WIDE_INT sp_off = 0;
25402 /* sp_adjust is the stack adjusting instruction, tracked so that the
25403 insn setting up the split-stack arg pointer can be emitted just
25404 prior to it, when r12 is not used here for other purposes. */
25405 rtx_insn *sp_adjust = 0;
25407 #if CHECKING_P
25408 /* Track and check usage of r0, r11, r12. */
25409 int reg_inuse = using_static_chain_p ? 1 << 11 : 0;
25410 #define START_USE(R) do \
25412 gcc_assert ((reg_inuse & (1 << (R))) == 0); \
25413 reg_inuse |= 1 << (R); \
25414 } while (0)
25415 #define END_USE(R) do \
25417 gcc_assert ((reg_inuse & (1 << (R))) != 0); \
25418 reg_inuse &= ~(1 << (R)); \
25419 } while (0)
25420 #define NOT_INUSE(R) do \
25422 gcc_assert ((reg_inuse & (1 << (R))) == 0); \
25423 } while (0)
25424 #else
25425 #define START_USE(R) do {} while (0)
25426 #define END_USE(R) do {} while (0)
25427 #define NOT_INUSE(R) do {} while (0)
25428 #endif
25430 if (DEFAULT_ABI == ABI_ELFv2
25431 && !TARGET_SINGLE_PIC_BASE)
25433 cfun->machine->r2_setup_needed = df_regs_ever_live_p (TOC_REGNUM);
25435 /* With -mminimal-toc we may generate an extra use of r2 below. */
25436 if (TARGET_TOC && TARGET_MINIMAL_TOC && get_pool_size () != 0)
25437 cfun->machine->r2_setup_needed = true;
25441 if (flag_stack_usage_info)
25442 current_function_static_stack_size = info->total_size;
25444 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
25446 HOST_WIDE_INT size = info->total_size;
25448 if (crtl->is_leaf && !cfun->calls_alloca)
25450 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
25451 rs6000_emit_probe_stack_range (STACK_CHECK_PROTECT,
25452 size - STACK_CHECK_PROTECT);
25454 else if (size > 0)
25455 rs6000_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
25458 if (TARGET_FIX_AND_CONTINUE)
25460 /* gdb on darwin arranges to forward a function from the old
25461 address by modifying the first 5 instructions of the function
25462 to branch to the overriding function. This is necessary to
25463 permit function pointers that point to the old function to
25464 actually forward to the new function. */
25465 emit_insn (gen_nop ());
25466 emit_insn (gen_nop ());
25467 emit_insn (gen_nop ());
25468 emit_insn (gen_nop ());
25469 emit_insn (gen_nop ());
25472 if (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0)
25474 reg_mode = V2SImode;
25475 reg_size = 8;
25478 /* Handle world saves specially here. */
25479 if (WORLD_SAVE_P (info))
25481 int i, j, sz;
25482 rtx treg;
25483 rtvec p;
25484 rtx reg0;
25486 /* save_world expects lr in r0. */
25487 reg0 = gen_rtx_REG (Pmode, 0);
25488 if (info->lr_save_p)
25490 insn = emit_move_insn (reg0,
25491 gen_rtx_REG (Pmode, LR_REGNO));
25492 RTX_FRAME_RELATED_P (insn) = 1;
25495 /* The SAVE_WORLD and RESTORE_WORLD routines make a number of
25496 assumptions about the offsets of various bits of the stack
25497 frame. */
25498 gcc_assert (info->gp_save_offset == -220
25499 && info->fp_save_offset == -144
25500 && info->lr_save_offset == 8
25501 && info->cr_save_offset == 4
25502 && info->push_p
25503 && info->lr_save_p
25504 && (!crtl->calls_eh_return
25505 || info->ehrd_offset == -432)
25506 && info->vrsave_save_offset == -224
25507 && info->altivec_save_offset == -416);
25509 treg = gen_rtx_REG (SImode, 11);
25510 emit_move_insn (treg, GEN_INT (-info->total_size));
25512 /* SAVE_WORLD takes the caller's LR in R0 and the frame size
25513 in R11. It also clobbers R12, so beware! */
25515 /* Preserve CR2 for save_world prologues */
25516 sz = 5;
25517 sz += 32 - info->first_gp_reg_save;
25518 sz += 64 - info->first_fp_reg_save;
25519 sz += LAST_ALTIVEC_REGNO - info->first_altivec_reg_save + 1;
25520 p = rtvec_alloc (sz);
25521 j = 0;
25522 RTVEC_ELT (p, j++) = gen_rtx_CLOBBER (VOIDmode,
25523 gen_rtx_REG (SImode,
25524 LR_REGNO));
25525 RTVEC_ELT (p, j++) = gen_rtx_USE (VOIDmode,
25526 gen_rtx_SYMBOL_REF (Pmode,
25527 "*save_world"));
25528 /* We do floats first so that the instruction pattern matches
25529 properly. */
25530 for (i = 0; i < 64 - info->first_fp_reg_save; i++)
25531 RTVEC_ELT (p, j++)
25532 = gen_frame_store (gen_rtx_REG (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT
25533 ? DFmode : SFmode,
25534 info->first_fp_reg_save + i),
25535 frame_reg_rtx,
25536 info->fp_save_offset + frame_off + 8 * i);
25537 for (i = 0; info->first_altivec_reg_save + i <= LAST_ALTIVEC_REGNO; i++)
25538 RTVEC_ELT (p, j++)
25539 = gen_frame_store (gen_rtx_REG (V4SImode,
25540 info->first_altivec_reg_save + i),
25541 frame_reg_rtx,
25542 info->altivec_save_offset + frame_off + 16 * i);
25543 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
25544 RTVEC_ELT (p, j++)
25545 = gen_frame_store (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
25546 frame_reg_rtx,
25547 info->gp_save_offset + frame_off + reg_size * i);
25549 /* CR register traditionally saved as CR2. */
25550 RTVEC_ELT (p, j++)
25551 = gen_frame_store (gen_rtx_REG (SImode, CR2_REGNO),
25552 frame_reg_rtx, info->cr_save_offset + frame_off);
25553 /* Explain about use of R0. */
25554 if (info->lr_save_p)
25555 RTVEC_ELT (p, j++)
25556 = gen_frame_store (reg0,
25557 frame_reg_rtx, info->lr_save_offset + frame_off);
25558 /* Explain what happens to the stack pointer. */
25560 rtx newval = gen_rtx_PLUS (Pmode, sp_reg_rtx, treg);
25561 RTVEC_ELT (p, j++) = gen_rtx_SET (sp_reg_rtx, newval);
25564 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
25565 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
25566 treg, GEN_INT (-info->total_size));
25567 sp_off = frame_off = info->total_size;
25570 strategy = info->savres_strategy;
25572 /* For V.4, update stack before we do any saving and set back pointer. */
25573 if (! WORLD_SAVE_P (info)
25574 && info->push_p
25575 && (DEFAULT_ABI == ABI_V4
25576 || crtl->calls_eh_return))
25578 bool need_r11 = (TARGET_SPE
25579 ? (!(strategy & SAVE_INLINE_GPRS)
25580 && info->spe_64bit_regs_used == 0)
25581 : (!(strategy & SAVE_INLINE_FPRS)
25582 || !(strategy & SAVE_INLINE_GPRS)
25583 || !(strategy & SAVE_INLINE_VRS)));
25584 int ptr_regno = -1;
25585 rtx ptr_reg = NULL_RTX;
25586 int ptr_off = 0;
25588 if (info->total_size < 32767)
25589 frame_off = info->total_size;
25590 else if (need_r11)
25591 ptr_regno = 11;
25592 else if (info->cr_save_p
25593 || info->lr_save_p
25594 || info->first_fp_reg_save < 64
25595 || info->first_gp_reg_save < 32
25596 || info->altivec_size != 0
25597 || info->vrsave_size != 0
25598 || crtl->calls_eh_return)
25599 ptr_regno = 12;
25600 else
25602 /* The prologue won't be saving any regs so there is no need
25603 to set up a frame register to access any frame save area.
25604 We also won't be using frame_off anywhere below, but set
25605 the correct value anyway to protect against future
25606 changes to this function. */
25607 frame_off = info->total_size;
25609 if (ptr_regno != -1)
25611 /* Set up the frame offset to that needed by the first
25612 out-of-line save function. */
25613 START_USE (ptr_regno);
25614 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
25615 frame_reg_rtx = ptr_reg;
25616 if (!(strategy & SAVE_INLINE_FPRS) && info->fp_size != 0)
25617 gcc_checking_assert (info->fp_save_offset + info->fp_size == 0);
25618 else if (!(strategy & SAVE_INLINE_GPRS) && info->first_gp_reg_save < 32)
25619 ptr_off = info->gp_save_offset + info->gp_size;
25620 else if (!(strategy & SAVE_INLINE_VRS) && info->altivec_size != 0)
25621 ptr_off = info->altivec_save_offset + info->altivec_size;
25622 frame_off = -ptr_off;
25624 sp_adjust = rs6000_emit_allocate_stack (info->total_size,
25625 ptr_reg, ptr_off);
25626 if (REGNO (frame_reg_rtx) == 12)
25627 sp_adjust = 0;
25628 sp_off = info->total_size;
25629 if (frame_reg_rtx != sp_reg_rtx)
25630 rs6000_emit_stack_tie (frame_reg_rtx, false);
25633 /* If we use the link register, get it into r0. */
25634 if (!WORLD_SAVE_P (info) && info->lr_save_p)
25636 rtx addr, reg, mem;
25638 reg = gen_rtx_REG (Pmode, 0);
25639 START_USE (0);
25640 insn = emit_move_insn (reg, gen_rtx_REG (Pmode, LR_REGNO));
25641 RTX_FRAME_RELATED_P (insn) = 1;
25643 if (!(strategy & (SAVE_NOINLINE_GPRS_SAVES_LR
25644 | SAVE_NOINLINE_FPRS_SAVES_LR)))
25646 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx,
25647 GEN_INT (info->lr_save_offset + frame_off));
25648 mem = gen_rtx_MEM (Pmode, addr);
25649 /* This should not be of rs6000_sr_alias_set, because of
25650 __builtin_return_address. */
25652 insn = emit_move_insn (mem, reg);
25653 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
25654 NULL_RTX, NULL_RTX);
25655 END_USE (0);
25659 /* If we need to save CR, put it into r12 or r11. Choose r12 except when
25660 r12 will be needed by out-of-line gpr restore. */
25661 cr_save_regno = ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
25662 && !(strategy & (SAVE_INLINE_GPRS
25663 | SAVE_NOINLINE_GPRS_SAVES_LR))
25664 ? 11 : 12);
25665 if (!WORLD_SAVE_P (info)
25666 && info->cr_save_p
25667 && REGNO (frame_reg_rtx) != cr_save_regno
25668 && !(using_static_chain_p && cr_save_regno == 11)
25669 && !(using_split_stack && cr_save_regno == 12 && sp_adjust))
25671 cr_save_rtx = gen_rtx_REG (SImode, cr_save_regno);
25672 START_USE (cr_save_regno);
25673 rs6000_emit_move_from_cr (cr_save_rtx);
25676 /* Do any required saving of fpr's. If only one or two to save, do
25677 it ourselves. Otherwise, call function. */
25678 if (!WORLD_SAVE_P (info) && (strategy & SAVE_INLINE_FPRS))
25680 int i;
25681 for (i = 0; i < 64 - info->first_fp_reg_save; i++)
25682 if (save_reg_p (info->first_fp_reg_save + i))
25683 emit_frame_save (frame_reg_rtx,
25684 (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT
25685 ? DFmode : SFmode),
25686 info->first_fp_reg_save + i,
25687 info->fp_save_offset + frame_off + 8 * i,
25688 sp_off - frame_off);
25690 else if (!WORLD_SAVE_P (info) && info->first_fp_reg_save != 64)
25692 bool lr = (strategy & SAVE_NOINLINE_FPRS_SAVES_LR) != 0;
25693 int sel = SAVRES_SAVE | SAVRES_FPR | (lr ? SAVRES_LR : 0);
25694 unsigned ptr_regno = ptr_regno_for_savres (sel);
25695 rtx ptr_reg = frame_reg_rtx;
25697 if (REGNO (frame_reg_rtx) == ptr_regno)
25698 gcc_checking_assert (frame_off == 0);
25699 else
25701 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
25702 NOT_INUSE (ptr_regno);
25703 emit_insn (gen_add3_insn (ptr_reg,
25704 frame_reg_rtx, GEN_INT (frame_off)));
25706 insn = rs6000_emit_savres_rtx (info, ptr_reg,
25707 info->fp_save_offset,
25708 info->lr_save_offset,
25709 DFmode, sel);
25710 rs6000_frame_related (insn, ptr_reg, sp_off,
25711 NULL_RTX, NULL_RTX);
25712 if (lr)
25713 END_USE (0);
25716 /* Save GPRs. This is done as a PARALLEL if we are using
25717 the store-multiple instructions. */
25718 if (!WORLD_SAVE_P (info)
25719 && TARGET_SPE_ABI
25720 && info->spe_64bit_regs_used != 0
25721 && info->first_gp_reg_save != 32)
25723 int i;
25724 rtx spe_save_area_ptr;
25725 HOST_WIDE_INT save_off;
25726 int ool_adjust = 0;
25728 /* Determine whether we can address all of the registers that need
25729 to be saved with an offset from frame_reg_rtx that fits in
25730 the small const field for SPE memory instructions. */
25731 int spe_regs_addressable
25732 = (SPE_CONST_OFFSET_OK (info->spe_gp_save_offset + frame_off
25733 + reg_size * (32 - info->first_gp_reg_save - 1))
25734 && (strategy & SAVE_INLINE_GPRS));
25736 if (spe_regs_addressable)
25738 spe_save_area_ptr = frame_reg_rtx;
25739 save_off = frame_off;
25741 else
25743 /* Make r11 point to the start of the SPE save area. We need
25744 to be careful here if r11 is holding the static chain. If
25745 it is, then temporarily save it in r0. */
25746 HOST_WIDE_INT offset;
25748 if (!(strategy & SAVE_INLINE_GPRS))
25749 ool_adjust = 8 * (info->first_gp_reg_save - FIRST_SAVED_GP_REGNO);
25750 offset = info->spe_gp_save_offset + frame_off - ool_adjust;
25751 spe_save_area_ptr = gen_rtx_REG (Pmode, 11);
25752 save_off = frame_off - offset;
25754 if (using_static_chain_p)
25756 rtx r0 = gen_rtx_REG (Pmode, 0);
25758 START_USE (0);
25759 gcc_assert (info->first_gp_reg_save > 11);
25761 emit_move_insn (r0, spe_save_area_ptr);
25763 else if (REGNO (frame_reg_rtx) != 11)
25764 START_USE (11);
25766 emit_insn (gen_addsi3 (spe_save_area_ptr,
25767 frame_reg_rtx, GEN_INT (offset)));
25768 if (!using_static_chain_p && REGNO (frame_reg_rtx) == 11)
25769 frame_off = -info->spe_gp_save_offset + ool_adjust;
25772 if ((strategy & SAVE_INLINE_GPRS))
25774 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
25775 if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i))
25776 emit_frame_save (spe_save_area_ptr, reg_mode,
25777 info->first_gp_reg_save + i,
25778 (info->spe_gp_save_offset + save_off
25779 + reg_size * i),
25780 sp_off - save_off);
25782 else
25784 insn = rs6000_emit_savres_rtx (info, spe_save_area_ptr,
25785 info->spe_gp_save_offset + save_off,
25786 0, reg_mode,
25787 SAVRES_SAVE | SAVRES_GPR);
25789 rs6000_frame_related (insn, spe_save_area_ptr, sp_off - save_off,
25790 NULL_RTX, NULL_RTX);
25793 /* Move the static chain pointer back. */
25794 if (!spe_regs_addressable)
25796 if (using_static_chain_p)
25798 emit_move_insn (spe_save_area_ptr, gen_rtx_REG (Pmode, 0));
25799 END_USE (0);
25801 else if (REGNO (frame_reg_rtx) != 11)
25802 END_USE (11);
25805 else if (!WORLD_SAVE_P (info) && !(strategy & SAVE_INLINE_GPRS))
25807 bool lr = (strategy & SAVE_NOINLINE_GPRS_SAVES_LR) != 0;
25808 int sel = SAVRES_SAVE | SAVRES_GPR | (lr ? SAVRES_LR : 0);
25809 unsigned ptr_regno = ptr_regno_for_savres (sel);
25810 rtx ptr_reg = frame_reg_rtx;
25811 bool ptr_set_up = REGNO (ptr_reg) == ptr_regno;
25812 int end_save = info->gp_save_offset + info->gp_size;
25813 int ptr_off;
25815 if (ptr_regno == 12)
25816 sp_adjust = 0;
25817 if (!ptr_set_up)
25818 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
25820 /* Need to adjust r11 (r12) if we saved any FPRs. */
25821 if (end_save + frame_off != 0)
25823 rtx offset = GEN_INT (end_save + frame_off);
25825 if (ptr_set_up)
25826 frame_off = -end_save;
25827 else
25828 NOT_INUSE (ptr_regno);
25829 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
25831 else if (!ptr_set_up)
25833 NOT_INUSE (ptr_regno);
25834 emit_move_insn (ptr_reg, frame_reg_rtx);
25836 ptr_off = -end_save;
25837 insn = rs6000_emit_savres_rtx (info, ptr_reg,
25838 info->gp_save_offset + ptr_off,
25839 info->lr_save_offset + ptr_off,
25840 reg_mode, sel);
25841 rs6000_frame_related (insn, ptr_reg, sp_off - ptr_off,
25842 NULL_RTX, NULL_RTX);
25843 if (lr)
25844 END_USE (0);
25846 else if (!WORLD_SAVE_P (info) && (strategy & SAVE_MULTIPLE))
25848 rtvec p;
25849 int i;
25850 p = rtvec_alloc (32 - info->first_gp_reg_save);
25851 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
25852 RTVEC_ELT (p, i)
25853 = gen_frame_store (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
25854 frame_reg_rtx,
25855 info->gp_save_offset + frame_off + reg_size * i);
25856 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
25857 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
25858 NULL_RTX, NULL_RTX);
25860 else if (!WORLD_SAVE_P (info))
25862 int i;
25863 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
25864 if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i))
25865 emit_frame_save (frame_reg_rtx, reg_mode,
25866 info->first_gp_reg_save + i,
25867 info->gp_save_offset + frame_off + reg_size * i,
25868 sp_off - frame_off);
25871 if (crtl->calls_eh_return)
25873 unsigned int i;
25874 rtvec p;
25876 for (i = 0; ; ++i)
25878 unsigned int regno = EH_RETURN_DATA_REGNO (i);
25879 if (regno == INVALID_REGNUM)
25880 break;
25883 p = rtvec_alloc (i);
25885 for (i = 0; ; ++i)
25887 unsigned int regno = EH_RETURN_DATA_REGNO (i);
25888 if (regno == INVALID_REGNUM)
25889 break;
25891 insn
25892 = gen_frame_store (gen_rtx_REG (reg_mode, regno),
25893 sp_reg_rtx,
25894 info->ehrd_offset + sp_off + reg_size * (int) i);
25895 RTVEC_ELT (p, i) = insn;
25896 RTX_FRAME_RELATED_P (insn) = 1;
25899 insn = emit_insn (gen_blockage ());
25900 RTX_FRAME_RELATED_P (insn) = 1;
25901 add_reg_note (insn, REG_FRAME_RELATED_EXPR, gen_rtx_PARALLEL (VOIDmode, p));
25904 /* In AIX ABI we need to make sure r2 is really saved. */
25905 if (TARGET_AIX && crtl->calls_eh_return)
25907 rtx tmp_reg, tmp_reg_si, hi, lo, compare_result, toc_save_done, jump;
25908 rtx save_insn, join_insn, note;
25909 long toc_restore_insn;
25911 tmp_reg = gen_rtx_REG (Pmode, 11);
25912 tmp_reg_si = gen_rtx_REG (SImode, 11);
25913 if (using_static_chain_p)
25915 START_USE (0);
25916 emit_move_insn (gen_rtx_REG (Pmode, 0), tmp_reg);
25918 else
25919 START_USE (11);
25920 emit_move_insn (tmp_reg, gen_rtx_REG (Pmode, LR_REGNO));
25921 /* Peek at instruction to which this function returns. If it's
25922 restoring r2, then we know we've already saved r2. We can't
25923 unconditionally save r2 because the value we have will already
25924 be updated if we arrived at this function via a plt call or
25925 toc adjusting stub. */
25926 emit_move_insn (tmp_reg_si, gen_rtx_MEM (SImode, tmp_reg));
25927 toc_restore_insn = ((TARGET_32BIT ? 0x80410000 : 0xE8410000)
25928 + RS6000_TOC_SAVE_SLOT);
25929 hi = gen_int_mode (toc_restore_insn & ~0xffff, SImode);
25930 emit_insn (gen_xorsi3 (tmp_reg_si, tmp_reg_si, hi));
25931 compare_result = gen_rtx_REG (CCUNSmode, CR0_REGNO);
25932 validate_condition_mode (EQ, CCUNSmode);
25933 lo = gen_int_mode (toc_restore_insn & 0xffff, SImode);
25934 emit_insn (gen_rtx_SET (compare_result,
25935 gen_rtx_COMPARE (CCUNSmode, tmp_reg_si, lo)));
25936 toc_save_done = gen_label_rtx ();
25937 jump = gen_rtx_IF_THEN_ELSE (VOIDmode,
25938 gen_rtx_EQ (VOIDmode, compare_result,
25939 const0_rtx),
25940 gen_rtx_LABEL_REF (VOIDmode, toc_save_done),
25941 pc_rtx);
25942 jump = emit_jump_insn (gen_rtx_SET (pc_rtx, jump));
25943 JUMP_LABEL (jump) = toc_save_done;
25944 LABEL_NUSES (toc_save_done) += 1;
25946 save_insn = emit_frame_save (frame_reg_rtx, reg_mode,
25947 TOC_REGNUM, frame_off + RS6000_TOC_SAVE_SLOT,
25948 sp_off - frame_off);
25950 emit_label (toc_save_done);
25952 /* ??? If we leave SAVE_INSN as marked as saving R2, then we'll
25953 have a CFG that has different saves along different paths.
25954 Move the note to a dummy blockage insn, which describes that
25955 R2 is unconditionally saved after the label. */
25956 /* ??? An alternate representation might be a special insn pattern
25957 containing both the branch and the store. That might let the
25958 code that minimizes the number of DW_CFA_advance opcodes better
25959 freedom in placing the annotations. */
25960 note = find_reg_note (save_insn, REG_FRAME_RELATED_EXPR, NULL);
25961 if (note)
25962 remove_note (save_insn, note);
25963 else
25964 note = alloc_reg_note (REG_FRAME_RELATED_EXPR,
25965 copy_rtx (PATTERN (save_insn)), NULL_RTX);
25966 RTX_FRAME_RELATED_P (save_insn) = 0;
25968 join_insn = emit_insn (gen_blockage ());
25969 REG_NOTES (join_insn) = note;
25970 RTX_FRAME_RELATED_P (join_insn) = 1;
25972 if (using_static_chain_p)
25974 emit_move_insn (tmp_reg, gen_rtx_REG (Pmode, 0));
25975 END_USE (0);
25977 else
25978 END_USE (11);
25981 /* Save CR if we use any that must be preserved. */
25982 if (!WORLD_SAVE_P (info) && info->cr_save_p)
25984 rtx addr = gen_rtx_PLUS (Pmode, frame_reg_rtx,
25985 GEN_INT (info->cr_save_offset + frame_off));
25986 rtx mem = gen_frame_mem (SImode, addr);
25988 /* If we didn't copy cr before, do so now using r0. */
25989 if (cr_save_rtx == NULL_RTX)
25991 START_USE (0);
25992 cr_save_rtx = gen_rtx_REG (SImode, 0);
25993 rs6000_emit_move_from_cr (cr_save_rtx);
25996 /* Saving CR requires a two-instruction sequence: one instruction
25997 to move the CR to a general-purpose register, and a second
25998 instruction that stores the GPR to memory.
26000 We do not emit any DWARF CFI records for the first of these,
26001 because we cannot properly represent the fact that CR is saved in
26002 a register. One reason is that we cannot express that multiple
26003 CR fields are saved; another reason is that on 64-bit, the size
26004 of the CR register in DWARF (4 bytes) differs from the size of
26005 a general-purpose register.
26007 This means if any intervening instruction were to clobber one of
26008 the call-saved CR fields, we'd have incorrect CFI. To prevent
26009 this from happening, we mark the store to memory as a use of
26010 those CR fields, which prevents any such instruction from being
26011 scheduled in between the two instructions. */
26012 rtx crsave_v[9];
26013 int n_crsave = 0;
26014 int i;
26016 crsave_v[n_crsave++] = gen_rtx_SET (mem, cr_save_rtx);
26017 for (i = 0; i < 8; i++)
26018 if (save_reg_p (CR0_REGNO + i))
26019 crsave_v[n_crsave++]
26020 = gen_rtx_USE (VOIDmode, gen_rtx_REG (CCmode, CR0_REGNO + i));
26022 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode,
26023 gen_rtvec_v (n_crsave, crsave_v)));
26024 END_USE (REGNO (cr_save_rtx));
26026 /* Now, there's no way that dwarf2out_frame_debug_expr is going to
26027 understand '(unspec:SI [(reg:CC 68) ...] UNSPEC_MOVESI_FROM_CR)',
26028 so we need to construct a frame expression manually. */
26029 RTX_FRAME_RELATED_P (insn) = 1;
26031 /* Update address to be stack-pointer relative, like
26032 rs6000_frame_related would do. */
26033 addr = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, STACK_POINTER_REGNUM),
26034 GEN_INT (info->cr_save_offset + sp_off));
26035 mem = gen_frame_mem (SImode, addr);
26037 if (DEFAULT_ABI == ABI_ELFv2)
26039 /* In the ELFv2 ABI we generate separate CFI records for each
26040 CR field that was actually saved. They all point to the
26041 same 32-bit stack slot. */
26042 rtx crframe[8];
26043 int n_crframe = 0;
26045 for (i = 0; i < 8; i++)
26046 if (save_reg_p (CR0_REGNO + i))
26048 crframe[n_crframe]
26049 = gen_rtx_SET (mem, gen_rtx_REG (SImode, CR0_REGNO + i));
26051 RTX_FRAME_RELATED_P (crframe[n_crframe]) = 1;
26052 n_crframe++;
26055 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
26056 gen_rtx_PARALLEL (VOIDmode,
26057 gen_rtvec_v (n_crframe, crframe)));
26059 else
26061 /* In other ABIs, by convention, we use a single CR regnum to
26062 represent the fact that all call-saved CR fields are saved.
26063 We use CR2_REGNO to be compatible with gcc-2.95 on Linux. */
26064 rtx set = gen_rtx_SET (mem, gen_rtx_REG (SImode, CR2_REGNO));
26065 add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
26069 /* In the ELFv2 ABI we need to save all call-saved CR fields into
26070 *separate* slots if the routine calls __builtin_eh_return, so
26071 that they can be independently restored by the unwinder. */
26072 if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return)
26074 int i, cr_off = info->ehcr_offset;
26075 rtx crsave;
26077 /* ??? We might get better performance by using multiple mfocrf
26078 instructions. */
26079 crsave = gen_rtx_REG (SImode, 0);
26080 emit_insn (gen_movesi_from_cr (crsave));
26082 for (i = 0; i < 8; i++)
26083 if (!call_used_regs[CR0_REGNO + i])
26085 rtvec p = rtvec_alloc (2);
26086 RTVEC_ELT (p, 0)
26087 = gen_frame_store (crsave, frame_reg_rtx, cr_off + frame_off);
26088 RTVEC_ELT (p, 1)
26089 = gen_rtx_USE (VOIDmode, gen_rtx_REG (CCmode, CR0_REGNO + i));
26091 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
26093 RTX_FRAME_RELATED_P (insn) = 1;
26094 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
26095 gen_frame_store (gen_rtx_REG (SImode, CR0_REGNO + i),
26096 sp_reg_rtx, cr_off + sp_off));
26098 cr_off += reg_size;
26102 /* Update stack and set back pointer unless this is V.4,
26103 for which it was done previously. */
26104 if (!WORLD_SAVE_P (info) && info->push_p
26105 && !(DEFAULT_ABI == ABI_V4 || crtl->calls_eh_return))
26107 rtx ptr_reg = NULL;
26108 int ptr_off = 0;
26110 /* If saving altivec regs we need to be able to address all save
26111 locations using a 16-bit offset. */
26112 if ((strategy & SAVE_INLINE_VRS) == 0
26113 || (info->altivec_size != 0
26114 && (info->altivec_save_offset + info->altivec_size - 16
26115 + info->total_size - frame_off) > 32767)
26116 || (info->vrsave_size != 0
26117 && (info->vrsave_save_offset
26118 + info->total_size - frame_off) > 32767))
26120 int sel = SAVRES_SAVE | SAVRES_VR;
26121 unsigned ptr_regno = ptr_regno_for_savres (sel);
26123 if (using_static_chain_p
26124 && ptr_regno == STATIC_CHAIN_REGNUM)
26125 ptr_regno = 12;
26126 if (REGNO (frame_reg_rtx) != ptr_regno)
26127 START_USE (ptr_regno);
26128 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
26129 frame_reg_rtx = ptr_reg;
26130 ptr_off = info->altivec_save_offset + info->altivec_size;
26131 frame_off = -ptr_off;
26133 else if (REGNO (frame_reg_rtx) == 1)
26134 frame_off = info->total_size;
26135 sp_adjust = rs6000_emit_allocate_stack (info->total_size,
26136 ptr_reg, ptr_off);
26137 if (REGNO (frame_reg_rtx) == 12)
26138 sp_adjust = 0;
26139 sp_off = info->total_size;
26140 if (frame_reg_rtx != sp_reg_rtx)
26141 rs6000_emit_stack_tie (frame_reg_rtx, false);
26144 /* Set frame pointer, if needed. */
26145 if (frame_pointer_needed)
26147 insn = emit_move_insn (gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM),
26148 sp_reg_rtx);
26149 RTX_FRAME_RELATED_P (insn) = 1;
26152 /* Save AltiVec registers if needed. Save here because the red zone does
26153 not always include AltiVec registers. */
26154 if (!WORLD_SAVE_P (info)
26155 && info->altivec_size != 0 && (strategy & SAVE_INLINE_VRS) == 0)
26157 int end_save = info->altivec_save_offset + info->altivec_size;
26158 int ptr_off;
26159 /* Oddly, the vector save/restore functions point r0 at the end
26160 of the save area, then use r11 or r12 to load offsets for
26161 [reg+reg] addressing. */
26162 rtx ptr_reg = gen_rtx_REG (Pmode, 0);
26163 int scratch_regno = ptr_regno_for_savres (SAVRES_SAVE | SAVRES_VR);
26164 rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
26166 gcc_checking_assert (scratch_regno == 11 || scratch_regno == 12);
26167 NOT_INUSE (0);
26168 if (scratch_regno == 12)
26169 sp_adjust = 0;
26170 if (end_save + frame_off != 0)
26172 rtx offset = GEN_INT (end_save + frame_off);
26174 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
26176 else
26177 emit_move_insn (ptr_reg, frame_reg_rtx);
26179 ptr_off = -end_save;
26180 insn = rs6000_emit_savres_rtx (info, scratch_reg,
26181 info->altivec_save_offset + ptr_off,
26182 0, V4SImode, SAVRES_SAVE | SAVRES_VR);
26183 rs6000_frame_related (insn, scratch_reg, sp_off - ptr_off,
26184 NULL_RTX, NULL_RTX);
26185 if (REGNO (frame_reg_rtx) == REGNO (scratch_reg))
26187 /* The oddity mentioned above clobbered our frame reg. */
26188 emit_move_insn (frame_reg_rtx, ptr_reg);
26189 frame_off = ptr_off;
26192 else if (!WORLD_SAVE_P (info)
26193 && info->altivec_size != 0)
26195 int i;
26197 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
26198 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
26200 rtx areg, savereg, mem;
26201 int offset;
26203 offset = (info->altivec_save_offset + frame_off
26204 + 16 * (i - info->first_altivec_reg_save));
26206 savereg = gen_rtx_REG (V4SImode, i);
26208 NOT_INUSE (0);
26209 areg = gen_rtx_REG (Pmode, 0);
26210 emit_move_insn (areg, GEN_INT (offset));
26212 /* AltiVec addressing mode is [reg+reg]. */
26213 mem = gen_frame_mem (V4SImode,
26214 gen_rtx_PLUS (Pmode, frame_reg_rtx, areg));
26216 /* Rather than emitting a generic move, force use of the stvx
26217 instruction, which we always want. In particular we don't
26218 want xxpermdi/stxvd2x for little endian. */
26219 insn = emit_insn (gen_altivec_stvx_v4si_internal (mem, savereg));
26221 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
26222 areg, GEN_INT (offset));
26226 /* VRSAVE is a bit vector representing which AltiVec registers
26227 are used. The OS uses this to determine which vector
26228 registers to save on a context switch. We need to save
26229 VRSAVE on the stack frame, add whatever AltiVec registers we
26230 used in this function, and do the corresponding magic in the
26231 epilogue. */
26233 if (!WORLD_SAVE_P (info)
26234 && info->vrsave_size != 0)
26236 rtx reg, vrsave;
26237 int offset;
26238 int save_regno;
26240 /* Get VRSAVE onto a GPR. Note that ABI_V4 and ABI_DARWIN might
26241 be using r12 as frame_reg_rtx and r11 as the static chain
26242 pointer for nested functions. */
26243 save_regno = 12;
26244 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
26245 && !using_static_chain_p)
26246 save_regno = 11;
26247 else if (using_split_stack || REGNO (frame_reg_rtx) == 12)
26249 save_regno = 11;
26250 if (using_static_chain_p)
26251 save_regno = 0;
26254 NOT_INUSE (save_regno);
26255 reg = gen_rtx_REG (SImode, save_regno);
26256 vrsave = gen_rtx_REG (SImode, VRSAVE_REGNO);
26257 if (TARGET_MACHO)
26258 emit_insn (gen_get_vrsave_internal (reg));
26259 else
26260 emit_insn (gen_rtx_SET (reg, vrsave));
26262 /* Save VRSAVE. */
26263 offset = info->vrsave_save_offset + frame_off;
26264 insn = emit_insn (gen_frame_store (reg, frame_reg_rtx, offset));
26266 /* Include the registers in the mask. */
26267 emit_insn (gen_iorsi3 (reg, reg, GEN_INT ((int) info->vrsave_mask)));
26269 insn = emit_insn (generate_set_vrsave (reg, info, 0));
26272 /* If we are using RS6000_PIC_OFFSET_TABLE_REGNUM, we need to set it up. */
26273 if (!TARGET_SINGLE_PIC_BASE
26274 && ((TARGET_TOC && TARGET_MINIMAL_TOC && get_pool_size () != 0)
26275 || (DEFAULT_ABI == ABI_V4
26276 && (flag_pic == 1 || (flag_pic && TARGET_SECURE_PLT))
26277 && df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM))))
26279 /* If emit_load_toc_table will use the link register, we need to save
26280 it. We use R12 for this purpose because emit_load_toc_table
26281 can use register 0. This allows us to use a plain 'blr' to return
26282 from the procedure more often. */
26283 int save_LR_around_toc_setup = (TARGET_ELF
26284 && DEFAULT_ABI == ABI_V4
26285 && flag_pic
26286 && ! info->lr_save_p
26287 && EDGE_COUNT (EXIT_BLOCK_PTR_FOR_FN (cfun)->preds) > 0);
26288 if (save_LR_around_toc_setup)
26290 rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
26291 rtx tmp = gen_rtx_REG (Pmode, 12);
26293 sp_adjust = 0;
26294 insn = emit_move_insn (tmp, lr);
26295 RTX_FRAME_RELATED_P (insn) = 1;
26297 rs6000_emit_load_toc_table (TRUE);
26299 insn = emit_move_insn (lr, tmp);
26300 add_reg_note (insn, REG_CFA_RESTORE, lr);
26301 RTX_FRAME_RELATED_P (insn) = 1;
26303 else
26304 rs6000_emit_load_toc_table (TRUE);
26307 #if TARGET_MACHO
26308 if (!TARGET_SINGLE_PIC_BASE
26309 && DEFAULT_ABI == ABI_DARWIN
26310 && flag_pic && crtl->uses_pic_offset_table)
26312 rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
26313 rtx src = gen_rtx_SYMBOL_REF (Pmode, MACHOPIC_FUNCTION_BASE_NAME);
26315 /* Save and restore LR locally around this call (in R0). */
26316 if (!info->lr_save_p)
26317 emit_move_insn (gen_rtx_REG (Pmode, 0), lr);
26319 emit_insn (gen_load_macho_picbase (src));
26321 emit_move_insn (gen_rtx_REG (Pmode,
26322 RS6000_PIC_OFFSET_TABLE_REGNUM),
26323 lr);
26325 if (!info->lr_save_p)
26326 emit_move_insn (lr, gen_rtx_REG (Pmode, 0));
26328 #endif
26330 /* If we need to, save the TOC register after doing the stack setup.
26331 Do not emit eh frame info for this save. The unwinder wants info,
26332 conceptually attached to instructions in this function, about
26333 register values in the caller of this function. This R2 may have
26334 already been changed from the value in the caller.
26335 We don't attempt to write accurate DWARF EH frame info for R2
26336 because code emitted by gcc for a (non-pointer) function call
26337 doesn't save and restore R2. Instead, R2 is managed out-of-line
26338 by a linker generated plt call stub when the function resides in
26339 a shared library. This behavior is costly to describe in DWARF,
26340 both in terms of the size of DWARF info and the time taken in the
26341 unwinder to interpret it. R2 changes, apart from the
26342 calls_eh_return case earlier in this function, are handled by
26343 linux-unwind.h frob_update_context. */
26344 if (rs6000_save_toc_in_prologue_p ())
26346 rtx reg = gen_rtx_REG (reg_mode, TOC_REGNUM);
26347 emit_insn (gen_frame_store (reg, sp_reg_rtx, RS6000_TOC_SAVE_SLOT));
26350 if (using_split_stack && split_stack_arg_pointer_used_p ())
26352 /* Set up the arg pointer (r12) for -fsplit-stack code. If
26353 __morestack was called, it left the arg pointer to the old
26354 stack in r29. Otherwise, the arg pointer is the top of the
26355 current frame. */
26356 cfun->machine->split_stack_argp_used = true;
26357 if (sp_adjust)
26359 rtx r12 = gen_rtx_REG (Pmode, 12);
26360 rtx set_r12 = gen_rtx_SET (r12, sp_reg_rtx);
26361 emit_insn_before (set_r12, sp_adjust);
26363 else if (frame_off != 0 || REGNO (frame_reg_rtx) != 12)
26365 rtx r12 = gen_rtx_REG (Pmode, 12);
26366 if (frame_off == 0)
26367 emit_move_insn (r12, frame_reg_rtx);
26368 else
26369 emit_insn (gen_add3_insn (r12, frame_reg_rtx, GEN_INT (frame_off)));
26371 if (info->push_p)
26373 rtx r12 = gen_rtx_REG (Pmode, 12);
26374 rtx r29 = gen_rtx_REG (Pmode, 29);
26375 rtx cr7 = gen_rtx_REG (CCUNSmode, CR7_REGNO);
26376 rtx not_more = gen_label_rtx ();
26377 rtx jump;
26379 jump = gen_rtx_IF_THEN_ELSE (VOIDmode,
26380 gen_rtx_GEU (VOIDmode, cr7, const0_rtx),
26381 gen_rtx_LABEL_REF (VOIDmode, not_more),
26382 pc_rtx);
26383 jump = emit_jump_insn (gen_rtx_SET (pc_rtx, jump));
26384 JUMP_LABEL (jump) = not_more;
26385 LABEL_NUSES (not_more) += 1;
26386 emit_move_insn (r12, r29);
26387 emit_label (not_more);
26392 /* Output .extern statements for the save/restore routines we use. */
26394 static void
26395 rs6000_output_savres_externs (FILE *file)
26397 rs6000_stack_t *info = rs6000_stack_info ();
26399 if (TARGET_DEBUG_STACK)
26400 debug_stack_info (info);
26402 /* Write .extern for any function we will call to save and restore
26403 fp values. */
26404 if (info->first_fp_reg_save < 64
26405 && !TARGET_MACHO
26406 && !TARGET_ELF)
26408 char *name;
26409 int regno = info->first_fp_reg_save - 32;
26411 if ((info->savres_strategy & SAVE_INLINE_FPRS) == 0)
26413 bool lr = (info->savres_strategy & SAVE_NOINLINE_FPRS_SAVES_LR) != 0;
26414 int sel = SAVRES_SAVE | SAVRES_FPR | (lr ? SAVRES_LR : 0);
26415 name = rs6000_savres_routine_name (info, regno, sel);
26416 fprintf (file, "\t.extern %s\n", name);
26418 if ((info->savres_strategy & REST_INLINE_FPRS) == 0)
26420 bool lr = (info->savres_strategy
26421 & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR) == 0;
26422 int sel = SAVRES_FPR | (lr ? SAVRES_LR : 0);
26423 name = rs6000_savres_routine_name (info, regno, sel);
26424 fprintf (file, "\t.extern %s\n", name);
26429 /* Write function prologue. */
26431 static void
26432 rs6000_output_function_prologue (FILE *file,
26433 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
26435 if (!cfun->is_thunk)
26436 rs6000_output_savres_externs (file);
26438 /* ELFv2 ABI r2 setup code and local entry point. This must follow
26439 immediately after the global entry point label. */
26440 if (rs6000_global_entry_point_needed_p ())
26442 const char *name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
26444 (*targetm.asm_out.internal_label) (file, "LCF", rs6000_pic_labelno);
26446 if (TARGET_CMODEL != CMODEL_LARGE)
26448 /* In the small and medium code models, we assume the TOC is less
26449 2 GB away from the text section, so it can be computed via the
26450 following two-instruction sequence. */
26451 char buf[256];
26453 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
26454 fprintf (file, "0:\taddis 2,12,.TOC.-");
26455 assemble_name (file, buf);
26456 fprintf (file, "@ha\n");
26457 fprintf (file, "\taddi 2,2,.TOC.-");
26458 assemble_name (file, buf);
26459 fprintf (file, "@l\n");
26461 else
26463 /* In the large code model, we allow arbitrary offsets between the
26464 TOC and the text section, so we have to load the offset from
26465 memory. The data field is emitted directly before the global
26466 entry point in rs6000_elf_declare_function_name. */
26467 char buf[256];
26469 #ifdef HAVE_AS_ENTRY_MARKERS
26470 /* If supported by the linker, emit a marker relocation. If the
26471 total code size of the final executable or shared library
26472 happens to fit into 2 GB after all, the linker will replace
26473 this code sequence with the sequence for the small or medium
26474 code model. */
26475 fprintf (file, "\t.reloc .,R_PPC64_ENTRY\n");
26476 #endif
26477 fprintf (file, "\tld 2,");
26478 ASM_GENERATE_INTERNAL_LABEL (buf, "LCL", rs6000_pic_labelno);
26479 assemble_name (file, buf);
26480 fprintf (file, "-");
26481 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
26482 assemble_name (file, buf);
26483 fprintf (file, "(12)\n");
26484 fprintf (file, "\tadd 2,2,12\n");
26487 fputs ("\t.localentry\t", file);
26488 assemble_name (file, name);
26489 fputs (",.-", file);
26490 assemble_name (file, name);
26491 fputs ("\n", file);
26494 /* Output -mprofile-kernel code. This needs to be done here instead of
26495 in output_function_profile since it must go after the ELFv2 ABI
26496 local entry point. */
26497 if (TARGET_PROFILE_KERNEL && crtl->profile)
26499 gcc_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2);
26500 gcc_assert (!TARGET_32BIT);
26502 asm_fprintf (file, "\tmflr %s\n", reg_names[0]);
26504 /* In the ELFv2 ABI we have no compiler stack word. It must be
26505 the resposibility of _mcount to preserve the static chain
26506 register if required. */
26507 if (DEFAULT_ABI != ABI_ELFv2
26508 && cfun->static_chain_decl != NULL)
26510 asm_fprintf (file, "\tstd %s,24(%s)\n",
26511 reg_names[STATIC_CHAIN_REGNUM], reg_names[1]);
26512 fprintf (file, "\tbl %s\n", RS6000_MCOUNT);
26513 asm_fprintf (file, "\tld %s,24(%s)\n",
26514 reg_names[STATIC_CHAIN_REGNUM], reg_names[1]);
26516 else
26517 fprintf (file, "\tbl %s\n", RS6000_MCOUNT);
26520 rs6000_pic_labelno++;
26523 /* -mprofile-kernel code calls mcount before the function prolog,
26524 so a profiled leaf function should stay a leaf function. */
26525 static bool
26526 rs6000_keep_leaf_when_profiled ()
26528 return TARGET_PROFILE_KERNEL;
26531 /* Non-zero if vmx regs are restored before the frame pop, zero if
26532 we restore after the pop when possible. */
26533 #define ALWAYS_RESTORE_ALTIVEC_BEFORE_POP 0
26535 /* Restoring cr is a two step process: loading a reg from the frame
26536 save, then moving the reg to cr. For ABI_V4 we must let the
26537 unwinder know that the stack location is no longer valid at or
26538 before the stack deallocation, but we can't emit a cfa_restore for
26539 cr at the stack deallocation like we do for other registers.
26540 The trouble is that it is possible for the move to cr to be
26541 scheduled after the stack deallocation. So say exactly where cr
26542 is located on each of the two insns. */
26544 static rtx
26545 load_cr_save (int regno, rtx frame_reg_rtx, int offset, bool exit_func)
26547 rtx mem = gen_frame_mem_offset (SImode, frame_reg_rtx, offset);
26548 rtx reg = gen_rtx_REG (SImode, regno);
26549 rtx_insn *insn = emit_move_insn (reg, mem);
26551 if (!exit_func && DEFAULT_ABI == ABI_V4)
26553 rtx cr = gen_rtx_REG (SImode, CR2_REGNO);
26554 rtx set = gen_rtx_SET (reg, cr);
26556 add_reg_note (insn, REG_CFA_REGISTER, set);
26557 RTX_FRAME_RELATED_P (insn) = 1;
26559 return reg;
26562 /* Reload CR from REG. */
26564 static void
26565 restore_saved_cr (rtx reg, int using_mfcr_multiple, bool exit_func)
26567 int count = 0;
26568 int i;
26570 if (using_mfcr_multiple)
26572 for (i = 0; i < 8; i++)
26573 if (save_reg_p (CR0_REGNO + i))
26574 count++;
26575 gcc_assert (count);
26578 if (using_mfcr_multiple && count > 1)
26580 rtx_insn *insn;
26581 rtvec p;
26582 int ndx;
26584 p = rtvec_alloc (count);
26586 ndx = 0;
26587 for (i = 0; i < 8; i++)
26588 if (save_reg_p (CR0_REGNO + i))
26590 rtvec r = rtvec_alloc (2);
26591 RTVEC_ELT (r, 0) = reg;
26592 RTVEC_ELT (r, 1) = GEN_INT (1 << (7-i));
26593 RTVEC_ELT (p, ndx) =
26594 gen_rtx_SET (gen_rtx_REG (CCmode, CR0_REGNO + i),
26595 gen_rtx_UNSPEC (CCmode, r, UNSPEC_MOVESI_TO_CR));
26596 ndx++;
26598 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
26599 gcc_assert (ndx == count);
26601 /* For the ELFv2 ABI we generate a CFA_RESTORE for each
26602 CR field separately. */
26603 if (!exit_func && DEFAULT_ABI == ABI_ELFv2 && flag_shrink_wrap)
26605 for (i = 0; i < 8; i++)
26606 if (save_reg_p (CR0_REGNO + i))
26607 add_reg_note (insn, REG_CFA_RESTORE,
26608 gen_rtx_REG (SImode, CR0_REGNO + i));
26610 RTX_FRAME_RELATED_P (insn) = 1;
26613 else
26614 for (i = 0; i < 8; i++)
26615 if (save_reg_p (CR0_REGNO + i))
26617 rtx insn = emit_insn (gen_movsi_to_cr_one
26618 (gen_rtx_REG (CCmode, CR0_REGNO + i), reg));
26620 /* For the ELFv2 ABI we generate a CFA_RESTORE for each
26621 CR field separately, attached to the insn that in fact
26622 restores this particular CR field. */
26623 if (!exit_func && DEFAULT_ABI == ABI_ELFv2 && flag_shrink_wrap)
26625 add_reg_note (insn, REG_CFA_RESTORE,
26626 gen_rtx_REG (SImode, CR0_REGNO + i));
26628 RTX_FRAME_RELATED_P (insn) = 1;
26632 /* For other ABIs, we just generate a single CFA_RESTORE for CR2. */
26633 if (!exit_func && DEFAULT_ABI != ABI_ELFv2
26634 && (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap))
26636 rtx_insn *insn = get_last_insn ();
26637 rtx cr = gen_rtx_REG (SImode, CR2_REGNO);
26639 add_reg_note (insn, REG_CFA_RESTORE, cr);
26640 RTX_FRAME_RELATED_P (insn) = 1;
26644 /* Like cr, the move to lr instruction can be scheduled after the
26645 stack deallocation, but unlike cr, its stack frame save is still
26646 valid. So we only need to emit the cfa_restore on the correct
26647 instruction. */
26649 static void
26650 load_lr_save (int regno, rtx frame_reg_rtx, int offset)
26652 rtx mem = gen_frame_mem_offset (Pmode, frame_reg_rtx, offset);
26653 rtx reg = gen_rtx_REG (Pmode, regno);
26655 emit_move_insn (reg, mem);
26658 static void
26659 restore_saved_lr (int regno, bool exit_func)
26661 rtx reg = gen_rtx_REG (Pmode, regno);
26662 rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
26663 rtx_insn *insn = emit_move_insn (lr, reg);
26665 if (!exit_func && flag_shrink_wrap)
26667 add_reg_note (insn, REG_CFA_RESTORE, lr);
26668 RTX_FRAME_RELATED_P (insn) = 1;
26672 static rtx
26673 add_crlr_cfa_restore (const rs6000_stack_t *info, rtx cfa_restores)
26675 if (DEFAULT_ABI == ABI_ELFv2)
26677 int i;
26678 for (i = 0; i < 8; i++)
26679 if (save_reg_p (CR0_REGNO + i))
26681 rtx cr = gen_rtx_REG (SImode, CR0_REGNO + i);
26682 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, cr,
26683 cfa_restores);
26686 else if (info->cr_save_p)
26687 cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
26688 gen_rtx_REG (SImode, CR2_REGNO),
26689 cfa_restores);
26691 if (info->lr_save_p)
26692 cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
26693 gen_rtx_REG (Pmode, LR_REGNO),
26694 cfa_restores);
26695 return cfa_restores;
26698 /* Return true if OFFSET from stack pointer can be clobbered by signals.
26699 V.4 doesn't have any stack cushion, AIX ABIs have 220 or 288 bytes
26700 below stack pointer not cloberred by signals. */
26702 static inline bool
26703 offset_below_red_zone_p (HOST_WIDE_INT offset)
26705 return offset < (DEFAULT_ABI == ABI_V4
26707 : TARGET_32BIT ? -220 : -288);
26710 /* Append CFA_RESTORES to any existing REG_NOTES on the last insn. */
26712 static void
26713 emit_cfa_restores (rtx cfa_restores)
26715 rtx_insn *insn = get_last_insn ();
26716 rtx *loc = &REG_NOTES (insn);
26718 while (*loc)
26719 loc = &XEXP (*loc, 1);
26720 *loc = cfa_restores;
26721 RTX_FRAME_RELATED_P (insn) = 1;
26724 /* Emit function epilogue as insns. */
26726 void
26727 rs6000_emit_epilogue (int sibcall)
26729 rs6000_stack_t *info;
26730 int restoring_GPRs_inline;
26731 int restoring_FPRs_inline;
26732 int using_load_multiple;
26733 int using_mtcr_multiple;
26734 int use_backchain_to_restore_sp;
26735 int restore_lr;
26736 int strategy;
26737 HOST_WIDE_INT frame_off = 0;
26738 rtx sp_reg_rtx = gen_rtx_REG (Pmode, 1);
26739 rtx frame_reg_rtx = sp_reg_rtx;
26740 rtx cfa_restores = NULL_RTX;
26741 rtx insn;
26742 rtx cr_save_reg = NULL_RTX;
26743 machine_mode reg_mode = Pmode;
26744 int reg_size = TARGET_32BIT ? 4 : 8;
26745 int i;
26746 bool exit_func;
26747 unsigned ptr_regno;
26749 info = rs6000_stack_info ();
26751 if (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0)
26753 reg_mode = V2SImode;
26754 reg_size = 8;
26757 strategy = info->savres_strategy;
26758 using_load_multiple = strategy & REST_MULTIPLE;
26759 restoring_FPRs_inline = sibcall || (strategy & REST_INLINE_FPRS);
26760 restoring_GPRs_inline = sibcall || (strategy & REST_INLINE_GPRS);
26761 using_mtcr_multiple = (rs6000_cpu == PROCESSOR_PPC601
26762 || rs6000_cpu == PROCESSOR_PPC603
26763 || rs6000_cpu == PROCESSOR_PPC750
26764 || optimize_size);
26765 /* Restore via the backchain when we have a large frame, since this
26766 is more efficient than an addis, addi pair. The second condition
26767 here will not trigger at the moment; We don't actually need a
26768 frame pointer for alloca, but the generic parts of the compiler
26769 give us one anyway. */
26770 use_backchain_to_restore_sp = (info->total_size + (info->lr_save_p
26771 ? info->lr_save_offset
26772 : 0) > 32767
26773 || (cfun->calls_alloca
26774 && !frame_pointer_needed));
26775 restore_lr = (info->lr_save_p
26776 && (restoring_FPRs_inline
26777 || (strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR))
26778 && (restoring_GPRs_inline
26779 || info->first_fp_reg_save < 64));
26781 if (WORLD_SAVE_P (info))
26783 int i, j;
26784 char rname[30];
26785 const char *alloc_rname;
26786 rtvec p;
26788 /* eh_rest_world_r10 will return to the location saved in the LR
26789 stack slot (which is not likely to be our caller.)
26790 Input: R10 -- stack adjustment. Clobbers R0, R11, R12, R7, R8.
26791 rest_world is similar, except any R10 parameter is ignored.
26792 The exception-handling stuff that was here in 2.95 is no
26793 longer necessary. */
26795 p = rtvec_alloc (9
26797 + 32 - info->first_gp_reg_save
26798 + LAST_ALTIVEC_REGNO + 1 - info->first_altivec_reg_save
26799 + 63 + 1 - info->first_fp_reg_save);
26801 strcpy (rname, ((crtl->calls_eh_return) ?
26802 "*eh_rest_world_r10" : "*rest_world"));
26803 alloc_rname = ggc_strdup (rname);
26805 j = 0;
26806 RTVEC_ELT (p, j++) = ret_rtx;
26807 RTVEC_ELT (p, j++) = gen_rtx_USE (VOIDmode,
26808 gen_rtx_REG (Pmode,
26809 LR_REGNO));
26810 RTVEC_ELT (p, j++)
26811 = gen_rtx_USE (VOIDmode, gen_rtx_SYMBOL_REF (Pmode, alloc_rname));
26812 /* The instruction pattern requires a clobber here;
26813 it is shared with the restVEC helper. */
26814 RTVEC_ELT (p, j++)
26815 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, 11));
26818 /* CR register traditionally saved as CR2. */
26819 rtx reg = gen_rtx_REG (SImode, CR2_REGNO);
26820 RTVEC_ELT (p, j++)
26821 = gen_frame_load (reg, frame_reg_rtx, info->cr_save_offset);
26822 if (flag_shrink_wrap)
26824 cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
26825 gen_rtx_REG (Pmode, LR_REGNO),
26826 cfa_restores);
26827 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
26831 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
26833 rtx reg = gen_rtx_REG (reg_mode, info->first_gp_reg_save + i);
26834 RTVEC_ELT (p, j++)
26835 = gen_frame_load (reg,
26836 frame_reg_rtx, info->gp_save_offset + reg_size * i);
26837 if (flag_shrink_wrap)
26838 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
26840 for (i = 0; info->first_altivec_reg_save + i <= LAST_ALTIVEC_REGNO; i++)
26842 rtx reg = gen_rtx_REG (V4SImode, info->first_altivec_reg_save + i);
26843 RTVEC_ELT (p, j++)
26844 = gen_frame_load (reg,
26845 frame_reg_rtx, info->altivec_save_offset + 16 * i);
26846 if (flag_shrink_wrap)
26847 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
26849 for (i = 0; info->first_fp_reg_save + i <= 63; i++)
26851 rtx reg = gen_rtx_REG ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT
26852 ? DFmode : SFmode),
26853 info->first_fp_reg_save + i);
26854 RTVEC_ELT (p, j++)
26855 = gen_frame_load (reg, frame_reg_rtx, info->fp_save_offset + 8 * i);
26856 if (flag_shrink_wrap)
26857 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
26859 RTVEC_ELT (p, j++)
26860 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, 0));
26861 RTVEC_ELT (p, j++)
26862 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 12));
26863 RTVEC_ELT (p, j++)
26864 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 7));
26865 RTVEC_ELT (p, j++)
26866 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 8));
26867 RTVEC_ELT (p, j++)
26868 = gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, 10));
26869 insn = emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, p));
26871 if (flag_shrink_wrap)
26873 REG_NOTES (insn) = cfa_restores;
26874 add_reg_note (insn, REG_CFA_DEF_CFA, sp_reg_rtx);
26875 RTX_FRAME_RELATED_P (insn) = 1;
26877 return;
26880 /* frame_reg_rtx + frame_off points to the top of this stack frame. */
26881 if (info->push_p)
26882 frame_off = info->total_size;
26884 /* Restore AltiVec registers if we must do so before adjusting the
26885 stack. */
26886 if (info->altivec_size != 0
26887 && (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
26888 || (DEFAULT_ABI != ABI_V4
26889 && offset_below_red_zone_p (info->altivec_save_offset))))
26891 int i;
26892 int scratch_regno = ptr_regno_for_savres (SAVRES_VR);
26894 gcc_checking_assert (scratch_regno == 11 || scratch_regno == 12);
26895 if (use_backchain_to_restore_sp)
26897 int frame_regno = 11;
26899 if ((strategy & REST_INLINE_VRS) == 0)
26901 /* Of r11 and r12, select the one not clobbered by an
26902 out-of-line restore function for the frame register. */
26903 frame_regno = 11 + 12 - scratch_regno;
26905 frame_reg_rtx = gen_rtx_REG (Pmode, frame_regno);
26906 emit_move_insn (frame_reg_rtx,
26907 gen_rtx_MEM (Pmode, sp_reg_rtx));
26908 frame_off = 0;
26910 else if (frame_pointer_needed)
26911 frame_reg_rtx = hard_frame_pointer_rtx;
26913 if ((strategy & REST_INLINE_VRS) == 0)
26915 int end_save = info->altivec_save_offset + info->altivec_size;
26916 int ptr_off;
26917 rtx ptr_reg = gen_rtx_REG (Pmode, 0);
26918 rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
26920 if (end_save + frame_off != 0)
26922 rtx offset = GEN_INT (end_save + frame_off);
26924 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
26926 else
26927 emit_move_insn (ptr_reg, frame_reg_rtx);
26929 ptr_off = -end_save;
26930 insn = rs6000_emit_savres_rtx (info, scratch_reg,
26931 info->altivec_save_offset + ptr_off,
26932 0, V4SImode, SAVRES_VR);
26934 else
26936 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
26937 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
26939 rtx addr, areg, mem, reg;
26941 areg = gen_rtx_REG (Pmode, 0);
26942 emit_move_insn
26943 (areg, GEN_INT (info->altivec_save_offset
26944 + frame_off
26945 + 16 * (i - info->first_altivec_reg_save)));
26947 /* AltiVec addressing mode is [reg+reg]. */
26948 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx, areg);
26949 mem = gen_frame_mem (V4SImode, addr);
26951 reg = gen_rtx_REG (V4SImode, i);
26952 /* Rather than emitting a generic move, force use of the
26953 lvx instruction, which we always want. In particular
26954 we don't want lxvd2x/xxpermdi for little endian. */
26955 (void) emit_insn (gen_altivec_lvx_v4si_internal (reg, mem));
26959 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
26960 if (((strategy & REST_INLINE_VRS) == 0
26961 || (info->vrsave_mask & ALTIVEC_REG_BIT (i)) != 0)
26962 && (flag_shrink_wrap
26963 || (offset_below_red_zone_p
26964 (info->altivec_save_offset
26965 + 16 * (i - info->first_altivec_reg_save)))))
26967 rtx reg = gen_rtx_REG (V4SImode, i);
26968 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
26972 /* Restore VRSAVE if we must do so before adjusting the stack. */
26973 if (info->vrsave_size != 0
26974 && (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
26975 || (DEFAULT_ABI != ABI_V4
26976 && offset_below_red_zone_p (info->vrsave_save_offset))))
26978 rtx reg;
26980 if (frame_reg_rtx == sp_reg_rtx)
26982 if (use_backchain_to_restore_sp)
26984 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
26985 emit_move_insn (frame_reg_rtx,
26986 gen_rtx_MEM (Pmode, sp_reg_rtx));
26987 frame_off = 0;
26989 else if (frame_pointer_needed)
26990 frame_reg_rtx = hard_frame_pointer_rtx;
26993 reg = gen_rtx_REG (SImode, 12);
26994 emit_insn (gen_frame_load (reg, frame_reg_rtx,
26995 info->vrsave_save_offset + frame_off));
26997 emit_insn (generate_set_vrsave (reg, info, 1));
27000 insn = NULL_RTX;
27001 /* If we have a large stack frame, restore the old stack pointer
27002 using the backchain. */
27003 if (use_backchain_to_restore_sp)
27005 if (frame_reg_rtx == sp_reg_rtx)
27007 /* Under V.4, don't reset the stack pointer until after we're done
27008 loading the saved registers. */
27009 if (DEFAULT_ABI == ABI_V4)
27010 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
27012 insn = emit_move_insn (frame_reg_rtx,
27013 gen_rtx_MEM (Pmode, sp_reg_rtx));
27014 frame_off = 0;
27016 else if (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
27017 && DEFAULT_ABI == ABI_V4)
27018 /* frame_reg_rtx has been set up by the altivec restore. */
27020 else
27022 insn = emit_move_insn (sp_reg_rtx, frame_reg_rtx);
27023 frame_reg_rtx = sp_reg_rtx;
27026 /* If we have a frame pointer, we can restore the old stack pointer
27027 from it. */
27028 else if (frame_pointer_needed)
27030 frame_reg_rtx = sp_reg_rtx;
27031 if (DEFAULT_ABI == ABI_V4)
27032 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
27033 /* Prevent reordering memory accesses against stack pointer restore. */
27034 else if (cfun->calls_alloca
27035 || offset_below_red_zone_p (-info->total_size))
27036 rs6000_emit_stack_tie (frame_reg_rtx, true);
27038 insn = emit_insn (gen_add3_insn (frame_reg_rtx, hard_frame_pointer_rtx,
27039 GEN_INT (info->total_size)));
27040 frame_off = 0;
27042 else if (info->push_p
27043 && DEFAULT_ABI != ABI_V4
27044 && !crtl->calls_eh_return)
27046 /* Prevent reordering memory accesses against stack pointer restore. */
27047 if (cfun->calls_alloca
27048 || offset_below_red_zone_p (-info->total_size))
27049 rs6000_emit_stack_tie (frame_reg_rtx, false);
27050 insn = emit_insn (gen_add3_insn (sp_reg_rtx, sp_reg_rtx,
27051 GEN_INT (info->total_size)));
27052 frame_off = 0;
27054 if (insn && frame_reg_rtx == sp_reg_rtx)
27056 if (cfa_restores)
27058 REG_NOTES (insn) = cfa_restores;
27059 cfa_restores = NULL_RTX;
27061 add_reg_note (insn, REG_CFA_DEF_CFA, sp_reg_rtx);
27062 RTX_FRAME_RELATED_P (insn) = 1;
27065 /* Restore AltiVec registers if we have not done so already. */
27066 if (!ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
27067 && info->altivec_size != 0
27068 && (DEFAULT_ABI == ABI_V4
27069 || !offset_below_red_zone_p (info->altivec_save_offset)))
27071 int i;
27073 if ((strategy & REST_INLINE_VRS) == 0)
27075 int end_save = info->altivec_save_offset + info->altivec_size;
27076 int ptr_off;
27077 rtx ptr_reg = gen_rtx_REG (Pmode, 0);
27078 int scratch_regno = ptr_regno_for_savres (SAVRES_VR);
27079 rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
27081 if (end_save + frame_off != 0)
27083 rtx offset = GEN_INT (end_save + frame_off);
27085 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
27087 else
27088 emit_move_insn (ptr_reg, frame_reg_rtx);
27090 ptr_off = -end_save;
27091 insn = rs6000_emit_savres_rtx (info, scratch_reg,
27092 info->altivec_save_offset + ptr_off,
27093 0, V4SImode, SAVRES_VR);
27094 if (REGNO (frame_reg_rtx) == REGNO (scratch_reg))
27096 /* Frame reg was clobbered by out-of-line save. Restore it
27097 from ptr_reg, and if we are calling out-of-line gpr or
27098 fpr restore set up the correct pointer and offset. */
27099 unsigned newptr_regno = 1;
27100 if (!restoring_GPRs_inline)
27102 bool lr = info->gp_save_offset + info->gp_size == 0;
27103 int sel = SAVRES_GPR | (lr ? SAVRES_LR : 0);
27104 newptr_regno = ptr_regno_for_savres (sel);
27105 end_save = info->gp_save_offset + info->gp_size;
27107 else if (!restoring_FPRs_inline)
27109 bool lr = !(strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR);
27110 int sel = SAVRES_FPR | (lr ? SAVRES_LR : 0);
27111 newptr_regno = ptr_regno_for_savres (sel);
27112 end_save = info->fp_save_offset + info->fp_size;
27115 if (newptr_regno != 1 && REGNO (frame_reg_rtx) != newptr_regno)
27116 frame_reg_rtx = gen_rtx_REG (Pmode, newptr_regno);
27118 if (end_save + ptr_off != 0)
27120 rtx offset = GEN_INT (end_save + ptr_off);
27122 frame_off = -end_save;
27123 if (TARGET_32BIT)
27124 emit_insn (gen_addsi3_carry (frame_reg_rtx,
27125 ptr_reg, offset));
27126 else
27127 emit_insn (gen_adddi3_carry (frame_reg_rtx,
27128 ptr_reg, offset));
27130 else
27132 frame_off = ptr_off;
27133 emit_move_insn (frame_reg_rtx, ptr_reg);
27137 else
27139 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
27140 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
27142 rtx addr, areg, mem, reg;
27144 areg = gen_rtx_REG (Pmode, 0);
27145 emit_move_insn
27146 (areg, GEN_INT (info->altivec_save_offset
27147 + frame_off
27148 + 16 * (i - info->first_altivec_reg_save)));
27150 /* AltiVec addressing mode is [reg+reg]. */
27151 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx, areg);
27152 mem = gen_frame_mem (V4SImode, addr);
27154 reg = gen_rtx_REG (V4SImode, i);
27155 /* Rather than emitting a generic move, force use of the
27156 lvx instruction, which we always want. In particular
27157 we don't want lxvd2x/xxpermdi for little endian. */
27158 (void) emit_insn (gen_altivec_lvx_v4si_internal (reg, mem));
27162 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
27163 if (((strategy & REST_INLINE_VRS) == 0
27164 || (info->vrsave_mask & ALTIVEC_REG_BIT (i)) != 0)
27165 && (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap))
27167 rtx reg = gen_rtx_REG (V4SImode, i);
27168 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
27172 /* Restore VRSAVE if we have not done so already. */
27173 if (!ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
27174 && info->vrsave_size != 0
27175 && (DEFAULT_ABI == ABI_V4
27176 || !offset_below_red_zone_p (info->vrsave_save_offset)))
27178 rtx reg;
27180 reg = gen_rtx_REG (SImode, 12);
27181 emit_insn (gen_frame_load (reg, frame_reg_rtx,
27182 info->vrsave_save_offset + frame_off));
27184 emit_insn (generate_set_vrsave (reg, info, 1));
27187 /* If we exit by an out-of-line restore function on ABI_V4 then that
27188 function will deallocate the stack, so we don't need to worry
27189 about the unwinder restoring cr from an invalid stack frame
27190 location. */
27191 exit_func = (!restoring_FPRs_inline
27192 || (!restoring_GPRs_inline
27193 && info->first_fp_reg_save == 64));
27195 /* In the ELFv2 ABI we need to restore all call-saved CR fields from
27196 *separate* slots if the routine calls __builtin_eh_return, so
27197 that they can be independently restored by the unwinder. */
27198 if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return)
27200 int i, cr_off = info->ehcr_offset;
27202 for (i = 0; i < 8; i++)
27203 if (!call_used_regs[CR0_REGNO + i])
27205 rtx reg = gen_rtx_REG (SImode, 0);
27206 emit_insn (gen_frame_load (reg, frame_reg_rtx,
27207 cr_off + frame_off));
27209 insn = emit_insn (gen_movsi_to_cr_one
27210 (gen_rtx_REG (CCmode, CR0_REGNO + i), reg));
27212 if (!exit_func && flag_shrink_wrap)
27214 add_reg_note (insn, REG_CFA_RESTORE,
27215 gen_rtx_REG (SImode, CR0_REGNO + i));
27217 RTX_FRAME_RELATED_P (insn) = 1;
27220 cr_off += reg_size;
27224 /* Get the old lr if we saved it. If we are restoring registers
27225 out-of-line, then the out-of-line routines can do this for us. */
27226 if (restore_lr && restoring_GPRs_inline)
27227 load_lr_save (0, frame_reg_rtx, info->lr_save_offset + frame_off);
27229 /* Get the old cr if we saved it. */
27230 if (info->cr_save_p)
27232 unsigned cr_save_regno = 12;
27234 if (!restoring_GPRs_inline)
27236 /* Ensure we don't use the register used by the out-of-line
27237 gpr register restore below. */
27238 bool lr = info->gp_save_offset + info->gp_size == 0;
27239 int sel = SAVRES_GPR | (lr ? SAVRES_LR : 0);
27240 int gpr_ptr_regno = ptr_regno_for_savres (sel);
27242 if (gpr_ptr_regno == 12)
27243 cr_save_regno = 11;
27244 gcc_checking_assert (REGNO (frame_reg_rtx) != cr_save_regno);
27246 else if (REGNO (frame_reg_rtx) == 12)
27247 cr_save_regno = 11;
27249 cr_save_reg = load_cr_save (cr_save_regno, frame_reg_rtx,
27250 info->cr_save_offset + frame_off,
27251 exit_func);
27254 /* Set LR here to try to overlap restores below. */
27255 if (restore_lr && restoring_GPRs_inline)
27256 restore_saved_lr (0, exit_func);
27258 /* Load exception handler data registers, if needed. */
27259 if (crtl->calls_eh_return)
27261 unsigned int i, regno;
27263 if (TARGET_AIX)
27265 rtx reg = gen_rtx_REG (reg_mode, 2);
27266 emit_insn (gen_frame_load (reg, frame_reg_rtx,
27267 frame_off + RS6000_TOC_SAVE_SLOT));
27270 for (i = 0; ; ++i)
27272 rtx mem;
27274 regno = EH_RETURN_DATA_REGNO (i);
27275 if (regno == INVALID_REGNUM)
27276 break;
27278 /* Note: possible use of r0 here to address SPE regs. */
27279 mem = gen_frame_mem_offset (reg_mode, frame_reg_rtx,
27280 info->ehrd_offset + frame_off
27281 + reg_size * (int) i);
27283 emit_move_insn (gen_rtx_REG (reg_mode, regno), mem);
27287 /* Restore GPRs. This is done as a PARALLEL if we are using
27288 the load-multiple instructions. */
27289 if (TARGET_SPE_ABI
27290 && info->spe_64bit_regs_used
27291 && info->first_gp_reg_save != 32)
27293 /* Determine whether we can address all of the registers that need
27294 to be saved with an offset from frame_reg_rtx that fits in
27295 the small const field for SPE memory instructions. */
27296 int spe_regs_addressable
27297 = (SPE_CONST_OFFSET_OK (info->spe_gp_save_offset + frame_off
27298 + reg_size * (32 - info->first_gp_reg_save - 1))
27299 && restoring_GPRs_inline);
27301 if (!spe_regs_addressable)
27303 int ool_adjust = 0;
27304 rtx old_frame_reg_rtx = frame_reg_rtx;
27305 /* Make r11 point to the start of the SPE save area. We worried about
27306 not clobbering it when we were saving registers in the prologue.
27307 There's no need to worry here because the static chain is passed
27308 anew to every function. */
27310 if (!restoring_GPRs_inline)
27311 ool_adjust = 8 * (info->first_gp_reg_save - FIRST_SAVED_GP_REGNO);
27312 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
27313 emit_insn (gen_addsi3 (frame_reg_rtx, old_frame_reg_rtx,
27314 GEN_INT (info->spe_gp_save_offset
27315 + frame_off
27316 - ool_adjust)));
27317 /* Keep the invariant that frame_reg_rtx + frame_off points
27318 at the top of the stack frame. */
27319 frame_off = -info->spe_gp_save_offset + ool_adjust;
27322 if (restoring_GPRs_inline)
27324 HOST_WIDE_INT spe_offset = info->spe_gp_save_offset + frame_off;
27326 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
27327 if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i))
27329 rtx offset, addr, mem, reg;
27331 /* We're doing all this to ensure that the immediate offset
27332 fits into the immediate field of 'evldd'. */
27333 gcc_assert (SPE_CONST_OFFSET_OK (spe_offset + reg_size * i));
27335 offset = GEN_INT (spe_offset + reg_size * i);
27336 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx, offset);
27337 mem = gen_rtx_MEM (V2SImode, addr);
27338 reg = gen_rtx_REG (reg_mode, info->first_gp_reg_save + i);
27340 emit_move_insn (reg, mem);
27343 else
27344 rs6000_emit_savres_rtx (info, frame_reg_rtx,
27345 info->spe_gp_save_offset + frame_off,
27346 info->lr_save_offset + frame_off,
27347 reg_mode,
27348 SAVRES_GPR | SAVRES_LR);
27350 else if (!restoring_GPRs_inline)
27352 /* We are jumping to an out-of-line function. */
27353 rtx ptr_reg;
27354 int end_save = info->gp_save_offset + info->gp_size;
27355 bool can_use_exit = end_save == 0;
27356 int sel = SAVRES_GPR | (can_use_exit ? SAVRES_LR : 0);
27357 int ptr_off;
27359 /* Emit stack reset code if we need it. */
27360 ptr_regno = ptr_regno_for_savres (sel);
27361 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
27362 if (can_use_exit)
27363 rs6000_emit_stack_reset (info, frame_reg_rtx, frame_off, ptr_regno);
27364 else if (end_save + frame_off != 0)
27365 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx,
27366 GEN_INT (end_save + frame_off)));
27367 else if (REGNO (frame_reg_rtx) != ptr_regno)
27368 emit_move_insn (ptr_reg, frame_reg_rtx);
27369 if (REGNO (frame_reg_rtx) == ptr_regno)
27370 frame_off = -end_save;
27372 if (can_use_exit && info->cr_save_p)
27373 restore_saved_cr (cr_save_reg, using_mtcr_multiple, true);
27375 ptr_off = -end_save;
27376 rs6000_emit_savres_rtx (info, ptr_reg,
27377 info->gp_save_offset + ptr_off,
27378 info->lr_save_offset + ptr_off,
27379 reg_mode, sel);
27381 else if (using_load_multiple)
27383 rtvec p;
27384 p = rtvec_alloc (32 - info->first_gp_reg_save);
27385 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
27386 RTVEC_ELT (p, i)
27387 = gen_frame_load (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
27388 frame_reg_rtx,
27389 info->gp_save_offset + frame_off + reg_size * i);
27390 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
27392 else
27394 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
27395 if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i))
27396 emit_insn (gen_frame_load
27397 (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
27398 frame_reg_rtx,
27399 info->gp_save_offset + frame_off + reg_size * i));
27402 if (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap)
27404 /* If the frame pointer was used then we can't delay emitting
27405 a REG_CFA_DEF_CFA note. This must happen on the insn that
27406 restores the frame pointer, r31. We may have already emitted
27407 a REG_CFA_DEF_CFA note, but that's OK; A duplicate is
27408 discarded by dwarf2cfi.c/dwarf2out.c, and in any case would
27409 be harmless if emitted. */
27410 if (frame_pointer_needed)
27412 insn = get_last_insn ();
27413 add_reg_note (insn, REG_CFA_DEF_CFA,
27414 plus_constant (Pmode, frame_reg_rtx, frame_off));
27415 RTX_FRAME_RELATED_P (insn) = 1;
27418 /* Set up cfa_restores. We always need these when
27419 shrink-wrapping. If not shrink-wrapping then we only need
27420 the cfa_restore when the stack location is no longer valid.
27421 The cfa_restores must be emitted on or before the insn that
27422 invalidates the stack, and of course must not be emitted
27423 before the insn that actually does the restore. The latter
27424 is why it is a bad idea to emit the cfa_restores as a group
27425 on the last instruction here that actually does a restore:
27426 That insn may be reordered with respect to others doing
27427 restores. */
27428 if (flag_shrink_wrap
27429 && !restoring_GPRs_inline
27430 && info->first_fp_reg_save == 64)
27431 cfa_restores = add_crlr_cfa_restore (info, cfa_restores);
27433 for (i = info->first_gp_reg_save; i < 32; i++)
27434 if (!restoring_GPRs_inline
27435 || using_load_multiple
27436 || rs6000_reg_live_or_pic_offset_p (i))
27438 rtx reg = gen_rtx_REG (reg_mode, i);
27440 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
27444 if (!restoring_GPRs_inline
27445 && info->first_fp_reg_save == 64)
27447 /* We are jumping to an out-of-line function. */
27448 if (cfa_restores)
27449 emit_cfa_restores (cfa_restores);
27450 return;
27453 if (restore_lr && !restoring_GPRs_inline)
27455 load_lr_save (0, frame_reg_rtx, info->lr_save_offset + frame_off);
27456 restore_saved_lr (0, exit_func);
27459 /* Restore fpr's if we need to do it without calling a function. */
27460 if (restoring_FPRs_inline)
27461 for (i = 0; i < 64 - info->first_fp_reg_save; i++)
27462 if (save_reg_p (info->first_fp_reg_save + i))
27464 rtx reg = gen_rtx_REG ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT
27465 ? DFmode : SFmode),
27466 info->first_fp_reg_save + i);
27467 emit_insn (gen_frame_load (reg, frame_reg_rtx,
27468 info->fp_save_offset + frame_off + 8 * i));
27469 if (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap)
27470 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
27473 /* If we saved cr, restore it here. Just those that were used. */
27474 if (info->cr_save_p)
27475 restore_saved_cr (cr_save_reg, using_mtcr_multiple, exit_func);
27477 /* If this is V.4, unwind the stack pointer after all of the loads
27478 have been done, or set up r11 if we are restoring fp out of line. */
27479 ptr_regno = 1;
27480 if (!restoring_FPRs_inline)
27482 bool lr = (strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR) == 0;
27483 int sel = SAVRES_FPR | (lr ? SAVRES_LR : 0);
27484 ptr_regno = ptr_regno_for_savres (sel);
27487 insn = rs6000_emit_stack_reset (info, frame_reg_rtx, frame_off, ptr_regno);
27488 if (REGNO (frame_reg_rtx) == ptr_regno)
27489 frame_off = 0;
27491 if (insn && restoring_FPRs_inline)
27493 if (cfa_restores)
27495 REG_NOTES (insn) = cfa_restores;
27496 cfa_restores = NULL_RTX;
27498 add_reg_note (insn, REG_CFA_DEF_CFA, sp_reg_rtx);
27499 RTX_FRAME_RELATED_P (insn) = 1;
27502 if (crtl->calls_eh_return)
27504 rtx sa = EH_RETURN_STACKADJ_RTX;
27505 emit_insn (gen_add3_insn (sp_reg_rtx, sp_reg_rtx, sa));
27508 if (!sibcall)
27510 rtvec p;
27511 bool lr = (strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR) == 0;
27512 if (! restoring_FPRs_inline)
27514 p = rtvec_alloc (4 + 64 - info->first_fp_reg_save);
27515 RTVEC_ELT (p, 0) = ret_rtx;
27517 else
27519 if (cfa_restores)
27521 /* We can't hang the cfa_restores off a simple return,
27522 since the shrink-wrap code sometimes uses an existing
27523 return. This means there might be a path from
27524 pre-prologue code to this return, and dwarf2cfi code
27525 wants the eh_frame unwinder state to be the same on
27526 all paths to any point. So we need to emit the
27527 cfa_restores before the return. For -m64 we really
27528 don't need epilogue cfa_restores at all, except for
27529 this irritating dwarf2cfi with shrink-wrap
27530 requirement; The stack red-zone means eh_frame info
27531 from the prologue telling the unwinder to restore
27532 from the stack is perfectly good right to the end of
27533 the function. */
27534 emit_insn (gen_blockage ());
27535 emit_cfa_restores (cfa_restores);
27536 cfa_restores = NULL_RTX;
27538 p = rtvec_alloc (2);
27539 RTVEC_ELT (p, 0) = simple_return_rtx;
27542 RTVEC_ELT (p, 1) = ((restoring_FPRs_inline || !lr)
27543 ? gen_rtx_USE (VOIDmode,
27544 gen_rtx_REG (Pmode, LR_REGNO))
27545 : gen_rtx_CLOBBER (VOIDmode,
27546 gen_rtx_REG (Pmode, LR_REGNO)));
27548 /* If we have to restore more than two FP registers, branch to the
27549 restore function. It will return to our caller. */
27550 if (! restoring_FPRs_inline)
27552 int i;
27553 int reg;
27554 rtx sym;
27556 if (flag_shrink_wrap)
27557 cfa_restores = add_crlr_cfa_restore (info, cfa_restores);
27559 sym = rs6000_savres_routine_sym (info,
27560 SAVRES_FPR | (lr ? SAVRES_LR : 0));
27561 RTVEC_ELT (p, 2) = gen_rtx_USE (VOIDmode, sym);
27562 reg = (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)? 1 : 11;
27563 RTVEC_ELT (p, 3) = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, reg));
27565 for (i = 0; i < 64 - info->first_fp_reg_save; i++)
27567 rtx reg = gen_rtx_REG (DFmode, info->first_fp_reg_save + i);
27569 RTVEC_ELT (p, i + 4)
27570 = gen_frame_load (reg, sp_reg_rtx, info->fp_save_offset + 8 * i);
27571 if (flag_shrink_wrap)
27572 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg,
27573 cfa_restores);
27577 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, p));
27580 if (cfa_restores)
27582 if (sibcall)
27583 /* Ensure the cfa_restores are hung off an insn that won't
27584 be reordered above other restores. */
27585 emit_insn (gen_blockage ());
27587 emit_cfa_restores (cfa_restores);
27591 /* Write function epilogue. */
27593 static void
27594 rs6000_output_function_epilogue (FILE *file,
27595 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
27597 #if TARGET_MACHO
27598 macho_branch_islands ();
27599 /* Mach-O doesn't support labels at the end of objects, so if
27600 it looks like we might want one, insert a NOP. */
27602 rtx_insn *insn = get_last_insn ();
27603 rtx_insn *deleted_debug_label = NULL;
27604 while (insn
27605 && NOTE_P (insn)
27606 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
27608 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
27609 notes only, instead set their CODE_LABEL_NUMBER to -1,
27610 otherwise there would be code generation differences
27611 in between -g and -g0. */
27612 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
27613 deleted_debug_label = insn;
27614 insn = PREV_INSN (insn);
27616 if (insn
27617 && (LABEL_P (insn)
27618 || (NOTE_P (insn)
27619 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
27620 fputs ("\tnop\n", file);
27621 else if (deleted_debug_label)
27622 for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
27623 if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
27624 CODE_LABEL_NUMBER (insn) = -1;
27626 #endif
27628 /* Output a traceback table here. See /usr/include/sys/debug.h for info
27629 on its format.
27631 We don't output a traceback table if -finhibit-size-directive was
27632 used. The documentation for -finhibit-size-directive reads
27633 ``don't output a @code{.size} assembler directive, or anything
27634 else that would cause trouble if the function is split in the
27635 middle, and the two halves are placed at locations far apart in
27636 memory.'' The traceback table has this property, since it
27637 includes the offset from the start of the function to the
27638 traceback table itself.
27640 System V.4 Powerpc's (and the embedded ABI derived from it) use a
27641 different traceback table. */
27642 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
27643 && ! flag_inhibit_size_directive
27644 && rs6000_traceback != traceback_none && !cfun->is_thunk)
27646 const char *fname = NULL;
27647 const char *language_string = lang_hooks.name;
27648 int fixed_parms = 0, float_parms = 0, parm_info = 0;
27649 int i;
27650 int optional_tbtab;
27651 rs6000_stack_t *info = rs6000_stack_info ();
27653 if (rs6000_traceback == traceback_full)
27654 optional_tbtab = 1;
27655 else if (rs6000_traceback == traceback_part)
27656 optional_tbtab = 0;
27657 else
27658 optional_tbtab = !optimize_size && !TARGET_ELF;
27660 if (optional_tbtab)
27662 fname = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
27663 while (*fname == '.') /* V.4 encodes . in the name */
27664 fname++;
27666 /* Need label immediately before tbtab, so we can compute
27667 its offset from the function start. */
27668 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LT");
27669 ASM_OUTPUT_LABEL (file, fname);
27672 /* The .tbtab pseudo-op can only be used for the first eight
27673 expressions, since it can't handle the possibly variable
27674 length fields that follow. However, if you omit the optional
27675 fields, the assembler outputs zeros for all optional fields
27676 anyways, giving each variable length field is minimum length
27677 (as defined in sys/debug.h). Thus we can not use the .tbtab
27678 pseudo-op at all. */
27680 /* An all-zero word flags the start of the tbtab, for debuggers
27681 that have to find it by searching forward from the entry
27682 point or from the current pc. */
27683 fputs ("\t.long 0\n", file);
27685 /* Tbtab format type. Use format type 0. */
27686 fputs ("\t.byte 0,", file);
27688 /* Language type. Unfortunately, there does not seem to be any
27689 official way to discover the language being compiled, so we
27690 use language_string.
27691 C is 0. Fortran is 1. Pascal is 2. Ada is 3. C++ is 9.
27692 Java is 13. Objective-C is 14. Objective-C++ isn't assigned
27693 a number, so for now use 9. LTO, Go and JIT aren't assigned numbers
27694 either, so for now use 0. */
27695 if (lang_GNU_C ()
27696 || ! strcmp (language_string, "GNU GIMPLE")
27697 || ! strcmp (language_string, "GNU Go")
27698 || ! strcmp (language_string, "libgccjit"))
27699 i = 0;
27700 else if (! strcmp (language_string, "GNU F77")
27701 || lang_GNU_Fortran ())
27702 i = 1;
27703 else if (! strcmp (language_string, "GNU Pascal"))
27704 i = 2;
27705 else if (! strcmp (language_string, "GNU Ada"))
27706 i = 3;
27707 else if (lang_GNU_CXX ()
27708 || ! strcmp (language_string, "GNU Objective-C++"))
27709 i = 9;
27710 else if (! strcmp (language_string, "GNU Java"))
27711 i = 13;
27712 else if (! strcmp (language_string, "GNU Objective-C"))
27713 i = 14;
27714 else
27715 gcc_unreachable ();
27716 fprintf (file, "%d,", i);
27718 /* 8 single bit fields: global linkage (not set for C extern linkage,
27719 apparently a PL/I convention?), out-of-line epilogue/prologue, offset
27720 from start of procedure stored in tbtab, internal function, function
27721 has controlled storage, function has no toc, function uses fp,
27722 function logs/aborts fp operations. */
27723 /* Assume that fp operations are used if any fp reg must be saved. */
27724 fprintf (file, "%d,",
27725 (optional_tbtab << 5) | ((info->first_fp_reg_save != 64) << 1));
27727 /* 6 bitfields: function is interrupt handler, name present in
27728 proc table, function calls alloca, on condition directives
27729 (controls stack walks, 3 bits), saves condition reg, saves
27730 link reg. */
27731 /* The `function calls alloca' bit seems to be set whenever reg 31 is
27732 set up as a frame pointer, even when there is no alloca call. */
27733 fprintf (file, "%d,",
27734 ((optional_tbtab << 6)
27735 | ((optional_tbtab & frame_pointer_needed) << 5)
27736 | (info->cr_save_p << 1)
27737 | (info->lr_save_p)));
27739 /* 3 bitfields: saves backchain, fixup code, number of fpr saved
27740 (6 bits). */
27741 fprintf (file, "%d,",
27742 (info->push_p << 7) | (64 - info->first_fp_reg_save));
27744 /* 2 bitfields: spare bits (2 bits), number of gpr saved (6 bits). */
27745 fprintf (file, "%d,", (32 - first_reg_to_save ()));
27747 if (optional_tbtab)
27749 /* Compute the parameter info from the function decl argument
27750 list. */
27751 tree decl;
27752 int next_parm_info_bit = 31;
27754 for (decl = DECL_ARGUMENTS (current_function_decl);
27755 decl; decl = DECL_CHAIN (decl))
27757 rtx parameter = DECL_INCOMING_RTL (decl);
27758 machine_mode mode = GET_MODE (parameter);
27760 if (GET_CODE (parameter) == REG)
27762 if (SCALAR_FLOAT_MODE_P (mode))
27764 int bits;
27766 float_parms++;
27768 switch (mode)
27770 case SFmode:
27771 case SDmode:
27772 bits = 0x2;
27773 break;
27775 case DFmode:
27776 case DDmode:
27777 case TFmode:
27778 case TDmode:
27779 case IFmode:
27780 case KFmode:
27781 bits = 0x3;
27782 break;
27784 default:
27785 gcc_unreachable ();
27788 /* If only one bit will fit, don't or in this entry. */
27789 if (next_parm_info_bit > 0)
27790 parm_info |= (bits << (next_parm_info_bit - 1));
27791 next_parm_info_bit -= 2;
27793 else
27795 fixed_parms += ((GET_MODE_SIZE (mode)
27796 + (UNITS_PER_WORD - 1))
27797 / UNITS_PER_WORD);
27798 next_parm_info_bit -= 1;
27804 /* Number of fixed point parameters. */
27805 /* This is actually the number of words of fixed point parameters; thus
27806 an 8 byte struct counts as 2; and thus the maximum value is 8. */
27807 fprintf (file, "%d,", fixed_parms);
27809 /* 2 bitfields: number of floating point parameters (7 bits), parameters
27810 all on stack. */
27811 /* This is actually the number of fp registers that hold parameters;
27812 and thus the maximum value is 13. */
27813 /* Set parameters on stack bit if parameters are not in their original
27814 registers, regardless of whether they are on the stack? Xlc
27815 seems to set the bit when not optimizing. */
27816 fprintf (file, "%d\n", ((float_parms << 1) | (! optimize)));
27818 if (! optional_tbtab)
27819 return;
27821 /* Optional fields follow. Some are variable length. */
27823 /* Parameter types, left adjusted bit fields: 0 fixed, 10 single float,
27824 11 double float. */
27825 /* There is an entry for each parameter in a register, in the order that
27826 they occur in the parameter list. Any intervening arguments on the
27827 stack are ignored. If the list overflows a long (max possible length
27828 34 bits) then completely leave off all elements that don't fit. */
27829 /* Only emit this long if there was at least one parameter. */
27830 if (fixed_parms || float_parms)
27831 fprintf (file, "\t.long %d\n", parm_info);
27833 /* Offset from start of code to tb table. */
27834 fputs ("\t.long ", file);
27835 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LT");
27836 RS6000_OUTPUT_BASENAME (file, fname);
27837 putc ('-', file);
27838 rs6000_output_function_entry (file, fname);
27839 putc ('\n', file);
27841 /* Interrupt handler mask. */
27842 /* Omit this long, since we never set the interrupt handler bit
27843 above. */
27845 /* Number of CTL (controlled storage) anchors. */
27846 /* Omit this long, since the has_ctl bit is never set above. */
27848 /* Displacement into stack of each CTL anchor. */
27849 /* Omit this list of longs, because there are no CTL anchors. */
27851 /* Length of function name. */
27852 if (*fname == '*')
27853 ++fname;
27854 fprintf (file, "\t.short %d\n", (int) strlen (fname));
27856 /* Function name. */
27857 assemble_string (fname, strlen (fname));
27859 /* Register for alloca automatic storage; this is always reg 31.
27860 Only emit this if the alloca bit was set above. */
27861 if (frame_pointer_needed)
27862 fputs ("\t.byte 31\n", file);
27864 fputs ("\t.align 2\n", file);
27867 /* Arrange to define .LCTOC1 label, if not already done. */
27868 if (need_toc_init)
27870 need_toc_init = 0;
27871 if (!toc_initialized)
27873 switch_to_section (toc_section);
27874 switch_to_section (current_function_section ());
27879 /* -fsplit-stack support. */
27881 /* A SYMBOL_REF for __morestack. */
27882 static GTY(()) rtx morestack_ref;
27884 static rtx
27885 gen_add3_const (rtx rt, rtx ra, long c)
27887 if (TARGET_64BIT)
27888 return gen_adddi3 (rt, ra, GEN_INT (c));
27889 else
27890 return gen_addsi3 (rt, ra, GEN_INT (c));
27893 /* Emit -fsplit-stack prologue, which goes before the regular function
27894 prologue (at local entry point in the case of ELFv2). */
27896 void
27897 rs6000_expand_split_stack_prologue (void)
27899 rs6000_stack_t *info = rs6000_stack_info ();
27900 unsigned HOST_WIDE_INT allocate;
27901 long alloc_hi, alloc_lo;
27902 rtx r0, r1, r12, lr, ok_label, compare, jump, call_fusage;
27903 rtx_insn *insn;
27905 gcc_assert (flag_split_stack && reload_completed);
27907 if (!info->push_p)
27908 return;
27910 if (global_regs[29])
27912 error ("-fsplit-stack uses register r29");
27913 inform (DECL_SOURCE_LOCATION (global_regs_decl[29]),
27914 "conflicts with %qD", global_regs_decl[29]);
27917 allocate = info->total_size;
27918 if (allocate > (unsigned HOST_WIDE_INT) 1 << 31)
27920 sorry ("Stack frame larger than 2G is not supported for -fsplit-stack");
27921 return;
27923 if (morestack_ref == NULL_RTX)
27925 morestack_ref = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
27926 SYMBOL_REF_FLAGS (morestack_ref) |= (SYMBOL_FLAG_LOCAL
27927 | SYMBOL_FLAG_FUNCTION);
27930 r0 = gen_rtx_REG (Pmode, 0);
27931 r1 = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
27932 r12 = gen_rtx_REG (Pmode, 12);
27933 emit_insn (gen_load_split_stack_limit (r0));
27934 /* Always emit two insns here to calculate the requested stack,
27935 so that the linker can edit them when adjusting size for calling
27936 non-split-stack code. */
27937 alloc_hi = (-allocate + 0x8000) & ~0xffffL;
27938 alloc_lo = -allocate - alloc_hi;
27939 if (alloc_hi != 0)
27941 emit_insn (gen_add3_const (r12, r1, alloc_hi));
27942 if (alloc_lo != 0)
27943 emit_insn (gen_add3_const (r12, r12, alloc_lo));
27944 else
27945 emit_insn (gen_nop ());
27947 else
27949 emit_insn (gen_add3_const (r12, r1, alloc_lo));
27950 emit_insn (gen_nop ());
27953 compare = gen_rtx_REG (CCUNSmode, CR7_REGNO);
27954 emit_insn (gen_rtx_SET (compare, gen_rtx_COMPARE (CCUNSmode, r12, r0)));
27955 ok_label = gen_label_rtx ();
27956 jump = gen_rtx_IF_THEN_ELSE (VOIDmode,
27957 gen_rtx_GEU (VOIDmode, compare, const0_rtx),
27958 gen_rtx_LABEL_REF (VOIDmode, ok_label),
27959 pc_rtx);
27960 jump = emit_jump_insn (gen_rtx_SET (pc_rtx, jump));
27961 JUMP_LABEL (jump) = ok_label;
27962 /* Mark the jump as very likely to be taken. */
27963 add_int_reg_note (jump, REG_BR_PROB,
27964 REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100);
27966 lr = gen_rtx_REG (Pmode, LR_REGNO);
27967 insn = emit_move_insn (r0, lr);
27968 RTX_FRAME_RELATED_P (insn) = 1;
27969 insn = emit_insn (gen_frame_store (r0, r1, info->lr_save_offset));
27970 RTX_FRAME_RELATED_P (insn) = 1;
27972 insn = emit_call_insn (gen_call (gen_rtx_MEM (SImode, morestack_ref),
27973 const0_rtx, const0_rtx));
27974 call_fusage = NULL_RTX;
27975 use_reg (&call_fusage, r12);
27976 add_function_usage_to (insn, call_fusage);
27977 emit_insn (gen_frame_load (r0, r1, info->lr_save_offset));
27978 insn = emit_move_insn (lr, r0);
27979 add_reg_note (insn, REG_CFA_RESTORE, lr);
27980 RTX_FRAME_RELATED_P (insn) = 1;
27981 emit_insn (gen_split_stack_return ());
27983 emit_label (ok_label);
27984 LABEL_NUSES (ok_label) = 1;
27987 /* Return the internal arg pointer used for function incoming
27988 arguments. When -fsplit-stack, the arg pointer is r12 so we need
27989 to copy it to a pseudo in order for it to be preserved over calls
27990 and suchlike. We'd really like to use a pseudo here for the
27991 internal arg pointer but data-flow analysis is not prepared to
27992 accept pseudos as live at the beginning of a function. */
27994 static rtx
27995 rs6000_internal_arg_pointer (void)
27997 if (flag_split_stack
27998 && (lookup_attribute ("no_split_stack", DECL_ATTRIBUTES (cfun->decl))
27999 == NULL))
28002 if (cfun->machine->split_stack_arg_pointer == NULL_RTX)
28004 rtx pat;
28006 cfun->machine->split_stack_arg_pointer = gen_reg_rtx (Pmode);
28007 REG_POINTER (cfun->machine->split_stack_arg_pointer) = 1;
28009 /* Put the pseudo initialization right after the note at the
28010 beginning of the function. */
28011 pat = gen_rtx_SET (cfun->machine->split_stack_arg_pointer,
28012 gen_rtx_REG (Pmode, 12));
28013 push_topmost_sequence ();
28014 emit_insn_after (pat, get_insns ());
28015 pop_topmost_sequence ();
28017 return plus_constant (Pmode, cfun->machine->split_stack_arg_pointer,
28018 FIRST_PARM_OFFSET (current_function_decl));
28020 return virtual_incoming_args_rtx;
28023 /* We may have to tell the dataflow pass that the split stack prologue
28024 is initializing a register. */
28026 static void
28027 rs6000_live_on_entry (bitmap regs)
28029 if (flag_split_stack)
28030 bitmap_set_bit (regs, 12);
28033 /* Emit -fsplit-stack dynamic stack allocation space check. */
28035 void
28036 rs6000_split_stack_space_check (rtx size, rtx label)
28038 rtx sp = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
28039 rtx limit = gen_reg_rtx (Pmode);
28040 rtx requested = gen_reg_rtx (Pmode);
28041 rtx cmp = gen_reg_rtx (CCUNSmode);
28042 rtx jump;
28044 emit_insn (gen_load_split_stack_limit (limit));
28045 if (CONST_INT_P (size))
28046 emit_insn (gen_add3_insn (requested, sp, GEN_INT (-INTVAL (size))));
28047 else
28049 size = force_reg (Pmode, size);
28050 emit_move_insn (requested, gen_rtx_MINUS (Pmode, sp, size));
28052 emit_insn (gen_rtx_SET (cmp, gen_rtx_COMPARE (CCUNSmode, requested, limit)));
28053 jump = gen_rtx_IF_THEN_ELSE (VOIDmode,
28054 gen_rtx_GEU (VOIDmode, cmp, const0_rtx),
28055 gen_rtx_LABEL_REF (VOIDmode, label),
28056 pc_rtx);
28057 jump = emit_jump_insn (gen_rtx_SET (pc_rtx, jump));
28058 JUMP_LABEL (jump) = label;
28061 /* A C compound statement that outputs the assembler code for a thunk
28062 function, used to implement C++ virtual function calls with
28063 multiple inheritance. The thunk acts as a wrapper around a virtual
28064 function, adjusting the implicit object parameter before handing
28065 control off to the real function.
28067 First, emit code to add the integer DELTA to the location that
28068 contains the incoming first argument. Assume that this argument
28069 contains a pointer, and is the one used to pass the `this' pointer
28070 in C++. This is the incoming argument *before* the function
28071 prologue, e.g. `%o0' on a sparc. The addition must preserve the
28072 values of all other incoming arguments.
28074 After the addition, emit code to jump to FUNCTION, which is a
28075 `FUNCTION_DECL'. This is a direct pure jump, not a call, and does
28076 not touch the return address. Hence returning from FUNCTION will
28077 return to whoever called the current `thunk'.
28079 The effect must be as if FUNCTION had been called directly with the
28080 adjusted first argument. This macro is responsible for emitting
28081 all of the code for a thunk function; output_function_prologue()
28082 and output_function_epilogue() are not invoked.
28084 The THUNK_FNDECL is redundant. (DELTA and FUNCTION have already
28085 been extracted from it.) It might possibly be useful on some
28086 targets, but probably not.
28088 If you do not define this macro, the target-independent code in the
28089 C++ frontend will generate a less efficient heavyweight thunk that
28090 calls FUNCTION instead of jumping to it. The generic approach does
28091 not support varargs. */
28093 static void
28094 rs6000_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
28095 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
28096 tree function)
28098 rtx this_rtx, funexp;
28099 rtx_insn *insn;
28101 reload_completed = 1;
28102 epilogue_completed = 1;
28104 /* Mark the end of the (empty) prologue. */
28105 emit_note (NOTE_INSN_PROLOGUE_END);
28107 /* Find the "this" pointer. If the function returns a structure,
28108 the structure return pointer is in r3. */
28109 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
28110 this_rtx = gen_rtx_REG (Pmode, 4);
28111 else
28112 this_rtx = gen_rtx_REG (Pmode, 3);
28114 /* Apply the constant offset, if required. */
28115 if (delta)
28116 emit_insn (gen_add3_insn (this_rtx, this_rtx, GEN_INT (delta)));
28118 /* Apply the offset from the vtable, if required. */
28119 if (vcall_offset)
28121 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
28122 rtx tmp = gen_rtx_REG (Pmode, 12);
28124 emit_move_insn (tmp, gen_rtx_MEM (Pmode, this_rtx));
28125 if (((unsigned HOST_WIDE_INT) vcall_offset) + 0x8000 >= 0x10000)
28127 emit_insn (gen_add3_insn (tmp, tmp, vcall_offset_rtx));
28128 emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp));
28130 else
28132 rtx loc = gen_rtx_PLUS (Pmode, tmp, vcall_offset_rtx);
28134 emit_move_insn (tmp, gen_rtx_MEM (Pmode, loc));
28136 emit_insn (gen_add3_insn (this_rtx, this_rtx, tmp));
28139 /* Generate a tail call to the target function. */
28140 if (!TREE_USED (function))
28142 assemble_external (function);
28143 TREE_USED (function) = 1;
28145 funexp = XEXP (DECL_RTL (function), 0);
28146 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
28148 #if TARGET_MACHO
28149 if (MACHOPIC_INDIRECT)
28150 funexp = machopic_indirect_call_target (funexp);
28151 #endif
28153 /* gen_sibcall expects reload to convert scratch pseudo to LR so we must
28154 generate sibcall RTL explicitly. */
28155 insn = emit_call_insn (
28156 gen_rtx_PARALLEL (VOIDmode,
28157 gen_rtvec (4,
28158 gen_rtx_CALL (VOIDmode,
28159 funexp, const0_rtx),
28160 gen_rtx_USE (VOIDmode, const0_rtx),
28161 gen_rtx_USE (VOIDmode,
28162 gen_rtx_REG (SImode,
28163 LR_REGNO)),
28164 simple_return_rtx)));
28165 SIBLING_CALL_P (insn) = 1;
28166 emit_barrier ();
28168 /* Run just enough of rest_of_compilation to get the insns emitted.
28169 There's not really enough bulk here to make other passes such as
28170 instruction scheduling worth while. Note that use_thunk calls
28171 assemble_start_function and assemble_end_function. */
28172 insn = get_insns ();
28173 shorten_branches (insn);
28174 final_start_function (insn, file, 1);
28175 final (insn, file, 1);
28176 final_end_function ();
28178 reload_completed = 0;
28179 epilogue_completed = 0;
28182 /* A quick summary of the various types of 'constant-pool tables'
28183 under PowerPC:
28185 Target Flags Name One table per
28186 AIX (none) AIX TOC object file
28187 AIX -mfull-toc AIX TOC object file
28188 AIX -mminimal-toc AIX minimal TOC translation unit
28189 SVR4/EABI (none) SVR4 SDATA object file
28190 SVR4/EABI -fpic SVR4 pic object file
28191 SVR4/EABI -fPIC SVR4 PIC translation unit
28192 SVR4/EABI -mrelocatable EABI TOC function
28193 SVR4/EABI -maix AIX TOC object file
28194 SVR4/EABI -maix -mminimal-toc
28195 AIX minimal TOC translation unit
28197 Name Reg. Set by entries contains:
28198 made by addrs? fp? sum?
28200 AIX TOC 2 crt0 as Y option option
28201 AIX minimal TOC 30 prolog gcc Y Y option
28202 SVR4 SDATA 13 crt0 gcc N Y N
28203 SVR4 pic 30 prolog ld Y not yet N
28204 SVR4 PIC 30 prolog gcc Y option option
28205 EABI TOC 30 prolog gcc Y option option
28209 /* Hash functions for the hash table. */
28211 static unsigned
28212 rs6000_hash_constant (rtx k)
28214 enum rtx_code code = GET_CODE (k);
28215 machine_mode mode = GET_MODE (k);
28216 unsigned result = (code << 3) ^ mode;
28217 const char *format;
28218 int flen, fidx;
28220 format = GET_RTX_FORMAT (code);
28221 flen = strlen (format);
28222 fidx = 0;
28224 switch (code)
28226 case LABEL_REF:
28227 return result * 1231 + (unsigned) INSN_UID (XEXP (k, 0));
28229 case CONST_WIDE_INT:
28231 int i;
28232 flen = CONST_WIDE_INT_NUNITS (k);
28233 for (i = 0; i < flen; i++)
28234 result = result * 613 + CONST_WIDE_INT_ELT (k, i);
28235 return result;
28238 case CONST_DOUBLE:
28239 if (mode != VOIDmode)
28240 return real_hash (CONST_DOUBLE_REAL_VALUE (k)) * result;
28241 flen = 2;
28242 break;
28244 case CODE_LABEL:
28245 fidx = 3;
28246 break;
28248 default:
28249 break;
28252 for (; fidx < flen; fidx++)
28253 switch (format[fidx])
28255 case 's':
28257 unsigned i, len;
28258 const char *str = XSTR (k, fidx);
28259 len = strlen (str);
28260 result = result * 613 + len;
28261 for (i = 0; i < len; i++)
28262 result = result * 613 + (unsigned) str[i];
28263 break;
28265 case 'u':
28266 case 'e':
28267 result = result * 1231 + rs6000_hash_constant (XEXP (k, fidx));
28268 break;
28269 case 'i':
28270 case 'n':
28271 result = result * 613 + (unsigned) XINT (k, fidx);
28272 break;
28273 case 'w':
28274 if (sizeof (unsigned) >= sizeof (HOST_WIDE_INT))
28275 result = result * 613 + (unsigned) XWINT (k, fidx);
28276 else
28278 size_t i;
28279 for (i = 0; i < sizeof (HOST_WIDE_INT) / sizeof (unsigned); i++)
28280 result = result * 613 + (unsigned) (XWINT (k, fidx)
28281 >> CHAR_BIT * i);
28283 break;
28284 case '0':
28285 break;
28286 default:
28287 gcc_unreachable ();
28290 return result;
28293 hashval_t
28294 toc_hasher::hash (toc_hash_struct *thc)
28296 return rs6000_hash_constant (thc->key) ^ thc->key_mode;
28299 /* Compare H1 and H2 for equivalence. */
28301 bool
28302 toc_hasher::equal (toc_hash_struct *h1, toc_hash_struct *h2)
28304 rtx r1 = h1->key;
28305 rtx r2 = h2->key;
28307 if (h1->key_mode != h2->key_mode)
28308 return 0;
28310 return rtx_equal_p (r1, r2);
28313 /* These are the names given by the C++ front-end to vtables, and
28314 vtable-like objects. Ideally, this logic should not be here;
28315 instead, there should be some programmatic way of inquiring as
28316 to whether or not an object is a vtable. */
28318 #define VTABLE_NAME_P(NAME) \
28319 (strncmp ("_vt.", name, strlen ("_vt.")) == 0 \
28320 || strncmp ("_ZTV", name, strlen ("_ZTV")) == 0 \
28321 || strncmp ("_ZTT", name, strlen ("_ZTT")) == 0 \
28322 || strncmp ("_ZTI", name, strlen ("_ZTI")) == 0 \
28323 || strncmp ("_ZTC", name, strlen ("_ZTC")) == 0)
28325 #ifdef NO_DOLLAR_IN_LABEL
28326 /* Return a GGC-allocated character string translating dollar signs in
28327 input NAME to underscores. Used by XCOFF ASM_OUTPUT_LABELREF. */
28329 const char *
28330 rs6000_xcoff_strip_dollar (const char *name)
28332 char *strip, *p;
28333 const char *q;
28334 size_t len;
28336 q = (const char *) strchr (name, '$');
28338 if (q == 0 || q == name)
28339 return name;
28341 len = strlen (name);
28342 strip = XALLOCAVEC (char, len + 1);
28343 strcpy (strip, name);
28344 p = strip + (q - name);
28345 while (p)
28347 *p = '_';
28348 p = strchr (p + 1, '$');
28351 return ggc_alloc_string (strip, len);
28353 #endif
28355 void
28356 rs6000_output_symbol_ref (FILE *file, rtx x)
28358 /* Currently C++ toc references to vtables can be emitted before it
28359 is decided whether the vtable is public or private. If this is
28360 the case, then the linker will eventually complain that there is
28361 a reference to an unknown section. Thus, for vtables only,
28362 we emit the TOC reference to reference the symbol and not the
28363 section. */
28364 const char *name = XSTR (x, 0);
28366 tree decl = SYMBOL_REF_DECL (x);
28367 if (decl /* sync condition with assemble_external () */
28368 && DECL_P (decl) && DECL_EXTERNAL (decl) && TREE_PUBLIC (decl)
28369 && (TREE_CODE (decl) == VAR_DECL
28370 || TREE_CODE (decl) == FUNCTION_DECL)
28371 && name[strlen (name) - 1] != ']')
28373 name = concat (name,
28374 (TREE_CODE (decl) == FUNCTION_DECL
28375 ? "[DS]" : "[UA]"),
28376 NULL);
28377 XSTR (x, 0) = name;
28380 if (VTABLE_NAME_P (name))
28382 RS6000_OUTPUT_BASENAME (file, name);
28384 else
28385 assemble_name (file, name);
28388 /* Output a TOC entry. We derive the entry name from what is being
28389 written. */
28391 void
28392 output_toc (FILE *file, rtx x, int labelno, machine_mode mode)
28394 char buf[256];
28395 const char *name = buf;
28396 rtx base = x;
28397 HOST_WIDE_INT offset = 0;
28399 gcc_assert (!TARGET_NO_TOC);
28401 /* When the linker won't eliminate them, don't output duplicate
28402 TOC entries (this happens on AIX if there is any kind of TOC,
28403 and on SVR4 under -fPIC or -mrelocatable). Don't do this for
28404 CODE_LABELs. */
28405 if (TARGET_TOC && GET_CODE (x) != LABEL_REF)
28407 struct toc_hash_struct *h;
28409 /* Create toc_hash_table. This can't be done at TARGET_OPTION_OVERRIDE
28410 time because GGC is not initialized at that point. */
28411 if (toc_hash_table == NULL)
28412 toc_hash_table = hash_table<toc_hasher>::create_ggc (1021);
28414 h = ggc_alloc<toc_hash_struct> ();
28415 h->key = x;
28416 h->key_mode = mode;
28417 h->labelno = labelno;
28419 toc_hash_struct **found = toc_hash_table->find_slot (h, INSERT);
28420 if (*found == NULL)
28421 *found = h;
28422 else /* This is indeed a duplicate.
28423 Set this label equal to that label. */
28425 fputs ("\t.set ", file);
28426 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
28427 fprintf (file, "%d,", labelno);
28428 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
28429 fprintf (file, "%d\n", ((*found)->labelno));
28431 #ifdef HAVE_AS_TLS
28432 if (TARGET_XCOFF && GET_CODE (x) == SYMBOL_REF
28433 && (SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_GLOBAL_DYNAMIC
28434 || SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC))
28436 fputs ("\t.set ", file);
28437 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
28438 fprintf (file, "%d,", labelno);
28439 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
28440 fprintf (file, "%d\n", ((*found)->labelno));
28442 #endif
28443 return;
28447 /* If we're going to put a double constant in the TOC, make sure it's
28448 aligned properly when strict alignment is on. */
28449 if ((CONST_DOUBLE_P (x) || CONST_WIDE_INT_P (x))
28450 && STRICT_ALIGNMENT
28451 && GET_MODE_BITSIZE (mode) >= 64
28452 && ! (TARGET_NO_FP_IN_TOC && ! TARGET_MINIMAL_TOC)) {
28453 ASM_OUTPUT_ALIGN (file, 3);
28456 (*targetm.asm_out.internal_label) (file, "LC", labelno);
28458 /* Handle FP constants specially. Note that if we have a minimal
28459 TOC, things we put here aren't actually in the TOC, so we can allow
28460 FP constants. */
28461 if (GET_CODE (x) == CONST_DOUBLE &&
28462 (GET_MODE (x) == TFmode || GET_MODE (x) == TDmode
28463 || GET_MODE (x) == IFmode || GET_MODE (x) == KFmode))
28465 long k[4];
28467 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
28468 REAL_VALUE_TO_TARGET_DECIMAL128 (*CONST_DOUBLE_REAL_VALUE (x), k);
28469 else
28470 REAL_VALUE_TO_TARGET_LONG_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), k);
28472 if (TARGET_64BIT)
28474 if (TARGET_ELF || TARGET_MINIMAL_TOC)
28475 fputs (DOUBLE_INT_ASM_OP, file);
28476 else
28477 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
28478 k[0] & 0xffffffff, k[1] & 0xffffffff,
28479 k[2] & 0xffffffff, k[3] & 0xffffffff);
28480 fprintf (file, "0x%lx%08lx,0x%lx%08lx\n",
28481 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
28482 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff,
28483 k[WORDS_BIG_ENDIAN ? 2 : 3] & 0xffffffff,
28484 k[WORDS_BIG_ENDIAN ? 3 : 2] & 0xffffffff);
28485 return;
28487 else
28489 if (TARGET_ELF || TARGET_MINIMAL_TOC)
28490 fputs ("\t.long ", file);
28491 else
28492 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
28493 k[0] & 0xffffffff, k[1] & 0xffffffff,
28494 k[2] & 0xffffffff, k[3] & 0xffffffff);
28495 fprintf (file, "0x%lx,0x%lx,0x%lx,0x%lx\n",
28496 k[0] & 0xffffffff, k[1] & 0xffffffff,
28497 k[2] & 0xffffffff, k[3] & 0xffffffff);
28498 return;
28501 else if (GET_CODE (x) == CONST_DOUBLE &&
28502 (GET_MODE (x) == DFmode || GET_MODE (x) == DDmode))
28504 long k[2];
28506 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
28507 REAL_VALUE_TO_TARGET_DECIMAL64 (*CONST_DOUBLE_REAL_VALUE (x), k);
28508 else
28509 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), k);
28511 if (TARGET_64BIT)
28513 if (TARGET_ELF || TARGET_MINIMAL_TOC)
28514 fputs (DOUBLE_INT_ASM_OP, file);
28515 else
28516 fprintf (file, "\t.tc FD_%lx_%lx[TC],",
28517 k[0] & 0xffffffff, k[1] & 0xffffffff);
28518 fprintf (file, "0x%lx%08lx\n",
28519 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
28520 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff);
28521 return;
28523 else
28525 if (TARGET_ELF || TARGET_MINIMAL_TOC)
28526 fputs ("\t.long ", file);
28527 else
28528 fprintf (file, "\t.tc FD_%lx_%lx[TC],",
28529 k[0] & 0xffffffff, k[1] & 0xffffffff);
28530 fprintf (file, "0x%lx,0x%lx\n",
28531 k[0] & 0xffffffff, k[1] & 0xffffffff);
28532 return;
28535 else if (GET_CODE (x) == CONST_DOUBLE &&
28536 (GET_MODE (x) == SFmode || GET_MODE (x) == SDmode))
28538 long l;
28540 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
28541 REAL_VALUE_TO_TARGET_DECIMAL32 (*CONST_DOUBLE_REAL_VALUE (x), l);
28542 else
28543 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l);
28545 if (TARGET_64BIT)
28547 if (TARGET_ELF || TARGET_MINIMAL_TOC)
28548 fputs (DOUBLE_INT_ASM_OP, file);
28549 else
28550 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
28551 if (WORDS_BIG_ENDIAN)
28552 fprintf (file, "0x%lx00000000\n", l & 0xffffffff);
28553 else
28554 fprintf (file, "0x%lx\n", l & 0xffffffff);
28555 return;
28557 else
28559 if (TARGET_ELF || TARGET_MINIMAL_TOC)
28560 fputs ("\t.long ", file);
28561 else
28562 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
28563 fprintf (file, "0x%lx\n", l & 0xffffffff);
28564 return;
28567 else if (GET_MODE (x) == VOIDmode && GET_CODE (x) == CONST_INT)
28569 unsigned HOST_WIDE_INT low;
28570 HOST_WIDE_INT high;
28572 low = INTVAL (x) & 0xffffffff;
28573 high = (HOST_WIDE_INT) INTVAL (x) >> 32;
28575 /* TOC entries are always Pmode-sized, so when big-endian
28576 smaller integer constants in the TOC need to be padded.
28577 (This is still a win over putting the constants in
28578 a separate constant pool, because then we'd have
28579 to have both a TOC entry _and_ the actual constant.)
28581 For a 32-bit target, CONST_INT values are loaded and shifted
28582 entirely within `low' and can be stored in one TOC entry. */
28584 /* It would be easy to make this work, but it doesn't now. */
28585 gcc_assert (!TARGET_64BIT || POINTER_SIZE >= GET_MODE_BITSIZE (mode));
28587 if (WORDS_BIG_ENDIAN && POINTER_SIZE > GET_MODE_BITSIZE (mode))
28589 low |= high << 32;
28590 low <<= POINTER_SIZE - GET_MODE_BITSIZE (mode);
28591 high = (HOST_WIDE_INT) low >> 32;
28592 low &= 0xffffffff;
28595 if (TARGET_64BIT)
28597 if (TARGET_ELF || TARGET_MINIMAL_TOC)
28598 fputs (DOUBLE_INT_ASM_OP, file);
28599 else
28600 fprintf (file, "\t.tc ID_%lx_%lx[TC],",
28601 (long) high & 0xffffffff, (long) low & 0xffffffff);
28602 fprintf (file, "0x%lx%08lx\n",
28603 (long) high & 0xffffffff, (long) low & 0xffffffff);
28604 return;
28606 else
28608 if (POINTER_SIZE < GET_MODE_BITSIZE (mode))
28610 if (TARGET_ELF || TARGET_MINIMAL_TOC)
28611 fputs ("\t.long ", file);
28612 else
28613 fprintf (file, "\t.tc ID_%lx_%lx[TC],",
28614 (long) high & 0xffffffff, (long) low & 0xffffffff);
28615 fprintf (file, "0x%lx,0x%lx\n",
28616 (long) high & 0xffffffff, (long) low & 0xffffffff);
28618 else
28620 if (TARGET_ELF || TARGET_MINIMAL_TOC)
28621 fputs ("\t.long ", file);
28622 else
28623 fprintf (file, "\t.tc IS_%lx[TC],", (long) low & 0xffffffff);
28624 fprintf (file, "0x%lx\n", (long) low & 0xffffffff);
28626 return;
28630 if (GET_CODE (x) == CONST)
28632 gcc_assert (GET_CODE (XEXP (x, 0)) == PLUS
28633 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT);
28635 base = XEXP (XEXP (x, 0), 0);
28636 offset = INTVAL (XEXP (XEXP (x, 0), 1));
28639 switch (GET_CODE (base))
28641 case SYMBOL_REF:
28642 name = XSTR (base, 0);
28643 break;
28645 case LABEL_REF:
28646 ASM_GENERATE_INTERNAL_LABEL (buf, "L",
28647 CODE_LABEL_NUMBER (XEXP (base, 0)));
28648 break;
28650 case CODE_LABEL:
28651 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (base));
28652 break;
28654 default:
28655 gcc_unreachable ();
28658 if (TARGET_ELF || TARGET_MINIMAL_TOC)
28659 fputs (TARGET_32BIT ? "\t.long " : DOUBLE_INT_ASM_OP, file);
28660 else
28662 fputs ("\t.tc ", file);
28663 RS6000_OUTPUT_BASENAME (file, name);
28665 if (offset < 0)
28666 fprintf (file, ".N" HOST_WIDE_INT_PRINT_UNSIGNED, - offset);
28667 else if (offset)
28668 fprintf (file, ".P" HOST_WIDE_INT_PRINT_UNSIGNED, offset);
28670 /* Mark large TOC symbols on AIX with [TE] so they are mapped
28671 after other TOC symbols, reducing overflow of small TOC access
28672 to [TC] symbols. */
28673 fputs (TARGET_XCOFF && TARGET_CMODEL != CMODEL_SMALL
28674 ? "[TE]," : "[TC],", file);
28677 /* Currently C++ toc references to vtables can be emitted before it
28678 is decided whether the vtable is public or private. If this is
28679 the case, then the linker will eventually complain that there is
28680 a TOC reference to an unknown section. Thus, for vtables only,
28681 we emit the TOC reference to reference the symbol and not the
28682 section. */
28683 if (VTABLE_NAME_P (name))
28685 RS6000_OUTPUT_BASENAME (file, name);
28686 if (offset < 0)
28687 fprintf (file, HOST_WIDE_INT_PRINT_DEC, offset);
28688 else if (offset > 0)
28689 fprintf (file, "+" HOST_WIDE_INT_PRINT_DEC, offset);
28691 else
28692 output_addr_const (file, x);
28694 #if HAVE_AS_TLS
28695 if (TARGET_XCOFF && GET_CODE (base) == SYMBOL_REF)
28697 switch (SYMBOL_REF_TLS_MODEL (base))
28699 case 0:
28700 break;
28701 case TLS_MODEL_LOCAL_EXEC:
28702 fputs ("@le", file);
28703 break;
28704 case TLS_MODEL_INITIAL_EXEC:
28705 fputs ("@ie", file);
28706 break;
28707 /* Use global-dynamic for local-dynamic. */
28708 case TLS_MODEL_GLOBAL_DYNAMIC:
28709 case TLS_MODEL_LOCAL_DYNAMIC:
28710 putc ('\n', file);
28711 (*targetm.asm_out.internal_label) (file, "LCM", labelno);
28712 fputs ("\t.tc .", file);
28713 RS6000_OUTPUT_BASENAME (file, name);
28714 fputs ("[TC],", file);
28715 output_addr_const (file, x);
28716 fputs ("@m", file);
28717 break;
28718 default:
28719 gcc_unreachable ();
28722 #endif
28724 putc ('\n', file);
28727 /* Output an assembler pseudo-op to write an ASCII string of N characters
28728 starting at P to FILE.
28730 On the RS/6000, we have to do this using the .byte operation and
28731 write out special characters outside the quoted string.
28732 Also, the assembler is broken; very long strings are truncated,
28733 so we must artificially break them up early. */
28735 void
28736 output_ascii (FILE *file, const char *p, int n)
28738 char c;
28739 int i, count_string;
28740 const char *for_string = "\t.byte \"";
28741 const char *for_decimal = "\t.byte ";
28742 const char *to_close = NULL;
28744 count_string = 0;
28745 for (i = 0; i < n; i++)
28747 c = *p++;
28748 if (c >= ' ' && c < 0177)
28750 if (for_string)
28751 fputs (for_string, file);
28752 putc (c, file);
28754 /* Write two quotes to get one. */
28755 if (c == '"')
28757 putc (c, file);
28758 ++count_string;
28761 for_string = NULL;
28762 for_decimal = "\"\n\t.byte ";
28763 to_close = "\"\n";
28764 ++count_string;
28766 if (count_string >= 512)
28768 fputs (to_close, file);
28770 for_string = "\t.byte \"";
28771 for_decimal = "\t.byte ";
28772 to_close = NULL;
28773 count_string = 0;
28776 else
28778 if (for_decimal)
28779 fputs (for_decimal, file);
28780 fprintf (file, "%d", c);
28782 for_string = "\n\t.byte \"";
28783 for_decimal = ", ";
28784 to_close = "\n";
28785 count_string = 0;
28789 /* Now close the string if we have written one. Then end the line. */
28790 if (to_close)
28791 fputs (to_close, file);
28794 /* Generate a unique section name for FILENAME for a section type
28795 represented by SECTION_DESC. Output goes into BUF.
28797 SECTION_DESC can be any string, as long as it is different for each
28798 possible section type.
28800 We name the section in the same manner as xlc. The name begins with an
28801 underscore followed by the filename (after stripping any leading directory
28802 names) with the last period replaced by the string SECTION_DESC. If
28803 FILENAME does not contain a period, SECTION_DESC is appended to the end of
28804 the name. */
28806 void
28807 rs6000_gen_section_name (char **buf, const char *filename,
28808 const char *section_desc)
28810 const char *q, *after_last_slash, *last_period = 0;
28811 char *p;
28812 int len;
28814 after_last_slash = filename;
28815 for (q = filename; *q; q++)
28817 if (*q == '/')
28818 after_last_slash = q + 1;
28819 else if (*q == '.')
28820 last_period = q;
28823 len = strlen (after_last_slash) + strlen (section_desc) + 2;
28824 *buf = (char *) xmalloc (len);
28826 p = *buf;
28827 *p++ = '_';
28829 for (q = after_last_slash; *q; q++)
28831 if (q == last_period)
28833 strcpy (p, section_desc);
28834 p += strlen (section_desc);
28835 break;
28838 else if (ISALNUM (*q))
28839 *p++ = *q;
28842 if (last_period == 0)
28843 strcpy (p, section_desc);
28844 else
28845 *p = '\0';
28848 /* Emit profile function. */
28850 void
28851 output_profile_hook (int labelno ATTRIBUTE_UNUSED)
28853 /* Non-standard profiling for kernels, which just saves LR then calls
28854 _mcount without worrying about arg saves. The idea is to change
28855 the function prologue as little as possible as it isn't easy to
28856 account for arg save/restore code added just for _mcount. */
28857 if (TARGET_PROFILE_KERNEL)
28858 return;
28860 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
28862 #ifndef NO_PROFILE_COUNTERS
28863 # define NO_PROFILE_COUNTERS 0
28864 #endif
28865 if (NO_PROFILE_COUNTERS)
28866 emit_library_call (init_one_libfunc (RS6000_MCOUNT),
28867 LCT_NORMAL, VOIDmode, 0);
28868 else
28870 char buf[30];
28871 const char *label_name;
28872 rtx fun;
28874 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
28875 label_name = ggc_strdup ((*targetm.strip_name_encoding) (buf));
28876 fun = gen_rtx_SYMBOL_REF (Pmode, label_name);
28878 emit_library_call (init_one_libfunc (RS6000_MCOUNT),
28879 LCT_NORMAL, VOIDmode, 1, fun, Pmode);
28882 else if (DEFAULT_ABI == ABI_DARWIN)
28884 const char *mcount_name = RS6000_MCOUNT;
28885 int caller_addr_regno = LR_REGNO;
28887 /* Be conservative and always set this, at least for now. */
28888 crtl->uses_pic_offset_table = 1;
28890 #if TARGET_MACHO
28891 /* For PIC code, set up a stub and collect the caller's address
28892 from r0, which is where the prologue puts it. */
28893 if (MACHOPIC_INDIRECT
28894 && crtl->uses_pic_offset_table)
28895 caller_addr_regno = 0;
28896 #endif
28897 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, mcount_name),
28898 LCT_NORMAL, VOIDmode, 1,
28899 gen_rtx_REG (Pmode, caller_addr_regno), Pmode);
28903 /* Write function profiler code. */
28905 void
28906 output_function_profiler (FILE *file, int labelno)
28908 char buf[100];
28910 switch (DEFAULT_ABI)
28912 default:
28913 gcc_unreachable ();
28915 case ABI_V4:
28916 if (!TARGET_32BIT)
28918 warning (0, "no profiling of 64-bit code for this ABI");
28919 return;
28921 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
28922 fprintf (file, "\tmflr %s\n", reg_names[0]);
28923 if (NO_PROFILE_COUNTERS)
28925 asm_fprintf (file, "\tstw %s,4(%s)\n",
28926 reg_names[0], reg_names[1]);
28928 else if (TARGET_SECURE_PLT && flag_pic)
28930 if (TARGET_LINK_STACK)
28932 char name[32];
28933 get_ppc476_thunk_name (name);
28934 asm_fprintf (file, "\tbl %s\n", name);
28936 else
28937 asm_fprintf (file, "\tbcl 20,31,1f\n1:\n");
28938 asm_fprintf (file, "\tstw %s,4(%s)\n",
28939 reg_names[0], reg_names[1]);
28940 asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
28941 asm_fprintf (file, "\taddis %s,%s,",
28942 reg_names[12], reg_names[12]);
28943 assemble_name (file, buf);
28944 asm_fprintf (file, "-1b@ha\n\tla %s,", reg_names[0]);
28945 assemble_name (file, buf);
28946 asm_fprintf (file, "-1b@l(%s)\n", reg_names[12]);
28948 else if (flag_pic == 1)
28950 fputs ("\tbl _GLOBAL_OFFSET_TABLE_@local-4\n", file);
28951 asm_fprintf (file, "\tstw %s,4(%s)\n",
28952 reg_names[0], reg_names[1]);
28953 asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
28954 asm_fprintf (file, "\tlwz %s,", reg_names[0]);
28955 assemble_name (file, buf);
28956 asm_fprintf (file, "@got(%s)\n", reg_names[12]);
28958 else if (flag_pic > 1)
28960 asm_fprintf (file, "\tstw %s,4(%s)\n",
28961 reg_names[0], reg_names[1]);
28962 /* Now, we need to get the address of the label. */
28963 if (TARGET_LINK_STACK)
28965 char name[32];
28966 get_ppc476_thunk_name (name);
28967 asm_fprintf (file, "\tbl %s\n\tb 1f\n\t.long ", name);
28968 assemble_name (file, buf);
28969 fputs ("-.\n1:", file);
28970 asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
28971 asm_fprintf (file, "\taddi %s,%s,4\n",
28972 reg_names[11], reg_names[11]);
28974 else
28976 fputs ("\tbcl 20,31,1f\n\t.long ", file);
28977 assemble_name (file, buf);
28978 fputs ("-.\n1:", file);
28979 asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
28981 asm_fprintf (file, "\tlwz %s,0(%s)\n",
28982 reg_names[0], reg_names[11]);
28983 asm_fprintf (file, "\tadd %s,%s,%s\n",
28984 reg_names[0], reg_names[0], reg_names[11]);
28986 else
28988 asm_fprintf (file, "\tlis %s,", reg_names[12]);
28989 assemble_name (file, buf);
28990 fputs ("@ha\n", file);
28991 asm_fprintf (file, "\tstw %s,4(%s)\n",
28992 reg_names[0], reg_names[1]);
28993 asm_fprintf (file, "\tla %s,", reg_names[0]);
28994 assemble_name (file, buf);
28995 asm_fprintf (file, "@l(%s)\n", reg_names[12]);
28998 /* ABI_V4 saves the static chain reg with ASM_OUTPUT_REG_PUSH. */
28999 fprintf (file, "\tbl %s%s\n",
29000 RS6000_MCOUNT, flag_pic ? "@plt" : "");
29001 break;
29003 case ABI_AIX:
29004 case ABI_ELFv2:
29005 case ABI_DARWIN:
29006 /* Don't do anything, done in output_profile_hook (). */
29007 break;
29013 /* The following variable value is the last issued insn. */
29015 static rtx last_scheduled_insn;
29017 /* The following variable helps to balance issuing of load and
29018 store instructions */
29020 static int load_store_pendulum;
29022 /* Power4 load update and store update instructions are cracked into a
29023 load or store and an integer insn which are executed in the same cycle.
29024 Branches have their own dispatch slot which does not count against the
29025 GCC issue rate, but it changes the program flow so there are no other
29026 instructions to issue in this cycle. */
29028 static int
29029 rs6000_variable_issue_1 (rtx_insn *insn, int more)
29031 last_scheduled_insn = insn;
29032 if (GET_CODE (PATTERN (insn)) == USE
29033 || GET_CODE (PATTERN (insn)) == CLOBBER)
29035 cached_can_issue_more = more;
29036 return cached_can_issue_more;
29039 if (insn_terminates_group_p (insn, current_group))
29041 cached_can_issue_more = 0;
29042 return cached_can_issue_more;
29045 /* If no reservation, but reach here */
29046 if (recog_memoized (insn) < 0)
29047 return more;
29049 if (rs6000_sched_groups)
29051 if (is_microcoded_insn (insn))
29052 cached_can_issue_more = 0;
29053 else if (is_cracked_insn (insn))
29054 cached_can_issue_more = more > 2 ? more - 2 : 0;
29055 else
29056 cached_can_issue_more = more - 1;
29058 return cached_can_issue_more;
29061 if (rs6000_cpu_attr == CPU_CELL && is_nonpipeline_insn (insn))
29062 return 0;
29064 cached_can_issue_more = more - 1;
29065 return cached_can_issue_more;
29068 static int
29069 rs6000_variable_issue (FILE *stream, int verbose, rtx_insn *insn, int more)
29071 int r = rs6000_variable_issue_1 (insn, more);
29072 if (verbose)
29073 fprintf (stream, "// rs6000_variable_issue (more = %d) = %d\n", more, r);
29074 return r;
29077 /* Adjust the cost of a scheduling dependency. Return the new cost of
29078 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
29080 static int
29081 rs6000_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep_insn, int cost)
29083 enum attr_type attr_type;
29085 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
29086 return cost;
29088 switch (REG_NOTE_KIND (link))
29090 case REG_DEP_TRUE:
29092 /* Data dependency; DEP_INSN writes a register that INSN reads
29093 some cycles later. */
29095 /* Separate a load from a narrower, dependent store. */
29096 if (rs6000_sched_groups
29097 && GET_CODE (PATTERN (insn)) == SET
29098 && GET_CODE (PATTERN (dep_insn)) == SET
29099 && GET_CODE (XEXP (PATTERN (insn), 1)) == MEM
29100 && GET_CODE (XEXP (PATTERN (dep_insn), 0)) == MEM
29101 && (GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (insn), 1)))
29102 > GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (dep_insn), 0)))))
29103 return cost + 14;
29105 attr_type = get_attr_type (insn);
29107 switch (attr_type)
29109 case TYPE_JMPREG:
29110 /* Tell the first scheduling pass about the latency between
29111 a mtctr and bctr (and mtlr and br/blr). The first
29112 scheduling pass will not know about this latency since
29113 the mtctr instruction, which has the latency associated
29114 to it, will be generated by reload. */
29115 return 4;
29116 case TYPE_BRANCH:
29117 /* Leave some extra cycles between a compare and its
29118 dependent branch, to inhibit expensive mispredicts. */
29119 if ((rs6000_cpu_attr == CPU_PPC603
29120 || rs6000_cpu_attr == CPU_PPC604
29121 || rs6000_cpu_attr == CPU_PPC604E
29122 || rs6000_cpu_attr == CPU_PPC620
29123 || rs6000_cpu_attr == CPU_PPC630
29124 || rs6000_cpu_attr == CPU_PPC750
29125 || rs6000_cpu_attr == CPU_PPC7400
29126 || rs6000_cpu_attr == CPU_PPC7450
29127 || rs6000_cpu_attr == CPU_PPCE5500
29128 || rs6000_cpu_attr == CPU_PPCE6500
29129 || rs6000_cpu_attr == CPU_POWER4
29130 || rs6000_cpu_attr == CPU_POWER5
29131 || rs6000_cpu_attr == CPU_POWER7
29132 || rs6000_cpu_attr == CPU_POWER8
29133 || rs6000_cpu_attr == CPU_POWER9
29134 || rs6000_cpu_attr == CPU_CELL)
29135 && recog_memoized (dep_insn)
29136 && (INSN_CODE (dep_insn) >= 0))
29138 switch (get_attr_type (dep_insn))
29140 case TYPE_CMP:
29141 case TYPE_FPCOMPARE:
29142 case TYPE_CR_LOGICAL:
29143 case TYPE_DELAYED_CR:
29144 return cost + 2;
29145 case TYPE_EXTS:
29146 case TYPE_MUL:
29147 if (get_attr_dot (dep_insn) == DOT_YES)
29148 return cost + 2;
29149 else
29150 break;
29151 case TYPE_SHIFT:
29152 if (get_attr_dot (dep_insn) == DOT_YES
29153 && get_attr_var_shift (dep_insn) == VAR_SHIFT_NO)
29154 return cost + 2;
29155 else
29156 break;
29157 default:
29158 break;
29160 break;
29162 case TYPE_STORE:
29163 case TYPE_FPSTORE:
29164 if ((rs6000_cpu == PROCESSOR_POWER6)
29165 && recog_memoized (dep_insn)
29166 && (INSN_CODE (dep_insn) >= 0))
29169 if (GET_CODE (PATTERN (insn)) != SET)
29170 /* If this happens, we have to extend this to schedule
29171 optimally. Return default for now. */
29172 return cost;
29174 /* Adjust the cost for the case where the value written
29175 by a fixed point operation is used as the address
29176 gen value on a store. */
29177 switch (get_attr_type (dep_insn))
29179 case TYPE_LOAD:
29180 case TYPE_CNTLZ:
29182 if (! store_data_bypass_p (dep_insn, insn))
29183 return get_attr_sign_extend (dep_insn)
29184 == SIGN_EXTEND_YES ? 6 : 4;
29185 break;
29187 case TYPE_SHIFT:
29189 if (! store_data_bypass_p (dep_insn, insn))
29190 return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
29191 6 : 3;
29192 break;
29194 case TYPE_INTEGER:
29195 case TYPE_ADD:
29196 case TYPE_LOGICAL:
29197 case TYPE_EXTS:
29198 case TYPE_INSERT:
29200 if (! store_data_bypass_p (dep_insn, insn))
29201 return 3;
29202 break;
29204 case TYPE_STORE:
29205 case TYPE_FPLOAD:
29206 case TYPE_FPSTORE:
29208 if (get_attr_update (dep_insn) == UPDATE_YES
29209 && ! store_data_bypass_p (dep_insn, insn))
29210 return 3;
29211 break;
29213 case TYPE_MUL:
29215 if (! store_data_bypass_p (dep_insn, insn))
29216 return 17;
29217 break;
29219 case TYPE_DIV:
29221 if (! store_data_bypass_p (dep_insn, insn))
29222 return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
29223 break;
29225 default:
29226 break;
29229 break;
29231 case TYPE_LOAD:
29232 if ((rs6000_cpu == PROCESSOR_POWER6)
29233 && recog_memoized (dep_insn)
29234 && (INSN_CODE (dep_insn) >= 0))
29237 /* Adjust the cost for the case where the value written
29238 by a fixed point instruction is used within the address
29239 gen portion of a subsequent load(u)(x) */
29240 switch (get_attr_type (dep_insn))
29242 case TYPE_LOAD:
29243 case TYPE_CNTLZ:
29245 if (set_to_load_agen (dep_insn, insn))
29246 return get_attr_sign_extend (dep_insn)
29247 == SIGN_EXTEND_YES ? 6 : 4;
29248 break;
29250 case TYPE_SHIFT:
29252 if (set_to_load_agen (dep_insn, insn))
29253 return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
29254 6 : 3;
29255 break;
29257 case TYPE_INTEGER:
29258 case TYPE_ADD:
29259 case TYPE_LOGICAL:
29260 case TYPE_EXTS:
29261 case TYPE_INSERT:
29263 if (set_to_load_agen (dep_insn, insn))
29264 return 3;
29265 break;
29267 case TYPE_STORE:
29268 case TYPE_FPLOAD:
29269 case TYPE_FPSTORE:
29271 if (get_attr_update (dep_insn) == UPDATE_YES
29272 && set_to_load_agen (dep_insn, insn))
29273 return 3;
29274 break;
29276 case TYPE_MUL:
29278 if (set_to_load_agen (dep_insn, insn))
29279 return 17;
29280 break;
29282 case TYPE_DIV:
29284 if (set_to_load_agen (dep_insn, insn))
29285 return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
29286 break;
29288 default:
29289 break;
29292 break;
29294 case TYPE_FPLOAD:
29295 if ((rs6000_cpu == PROCESSOR_POWER6)
29296 && get_attr_update (insn) == UPDATE_NO
29297 && recog_memoized (dep_insn)
29298 && (INSN_CODE (dep_insn) >= 0)
29299 && (get_attr_type (dep_insn) == TYPE_MFFGPR))
29300 return 2;
29302 default:
29303 break;
29306 /* Fall out to return default cost. */
29308 break;
29310 case REG_DEP_OUTPUT:
29311 /* Output dependency; DEP_INSN writes a register that INSN writes some
29312 cycles later. */
29313 if ((rs6000_cpu == PROCESSOR_POWER6)
29314 && recog_memoized (dep_insn)
29315 && (INSN_CODE (dep_insn) >= 0))
29317 attr_type = get_attr_type (insn);
29319 switch (attr_type)
29321 case TYPE_FP:
29322 if (get_attr_type (dep_insn) == TYPE_FP)
29323 return 1;
29324 break;
29325 case TYPE_FPLOAD:
29326 if (get_attr_update (insn) == UPDATE_NO
29327 && get_attr_type (dep_insn) == TYPE_MFFGPR)
29328 return 2;
29329 break;
29330 default:
29331 break;
29334 case REG_DEP_ANTI:
29335 /* Anti dependency; DEP_INSN reads a register that INSN writes some
29336 cycles later. */
29337 return 0;
29339 default:
29340 gcc_unreachable ();
29343 return cost;
29346 /* Debug version of rs6000_adjust_cost. */
29348 static int
29349 rs6000_debug_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep_insn,
29350 int cost)
29352 int ret = rs6000_adjust_cost (insn, link, dep_insn, cost);
29354 if (ret != cost)
29356 const char *dep;
29358 switch (REG_NOTE_KIND (link))
29360 default: dep = "unknown depencency"; break;
29361 case REG_DEP_TRUE: dep = "data dependency"; break;
29362 case REG_DEP_OUTPUT: dep = "output dependency"; break;
29363 case REG_DEP_ANTI: dep = "anti depencency"; break;
29366 fprintf (stderr,
29367 "\nrs6000_adjust_cost, final cost = %d, orig cost = %d, "
29368 "%s, insn:\n", ret, cost, dep);
29370 debug_rtx (insn);
29373 return ret;
29376 /* The function returns a true if INSN is microcoded.
29377 Return false otherwise. */
29379 static bool
29380 is_microcoded_insn (rtx_insn *insn)
29382 if (!insn || !NONDEBUG_INSN_P (insn)
29383 || GET_CODE (PATTERN (insn)) == USE
29384 || GET_CODE (PATTERN (insn)) == CLOBBER)
29385 return false;
29387 if (rs6000_cpu_attr == CPU_CELL)
29388 return get_attr_cell_micro (insn) == CELL_MICRO_ALWAYS;
29390 if (rs6000_sched_groups
29391 && (rs6000_cpu == PROCESSOR_POWER4 || rs6000_cpu == PROCESSOR_POWER5))
29393 enum attr_type type = get_attr_type (insn);
29394 if ((type == TYPE_LOAD
29395 && get_attr_update (insn) == UPDATE_YES
29396 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES)
29397 || ((type == TYPE_LOAD || type == TYPE_STORE)
29398 && get_attr_update (insn) == UPDATE_YES
29399 && get_attr_indexed (insn) == INDEXED_YES)
29400 || type == TYPE_MFCR)
29401 return true;
29404 return false;
29407 /* The function returns true if INSN is cracked into 2 instructions
29408 by the processor (and therefore occupies 2 issue slots). */
29410 static bool
29411 is_cracked_insn (rtx_insn *insn)
29413 if (!insn || !NONDEBUG_INSN_P (insn)
29414 || GET_CODE (PATTERN (insn)) == USE
29415 || GET_CODE (PATTERN (insn)) == CLOBBER)
29416 return false;
29418 if (rs6000_sched_groups
29419 && (rs6000_cpu == PROCESSOR_POWER4 || rs6000_cpu == PROCESSOR_POWER5))
29421 enum attr_type type = get_attr_type (insn);
29422 if ((type == TYPE_LOAD
29423 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES
29424 && get_attr_update (insn) == UPDATE_NO)
29425 || (type == TYPE_LOAD
29426 && get_attr_sign_extend (insn) == SIGN_EXTEND_NO
29427 && get_attr_update (insn) == UPDATE_YES
29428 && get_attr_indexed (insn) == INDEXED_NO)
29429 || (type == TYPE_STORE
29430 && get_attr_update (insn) == UPDATE_YES
29431 && get_attr_indexed (insn) == INDEXED_NO)
29432 || ((type == TYPE_FPLOAD || type == TYPE_FPSTORE)
29433 && get_attr_update (insn) == UPDATE_YES)
29434 || type == TYPE_DELAYED_CR
29435 || (type == TYPE_EXTS
29436 && get_attr_dot (insn) == DOT_YES)
29437 || (type == TYPE_SHIFT
29438 && get_attr_dot (insn) == DOT_YES
29439 && get_attr_var_shift (insn) == VAR_SHIFT_NO)
29440 || (type == TYPE_MUL
29441 && get_attr_dot (insn) == DOT_YES)
29442 || type == TYPE_DIV
29443 || (type == TYPE_INSERT
29444 && get_attr_size (insn) == SIZE_32))
29445 return true;
29448 return false;
29451 /* The function returns true if INSN can be issued only from
29452 the branch slot. */
29454 static bool
29455 is_branch_slot_insn (rtx_insn *insn)
29457 if (!insn || !NONDEBUG_INSN_P (insn)
29458 || GET_CODE (PATTERN (insn)) == USE
29459 || GET_CODE (PATTERN (insn)) == CLOBBER)
29460 return false;
29462 if (rs6000_sched_groups)
29464 enum attr_type type = get_attr_type (insn);
29465 if (type == TYPE_BRANCH || type == TYPE_JMPREG)
29466 return true;
29467 return false;
29470 return false;
29473 /* The function returns true if out_inst sets a value that is
29474 used in the address generation computation of in_insn */
29475 static bool
29476 set_to_load_agen (rtx_insn *out_insn, rtx_insn *in_insn)
29478 rtx out_set, in_set;
29480 /* For performance reasons, only handle the simple case where
29481 both loads are a single_set. */
29482 out_set = single_set (out_insn);
29483 if (out_set)
29485 in_set = single_set (in_insn);
29486 if (in_set)
29487 return reg_mentioned_p (SET_DEST (out_set), SET_SRC (in_set));
29490 return false;
29493 /* Try to determine base/offset/size parts of the given MEM.
29494 Return true if successful, false if all the values couldn't
29495 be determined.
29497 This function only looks for REG or REG+CONST address forms.
29498 REG+REG address form will return false. */
29500 static bool
29501 get_memref_parts (rtx mem, rtx *base, HOST_WIDE_INT *offset,
29502 HOST_WIDE_INT *size)
29504 rtx addr_rtx;
29505 if MEM_SIZE_KNOWN_P (mem)
29506 *size = MEM_SIZE (mem);
29507 else
29508 return false;
29510 addr_rtx = (XEXP (mem, 0));
29511 if (GET_CODE (addr_rtx) == PRE_MODIFY)
29512 addr_rtx = XEXP (addr_rtx, 1);
29514 *offset = 0;
29515 while (GET_CODE (addr_rtx) == PLUS
29516 && CONST_INT_P (XEXP (addr_rtx, 1)))
29518 *offset += INTVAL (XEXP (addr_rtx, 1));
29519 addr_rtx = XEXP (addr_rtx, 0);
29521 if (!REG_P (addr_rtx))
29522 return false;
29524 *base = addr_rtx;
29525 return true;
29528 /* The function returns true if the target storage location of
29529 mem1 is adjacent to the target storage location of mem2 */
29530 /* Return 1 if memory locations are adjacent. */
29532 static bool
29533 adjacent_mem_locations (rtx mem1, rtx mem2)
29535 rtx reg1, reg2;
29536 HOST_WIDE_INT off1, size1, off2, size2;
29538 if (get_memref_parts (mem1, &reg1, &off1, &size1)
29539 && get_memref_parts (mem2, &reg2, &off2, &size2))
29540 return ((REGNO (reg1) == REGNO (reg2))
29541 && ((off1 + size1 == off2)
29542 || (off2 + size2 == off1)));
29544 return false;
29547 /* This function returns true if it can be determined that the two MEM
29548 locations overlap by at least 1 byte based on base reg/offset/size. */
29550 static bool
29551 mem_locations_overlap (rtx mem1, rtx mem2)
29553 rtx reg1, reg2;
29554 HOST_WIDE_INT off1, size1, off2, size2;
29556 if (get_memref_parts (mem1, &reg1, &off1, &size1)
29557 && get_memref_parts (mem2, &reg2, &off2, &size2))
29558 return ((REGNO (reg1) == REGNO (reg2))
29559 && (((off1 <= off2) && (off1 + size1 > off2))
29560 || ((off2 <= off1) && (off2 + size2 > off1))));
29562 return false;
29565 /* A C statement (sans semicolon) to update the integer scheduling
29566 priority INSN_PRIORITY (INSN). Increase the priority to execute the
29567 INSN earlier, reduce the priority to execute INSN later. Do not
29568 define this macro if you do not need to adjust the scheduling
29569 priorities of insns. */
29571 static int
29572 rs6000_adjust_priority (rtx_insn *insn ATTRIBUTE_UNUSED, int priority)
29574 rtx load_mem, str_mem;
29575 /* On machines (like the 750) which have asymmetric integer units,
29576 where one integer unit can do multiply and divides and the other
29577 can't, reduce the priority of multiply/divide so it is scheduled
29578 before other integer operations. */
29580 #if 0
29581 if (! INSN_P (insn))
29582 return priority;
29584 if (GET_CODE (PATTERN (insn)) == USE)
29585 return priority;
29587 switch (rs6000_cpu_attr) {
29588 case CPU_PPC750:
29589 switch (get_attr_type (insn))
29591 default:
29592 break;
29594 case TYPE_MUL:
29595 case TYPE_DIV:
29596 fprintf (stderr, "priority was %#x (%d) before adjustment\n",
29597 priority, priority);
29598 if (priority >= 0 && priority < 0x01000000)
29599 priority >>= 3;
29600 break;
29603 #endif
29605 if (insn_must_be_first_in_group (insn)
29606 && reload_completed
29607 && current_sched_info->sched_max_insns_priority
29608 && rs6000_sched_restricted_insns_priority)
29611 /* Prioritize insns that can be dispatched only in the first
29612 dispatch slot. */
29613 if (rs6000_sched_restricted_insns_priority == 1)
29614 /* Attach highest priority to insn. This means that in
29615 haifa-sched.c:ready_sort(), dispatch-slot restriction considerations
29616 precede 'priority' (critical path) considerations. */
29617 return current_sched_info->sched_max_insns_priority;
29618 else if (rs6000_sched_restricted_insns_priority == 2)
29619 /* Increase priority of insn by a minimal amount. This means that in
29620 haifa-sched.c:ready_sort(), only 'priority' (critical path)
29621 considerations precede dispatch-slot restriction considerations. */
29622 return (priority + 1);
29625 if (rs6000_cpu == PROCESSOR_POWER6
29626 && ((load_store_pendulum == -2 && is_load_insn (insn, &load_mem))
29627 || (load_store_pendulum == 2 && is_store_insn (insn, &str_mem))))
29628 /* Attach highest priority to insn if the scheduler has just issued two
29629 stores and this instruction is a load, or two loads and this instruction
29630 is a store. Power6 wants loads and stores scheduled alternately
29631 when possible */
29632 return current_sched_info->sched_max_insns_priority;
29634 return priority;
29637 /* Return true if the instruction is nonpipelined on the Cell. */
29638 static bool
29639 is_nonpipeline_insn (rtx_insn *insn)
29641 enum attr_type type;
29642 if (!insn || !NONDEBUG_INSN_P (insn)
29643 || GET_CODE (PATTERN (insn)) == USE
29644 || GET_CODE (PATTERN (insn)) == CLOBBER)
29645 return false;
29647 type = get_attr_type (insn);
29648 if (type == TYPE_MUL
29649 || type == TYPE_DIV
29650 || type == TYPE_SDIV
29651 || type == TYPE_DDIV
29652 || type == TYPE_SSQRT
29653 || type == TYPE_DSQRT
29654 || type == TYPE_MFCR
29655 || type == TYPE_MFCRF
29656 || type == TYPE_MFJMPR)
29658 return true;
29660 return false;
29664 /* Return how many instructions the machine can issue per cycle. */
29666 static int
29667 rs6000_issue_rate (void)
29669 /* Unless scheduling for register pressure, use issue rate of 1 for
29670 first scheduling pass to decrease degradation. */
29671 if (!reload_completed && !flag_sched_pressure)
29672 return 1;
29674 switch (rs6000_cpu_attr) {
29675 case CPU_RS64A:
29676 case CPU_PPC601: /* ? */
29677 case CPU_PPC7450:
29678 return 3;
29679 case CPU_PPC440:
29680 case CPU_PPC603:
29681 case CPU_PPC750:
29682 case CPU_PPC7400:
29683 case CPU_PPC8540:
29684 case CPU_PPC8548:
29685 case CPU_CELL:
29686 case CPU_PPCE300C2:
29687 case CPU_PPCE300C3:
29688 case CPU_PPCE500MC:
29689 case CPU_PPCE500MC64:
29690 case CPU_PPCE5500:
29691 case CPU_PPCE6500:
29692 case CPU_TITAN:
29693 return 2;
29694 case CPU_PPC476:
29695 case CPU_PPC604:
29696 case CPU_PPC604E:
29697 case CPU_PPC620:
29698 case CPU_PPC630:
29699 return 4;
29700 case CPU_POWER4:
29701 case CPU_POWER5:
29702 case CPU_POWER6:
29703 case CPU_POWER7:
29704 return 5;
29705 case CPU_POWER8:
29706 case CPU_POWER9:
29707 return 7;
29708 default:
29709 return 1;
29713 /* Return how many instructions to look ahead for better insn
29714 scheduling. */
29716 static int
29717 rs6000_use_sched_lookahead (void)
29719 switch (rs6000_cpu_attr)
29721 case CPU_PPC8540:
29722 case CPU_PPC8548:
29723 return 4;
29725 case CPU_CELL:
29726 return (reload_completed ? 8 : 0);
29728 default:
29729 return 0;
29733 /* We are choosing insn from the ready queue. Return zero if INSN can be
29734 chosen. */
29735 static int
29736 rs6000_use_sched_lookahead_guard (rtx_insn *insn, int ready_index)
29738 if (ready_index == 0)
29739 return 0;
29741 if (rs6000_cpu_attr != CPU_CELL)
29742 return 0;
29744 gcc_assert (insn != NULL_RTX && INSN_P (insn));
29746 if (!reload_completed
29747 || is_nonpipeline_insn (insn)
29748 || is_microcoded_insn (insn))
29749 return 1;
29751 return 0;
29754 /* Determine if PAT refers to memory. If so, set MEM_REF to the MEM rtx
29755 and return true. */
29757 static bool
29758 find_mem_ref (rtx pat, rtx *mem_ref)
29760 const char * fmt;
29761 int i, j;
29763 /* stack_tie does not produce any real memory traffic. */
29764 if (tie_operand (pat, VOIDmode))
29765 return false;
29767 if (GET_CODE (pat) == MEM)
29769 *mem_ref = pat;
29770 return true;
29773 /* Recursively process the pattern. */
29774 fmt = GET_RTX_FORMAT (GET_CODE (pat));
29776 for (i = GET_RTX_LENGTH (GET_CODE (pat)) - 1; i >= 0; i--)
29778 if (fmt[i] == 'e')
29780 if (find_mem_ref (XEXP (pat, i), mem_ref))
29781 return true;
29783 else if (fmt[i] == 'E')
29784 for (j = XVECLEN (pat, i) - 1; j >= 0; j--)
29786 if (find_mem_ref (XVECEXP (pat, i, j), mem_ref))
29787 return true;
29791 return false;
29794 /* Determine if PAT is a PATTERN of a load insn. */
29796 static bool
29797 is_load_insn1 (rtx pat, rtx *load_mem)
29799 if (!pat || pat == NULL_RTX)
29800 return false;
29802 if (GET_CODE (pat) == SET)
29803 return find_mem_ref (SET_SRC (pat), load_mem);
29805 if (GET_CODE (pat) == PARALLEL)
29807 int i;
29809 for (i = 0; i < XVECLEN (pat, 0); i++)
29810 if (is_load_insn1 (XVECEXP (pat, 0, i), load_mem))
29811 return true;
29814 return false;
29817 /* Determine if INSN loads from memory. */
29819 static bool
29820 is_load_insn (rtx insn, rtx *load_mem)
29822 if (!insn || !INSN_P (insn))
29823 return false;
29825 if (CALL_P (insn))
29826 return false;
29828 return is_load_insn1 (PATTERN (insn), load_mem);
29831 /* Determine if PAT is a PATTERN of a store insn. */
29833 static bool
29834 is_store_insn1 (rtx pat, rtx *str_mem)
29836 if (!pat || pat == NULL_RTX)
29837 return false;
29839 if (GET_CODE (pat) == SET)
29840 return find_mem_ref (SET_DEST (pat), str_mem);
29842 if (GET_CODE (pat) == PARALLEL)
29844 int i;
29846 for (i = 0; i < XVECLEN (pat, 0); i++)
29847 if (is_store_insn1 (XVECEXP (pat, 0, i), str_mem))
29848 return true;
29851 return false;
29854 /* Determine if INSN stores to memory. */
29856 static bool
29857 is_store_insn (rtx insn, rtx *str_mem)
29859 if (!insn || !INSN_P (insn))
29860 return false;
29862 return is_store_insn1 (PATTERN (insn), str_mem);
29865 /* Returns whether the dependence between INSN and NEXT is considered
29866 costly by the given target. */
29868 static bool
29869 rs6000_is_costly_dependence (dep_t dep, int cost, int distance)
29871 rtx insn;
29872 rtx next;
29873 rtx load_mem, str_mem;
29875 /* If the flag is not enabled - no dependence is considered costly;
29876 allow all dependent insns in the same group.
29877 This is the most aggressive option. */
29878 if (rs6000_sched_costly_dep == no_dep_costly)
29879 return false;
29881 /* If the flag is set to 1 - a dependence is always considered costly;
29882 do not allow dependent instructions in the same group.
29883 This is the most conservative option. */
29884 if (rs6000_sched_costly_dep == all_deps_costly)
29885 return true;
29887 insn = DEP_PRO (dep);
29888 next = DEP_CON (dep);
29890 if (rs6000_sched_costly_dep == store_to_load_dep_costly
29891 && is_load_insn (next, &load_mem)
29892 && is_store_insn (insn, &str_mem))
29893 /* Prevent load after store in the same group. */
29894 return true;
29896 if (rs6000_sched_costly_dep == true_store_to_load_dep_costly
29897 && is_load_insn (next, &load_mem)
29898 && is_store_insn (insn, &str_mem)
29899 && DEP_TYPE (dep) == REG_DEP_TRUE
29900 && mem_locations_overlap(str_mem, load_mem))
29901 /* Prevent load after store in the same group if it is a true
29902 dependence. */
29903 return true;
29905 /* The flag is set to X; dependences with latency >= X are considered costly,
29906 and will not be scheduled in the same group. */
29907 if (rs6000_sched_costly_dep <= max_dep_latency
29908 && ((cost - distance) >= (int)rs6000_sched_costly_dep))
29909 return true;
29911 return false;
29914 /* Return the next insn after INSN that is found before TAIL is reached,
29915 skipping any "non-active" insns - insns that will not actually occupy
29916 an issue slot. Return NULL_RTX if such an insn is not found. */
29918 static rtx_insn *
29919 get_next_active_insn (rtx_insn *insn, rtx_insn *tail)
29921 if (insn == NULL_RTX || insn == tail)
29922 return NULL;
29924 while (1)
29926 insn = NEXT_INSN (insn);
29927 if (insn == NULL_RTX || insn == tail)
29928 return NULL;
29930 if (CALL_P (insn)
29931 || JUMP_P (insn) || JUMP_TABLE_DATA_P (insn)
29932 || (NONJUMP_INSN_P (insn)
29933 && GET_CODE (PATTERN (insn)) != USE
29934 && GET_CODE (PATTERN (insn)) != CLOBBER
29935 && INSN_CODE (insn) != CODE_FOR_stack_tie))
29936 break;
29938 return insn;
29941 /* We are about to begin issuing insns for this clock cycle. */
29943 static int
29944 rs6000_sched_reorder (FILE *dump ATTRIBUTE_UNUSED, int sched_verbose,
29945 rtx_insn **ready ATTRIBUTE_UNUSED,
29946 int *pn_ready ATTRIBUTE_UNUSED,
29947 int clock_var ATTRIBUTE_UNUSED)
29949 int n_ready = *pn_ready;
29951 if (sched_verbose)
29952 fprintf (dump, "// rs6000_sched_reorder :\n");
29954 /* Reorder the ready list, if the second to last ready insn
29955 is a nonepipeline insn. */
29956 if (rs6000_cpu_attr == CPU_CELL && n_ready > 1)
29958 if (is_nonpipeline_insn (ready[n_ready - 1])
29959 && (recog_memoized (ready[n_ready - 2]) > 0))
29960 /* Simply swap first two insns. */
29961 std::swap (ready[n_ready - 1], ready[n_ready - 2]);
29964 if (rs6000_cpu == PROCESSOR_POWER6)
29965 load_store_pendulum = 0;
29967 return rs6000_issue_rate ();
29970 /* Like rs6000_sched_reorder, but called after issuing each insn. */
29972 static int
29973 rs6000_sched_reorder2 (FILE *dump, int sched_verbose, rtx_insn **ready,
29974 int *pn_ready, int clock_var ATTRIBUTE_UNUSED)
29976 if (sched_verbose)
29977 fprintf (dump, "// rs6000_sched_reorder2 :\n");
29979 /* For Power6, we need to handle some special cases to try and keep the
29980 store queue from overflowing and triggering expensive flushes.
29982 This code monitors how load and store instructions are being issued
29983 and skews the ready list one way or the other to increase the likelihood
29984 that a desired instruction is issued at the proper time.
29986 A couple of things are done. First, we maintain a "load_store_pendulum"
29987 to track the current state of load/store issue.
29989 - If the pendulum is at zero, then no loads or stores have been
29990 issued in the current cycle so we do nothing.
29992 - If the pendulum is 1, then a single load has been issued in this
29993 cycle and we attempt to locate another load in the ready list to
29994 issue with it.
29996 - If the pendulum is -2, then two stores have already been
29997 issued in this cycle, so we increase the priority of the first load
29998 in the ready list to increase it's likelihood of being chosen first
29999 in the next cycle.
30001 - If the pendulum is -1, then a single store has been issued in this
30002 cycle and we attempt to locate another store in the ready list to
30003 issue with it, preferring a store to an adjacent memory location to
30004 facilitate store pairing in the store queue.
30006 - If the pendulum is 2, then two loads have already been
30007 issued in this cycle, so we increase the priority of the first store
30008 in the ready list to increase it's likelihood of being chosen first
30009 in the next cycle.
30011 - If the pendulum < -2 or > 2, then do nothing.
30013 Note: This code covers the most common scenarios. There exist non
30014 load/store instructions which make use of the LSU and which
30015 would need to be accounted for to strictly model the behavior
30016 of the machine. Those instructions are currently unaccounted
30017 for to help minimize compile time overhead of this code.
30019 if (rs6000_cpu == PROCESSOR_POWER6 && last_scheduled_insn)
30021 int pos;
30022 int i;
30023 rtx_insn *tmp;
30024 rtx load_mem, str_mem;
30026 if (is_store_insn (last_scheduled_insn, &str_mem))
30027 /* Issuing a store, swing the load_store_pendulum to the left */
30028 load_store_pendulum--;
30029 else if (is_load_insn (last_scheduled_insn, &load_mem))
30030 /* Issuing a load, swing the load_store_pendulum to the right */
30031 load_store_pendulum++;
30032 else
30033 return cached_can_issue_more;
30035 /* If the pendulum is balanced, or there is only one instruction on
30036 the ready list, then all is well, so return. */
30037 if ((load_store_pendulum == 0) || (*pn_ready <= 1))
30038 return cached_can_issue_more;
30040 if (load_store_pendulum == 1)
30042 /* A load has been issued in this cycle. Scan the ready list
30043 for another load to issue with it */
30044 pos = *pn_ready-1;
30046 while (pos >= 0)
30048 if (is_load_insn (ready[pos], &load_mem))
30050 /* Found a load. Move it to the head of the ready list,
30051 and adjust it's priority so that it is more likely to
30052 stay there */
30053 tmp = ready[pos];
30054 for (i=pos; i<*pn_ready-1; i++)
30055 ready[i] = ready[i + 1];
30056 ready[*pn_ready-1] = tmp;
30058 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
30059 INSN_PRIORITY (tmp)++;
30060 break;
30062 pos--;
30065 else if (load_store_pendulum == -2)
30067 /* Two stores have been issued in this cycle. Increase the
30068 priority of the first load in the ready list to favor it for
30069 issuing in the next cycle. */
30070 pos = *pn_ready-1;
30072 while (pos >= 0)
30074 if (is_load_insn (ready[pos], &load_mem)
30075 && !sel_sched_p ()
30076 && INSN_PRIORITY_KNOWN (ready[pos]))
30078 INSN_PRIORITY (ready[pos])++;
30080 /* Adjust the pendulum to account for the fact that a load
30081 was found and increased in priority. This is to prevent
30082 increasing the priority of multiple loads */
30083 load_store_pendulum--;
30085 break;
30087 pos--;
30090 else if (load_store_pendulum == -1)
30092 /* A store has been issued in this cycle. Scan the ready list for
30093 another store to issue with it, preferring a store to an adjacent
30094 memory location */
30095 int first_store_pos = -1;
30097 pos = *pn_ready-1;
30099 while (pos >= 0)
30101 if (is_store_insn (ready[pos], &str_mem))
30103 rtx str_mem2;
30104 /* Maintain the index of the first store found on the
30105 list */
30106 if (first_store_pos == -1)
30107 first_store_pos = pos;
30109 if (is_store_insn (last_scheduled_insn, &str_mem2)
30110 && adjacent_mem_locations (str_mem, str_mem2))
30112 /* Found an adjacent store. Move it to the head of the
30113 ready list, and adjust it's priority so that it is
30114 more likely to stay there */
30115 tmp = ready[pos];
30116 for (i=pos; i<*pn_ready-1; i++)
30117 ready[i] = ready[i + 1];
30118 ready[*pn_ready-1] = tmp;
30120 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
30121 INSN_PRIORITY (tmp)++;
30123 first_store_pos = -1;
30125 break;
30128 pos--;
30131 if (first_store_pos >= 0)
30133 /* An adjacent store wasn't found, but a non-adjacent store was,
30134 so move the non-adjacent store to the front of the ready
30135 list, and adjust its priority so that it is more likely to
30136 stay there. */
30137 tmp = ready[first_store_pos];
30138 for (i=first_store_pos; i<*pn_ready-1; i++)
30139 ready[i] = ready[i + 1];
30140 ready[*pn_ready-1] = tmp;
30141 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
30142 INSN_PRIORITY (tmp)++;
30145 else if (load_store_pendulum == 2)
30147 /* Two loads have been issued in this cycle. Increase the priority
30148 of the first store in the ready list to favor it for issuing in
30149 the next cycle. */
30150 pos = *pn_ready-1;
30152 while (pos >= 0)
30154 if (is_store_insn (ready[pos], &str_mem)
30155 && !sel_sched_p ()
30156 && INSN_PRIORITY_KNOWN (ready[pos]))
30158 INSN_PRIORITY (ready[pos])++;
30160 /* Adjust the pendulum to account for the fact that a store
30161 was found and increased in priority. This is to prevent
30162 increasing the priority of multiple stores */
30163 load_store_pendulum++;
30165 break;
30167 pos--;
30172 return cached_can_issue_more;
30175 /* Return whether the presence of INSN causes a dispatch group termination
30176 of group WHICH_GROUP.
30178 If WHICH_GROUP == current_group, this function will return true if INSN
30179 causes the termination of the current group (i.e, the dispatch group to
30180 which INSN belongs). This means that INSN will be the last insn in the
30181 group it belongs to.
30183 If WHICH_GROUP == previous_group, this function will return true if INSN
30184 causes the termination of the previous group (i.e, the dispatch group that
30185 precedes the group to which INSN belongs). This means that INSN will be
30186 the first insn in the group it belongs to). */
30188 static bool
30189 insn_terminates_group_p (rtx_insn *insn, enum group_termination which_group)
30191 bool first, last;
30193 if (! insn)
30194 return false;
30196 first = insn_must_be_first_in_group (insn);
30197 last = insn_must_be_last_in_group (insn);
30199 if (first && last)
30200 return true;
30202 if (which_group == current_group)
30203 return last;
30204 else if (which_group == previous_group)
30205 return first;
30207 return false;
30211 static bool
30212 insn_must_be_first_in_group (rtx_insn *insn)
30214 enum attr_type type;
30216 if (!insn
30217 || NOTE_P (insn)
30218 || DEBUG_INSN_P (insn)
30219 || GET_CODE (PATTERN (insn)) == USE
30220 || GET_CODE (PATTERN (insn)) == CLOBBER)
30221 return false;
30223 switch (rs6000_cpu)
30225 case PROCESSOR_POWER5:
30226 if (is_cracked_insn (insn))
30227 return true;
30228 case PROCESSOR_POWER4:
30229 if (is_microcoded_insn (insn))
30230 return true;
30232 if (!rs6000_sched_groups)
30233 return false;
30235 type = get_attr_type (insn);
30237 switch (type)
30239 case TYPE_MFCR:
30240 case TYPE_MFCRF:
30241 case TYPE_MTCR:
30242 case TYPE_DELAYED_CR:
30243 case TYPE_CR_LOGICAL:
30244 case TYPE_MTJMPR:
30245 case TYPE_MFJMPR:
30246 case TYPE_DIV:
30247 case TYPE_LOAD_L:
30248 case TYPE_STORE_C:
30249 case TYPE_ISYNC:
30250 case TYPE_SYNC:
30251 return true;
30252 default:
30253 break;
30255 break;
30256 case PROCESSOR_POWER6:
30257 type = get_attr_type (insn);
30259 switch (type)
30261 case TYPE_EXTS:
30262 case TYPE_CNTLZ:
30263 case TYPE_TRAP:
30264 case TYPE_MUL:
30265 case TYPE_INSERT:
30266 case TYPE_FPCOMPARE:
30267 case TYPE_MFCR:
30268 case TYPE_MTCR:
30269 case TYPE_MFJMPR:
30270 case TYPE_MTJMPR:
30271 case TYPE_ISYNC:
30272 case TYPE_SYNC:
30273 case TYPE_LOAD_L:
30274 case TYPE_STORE_C:
30275 return true;
30276 case TYPE_SHIFT:
30277 if (get_attr_dot (insn) == DOT_NO
30278 || get_attr_var_shift (insn) == VAR_SHIFT_NO)
30279 return true;
30280 else
30281 break;
30282 case TYPE_DIV:
30283 if (get_attr_size (insn) == SIZE_32)
30284 return true;
30285 else
30286 break;
30287 case TYPE_LOAD:
30288 case TYPE_STORE:
30289 case TYPE_FPLOAD:
30290 case TYPE_FPSTORE:
30291 if (get_attr_update (insn) == UPDATE_YES)
30292 return true;
30293 else
30294 break;
30295 default:
30296 break;
30298 break;
30299 case PROCESSOR_POWER7:
30300 type = get_attr_type (insn);
30302 switch (type)
30304 case TYPE_CR_LOGICAL:
30305 case TYPE_MFCR:
30306 case TYPE_MFCRF:
30307 case TYPE_MTCR:
30308 case TYPE_DIV:
30309 case TYPE_ISYNC:
30310 case TYPE_LOAD_L:
30311 case TYPE_STORE_C:
30312 case TYPE_MFJMPR:
30313 case TYPE_MTJMPR:
30314 return true;
30315 case TYPE_MUL:
30316 case TYPE_SHIFT:
30317 case TYPE_EXTS:
30318 if (get_attr_dot (insn) == DOT_YES)
30319 return true;
30320 else
30321 break;
30322 case TYPE_LOAD:
30323 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
30324 || get_attr_update (insn) == UPDATE_YES)
30325 return true;
30326 else
30327 break;
30328 case TYPE_STORE:
30329 case TYPE_FPLOAD:
30330 case TYPE_FPSTORE:
30331 if (get_attr_update (insn) == UPDATE_YES)
30332 return true;
30333 else
30334 break;
30335 default:
30336 break;
30338 break;
30339 case PROCESSOR_POWER8:
30340 case PROCESSOR_POWER9:
30341 type = get_attr_type (insn);
30343 switch (type)
30345 case TYPE_CR_LOGICAL:
30346 case TYPE_DELAYED_CR:
30347 case TYPE_MFCR:
30348 case TYPE_MFCRF:
30349 case TYPE_MTCR:
30350 case TYPE_SYNC:
30351 case TYPE_ISYNC:
30352 case TYPE_LOAD_L:
30353 case TYPE_STORE_C:
30354 case TYPE_VECSTORE:
30355 case TYPE_MFJMPR:
30356 case TYPE_MTJMPR:
30357 return true;
30358 case TYPE_SHIFT:
30359 case TYPE_EXTS:
30360 case TYPE_MUL:
30361 if (get_attr_dot (insn) == DOT_YES)
30362 return true;
30363 else
30364 break;
30365 case TYPE_LOAD:
30366 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
30367 || get_attr_update (insn) == UPDATE_YES)
30368 return true;
30369 else
30370 break;
30371 case TYPE_STORE:
30372 if (get_attr_update (insn) == UPDATE_YES
30373 && get_attr_indexed (insn) == INDEXED_YES)
30374 return true;
30375 else
30376 break;
30377 default:
30378 break;
30380 break;
30381 default:
30382 break;
30385 return false;
30388 static bool
30389 insn_must_be_last_in_group (rtx_insn *insn)
30391 enum attr_type type;
30393 if (!insn
30394 || NOTE_P (insn)
30395 || DEBUG_INSN_P (insn)
30396 || GET_CODE (PATTERN (insn)) == USE
30397 || GET_CODE (PATTERN (insn)) == CLOBBER)
30398 return false;
30400 switch (rs6000_cpu) {
30401 case PROCESSOR_POWER4:
30402 case PROCESSOR_POWER5:
30403 if (is_microcoded_insn (insn))
30404 return true;
30406 if (is_branch_slot_insn (insn))
30407 return true;
30409 break;
30410 case PROCESSOR_POWER6:
30411 type = get_attr_type (insn);
30413 switch (type)
30415 case TYPE_EXTS:
30416 case TYPE_CNTLZ:
30417 case TYPE_TRAP:
30418 case TYPE_MUL:
30419 case TYPE_FPCOMPARE:
30420 case TYPE_MFCR:
30421 case TYPE_MTCR:
30422 case TYPE_MFJMPR:
30423 case TYPE_MTJMPR:
30424 case TYPE_ISYNC:
30425 case TYPE_SYNC:
30426 case TYPE_LOAD_L:
30427 case TYPE_STORE_C:
30428 return true;
30429 case TYPE_SHIFT:
30430 if (get_attr_dot (insn) == DOT_NO
30431 || get_attr_var_shift (insn) == VAR_SHIFT_NO)
30432 return true;
30433 else
30434 break;
30435 case TYPE_DIV:
30436 if (get_attr_size (insn) == SIZE_32)
30437 return true;
30438 else
30439 break;
30440 default:
30441 break;
30443 break;
30444 case PROCESSOR_POWER7:
30445 type = get_attr_type (insn);
30447 switch (type)
30449 case TYPE_ISYNC:
30450 case TYPE_SYNC:
30451 case TYPE_LOAD_L:
30452 case TYPE_STORE_C:
30453 return true;
30454 case TYPE_LOAD:
30455 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
30456 && get_attr_update (insn) == UPDATE_YES)
30457 return true;
30458 else
30459 break;
30460 case TYPE_STORE:
30461 if (get_attr_update (insn) == UPDATE_YES
30462 && get_attr_indexed (insn) == INDEXED_YES)
30463 return true;
30464 else
30465 break;
30466 default:
30467 break;
30469 break;
30470 case PROCESSOR_POWER8:
30471 case PROCESSOR_POWER9:
30472 type = get_attr_type (insn);
30474 switch (type)
30476 case TYPE_MFCR:
30477 case TYPE_MTCR:
30478 case TYPE_ISYNC:
30479 case TYPE_SYNC:
30480 case TYPE_LOAD_L:
30481 case TYPE_STORE_C:
30482 return true;
30483 case TYPE_LOAD:
30484 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
30485 && get_attr_update (insn) == UPDATE_YES)
30486 return true;
30487 else
30488 break;
30489 case TYPE_STORE:
30490 if (get_attr_update (insn) == UPDATE_YES
30491 && get_attr_indexed (insn) == INDEXED_YES)
30492 return true;
30493 else
30494 break;
30495 default:
30496 break;
30498 break;
30499 default:
30500 break;
30503 return false;
30506 /* Return true if it is recommended to keep NEXT_INSN "far" (in a separate
30507 dispatch group) from the insns in GROUP_INSNS. Return false otherwise. */
30509 static bool
30510 is_costly_group (rtx *group_insns, rtx next_insn)
30512 int i;
30513 int issue_rate = rs6000_issue_rate ();
30515 for (i = 0; i < issue_rate; i++)
30517 sd_iterator_def sd_it;
30518 dep_t dep;
30519 rtx insn = group_insns[i];
30521 if (!insn)
30522 continue;
30524 FOR_EACH_DEP (insn, SD_LIST_RES_FORW, sd_it, dep)
30526 rtx next = DEP_CON (dep);
30528 if (next == next_insn
30529 && rs6000_is_costly_dependence (dep, dep_cost (dep), 0))
30530 return true;
30534 return false;
30537 /* Utility of the function redefine_groups.
30538 Check if it is too costly to schedule NEXT_INSN together with GROUP_INSNS
30539 in the same dispatch group. If so, insert nops before NEXT_INSN, in order
30540 to keep it "far" (in a separate group) from GROUP_INSNS, following
30541 one of the following schemes, depending on the value of the flag
30542 -minsert_sched_nops = X:
30543 (1) X == sched_finish_regroup_exact: insert exactly as many nops as needed
30544 in order to force NEXT_INSN into a separate group.
30545 (2) X < sched_finish_regroup_exact: insert exactly X nops.
30546 GROUP_END, CAN_ISSUE_MORE and GROUP_COUNT record the state after nop
30547 insertion (has a group just ended, how many vacant issue slots remain in the
30548 last group, and how many dispatch groups were encountered so far). */
30550 static int
30551 force_new_group (int sched_verbose, FILE *dump, rtx *group_insns,
30552 rtx_insn *next_insn, bool *group_end, int can_issue_more,
30553 int *group_count)
30555 rtx nop;
30556 bool force;
30557 int issue_rate = rs6000_issue_rate ();
30558 bool end = *group_end;
30559 int i;
30561 if (next_insn == NULL_RTX || DEBUG_INSN_P (next_insn))
30562 return can_issue_more;
30564 if (rs6000_sched_insert_nops > sched_finish_regroup_exact)
30565 return can_issue_more;
30567 force = is_costly_group (group_insns, next_insn);
30568 if (!force)
30569 return can_issue_more;
30571 if (sched_verbose > 6)
30572 fprintf (dump,"force: group count = %d, can_issue_more = %d\n",
30573 *group_count ,can_issue_more);
30575 if (rs6000_sched_insert_nops == sched_finish_regroup_exact)
30577 if (*group_end)
30578 can_issue_more = 0;
30580 /* Since only a branch can be issued in the last issue_slot, it is
30581 sufficient to insert 'can_issue_more - 1' nops if next_insn is not
30582 a branch. If next_insn is a branch, we insert 'can_issue_more' nops;
30583 in this case the last nop will start a new group and the branch
30584 will be forced to the new group. */
30585 if (can_issue_more && !is_branch_slot_insn (next_insn))
30586 can_issue_more--;
30588 /* Do we have a special group ending nop? */
30589 if (rs6000_cpu_attr == CPU_POWER6 || rs6000_cpu_attr == CPU_POWER7
30590 || rs6000_cpu_attr == CPU_POWER8 || rs6000_cpu_attr == CPU_POWER9)
30592 nop = gen_group_ending_nop ();
30593 emit_insn_before (nop, next_insn);
30594 can_issue_more = 0;
30596 else
30597 while (can_issue_more > 0)
30599 nop = gen_nop ();
30600 emit_insn_before (nop, next_insn);
30601 can_issue_more--;
30604 *group_end = true;
30605 return 0;
30608 if (rs6000_sched_insert_nops < sched_finish_regroup_exact)
30610 int n_nops = rs6000_sched_insert_nops;
30612 /* Nops can't be issued from the branch slot, so the effective
30613 issue_rate for nops is 'issue_rate - 1'. */
30614 if (can_issue_more == 0)
30615 can_issue_more = issue_rate;
30616 can_issue_more--;
30617 if (can_issue_more == 0)
30619 can_issue_more = issue_rate - 1;
30620 (*group_count)++;
30621 end = true;
30622 for (i = 0; i < issue_rate; i++)
30624 group_insns[i] = 0;
30628 while (n_nops > 0)
30630 nop = gen_nop ();
30631 emit_insn_before (nop, next_insn);
30632 if (can_issue_more == issue_rate - 1) /* new group begins */
30633 end = false;
30634 can_issue_more--;
30635 if (can_issue_more == 0)
30637 can_issue_more = issue_rate - 1;
30638 (*group_count)++;
30639 end = true;
30640 for (i = 0; i < issue_rate; i++)
30642 group_insns[i] = 0;
30645 n_nops--;
30648 /* Scale back relative to 'issue_rate' (instead of 'issue_rate - 1'). */
30649 can_issue_more++;
30651 /* Is next_insn going to start a new group? */
30652 *group_end
30653 = (end
30654 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
30655 || (can_issue_more <= 2 && is_cracked_insn (next_insn))
30656 || (can_issue_more < issue_rate &&
30657 insn_terminates_group_p (next_insn, previous_group)));
30658 if (*group_end && end)
30659 (*group_count)--;
30661 if (sched_verbose > 6)
30662 fprintf (dump, "done force: group count = %d, can_issue_more = %d\n",
30663 *group_count, can_issue_more);
30664 return can_issue_more;
30667 return can_issue_more;
30670 /* This function tries to synch the dispatch groups that the compiler "sees"
30671 with the dispatch groups that the processor dispatcher is expected to
30672 form in practice. It tries to achieve this synchronization by forcing the
30673 estimated processor grouping on the compiler (as opposed to the function
30674 'pad_goups' which tries to force the scheduler's grouping on the processor).
30676 The function scans the insn sequence between PREV_HEAD_INSN and TAIL and
30677 examines the (estimated) dispatch groups that will be formed by the processor
30678 dispatcher. It marks these group boundaries to reflect the estimated
30679 processor grouping, overriding the grouping that the scheduler had marked.
30680 Depending on the value of the flag '-minsert-sched-nops' this function can
30681 force certain insns into separate groups or force a certain distance between
30682 them by inserting nops, for example, if there exists a "costly dependence"
30683 between the insns.
30685 The function estimates the group boundaries that the processor will form as
30686 follows: It keeps track of how many vacant issue slots are available after
30687 each insn. A subsequent insn will start a new group if one of the following
30688 4 cases applies:
30689 - no more vacant issue slots remain in the current dispatch group.
30690 - only the last issue slot, which is the branch slot, is vacant, but the next
30691 insn is not a branch.
30692 - only the last 2 or less issue slots, including the branch slot, are vacant,
30693 which means that a cracked insn (which occupies two issue slots) can't be
30694 issued in this group.
30695 - less than 'issue_rate' slots are vacant, and the next insn always needs to
30696 start a new group. */
30698 static int
30699 redefine_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
30700 rtx_insn *tail)
30702 rtx_insn *insn, *next_insn;
30703 int issue_rate;
30704 int can_issue_more;
30705 int slot, i;
30706 bool group_end;
30707 int group_count = 0;
30708 rtx *group_insns;
30710 /* Initialize. */
30711 issue_rate = rs6000_issue_rate ();
30712 group_insns = XALLOCAVEC (rtx, issue_rate);
30713 for (i = 0; i < issue_rate; i++)
30715 group_insns[i] = 0;
30717 can_issue_more = issue_rate;
30718 slot = 0;
30719 insn = get_next_active_insn (prev_head_insn, tail);
30720 group_end = false;
30722 while (insn != NULL_RTX)
30724 slot = (issue_rate - can_issue_more);
30725 group_insns[slot] = insn;
30726 can_issue_more =
30727 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
30728 if (insn_terminates_group_p (insn, current_group))
30729 can_issue_more = 0;
30731 next_insn = get_next_active_insn (insn, tail);
30732 if (next_insn == NULL_RTX)
30733 return group_count + 1;
30735 /* Is next_insn going to start a new group? */
30736 group_end
30737 = (can_issue_more == 0
30738 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
30739 || (can_issue_more <= 2 && is_cracked_insn (next_insn))
30740 || (can_issue_more < issue_rate &&
30741 insn_terminates_group_p (next_insn, previous_group)));
30743 can_issue_more = force_new_group (sched_verbose, dump, group_insns,
30744 next_insn, &group_end, can_issue_more,
30745 &group_count);
30747 if (group_end)
30749 group_count++;
30750 can_issue_more = 0;
30751 for (i = 0; i < issue_rate; i++)
30753 group_insns[i] = 0;
30757 if (GET_MODE (next_insn) == TImode && can_issue_more)
30758 PUT_MODE (next_insn, VOIDmode);
30759 else if (!can_issue_more && GET_MODE (next_insn) != TImode)
30760 PUT_MODE (next_insn, TImode);
30762 insn = next_insn;
30763 if (can_issue_more == 0)
30764 can_issue_more = issue_rate;
30765 } /* while */
30767 return group_count;
30770 /* Scan the insn sequence between PREV_HEAD_INSN and TAIL and examine the
30771 dispatch group boundaries that the scheduler had marked. Pad with nops
30772 any dispatch groups which have vacant issue slots, in order to force the
30773 scheduler's grouping on the processor dispatcher. The function
30774 returns the number of dispatch groups found. */
30776 static int
30777 pad_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
30778 rtx_insn *tail)
30780 rtx_insn *insn, *next_insn;
30781 rtx nop;
30782 int issue_rate;
30783 int can_issue_more;
30784 int group_end;
30785 int group_count = 0;
30787 /* Initialize issue_rate. */
30788 issue_rate = rs6000_issue_rate ();
30789 can_issue_more = issue_rate;
30791 insn = get_next_active_insn (prev_head_insn, tail);
30792 next_insn = get_next_active_insn (insn, tail);
30794 while (insn != NULL_RTX)
30796 can_issue_more =
30797 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
30799 group_end = (next_insn == NULL_RTX || GET_MODE (next_insn) == TImode);
30801 if (next_insn == NULL_RTX)
30802 break;
30804 if (group_end)
30806 /* If the scheduler had marked group termination at this location
30807 (between insn and next_insn), and neither insn nor next_insn will
30808 force group termination, pad the group with nops to force group
30809 termination. */
30810 if (can_issue_more
30811 && (rs6000_sched_insert_nops == sched_finish_pad_groups)
30812 && !insn_terminates_group_p (insn, current_group)
30813 && !insn_terminates_group_p (next_insn, previous_group))
30815 if (!is_branch_slot_insn (next_insn))
30816 can_issue_more--;
30818 while (can_issue_more)
30820 nop = gen_nop ();
30821 emit_insn_before (nop, next_insn);
30822 can_issue_more--;
30826 can_issue_more = issue_rate;
30827 group_count++;
30830 insn = next_insn;
30831 next_insn = get_next_active_insn (insn, tail);
30834 return group_count;
30837 /* We're beginning a new block. Initialize data structures as necessary. */
30839 static void
30840 rs6000_sched_init (FILE *dump ATTRIBUTE_UNUSED,
30841 int sched_verbose ATTRIBUTE_UNUSED,
30842 int max_ready ATTRIBUTE_UNUSED)
30844 last_scheduled_insn = NULL_RTX;
30845 load_store_pendulum = 0;
30848 /* The following function is called at the end of scheduling BB.
30849 After reload, it inserts nops at insn group bundling. */
30851 static void
30852 rs6000_sched_finish (FILE *dump, int sched_verbose)
30854 int n_groups;
30856 if (sched_verbose)
30857 fprintf (dump, "=== Finishing schedule.\n");
30859 if (reload_completed && rs6000_sched_groups)
30861 /* Do not run sched_finish hook when selective scheduling enabled. */
30862 if (sel_sched_p ())
30863 return;
30865 if (rs6000_sched_insert_nops == sched_finish_none)
30866 return;
30868 if (rs6000_sched_insert_nops == sched_finish_pad_groups)
30869 n_groups = pad_groups (dump, sched_verbose,
30870 current_sched_info->prev_head,
30871 current_sched_info->next_tail);
30872 else
30873 n_groups = redefine_groups (dump, sched_verbose,
30874 current_sched_info->prev_head,
30875 current_sched_info->next_tail);
30877 if (sched_verbose >= 6)
30879 fprintf (dump, "ngroups = %d\n", n_groups);
30880 print_rtl (dump, current_sched_info->prev_head);
30881 fprintf (dump, "Done finish_sched\n");
30886 struct _rs6000_sched_context
30888 short cached_can_issue_more;
30889 rtx last_scheduled_insn;
30890 int load_store_pendulum;
30893 typedef struct _rs6000_sched_context rs6000_sched_context_def;
30894 typedef rs6000_sched_context_def *rs6000_sched_context_t;
30896 /* Allocate store for new scheduling context. */
30897 static void *
30898 rs6000_alloc_sched_context (void)
30900 return xmalloc (sizeof (rs6000_sched_context_def));
30903 /* If CLEAN_P is true then initializes _SC with clean data,
30904 and from the global context otherwise. */
30905 static void
30906 rs6000_init_sched_context (void *_sc, bool clean_p)
30908 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
30910 if (clean_p)
30912 sc->cached_can_issue_more = 0;
30913 sc->last_scheduled_insn = NULL_RTX;
30914 sc->load_store_pendulum = 0;
30916 else
30918 sc->cached_can_issue_more = cached_can_issue_more;
30919 sc->last_scheduled_insn = last_scheduled_insn;
30920 sc->load_store_pendulum = load_store_pendulum;
30924 /* Sets the global scheduling context to the one pointed to by _SC. */
30925 static void
30926 rs6000_set_sched_context (void *_sc)
30928 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
30930 gcc_assert (sc != NULL);
30932 cached_can_issue_more = sc->cached_can_issue_more;
30933 last_scheduled_insn = sc->last_scheduled_insn;
30934 load_store_pendulum = sc->load_store_pendulum;
30937 /* Free _SC. */
30938 static void
30939 rs6000_free_sched_context (void *_sc)
30941 gcc_assert (_sc != NULL);
30943 free (_sc);
30947 /* Length in units of the trampoline for entering a nested function. */
30950 rs6000_trampoline_size (void)
30952 int ret = 0;
30954 switch (DEFAULT_ABI)
30956 default:
30957 gcc_unreachable ();
30959 case ABI_AIX:
30960 ret = (TARGET_32BIT) ? 12 : 24;
30961 break;
30963 case ABI_ELFv2:
30964 gcc_assert (!TARGET_32BIT);
30965 ret = 32;
30966 break;
30968 case ABI_DARWIN:
30969 case ABI_V4:
30970 ret = (TARGET_32BIT) ? 40 : 48;
30971 break;
30974 return ret;
30977 /* Emit RTL insns to initialize the variable parts of a trampoline.
30978 FNADDR is an RTX for the address of the function's pure code.
30979 CXT is an RTX for the static chain value for the function. */
30981 static void
30982 rs6000_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
30984 int regsize = (TARGET_32BIT) ? 4 : 8;
30985 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
30986 rtx ctx_reg = force_reg (Pmode, cxt);
30987 rtx addr = force_reg (Pmode, XEXP (m_tramp, 0));
30989 switch (DEFAULT_ABI)
30991 default:
30992 gcc_unreachable ();
30994 /* Under AIX, just build the 3 word function descriptor */
30995 case ABI_AIX:
30997 rtx fnmem, fn_reg, toc_reg;
30999 if (!TARGET_POINTERS_TO_NESTED_FUNCTIONS)
31000 error ("You cannot take the address of a nested function if you use "
31001 "the -mno-pointers-to-nested-functions option.");
31003 fnmem = gen_const_mem (Pmode, force_reg (Pmode, fnaddr));
31004 fn_reg = gen_reg_rtx (Pmode);
31005 toc_reg = gen_reg_rtx (Pmode);
31007 /* Macro to shorten the code expansions below. */
31008 # define MEM_PLUS(MEM, OFFSET) adjust_address (MEM, Pmode, OFFSET)
31010 m_tramp = replace_equiv_address (m_tramp, addr);
31012 emit_move_insn (fn_reg, MEM_PLUS (fnmem, 0));
31013 emit_move_insn (toc_reg, MEM_PLUS (fnmem, regsize));
31014 emit_move_insn (MEM_PLUS (m_tramp, 0), fn_reg);
31015 emit_move_insn (MEM_PLUS (m_tramp, regsize), toc_reg);
31016 emit_move_insn (MEM_PLUS (m_tramp, 2*regsize), ctx_reg);
31018 # undef MEM_PLUS
31020 break;
31022 /* Under V.4/eabi/darwin, __trampoline_setup does the real work. */
31023 case ABI_ELFv2:
31024 case ABI_DARWIN:
31025 case ABI_V4:
31026 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__trampoline_setup"),
31027 LCT_NORMAL, VOIDmode, 4,
31028 addr, Pmode,
31029 GEN_INT (rs6000_trampoline_size ()), SImode,
31030 fnaddr, Pmode,
31031 ctx_reg, Pmode);
31032 break;
31037 /* Returns TRUE iff the target attribute indicated by ATTR_ID takes a plain
31038 identifier as an argument, so the front end shouldn't look it up. */
31040 static bool
31041 rs6000_attribute_takes_identifier_p (const_tree attr_id)
31043 return is_attribute_p ("altivec", attr_id);
31046 /* Handle the "altivec" attribute. The attribute may have
31047 arguments as follows:
31049 __attribute__((altivec(vector__)))
31050 __attribute__((altivec(pixel__))) (always followed by 'unsigned short')
31051 __attribute__((altivec(bool__))) (always followed by 'unsigned')
31053 and may appear more than once (e.g., 'vector bool char') in a
31054 given declaration. */
31056 static tree
31057 rs6000_handle_altivec_attribute (tree *node,
31058 tree name ATTRIBUTE_UNUSED,
31059 tree args,
31060 int flags ATTRIBUTE_UNUSED,
31061 bool *no_add_attrs)
31063 tree type = *node, result = NULL_TREE;
31064 machine_mode mode;
31065 int unsigned_p;
31066 char altivec_type
31067 = ((args && TREE_CODE (args) == TREE_LIST && TREE_VALUE (args)
31068 && TREE_CODE (TREE_VALUE (args)) == IDENTIFIER_NODE)
31069 ? *IDENTIFIER_POINTER (TREE_VALUE (args))
31070 : '?');
31072 while (POINTER_TYPE_P (type)
31073 || TREE_CODE (type) == FUNCTION_TYPE
31074 || TREE_CODE (type) == METHOD_TYPE
31075 || TREE_CODE (type) == ARRAY_TYPE)
31076 type = TREE_TYPE (type);
31078 mode = TYPE_MODE (type);
31080 /* Check for invalid AltiVec type qualifiers. */
31081 if (type == long_double_type_node)
31082 error ("use of %<long double%> in AltiVec types is invalid");
31083 else if (type == boolean_type_node)
31084 error ("use of boolean types in AltiVec types is invalid");
31085 else if (TREE_CODE (type) == COMPLEX_TYPE)
31086 error ("use of %<complex%> in AltiVec types is invalid");
31087 else if (DECIMAL_FLOAT_MODE_P (mode))
31088 error ("use of decimal floating point types in AltiVec types is invalid");
31089 else if (!TARGET_VSX)
31091 if (type == long_unsigned_type_node || type == long_integer_type_node)
31093 if (TARGET_64BIT)
31094 error ("use of %<long%> in AltiVec types is invalid for "
31095 "64-bit code without -mvsx");
31096 else if (rs6000_warn_altivec_long)
31097 warning (0, "use of %<long%> in AltiVec types is deprecated; "
31098 "use %<int%>");
31100 else if (type == long_long_unsigned_type_node
31101 || type == long_long_integer_type_node)
31102 error ("use of %<long long%> in AltiVec types is invalid without "
31103 "-mvsx");
31104 else if (type == double_type_node)
31105 error ("use of %<double%> in AltiVec types is invalid without -mvsx");
31108 switch (altivec_type)
31110 case 'v':
31111 unsigned_p = TYPE_UNSIGNED (type);
31112 switch (mode)
31114 case TImode:
31115 result = (unsigned_p ? unsigned_V1TI_type_node : V1TI_type_node);
31116 break;
31117 case DImode:
31118 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
31119 break;
31120 case SImode:
31121 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
31122 break;
31123 case HImode:
31124 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
31125 break;
31126 case QImode:
31127 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
31128 break;
31129 case SFmode: result = V4SF_type_node; break;
31130 case DFmode: result = V2DF_type_node; break;
31131 /* If the user says 'vector int bool', we may be handed the 'bool'
31132 attribute _before_ the 'vector' attribute, and so select the
31133 proper type in the 'b' case below. */
31134 case V4SImode: case V8HImode: case V16QImode: case V4SFmode:
31135 case V2DImode: case V2DFmode:
31136 result = type;
31137 default: break;
31139 break;
31140 case 'b':
31141 switch (mode)
31143 case DImode: case V2DImode: result = bool_V2DI_type_node; break;
31144 case SImode: case V4SImode: result = bool_V4SI_type_node; break;
31145 case HImode: case V8HImode: result = bool_V8HI_type_node; break;
31146 case QImode: case V16QImode: result = bool_V16QI_type_node;
31147 default: break;
31149 break;
31150 case 'p':
31151 switch (mode)
31153 case V8HImode: result = pixel_V8HI_type_node;
31154 default: break;
31156 default: break;
31159 /* Propagate qualifiers attached to the element type
31160 onto the vector type. */
31161 if (result && result != type && TYPE_QUALS (type))
31162 result = build_qualified_type (result, TYPE_QUALS (type));
31164 *no_add_attrs = true; /* No need to hang on to the attribute. */
31166 if (result)
31167 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
31169 return NULL_TREE;
31172 /* AltiVec defines four built-in scalar types that serve as vector
31173 elements; we must teach the compiler how to mangle them. */
31175 static const char *
31176 rs6000_mangle_type (const_tree type)
31178 type = TYPE_MAIN_VARIANT (type);
31180 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
31181 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
31182 return NULL;
31184 if (type == bool_char_type_node) return "U6__boolc";
31185 if (type == bool_short_type_node) return "U6__bools";
31186 if (type == pixel_type_node) return "u7__pixel";
31187 if (type == bool_int_type_node) return "U6__booli";
31188 if (type == bool_long_type_node) return "U6__booll";
31190 /* Use a unique name for __float128 rather than trying to use "e" or "g". Use
31191 "g" for IBM extended double, no matter whether it is long double (using
31192 -mabi=ibmlongdouble) or the distinct __ibm128 type. */
31193 if (TARGET_FLOAT128)
31195 if (type == ieee128_float_type_node)
31196 return "U10__float128";
31198 if (type == ibm128_float_type_node)
31199 return "g";
31201 if (type == long_double_type_node && TARGET_LONG_DOUBLE_128)
31202 return (TARGET_IEEEQUAD) ? "U10__float128" : "g";
31205 /* Mangle IBM extended float long double as `g' (__float128) on
31206 powerpc*-linux where long-double-64 previously was the default. */
31207 if (TYPE_MAIN_VARIANT (type) == long_double_type_node
31208 && TARGET_ELF
31209 && TARGET_LONG_DOUBLE_128
31210 && !TARGET_IEEEQUAD)
31211 return "g";
31213 /* For all other types, use normal C++ mangling. */
31214 return NULL;
31217 /* Handle a "longcall" or "shortcall" attribute; arguments as in
31218 struct attribute_spec.handler. */
31220 static tree
31221 rs6000_handle_longcall_attribute (tree *node, tree name,
31222 tree args ATTRIBUTE_UNUSED,
31223 int flags ATTRIBUTE_UNUSED,
31224 bool *no_add_attrs)
31226 if (TREE_CODE (*node) != FUNCTION_TYPE
31227 && TREE_CODE (*node) != FIELD_DECL
31228 && TREE_CODE (*node) != TYPE_DECL)
31230 warning (OPT_Wattributes, "%qE attribute only applies to functions",
31231 name);
31232 *no_add_attrs = true;
31235 return NULL_TREE;
31238 /* Set longcall attributes on all functions declared when
31239 rs6000_default_long_calls is true. */
31240 static void
31241 rs6000_set_default_type_attributes (tree type)
31243 if (rs6000_default_long_calls
31244 && (TREE_CODE (type) == FUNCTION_TYPE
31245 || TREE_CODE (type) == METHOD_TYPE))
31246 TYPE_ATTRIBUTES (type) = tree_cons (get_identifier ("longcall"),
31247 NULL_TREE,
31248 TYPE_ATTRIBUTES (type));
31250 #if TARGET_MACHO
31251 darwin_set_default_type_attributes (type);
31252 #endif
31255 /* Return a reference suitable for calling a function with the
31256 longcall attribute. */
31259 rs6000_longcall_ref (rtx call_ref)
31261 const char *call_name;
31262 tree node;
31264 if (GET_CODE (call_ref) != SYMBOL_REF)
31265 return call_ref;
31267 /* System V adds '.' to the internal name, so skip them. */
31268 call_name = XSTR (call_ref, 0);
31269 if (*call_name == '.')
31271 while (*call_name == '.')
31272 call_name++;
31274 node = get_identifier (call_name);
31275 call_ref = gen_rtx_SYMBOL_REF (VOIDmode, IDENTIFIER_POINTER (node));
31278 return force_reg (Pmode, call_ref);
31281 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
31282 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
31283 #endif
31285 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
31286 struct attribute_spec.handler. */
31287 static tree
31288 rs6000_handle_struct_attribute (tree *node, tree name,
31289 tree args ATTRIBUTE_UNUSED,
31290 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
31292 tree *type = NULL;
31293 if (DECL_P (*node))
31295 if (TREE_CODE (*node) == TYPE_DECL)
31296 type = &TREE_TYPE (*node);
31298 else
31299 type = node;
31301 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
31302 || TREE_CODE (*type) == UNION_TYPE)))
31304 warning (OPT_Wattributes, "%qE attribute ignored", name);
31305 *no_add_attrs = true;
31308 else if ((is_attribute_p ("ms_struct", name)
31309 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
31310 || ((is_attribute_p ("gcc_struct", name)
31311 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
31313 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
31314 name);
31315 *no_add_attrs = true;
31318 return NULL_TREE;
31321 static bool
31322 rs6000_ms_bitfield_layout_p (const_tree record_type)
31324 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
31325 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
31326 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
31329 #ifdef USING_ELFOS_H
31331 /* A get_unnamed_section callback, used for switching to toc_section. */
31333 static void
31334 rs6000_elf_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED)
31336 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
31337 && TARGET_MINIMAL_TOC
31338 && !TARGET_RELOCATABLE)
31340 if (!toc_initialized)
31342 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
31343 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
31344 (*targetm.asm_out.internal_label) (asm_out_file, "LCTOC", 0);
31345 fprintf (asm_out_file, "\t.tc ");
31346 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1[TC],");
31347 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
31348 fprintf (asm_out_file, "\n");
31350 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
31351 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
31352 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
31353 fprintf (asm_out_file, " = .+32768\n");
31354 toc_initialized = 1;
31356 else
31357 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
31359 else if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
31360 && !TARGET_RELOCATABLE)
31362 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
31363 if (!toc_initialized)
31365 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
31366 toc_initialized = 1;
31369 else
31371 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
31372 if (!toc_initialized)
31374 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
31375 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
31376 fprintf (asm_out_file, " = .+32768\n");
31377 toc_initialized = 1;
31382 /* Implement TARGET_ASM_INIT_SECTIONS. */
31384 static void
31385 rs6000_elf_asm_init_sections (void)
31387 toc_section
31388 = get_unnamed_section (0, rs6000_elf_output_toc_section_asm_op, NULL);
31390 sdata2_section
31391 = get_unnamed_section (SECTION_WRITE, output_section_asm_op,
31392 SDATA2_SECTION_ASM_OP);
31395 /* Implement TARGET_SELECT_RTX_SECTION. */
31397 static section *
31398 rs6000_elf_select_rtx_section (machine_mode mode, rtx x,
31399 unsigned HOST_WIDE_INT align)
31401 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
31402 return toc_section;
31403 else
31404 return default_elf_select_rtx_section (mode, x, align);
31407 /* For a SYMBOL_REF, set generic flags and then perform some
31408 target-specific processing.
31410 When the AIX ABI is requested on a non-AIX system, replace the
31411 function name with the real name (with a leading .) rather than the
31412 function descriptor name. This saves a lot of overriding code to
31413 read the prefixes. */
31415 static void rs6000_elf_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED;
31416 static void
31417 rs6000_elf_encode_section_info (tree decl, rtx rtl, int first)
31419 default_encode_section_info (decl, rtl, first);
31421 if (first
31422 && TREE_CODE (decl) == FUNCTION_DECL
31423 && !TARGET_AIX
31424 && DEFAULT_ABI == ABI_AIX)
31426 rtx sym_ref = XEXP (rtl, 0);
31427 size_t len = strlen (XSTR (sym_ref, 0));
31428 char *str = XALLOCAVEC (char, len + 2);
31429 str[0] = '.';
31430 memcpy (str + 1, XSTR (sym_ref, 0), len + 1);
31431 XSTR (sym_ref, 0) = ggc_alloc_string (str, len + 1);
31435 static inline bool
31436 compare_section_name (const char *section, const char *templ)
31438 int len;
31440 len = strlen (templ);
31441 return (strncmp (section, templ, len) == 0
31442 && (section[len] == 0 || section[len] == '.'));
31445 bool
31446 rs6000_elf_in_small_data_p (const_tree decl)
31448 if (rs6000_sdata == SDATA_NONE)
31449 return false;
31451 /* We want to merge strings, so we never consider them small data. */
31452 if (TREE_CODE (decl) == STRING_CST)
31453 return false;
31455 /* Functions are never in the small data area. */
31456 if (TREE_CODE (decl) == FUNCTION_DECL)
31457 return false;
31459 if (TREE_CODE (decl) == VAR_DECL && DECL_SECTION_NAME (decl))
31461 const char *section = DECL_SECTION_NAME (decl);
31462 if (compare_section_name (section, ".sdata")
31463 || compare_section_name (section, ".sdata2")
31464 || compare_section_name (section, ".gnu.linkonce.s")
31465 || compare_section_name (section, ".sbss")
31466 || compare_section_name (section, ".sbss2")
31467 || compare_section_name (section, ".gnu.linkonce.sb")
31468 || strcmp (section, ".PPC.EMB.sdata0") == 0
31469 || strcmp (section, ".PPC.EMB.sbss0") == 0)
31470 return true;
31472 else
31474 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (decl));
31476 if (size > 0
31477 && size <= g_switch_value
31478 /* If it's not public, and we're not going to reference it there,
31479 there's no need to put it in the small data section. */
31480 && (rs6000_sdata != SDATA_DATA || TREE_PUBLIC (decl)))
31481 return true;
31484 return false;
31487 #endif /* USING_ELFOS_H */
31489 /* Implement TARGET_USE_BLOCKS_FOR_CONSTANT_P. */
31491 static bool
31492 rs6000_use_blocks_for_constant_p (machine_mode mode, const_rtx x)
31494 return !ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode);
31497 /* Do not place thread-local symbols refs in the object blocks. */
31499 static bool
31500 rs6000_use_blocks_for_decl_p (const_tree decl)
31502 return !DECL_THREAD_LOCAL_P (decl);
31505 /* Return a REG that occurs in ADDR with coefficient 1.
31506 ADDR can be effectively incremented by incrementing REG.
31508 r0 is special and we must not select it as an address
31509 register by this routine since our caller will try to
31510 increment the returned register via an "la" instruction. */
31513 find_addr_reg (rtx addr)
31515 while (GET_CODE (addr) == PLUS)
31517 if (GET_CODE (XEXP (addr, 0)) == REG
31518 && REGNO (XEXP (addr, 0)) != 0)
31519 addr = XEXP (addr, 0);
31520 else if (GET_CODE (XEXP (addr, 1)) == REG
31521 && REGNO (XEXP (addr, 1)) != 0)
31522 addr = XEXP (addr, 1);
31523 else if (CONSTANT_P (XEXP (addr, 0)))
31524 addr = XEXP (addr, 1);
31525 else if (CONSTANT_P (XEXP (addr, 1)))
31526 addr = XEXP (addr, 0);
31527 else
31528 gcc_unreachable ();
31530 gcc_assert (GET_CODE (addr) == REG && REGNO (addr) != 0);
31531 return addr;
31534 void
31535 rs6000_fatal_bad_address (rtx op)
31537 fatal_insn ("bad address", op);
31540 #if TARGET_MACHO
31542 typedef struct branch_island_d {
31543 tree function_name;
31544 tree label_name;
31545 int line_number;
31546 } branch_island;
31549 static vec<branch_island, va_gc> *branch_islands;
31551 /* Remember to generate a branch island for far calls to the given
31552 function. */
31554 static void
31555 add_compiler_branch_island (tree label_name, tree function_name,
31556 int line_number)
31558 branch_island bi = {function_name, label_name, line_number};
31559 vec_safe_push (branch_islands, bi);
31562 /* Generate far-jump branch islands for everything recorded in
31563 branch_islands. Invoked immediately after the last instruction of
31564 the epilogue has been emitted; the branch islands must be appended
31565 to, and contiguous with, the function body. Mach-O stubs are
31566 generated in machopic_output_stub(). */
31568 static void
31569 macho_branch_islands (void)
31571 char tmp_buf[512];
31573 while (!vec_safe_is_empty (branch_islands))
31575 branch_island *bi = &branch_islands->last ();
31576 const char *label = IDENTIFIER_POINTER (bi->label_name);
31577 const char *name = IDENTIFIER_POINTER (bi->function_name);
31578 char name_buf[512];
31579 /* Cheap copy of the details from the Darwin ASM_OUTPUT_LABELREF(). */
31580 if (name[0] == '*' || name[0] == '&')
31581 strcpy (name_buf, name+1);
31582 else
31584 name_buf[0] = '_';
31585 strcpy (name_buf+1, name);
31587 strcpy (tmp_buf, "\n");
31588 strcat (tmp_buf, label);
31589 #if defined (DBX_DEBUGGING_INFO) || defined (XCOFF_DEBUGGING_INFO)
31590 if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG)
31591 dbxout_stabd (N_SLINE, bi->line_number);
31592 #endif /* DBX_DEBUGGING_INFO || XCOFF_DEBUGGING_INFO */
31593 if (flag_pic)
31595 if (TARGET_LINK_STACK)
31597 char name[32];
31598 get_ppc476_thunk_name (name);
31599 strcat (tmp_buf, ":\n\tmflr r0\n\tbl ");
31600 strcat (tmp_buf, name);
31601 strcat (tmp_buf, "\n");
31602 strcat (tmp_buf, label);
31603 strcat (tmp_buf, "_pic:\n\tmflr r11\n");
31605 else
31607 strcat (tmp_buf, ":\n\tmflr r0\n\tbcl 20,31,");
31608 strcat (tmp_buf, label);
31609 strcat (tmp_buf, "_pic\n");
31610 strcat (tmp_buf, label);
31611 strcat (tmp_buf, "_pic:\n\tmflr r11\n");
31614 strcat (tmp_buf, "\taddis r11,r11,ha16(");
31615 strcat (tmp_buf, name_buf);
31616 strcat (tmp_buf, " - ");
31617 strcat (tmp_buf, label);
31618 strcat (tmp_buf, "_pic)\n");
31620 strcat (tmp_buf, "\tmtlr r0\n");
31622 strcat (tmp_buf, "\taddi r12,r11,lo16(");
31623 strcat (tmp_buf, name_buf);
31624 strcat (tmp_buf, " - ");
31625 strcat (tmp_buf, label);
31626 strcat (tmp_buf, "_pic)\n");
31628 strcat (tmp_buf, "\tmtctr r12\n\tbctr\n");
31630 else
31632 strcat (tmp_buf, ":\nlis r12,hi16(");
31633 strcat (tmp_buf, name_buf);
31634 strcat (tmp_buf, ")\n\tori r12,r12,lo16(");
31635 strcat (tmp_buf, name_buf);
31636 strcat (tmp_buf, ")\n\tmtctr r12\n\tbctr");
31638 output_asm_insn (tmp_buf, 0);
31639 #if defined (DBX_DEBUGGING_INFO) || defined (XCOFF_DEBUGGING_INFO)
31640 if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG)
31641 dbxout_stabd (N_SLINE, bi->line_number);
31642 #endif /* DBX_DEBUGGING_INFO || XCOFF_DEBUGGING_INFO */
31643 branch_islands->pop ();
31647 /* NO_PREVIOUS_DEF checks in the link list whether the function name is
31648 already there or not. */
31650 static int
31651 no_previous_def (tree function_name)
31653 branch_island *bi;
31654 unsigned ix;
31656 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
31657 if (function_name == bi->function_name)
31658 return 0;
31659 return 1;
31662 /* GET_PREV_LABEL gets the label name from the previous definition of
31663 the function. */
31665 static tree
31666 get_prev_label (tree function_name)
31668 branch_island *bi;
31669 unsigned ix;
31671 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
31672 if (function_name == bi->function_name)
31673 return bi->label_name;
31674 return NULL_TREE;
31677 /* INSN is either a function call or a millicode call. It may have an
31678 unconditional jump in its delay slot.
31680 CALL_DEST is the routine we are calling. */
31682 char *
31683 output_call (rtx_insn *insn, rtx *operands, int dest_operand_number,
31684 int cookie_operand_number)
31686 static char buf[256];
31687 if (darwin_emit_branch_islands
31688 && GET_CODE (operands[dest_operand_number]) == SYMBOL_REF
31689 && (INTVAL (operands[cookie_operand_number]) & CALL_LONG))
31691 tree labelname;
31692 tree funname = get_identifier (XSTR (operands[dest_operand_number], 0));
31694 if (no_previous_def (funname))
31696 rtx label_rtx = gen_label_rtx ();
31697 char *label_buf, temp_buf[256];
31698 ASM_GENERATE_INTERNAL_LABEL (temp_buf, "L",
31699 CODE_LABEL_NUMBER (label_rtx));
31700 label_buf = temp_buf[0] == '*' ? temp_buf + 1 : temp_buf;
31701 labelname = get_identifier (label_buf);
31702 add_compiler_branch_island (labelname, funname, insn_line (insn));
31704 else
31705 labelname = get_prev_label (funname);
31707 /* "jbsr foo, L42" is Mach-O for "Link as 'bl foo' if a 'bl'
31708 instruction will reach 'foo', otherwise link as 'bl L42'".
31709 "L42" should be a 'branch island', that will do a far jump to
31710 'foo'. Branch islands are generated in
31711 macho_branch_islands(). */
31712 sprintf (buf, "jbsr %%z%d,%.246s",
31713 dest_operand_number, IDENTIFIER_POINTER (labelname));
31715 else
31716 sprintf (buf, "bl %%z%d", dest_operand_number);
31717 return buf;
31720 /* Generate PIC and indirect symbol stubs. */
31722 void
31723 machopic_output_stub (FILE *file, const char *symb, const char *stub)
31725 unsigned int length;
31726 char *symbol_name, *lazy_ptr_name;
31727 char *local_label_0;
31728 static int label = 0;
31730 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
31731 symb = (*targetm.strip_name_encoding) (symb);
31734 length = strlen (symb);
31735 symbol_name = XALLOCAVEC (char, length + 32);
31736 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
31738 lazy_ptr_name = XALLOCAVEC (char, length + 32);
31739 GEN_LAZY_PTR_NAME_FOR_SYMBOL (lazy_ptr_name, symb, length);
31741 if (flag_pic == 2)
31742 switch_to_section (darwin_sections[machopic_picsymbol_stub1_section]);
31743 else
31744 switch_to_section (darwin_sections[machopic_symbol_stub1_section]);
31746 if (flag_pic == 2)
31748 fprintf (file, "\t.align 5\n");
31750 fprintf (file, "%s:\n", stub);
31751 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
31753 label++;
31754 local_label_0 = XALLOCAVEC (char, sizeof ("\"L00000000000$spb\""));
31755 sprintf (local_label_0, "\"L%011d$spb\"", label);
31757 fprintf (file, "\tmflr r0\n");
31758 if (TARGET_LINK_STACK)
31760 char name[32];
31761 get_ppc476_thunk_name (name);
31762 fprintf (file, "\tbl %s\n", name);
31763 fprintf (file, "%s:\n\tmflr r11\n", local_label_0);
31765 else
31767 fprintf (file, "\tbcl 20,31,%s\n", local_label_0);
31768 fprintf (file, "%s:\n\tmflr r11\n", local_label_0);
31770 fprintf (file, "\taddis r11,r11,ha16(%s-%s)\n",
31771 lazy_ptr_name, local_label_0);
31772 fprintf (file, "\tmtlr r0\n");
31773 fprintf (file, "\t%s r12,lo16(%s-%s)(r11)\n",
31774 (TARGET_64BIT ? "ldu" : "lwzu"),
31775 lazy_ptr_name, local_label_0);
31776 fprintf (file, "\tmtctr r12\n");
31777 fprintf (file, "\tbctr\n");
31779 else
31781 fprintf (file, "\t.align 4\n");
31783 fprintf (file, "%s:\n", stub);
31784 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
31786 fprintf (file, "\tlis r11,ha16(%s)\n", lazy_ptr_name);
31787 fprintf (file, "\t%s r12,lo16(%s)(r11)\n",
31788 (TARGET_64BIT ? "ldu" : "lwzu"),
31789 lazy_ptr_name);
31790 fprintf (file, "\tmtctr r12\n");
31791 fprintf (file, "\tbctr\n");
31794 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
31795 fprintf (file, "%s:\n", lazy_ptr_name);
31796 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
31797 fprintf (file, "%sdyld_stub_binding_helper\n",
31798 (TARGET_64BIT ? DOUBLE_INT_ASM_OP : "\t.long\t"));
31801 /* Legitimize PIC addresses. If the address is already
31802 position-independent, we return ORIG. Newly generated
31803 position-independent addresses go into a reg. This is REG if non
31804 zero, otherwise we allocate register(s) as necessary. */
31806 #define SMALL_INT(X) ((UINTVAL (X) + 0x8000) < 0x10000)
31809 rs6000_machopic_legitimize_pic_address (rtx orig, machine_mode mode,
31810 rtx reg)
31812 rtx base, offset;
31814 if (reg == NULL && ! reload_in_progress && ! reload_completed)
31815 reg = gen_reg_rtx (Pmode);
31817 if (GET_CODE (orig) == CONST)
31819 rtx reg_temp;
31821 if (GET_CODE (XEXP (orig, 0)) == PLUS
31822 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
31823 return orig;
31825 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
31827 /* Use a different reg for the intermediate value, as
31828 it will be marked UNCHANGING. */
31829 reg_temp = !can_create_pseudo_p () ? reg : gen_reg_rtx (Pmode);
31830 base = rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 0),
31831 Pmode, reg_temp);
31832 offset =
31833 rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 1),
31834 Pmode, reg);
31836 if (GET_CODE (offset) == CONST_INT)
31838 if (SMALL_INT (offset))
31839 return plus_constant (Pmode, base, INTVAL (offset));
31840 else if (! reload_in_progress && ! reload_completed)
31841 offset = force_reg (Pmode, offset);
31842 else
31844 rtx mem = force_const_mem (Pmode, orig);
31845 return machopic_legitimize_pic_address (mem, Pmode, reg);
31848 return gen_rtx_PLUS (Pmode, base, offset);
31851 /* Fall back on generic machopic code. */
31852 return machopic_legitimize_pic_address (orig, mode, reg);
31855 /* Output a .machine directive for the Darwin assembler, and call
31856 the generic start_file routine. */
31858 static void
31859 rs6000_darwin_file_start (void)
31861 static const struct
31863 const char *arg;
31864 const char *name;
31865 HOST_WIDE_INT if_set;
31866 } mapping[] = {
31867 { "ppc64", "ppc64", MASK_64BIT },
31868 { "970", "ppc970", MASK_PPC_GPOPT | MASK_MFCRF | MASK_POWERPC64 },
31869 { "power4", "ppc970", 0 },
31870 { "G5", "ppc970", 0 },
31871 { "7450", "ppc7450", 0 },
31872 { "7400", "ppc7400", MASK_ALTIVEC },
31873 { "G4", "ppc7400", 0 },
31874 { "750", "ppc750", 0 },
31875 { "740", "ppc750", 0 },
31876 { "G3", "ppc750", 0 },
31877 { "604e", "ppc604e", 0 },
31878 { "604", "ppc604", 0 },
31879 { "603e", "ppc603", 0 },
31880 { "603", "ppc603", 0 },
31881 { "601", "ppc601", 0 },
31882 { NULL, "ppc", 0 } };
31883 const char *cpu_id = "";
31884 size_t i;
31886 rs6000_file_start ();
31887 darwin_file_start ();
31889 /* Determine the argument to -mcpu=. Default to G3 if not specified. */
31891 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
31892 cpu_id = rs6000_default_cpu;
31894 if (global_options_set.x_rs6000_cpu_index)
31895 cpu_id = processor_target_table[rs6000_cpu_index].name;
31897 /* Look through the mapping array. Pick the first name that either
31898 matches the argument, has a bit set in IF_SET that is also set
31899 in the target flags, or has a NULL name. */
31901 i = 0;
31902 while (mapping[i].arg != NULL
31903 && strcmp (mapping[i].arg, cpu_id) != 0
31904 && (mapping[i].if_set & rs6000_isa_flags) == 0)
31905 i++;
31907 fprintf (asm_out_file, "\t.machine %s\n", mapping[i].name);
31910 #endif /* TARGET_MACHO */
31912 #if TARGET_ELF
31913 static int
31914 rs6000_elf_reloc_rw_mask (void)
31916 if (flag_pic)
31917 return 3;
31918 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
31919 return 2;
31920 else
31921 return 0;
31924 /* Record an element in the table of global constructors. SYMBOL is
31925 a SYMBOL_REF of the function to be called; PRIORITY is a number
31926 between 0 and MAX_INIT_PRIORITY.
31928 This differs from default_named_section_asm_out_constructor in
31929 that we have special handling for -mrelocatable. */
31931 static void rs6000_elf_asm_out_constructor (rtx, int) ATTRIBUTE_UNUSED;
31932 static void
31933 rs6000_elf_asm_out_constructor (rtx symbol, int priority)
31935 const char *section = ".ctors";
31936 char buf[16];
31938 if (priority != DEFAULT_INIT_PRIORITY)
31940 sprintf (buf, ".ctors.%.5u",
31941 /* Invert the numbering so the linker puts us in the proper
31942 order; constructors are run from right to left, and the
31943 linker sorts in increasing order. */
31944 MAX_INIT_PRIORITY - priority);
31945 section = buf;
31948 switch_to_section (get_section (section, SECTION_WRITE, NULL));
31949 assemble_align (POINTER_SIZE);
31951 if (TARGET_RELOCATABLE)
31953 fputs ("\t.long (", asm_out_file);
31954 output_addr_const (asm_out_file, symbol);
31955 fputs (")@fixup\n", asm_out_file);
31957 else
31958 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
31961 static void rs6000_elf_asm_out_destructor (rtx, int) ATTRIBUTE_UNUSED;
31962 static void
31963 rs6000_elf_asm_out_destructor (rtx symbol, int priority)
31965 const char *section = ".dtors";
31966 char buf[16];
31968 if (priority != DEFAULT_INIT_PRIORITY)
31970 sprintf (buf, ".dtors.%.5u",
31971 /* Invert the numbering so the linker puts us in the proper
31972 order; constructors are run from right to left, and the
31973 linker sorts in increasing order. */
31974 MAX_INIT_PRIORITY - priority);
31975 section = buf;
31978 switch_to_section (get_section (section, SECTION_WRITE, NULL));
31979 assemble_align (POINTER_SIZE);
31981 if (TARGET_RELOCATABLE)
31983 fputs ("\t.long (", asm_out_file);
31984 output_addr_const (asm_out_file, symbol);
31985 fputs (")@fixup\n", asm_out_file);
31987 else
31988 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
31991 void
31992 rs6000_elf_declare_function_name (FILE *file, const char *name, tree decl)
31994 if (TARGET_64BIT && DEFAULT_ABI != ABI_ELFv2)
31996 fputs ("\t.section\t\".opd\",\"aw\"\n\t.align 3\n", file);
31997 ASM_OUTPUT_LABEL (file, name);
31998 fputs (DOUBLE_INT_ASM_OP, file);
31999 rs6000_output_function_entry (file, name);
32000 fputs (",.TOC.@tocbase,0\n\t.previous\n", file);
32001 if (DOT_SYMBOLS)
32003 fputs ("\t.size\t", file);
32004 assemble_name (file, name);
32005 fputs (",24\n\t.type\t.", file);
32006 assemble_name (file, name);
32007 fputs (",@function\n", file);
32008 if (TREE_PUBLIC (decl) && ! DECL_WEAK (decl))
32010 fputs ("\t.globl\t.", file);
32011 assemble_name (file, name);
32012 putc ('\n', file);
32015 else
32016 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
32017 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
32018 rs6000_output_function_entry (file, name);
32019 fputs (":\n", file);
32020 return;
32023 if (TARGET_RELOCATABLE
32024 && !TARGET_SECURE_PLT
32025 && (get_pool_size () != 0 || crtl->profile)
32026 && uses_TOC ())
32028 char buf[256];
32030 (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno);
32032 fprintf (file, "\t.long ");
32033 assemble_name (file, toc_label_name);
32034 need_toc_init = 1;
32035 putc ('-', file);
32036 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
32037 assemble_name (file, buf);
32038 putc ('\n', file);
32041 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
32042 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
32044 if (TARGET_CMODEL == CMODEL_LARGE && rs6000_global_entry_point_needed_p ())
32046 char buf[256];
32048 (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno);
32050 fprintf (file, "\t.quad .TOC.-");
32051 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
32052 assemble_name (file, buf);
32053 putc ('\n', file);
32056 if (DEFAULT_ABI == ABI_AIX)
32058 const char *desc_name, *orig_name;
32060 orig_name = (*targetm.strip_name_encoding) (name);
32061 desc_name = orig_name;
32062 while (*desc_name == '.')
32063 desc_name++;
32065 if (TREE_PUBLIC (decl))
32066 fprintf (file, "\t.globl %s\n", desc_name);
32068 fprintf (file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
32069 fprintf (file, "%s:\n", desc_name);
32070 fprintf (file, "\t.long %s\n", orig_name);
32071 fputs ("\t.long _GLOBAL_OFFSET_TABLE_\n", file);
32072 fputs ("\t.long 0\n", file);
32073 fprintf (file, "\t.previous\n");
32075 ASM_OUTPUT_LABEL (file, name);
32078 static void rs6000_elf_file_end (void) ATTRIBUTE_UNUSED;
32079 static void
32080 rs6000_elf_file_end (void)
32082 #ifdef HAVE_AS_GNU_ATTRIBUTE
32083 if (TARGET_32BIT && DEFAULT_ABI == ABI_V4)
32085 if (rs6000_passes_float)
32086 fprintf (asm_out_file, "\t.gnu_attribute 4, %d\n",
32087 ((TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT) ? 1
32088 : (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT) ? 3
32089 : 2));
32090 if (rs6000_passes_vector)
32091 fprintf (asm_out_file, "\t.gnu_attribute 8, %d\n",
32092 (TARGET_ALTIVEC_ABI ? 2
32093 : TARGET_SPE_ABI ? 3
32094 : 1));
32095 if (rs6000_returns_struct)
32096 fprintf (asm_out_file, "\t.gnu_attribute 12, %d\n",
32097 aix_struct_return ? 2 : 1);
32099 #endif
32100 #if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD)
32101 if (TARGET_32BIT || DEFAULT_ABI == ABI_ELFv2)
32102 file_end_indicate_exec_stack ();
32103 #endif
32105 if (flag_split_stack)
32106 file_end_indicate_split_stack ();
32108 if (cpu_builtin_p)
32110 /* We have expanded a CPU builtin, so we need to emit a reference to
32111 the special symbol that LIBC uses to declare it supports the
32112 AT_PLATFORM and AT_HWCAP/AT_HWCAP2 in the TCB feature. */
32113 switch_to_section (data_section);
32114 fprintf (asm_out_file, "\t.align %u\n", TARGET_32BIT ? 2 : 3);
32115 fprintf (asm_out_file, "\t%s %s\n",
32116 TARGET_32BIT ? ".long" : ".quad", tcb_verification_symbol);
32119 #endif
32121 #if TARGET_XCOFF
32123 #ifndef HAVE_XCOFF_DWARF_EXTRAS
32124 #define HAVE_XCOFF_DWARF_EXTRAS 0
32125 #endif
32127 static enum unwind_info_type
32128 rs6000_xcoff_debug_unwind_info (void)
32130 return UI_NONE;
32133 static void
32134 rs6000_xcoff_asm_output_anchor (rtx symbol)
32136 char buffer[100];
32138 sprintf (buffer, "$ + " HOST_WIDE_INT_PRINT_DEC,
32139 SYMBOL_REF_BLOCK_OFFSET (symbol));
32140 fprintf (asm_out_file, "%s", SET_ASM_OP);
32141 RS6000_OUTPUT_BASENAME (asm_out_file, XSTR (symbol, 0));
32142 fprintf (asm_out_file, ",");
32143 RS6000_OUTPUT_BASENAME (asm_out_file, buffer);
32144 fprintf (asm_out_file, "\n");
32147 static void
32148 rs6000_xcoff_asm_globalize_label (FILE *stream, const char *name)
32150 fputs (GLOBAL_ASM_OP, stream);
32151 RS6000_OUTPUT_BASENAME (stream, name);
32152 putc ('\n', stream);
32155 /* A get_unnamed_decl callback, used for read-only sections. PTR
32156 points to the section string variable. */
32158 static void
32159 rs6000_xcoff_output_readonly_section_asm_op (const void *directive)
32161 fprintf (asm_out_file, "\t.csect %s[RO],%s\n",
32162 *(const char *const *) directive,
32163 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
32166 /* Likewise for read-write sections. */
32168 static void
32169 rs6000_xcoff_output_readwrite_section_asm_op (const void *directive)
32171 fprintf (asm_out_file, "\t.csect %s[RW],%s\n",
32172 *(const char *const *) directive,
32173 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
32176 static void
32177 rs6000_xcoff_output_tls_section_asm_op (const void *directive)
32179 fprintf (asm_out_file, "\t.csect %s[TL],%s\n",
32180 *(const char *const *) directive,
32181 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
32184 /* A get_unnamed_section callback, used for switching to toc_section. */
32186 static void
32187 rs6000_xcoff_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED)
32189 if (TARGET_MINIMAL_TOC)
32191 /* toc_section is always selected at least once from
32192 rs6000_xcoff_file_start, so this is guaranteed to
32193 always be defined once and only once in each file. */
32194 if (!toc_initialized)
32196 fputs ("\t.toc\nLCTOC..1:\n", asm_out_file);
32197 fputs ("\t.tc toc_table[TC],toc_table[RW]\n", asm_out_file);
32198 toc_initialized = 1;
32200 fprintf (asm_out_file, "\t.csect toc_table[RW]%s\n",
32201 (TARGET_32BIT ? "" : ",3"));
32203 else
32204 fputs ("\t.toc\n", asm_out_file);
32207 /* Implement TARGET_ASM_INIT_SECTIONS. */
32209 static void
32210 rs6000_xcoff_asm_init_sections (void)
32212 read_only_data_section
32213 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
32214 &xcoff_read_only_section_name);
32216 private_data_section
32217 = get_unnamed_section (SECTION_WRITE,
32218 rs6000_xcoff_output_readwrite_section_asm_op,
32219 &xcoff_private_data_section_name);
32221 tls_data_section
32222 = get_unnamed_section (SECTION_TLS,
32223 rs6000_xcoff_output_tls_section_asm_op,
32224 &xcoff_tls_data_section_name);
32226 tls_private_data_section
32227 = get_unnamed_section (SECTION_TLS,
32228 rs6000_xcoff_output_tls_section_asm_op,
32229 &xcoff_private_data_section_name);
32231 read_only_private_data_section
32232 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
32233 &xcoff_private_data_section_name);
32235 toc_section
32236 = get_unnamed_section (0, rs6000_xcoff_output_toc_section_asm_op, NULL);
32238 readonly_data_section = read_only_data_section;
32241 static int
32242 rs6000_xcoff_reloc_rw_mask (void)
32244 return 3;
32247 static void
32248 rs6000_xcoff_asm_named_section (const char *name, unsigned int flags,
32249 tree decl ATTRIBUTE_UNUSED)
32251 int smclass;
32252 static const char * const suffix[5] = { "PR", "RO", "RW", "TL", "XO" };
32254 if (flags & SECTION_EXCLUDE)
32255 smclass = 4;
32256 else if (flags & SECTION_DEBUG)
32258 fprintf (asm_out_file, "\t.dwsect %s\n", name);
32259 return;
32261 else if (flags & SECTION_CODE)
32262 smclass = 0;
32263 else if (flags & SECTION_TLS)
32264 smclass = 3;
32265 else if (flags & SECTION_WRITE)
32266 smclass = 2;
32267 else
32268 smclass = 1;
32270 fprintf (asm_out_file, "\t.csect %s%s[%s],%u\n",
32271 (flags & SECTION_CODE) ? "." : "",
32272 name, suffix[smclass], flags & SECTION_ENTSIZE);
32275 #define IN_NAMED_SECTION(DECL) \
32276 ((TREE_CODE (DECL) == FUNCTION_DECL || TREE_CODE (DECL) == VAR_DECL) \
32277 && DECL_SECTION_NAME (DECL) != NULL)
32279 static section *
32280 rs6000_xcoff_select_section (tree decl, int reloc,
32281 unsigned HOST_WIDE_INT align)
32283 /* Place variables with alignment stricter than BIGGEST_ALIGNMENT into
32284 named section. */
32285 if (align > BIGGEST_ALIGNMENT)
32287 resolve_unique_section (decl, reloc, true);
32288 if (IN_NAMED_SECTION (decl))
32289 return get_named_section (decl, NULL, reloc);
32292 if (decl_readonly_section (decl, reloc))
32294 if (TREE_PUBLIC (decl))
32295 return read_only_data_section;
32296 else
32297 return read_only_private_data_section;
32299 else
32301 #if HAVE_AS_TLS
32302 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
32304 if (TREE_PUBLIC (decl))
32305 return tls_data_section;
32306 else if (bss_initializer_p (decl))
32308 /* Convert to COMMON to emit in BSS. */
32309 DECL_COMMON (decl) = 1;
32310 return tls_comm_section;
32312 else
32313 return tls_private_data_section;
32315 else
32316 #endif
32317 if (TREE_PUBLIC (decl))
32318 return data_section;
32319 else
32320 return private_data_section;
32324 static void
32325 rs6000_xcoff_unique_section (tree decl, int reloc ATTRIBUTE_UNUSED)
32327 const char *name;
32329 /* Use select_section for private data and uninitialized data with
32330 alignment <= BIGGEST_ALIGNMENT. */
32331 if (!TREE_PUBLIC (decl)
32332 || DECL_COMMON (decl)
32333 || (DECL_INITIAL (decl) == NULL_TREE
32334 && DECL_ALIGN (decl) <= BIGGEST_ALIGNMENT)
32335 || DECL_INITIAL (decl) == error_mark_node
32336 || (flag_zero_initialized_in_bss
32337 && initializer_zerop (DECL_INITIAL (decl))))
32338 return;
32340 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
32341 name = (*targetm.strip_name_encoding) (name);
32342 set_decl_section_name (decl, name);
32345 /* Select section for constant in constant pool.
32347 On RS/6000, all constants are in the private read-only data area.
32348 However, if this is being placed in the TOC it must be output as a
32349 toc entry. */
32351 static section *
32352 rs6000_xcoff_select_rtx_section (machine_mode mode, rtx x,
32353 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
32355 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
32356 return toc_section;
32357 else
32358 return read_only_private_data_section;
32361 /* Remove any trailing [DS] or the like from the symbol name. */
32363 static const char *
32364 rs6000_xcoff_strip_name_encoding (const char *name)
32366 size_t len;
32367 if (*name == '*')
32368 name++;
32369 len = strlen (name);
32370 if (name[len - 1] == ']')
32371 return ggc_alloc_string (name, len - 4);
32372 else
32373 return name;
32376 /* Section attributes. AIX is always PIC. */
32378 static unsigned int
32379 rs6000_xcoff_section_type_flags (tree decl, const char *name, int reloc)
32381 unsigned int align;
32382 unsigned int flags = default_section_type_flags (decl, name, reloc);
32384 /* Align to at least UNIT size. */
32385 if ((flags & SECTION_CODE) != 0 || !decl || !DECL_P (decl))
32386 align = MIN_UNITS_PER_WORD;
32387 else
32388 /* Increase alignment of large objects if not already stricter. */
32389 align = MAX ((DECL_ALIGN (decl) / BITS_PER_UNIT),
32390 int_size_in_bytes (TREE_TYPE (decl)) > MIN_UNITS_PER_WORD
32391 ? UNITS_PER_FP_WORD : MIN_UNITS_PER_WORD);
32393 return flags | (exact_log2 (align) & SECTION_ENTSIZE);
32396 /* Output at beginning of assembler file.
32398 Initialize the section names for the RS/6000 at this point.
32400 Specify filename, including full path, to assembler.
32402 We want to go into the TOC section so at least one .toc will be emitted.
32403 Also, in order to output proper .bs/.es pairs, we need at least one static
32404 [RW] section emitted.
32406 Finally, declare mcount when profiling to make the assembler happy. */
32408 static void
32409 rs6000_xcoff_file_start (void)
32411 rs6000_gen_section_name (&xcoff_bss_section_name,
32412 main_input_filename, ".bss_");
32413 rs6000_gen_section_name (&xcoff_private_data_section_name,
32414 main_input_filename, ".rw_");
32415 rs6000_gen_section_name (&xcoff_read_only_section_name,
32416 main_input_filename, ".ro_");
32417 rs6000_gen_section_name (&xcoff_tls_data_section_name,
32418 main_input_filename, ".tls_");
32419 rs6000_gen_section_name (&xcoff_tbss_section_name,
32420 main_input_filename, ".tbss_[UL]");
32422 fputs ("\t.file\t", asm_out_file);
32423 output_quoted_string (asm_out_file, main_input_filename);
32424 fputc ('\n', asm_out_file);
32425 if (write_symbols != NO_DEBUG)
32426 switch_to_section (private_data_section);
32427 switch_to_section (toc_section);
32428 switch_to_section (text_section);
32429 if (profile_flag)
32430 fprintf (asm_out_file, "\t.extern %s\n", RS6000_MCOUNT);
32431 rs6000_file_start ();
32434 /* Output at end of assembler file.
32435 On the RS/6000, referencing data should automatically pull in text. */
32437 static void
32438 rs6000_xcoff_file_end (void)
32440 switch_to_section (text_section);
32441 fputs ("_section_.text:\n", asm_out_file);
32442 switch_to_section (data_section);
32443 fputs (TARGET_32BIT
32444 ? "\t.long _section_.text\n" : "\t.llong _section_.text\n",
32445 asm_out_file);
32448 struct declare_alias_data
32450 FILE *file;
32451 bool function_descriptor;
32454 /* Declare alias N. A helper function for for_node_and_aliases. */
32456 static bool
32457 rs6000_declare_alias (struct symtab_node *n, void *d)
32459 struct declare_alias_data *data = (struct declare_alias_data *)d;
32460 /* Main symbol is output specially, because varasm machinery does part of
32461 the job for us - we do not need to declare .globl/lglobs and such. */
32462 if (!n->alias || n->weakref)
32463 return false;
32465 if (lookup_attribute ("ifunc", DECL_ATTRIBUTES (n->decl)))
32466 return false;
32468 /* Prevent assemble_alias from trying to use .set pseudo operation
32469 that does not behave as expected by the middle-end. */
32470 TREE_ASM_WRITTEN (n->decl) = true;
32472 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (n->decl));
32473 char *buffer = (char *) alloca (strlen (name) + 2);
32474 char *p;
32475 int dollar_inside = 0;
32477 strcpy (buffer, name);
32478 p = strchr (buffer, '$');
32479 while (p) {
32480 *p = '_';
32481 dollar_inside++;
32482 p = strchr (p + 1, '$');
32484 if (TREE_PUBLIC (n->decl))
32486 if (!RS6000_WEAK || !DECL_WEAK (n->decl))
32488 if (dollar_inside) {
32489 if (data->function_descriptor)
32490 fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
32491 fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
32493 if (data->function_descriptor)
32495 fputs ("\t.globl .", data->file);
32496 RS6000_OUTPUT_BASENAME (data->file, buffer);
32497 putc ('\n', data->file);
32499 fputs ("\t.globl ", data->file);
32500 RS6000_OUTPUT_BASENAME (data->file, buffer);
32501 putc ('\n', data->file);
32503 #ifdef ASM_WEAKEN_DECL
32504 else if (DECL_WEAK (n->decl) && !data->function_descriptor)
32505 ASM_WEAKEN_DECL (data->file, n->decl, name, NULL);
32506 #endif
32508 else
32510 if (dollar_inside)
32512 if (data->function_descriptor)
32513 fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
32514 fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
32516 if (data->function_descriptor)
32518 fputs ("\t.lglobl .", data->file);
32519 RS6000_OUTPUT_BASENAME (data->file, buffer);
32520 putc ('\n', data->file);
32522 fputs ("\t.lglobl ", data->file);
32523 RS6000_OUTPUT_BASENAME (data->file, buffer);
32524 putc ('\n', data->file);
32526 if (data->function_descriptor)
32527 fputs (".", data->file);
32528 RS6000_OUTPUT_BASENAME (data->file, buffer);
32529 fputs (":\n", data->file);
32530 return false;
32533 /* This macro produces the initial definition of a function name.
32534 On the RS/6000, we need to place an extra '.' in the function name and
32535 output the function descriptor.
32536 Dollar signs are converted to underscores.
32538 The csect for the function will have already been created when
32539 text_section was selected. We do have to go back to that csect, however.
32541 The third and fourth parameters to the .function pseudo-op (16 and 044)
32542 are placeholders which no longer have any use.
32544 Because AIX assembler's .set command has unexpected semantics, we output
32545 all aliases as alternative labels in front of the definition. */
32547 void
32548 rs6000_xcoff_declare_function_name (FILE *file, const char *name, tree decl)
32550 char *buffer = (char *) alloca (strlen (name) + 1);
32551 char *p;
32552 int dollar_inside = 0;
32553 struct declare_alias_data data = {file, false};
32555 strcpy (buffer, name);
32556 p = strchr (buffer, '$');
32557 while (p) {
32558 *p = '_';
32559 dollar_inside++;
32560 p = strchr (p + 1, '$');
32562 if (TREE_PUBLIC (decl))
32564 if (!RS6000_WEAK || !DECL_WEAK (decl))
32566 if (dollar_inside) {
32567 fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
32568 fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
32570 fputs ("\t.globl .", file);
32571 RS6000_OUTPUT_BASENAME (file, buffer);
32572 putc ('\n', file);
32575 else
32577 if (dollar_inside) {
32578 fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
32579 fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
32581 fputs ("\t.lglobl .", file);
32582 RS6000_OUTPUT_BASENAME (file, buffer);
32583 putc ('\n', file);
32585 fputs ("\t.csect ", file);
32586 RS6000_OUTPUT_BASENAME (file, buffer);
32587 fputs (TARGET_32BIT ? "[DS]\n" : "[DS],3\n", file);
32588 RS6000_OUTPUT_BASENAME (file, buffer);
32589 fputs (":\n", file);
32590 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias, &data, true);
32591 fputs (TARGET_32BIT ? "\t.long ." : "\t.llong .", file);
32592 RS6000_OUTPUT_BASENAME (file, buffer);
32593 fputs (", TOC[tc0], 0\n", file);
32594 in_section = NULL;
32595 switch_to_section (function_section (decl));
32596 putc ('.', file);
32597 RS6000_OUTPUT_BASENAME (file, buffer);
32598 fputs (":\n", file);
32599 data.function_descriptor = true;
32600 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias, &data, true);
32601 if (!DECL_IGNORED_P (decl))
32603 if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG)
32604 xcoffout_declare_function (file, decl, buffer);
32605 else if (write_symbols == DWARF2_DEBUG)
32607 name = (*targetm.strip_name_encoding) (name);
32608 fprintf (file, "\t.function .%s,.%s,2,0\n", name, name);
32611 return;
32614 /* This macro produces the initial definition of a object (variable) name.
32615 Because AIX assembler's .set command has unexpected semantics, we output
32616 all aliases as alternative labels in front of the definition. */
32618 void
32619 rs6000_xcoff_declare_object_name (FILE *file, const char *name, tree decl)
32621 struct declare_alias_data data = {file, false};
32622 RS6000_OUTPUT_BASENAME (file, name);
32623 fputs (":\n", file);
32624 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias, &data, true);
32627 /* Overide the default 'SYMBOL-.' syntax with AIX compatible 'SYMBOL-$'. */
32629 void
32630 rs6000_asm_output_dwarf_pcrel (FILE *file, int size, const char *label)
32632 fputs (integer_asm_op (size, FALSE), file);
32633 assemble_name (file, label);
32634 fputs ("-$", file);
32637 /* Output a symbol offset relative to the dbase for the current object.
32638 We use __gcc_unwind_dbase as an arbitrary base for dbase and assume
32639 signed offsets.
32641 __gcc_unwind_dbase is embedded in all executables/libraries through
32642 libgcc/config/rs6000/crtdbase.S. */
32644 void
32645 rs6000_asm_output_dwarf_datarel (FILE *file, int size, const char *label)
32647 fputs (integer_asm_op (size, FALSE), file);
32648 assemble_name (file, label);
32649 fputs("-__gcc_unwind_dbase", file);
32652 #ifdef HAVE_AS_TLS
32653 static void
32654 rs6000_xcoff_encode_section_info (tree decl, rtx rtl, int first)
32656 rtx symbol;
32657 int flags;
32659 default_encode_section_info (decl, rtl, first);
32661 /* Careful not to prod global register variables. */
32662 if (!MEM_P (rtl))
32663 return;
32664 symbol = XEXP (rtl, 0);
32665 if (GET_CODE (symbol) != SYMBOL_REF)
32666 return;
32668 flags = SYMBOL_REF_FLAGS (symbol);
32670 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
32671 flags &= ~SYMBOL_FLAG_HAS_BLOCK_INFO;
32673 SYMBOL_REF_FLAGS (symbol) = flags;
32675 #endif /* HAVE_AS_TLS */
32676 #endif /* TARGET_XCOFF */
32678 /* Return true if INSN should not be copied. */
32680 static bool
32681 rs6000_cannot_copy_insn_p (rtx_insn *insn)
32683 return recog_memoized (insn) >= 0
32684 && get_attr_cannot_copy (insn);
32687 /* Compute a (partial) cost for rtx X. Return true if the complete
32688 cost has been computed, and false if subexpressions should be
32689 scanned. In either case, *TOTAL contains the cost result. */
32691 static bool
32692 rs6000_rtx_costs (rtx x, machine_mode mode, int outer_code,
32693 int opno ATTRIBUTE_UNUSED, int *total, bool speed)
32695 int code = GET_CODE (x);
32697 switch (code)
32699 /* On the RS/6000, if it is valid in the insn, it is free. */
32700 case CONST_INT:
32701 if (((outer_code == SET
32702 || outer_code == PLUS
32703 || outer_code == MINUS)
32704 && (satisfies_constraint_I (x)
32705 || satisfies_constraint_L (x)))
32706 || (outer_code == AND
32707 && (satisfies_constraint_K (x)
32708 || (mode == SImode
32709 ? satisfies_constraint_L (x)
32710 : satisfies_constraint_J (x))))
32711 || ((outer_code == IOR || outer_code == XOR)
32712 && (satisfies_constraint_K (x)
32713 || (mode == SImode
32714 ? satisfies_constraint_L (x)
32715 : satisfies_constraint_J (x))))
32716 || outer_code == ASHIFT
32717 || outer_code == ASHIFTRT
32718 || outer_code == LSHIFTRT
32719 || outer_code == ROTATE
32720 || outer_code == ROTATERT
32721 || outer_code == ZERO_EXTRACT
32722 || (outer_code == MULT
32723 && satisfies_constraint_I (x))
32724 || ((outer_code == DIV || outer_code == UDIV
32725 || outer_code == MOD || outer_code == UMOD)
32726 && exact_log2 (INTVAL (x)) >= 0)
32727 || (outer_code == COMPARE
32728 && (satisfies_constraint_I (x)
32729 || satisfies_constraint_K (x)))
32730 || ((outer_code == EQ || outer_code == NE)
32731 && (satisfies_constraint_I (x)
32732 || satisfies_constraint_K (x)
32733 || (mode == SImode
32734 ? satisfies_constraint_L (x)
32735 : satisfies_constraint_J (x))))
32736 || (outer_code == GTU
32737 && satisfies_constraint_I (x))
32738 || (outer_code == LTU
32739 && satisfies_constraint_P (x)))
32741 *total = 0;
32742 return true;
32744 else if ((outer_code == PLUS
32745 && reg_or_add_cint_operand (x, VOIDmode))
32746 || (outer_code == MINUS
32747 && reg_or_sub_cint_operand (x, VOIDmode))
32748 || ((outer_code == SET
32749 || outer_code == IOR
32750 || outer_code == XOR)
32751 && (INTVAL (x)
32752 & ~ (unsigned HOST_WIDE_INT) 0xffffffff) == 0))
32754 *total = COSTS_N_INSNS (1);
32755 return true;
32757 /* FALLTHRU */
32759 case CONST_DOUBLE:
32760 case CONST_WIDE_INT:
32761 case CONST:
32762 case HIGH:
32763 case SYMBOL_REF:
32764 case MEM:
32765 /* When optimizing for size, MEM should be slightly more expensive
32766 than generating address, e.g., (plus (reg) (const)).
32767 L1 cache latency is about two instructions. */
32768 *total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
32769 return true;
32771 case LABEL_REF:
32772 *total = 0;
32773 return true;
32775 case PLUS:
32776 case MINUS:
32777 if (FLOAT_MODE_P (mode))
32778 *total = rs6000_cost->fp;
32779 else
32780 *total = COSTS_N_INSNS (1);
32781 return false;
32783 case MULT:
32784 if (GET_CODE (XEXP (x, 1)) == CONST_INT
32785 && satisfies_constraint_I (XEXP (x, 1)))
32787 if (INTVAL (XEXP (x, 1)) >= -256
32788 && INTVAL (XEXP (x, 1)) <= 255)
32789 *total = rs6000_cost->mulsi_const9;
32790 else
32791 *total = rs6000_cost->mulsi_const;
32793 else if (mode == SFmode)
32794 *total = rs6000_cost->fp;
32795 else if (FLOAT_MODE_P (mode))
32796 *total = rs6000_cost->dmul;
32797 else if (mode == DImode)
32798 *total = rs6000_cost->muldi;
32799 else
32800 *total = rs6000_cost->mulsi;
32801 return false;
32803 case FMA:
32804 if (mode == SFmode)
32805 *total = rs6000_cost->fp;
32806 else
32807 *total = rs6000_cost->dmul;
32808 break;
32810 case DIV:
32811 case MOD:
32812 if (FLOAT_MODE_P (mode))
32814 *total = mode == DFmode ? rs6000_cost->ddiv
32815 : rs6000_cost->sdiv;
32816 return false;
32818 /* FALLTHRU */
32820 case UDIV:
32821 case UMOD:
32822 if (GET_CODE (XEXP (x, 1)) == CONST_INT
32823 && exact_log2 (INTVAL (XEXP (x, 1))) >= 0)
32825 if (code == DIV || code == MOD)
32826 /* Shift, addze */
32827 *total = COSTS_N_INSNS (2);
32828 else
32829 /* Shift */
32830 *total = COSTS_N_INSNS (1);
32832 else
32834 if (GET_MODE (XEXP (x, 1)) == DImode)
32835 *total = rs6000_cost->divdi;
32836 else
32837 *total = rs6000_cost->divsi;
32839 /* Add in shift and subtract for MOD unless we have a mod instruction. */
32840 if (!TARGET_MODULO && (code == MOD || code == UMOD))
32841 *total += COSTS_N_INSNS (2);
32842 return false;
32844 case CTZ:
32845 *total = COSTS_N_INSNS (TARGET_CTZ ? 1 : 4);
32846 return false;
32848 case FFS:
32849 *total = COSTS_N_INSNS (4);
32850 return false;
32852 case POPCOUNT:
32853 *total = COSTS_N_INSNS (TARGET_POPCNTD ? 1 : 6);
32854 return false;
32856 case PARITY:
32857 *total = COSTS_N_INSNS (TARGET_CMPB ? 2 : 6);
32858 return false;
32860 case NOT:
32861 if (outer_code == AND || outer_code == IOR || outer_code == XOR)
32862 *total = 0;
32863 else
32864 *total = COSTS_N_INSNS (1);
32865 return false;
32867 case AND:
32868 if (CONST_INT_P (XEXP (x, 1)))
32870 rtx left = XEXP (x, 0);
32871 rtx_code left_code = GET_CODE (left);
32873 /* rotate-and-mask: 1 insn. */
32874 if ((left_code == ROTATE
32875 || left_code == ASHIFT
32876 || left_code == LSHIFTRT)
32877 && rs6000_is_valid_shift_mask (XEXP (x, 1), left, mode))
32879 *total = rtx_cost (XEXP (left, 0), mode, left_code, 0, speed);
32880 if (!CONST_INT_P (XEXP (left, 1)))
32881 *total += rtx_cost (XEXP (left, 1), SImode, left_code, 1, speed);
32882 *total += COSTS_N_INSNS (1);
32883 return true;
32886 /* rotate-and-mask (no rotate), andi., andis.: 1 insn. */
32887 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
32888 if (rs6000_is_valid_and_mask (XEXP (x, 1), mode)
32889 || (val & 0xffff) == val
32890 || (val & 0xffff0000) == val
32891 || ((val & 0xffff) == 0 && mode == SImode))
32893 *total = rtx_cost (left, mode, AND, 0, speed);
32894 *total += COSTS_N_INSNS (1);
32895 return true;
32898 /* 2 insns. */
32899 if (rs6000_is_valid_2insn_and (XEXP (x, 1), mode))
32901 *total = rtx_cost (left, mode, AND, 0, speed);
32902 *total += COSTS_N_INSNS (2);
32903 return true;
32907 *total = COSTS_N_INSNS (1);
32908 return false;
32910 case IOR:
32911 /* FIXME */
32912 *total = COSTS_N_INSNS (1);
32913 return true;
32915 case CLZ:
32916 case XOR:
32917 case ZERO_EXTRACT:
32918 *total = COSTS_N_INSNS (1);
32919 return false;
32921 case ASHIFT:
32922 /* The EXTSWSLI instruction is a combined instruction. Don't count both
32923 the sign extend and shift separately within the insn. */
32924 if (TARGET_EXTSWSLI && mode == DImode
32925 && GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
32926 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode)
32928 *total = 0;
32929 return false;
32931 /* fall through */
32933 case ASHIFTRT:
32934 case LSHIFTRT:
32935 case ROTATE:
32936 case ROTATERT:
32937 /* Handle mul_highpart. */
32938 if (outer_code == TRUNCATE
32939 && GET_CODE (XEXP (x, 0)) == MULT)
32941 if (mode == DImode)
32942 *total = rs6000_cost->muldi;
32943 else
32944 *total = rs6000_cost->mulsi;
32945 return true;
32947 else if (outer_code == AND)
32948 *total = 0;
32949 else
32950 *total = COSTS_N_INSNS (1);
32951 return false;
32953 case SIGN_EXTEND:
32954 case ZERO_EXTEND:
32955 if (GET_CODE (XEXP (x, 0)) == MEM)
32956 *total = 0;
32957 else
32958 *total = COSTS_N_INSNS (1);
32959 return false;
32961 case COMPARE:
32962 case NEG:
32963 case ABS:
32964 if (!FLOAT_MODE_P (mode))
32966 *total = COSTS_N_INSNS (1);
32967 return false;
32969 /* FALLTHRU */
32971 case FLOAT:
32972 case UNSIGNED_FLOAT:
32973 case FIX:
32974 case UNSIGNED_FIX:
32975 case FLOAT_TRUNCATE:
32976 *total = rs6000_cost->fp;
32977 return false;
32979 case FLOAT_EXTEND:
32980 if (mode == DFmode)
32981 *total = rs6000_cost->sfdf_convert;
32982 else
32983 *total = rs6000_cost->fp;
32984 return false;
32986 case UNSPEC:
32987 switch (XINT (x, 1))
32989 case UNSPEC_FRSP:
32990 *total = rs6000_cost->fp;
32991 return true;
32993 default:
32994 break;
32996 break;
32998 case CALL:
32999 case IF_THEN_ELSE:
33000 if (!speed)
33002 *total = COSTS_N_INSNS (1);
33003 return true;
33005 else if (FLOAT_MODE_P (mode)
33006 && TARGET_PPC_GFXOPT && TARGET_HARD_FLOAT && TARGET_FPRS)
33008 *total = rs6000_cost->fp;
33009 return false;
33011 break;
33013 case NE:
33014 case EQ:
33015 case GTU:
33016 case LTU:
33017 /* Carry bit requires mode == Pmode.
33018 NEG or PLUS already counted so only add one. */
33019 if (mode == Pmode
33020 && (outer_code == NEG || outer_code == PLUS))
33022 *total = COSTS_N_INSNS (1);
33023 return true;
33025 if (outer_code == SET)
33027 if (XEXP (x, 1) == const0_rtx)
33029 if (TARGET_ISEL && !TARGET_MFCRF)
33030 *total = COSTS_N_INSNS (8);
33031 else
33032 *total = COSTS_N_INSNS (2);
33033 return true;
33035 else
33037 *total = COSTS_N_INSNS (3);
33038 return false;
33041 /* FALLTHRU */
33043 case GT:
33044 case LT:
33045 case UNORDERED:
33046 if (outer_code == SET && (XEXP (x, 1) == const0_rtx))
33048 if (TARGET_ISEL && !TARGET_MFCRF)
33049 *total = COSTS_N_INSNS (8);
33050 else
33051 *total = COSTS_N_INSNS (2);
33052 return true;
33054 /* CC COMPARE. */
33055 if (outer_code == COMPARE)
33057 *total = 0;
33058 return true;
33060 break;
33062 default:
33063 break;
33066 return false;
33069 /* Debug form of r6000_rtx_costs that is selected if -mdebug=cost. */
33071 static bool
33072 rs6000_debug_rtx_costs (rtx x, machine_mode mode, int outer_code,
33073 int opno, int *total, bool speed)
33075 bool ret = rs6000_rtx_costs (x, mode, outer_code, opno, total, speed);
33077 fprintf (stderr,
33078 "\nrs6000_rtx_costs, return = %s, mode = %s, outer_code = %s, "
33079 "opno = %d, total = %d, speed = %s, x:\n",
33080 ret ? "complete" : "scan inner",
33081 GET_MODE_NAME (mode),
33082 GET_RTX_NAME (outer_code),
33083 opno,
33084 *total,
33085 speed ? "true" : "false");
33087 debug_rtx (x);
33089 return ret;
33092 /* Debug form of ADDRESS_COST that is selected if -mdebug=cost. */
33094 static int
33095 rs6000_debug_address_cost (rtx x, machine_mode mode,
33096 addr_space_t as, bool speed)
33098 int ret = TARGET_ADDRESS_COST (x, mode, as, speed);
33100 fprintf (stderr, "\nrs6000_address_cost, return = %d, speed = %s, x:\n",
33101 ret, speed ? "true" : "false");
33102 debug_rtx (x);
33104 return ret;
33108 /* A C expression returning the cost of moving data from a register of class
33109 CLASS1 to one of CLASS2. */
33111 static int
33112 rs6000_register_move_cost (machine_mode mode,
33113 reg_class_t from, reg_class_t to)
33115 int ret;
33117 if (TARGET_DEBUG_COST)
33118 dbg_cost_ctrl++;
33120 /* Moves from/to GENERAL_REGS. */
33121 if (reg_classes_intersect_p (to, GENERAL_REGS)
33122 || reg_classes_intersect_p (from, GENERAL_REGS))
33124 reg_class_t rclass = from;
33126 if (! reg_classes_intersect_p (to, GENERAL_REGS))
33127 rclass = to;
33129 if (rclass == FLOAT_REGS || rclass == ALTIVEC_REGS || rclass == VSX_REGS)
33130 ret = (rs6000_memory_move_cost (mode, rclass, false)
33131 + rs6000_memory_move_cost (mode, GENERAL_REGS, false));
33133 /* It's more expensive to move CR_REGS than CR0_REGS because of the
33134 shift. */
33135 else if (rclass == CR_REGS)
33136 ret = 4;
33138 /* For those processors that have slow LR/CTR moves, make them more
33139 expensive than memory in order to bias spills to memory .*/
33140 else if ((rs6000_cpu == PROCESSOR_POWER6
33141 || rs6000_cpu == PROCESSOR_POWER7
33142 || rs6000_cpu == PROCESSOR_POWER8
33143 || rs6000_cpu == PROCESSOR_POWER9)
33144 && reg_classes_intersect_p (rclass, LINK_OR_CTR_REGS))
33145 ret = 6 * hard_regno_nregs[0][mode];
33147 else
33148 /* A move will cost one instruction per GPR moved. */
33149 ret = 2 * hard_regno_nregs[0][mode];
33152 /* If we have VSX, we can easily move between FPR or Altivec registers. */
33153 else if (VECTOR_MEM_VSX_P (mode)
33154 && reg_classes_intersect_p (to, VSX_REGS)
33155 && reg_classes_intersect_p (from, VSX_REGS))
33156 ret = 2 * hard_regno_nregs[32][mode];
33158 /* Moving between two similar registers is just one instruction. */
33159 else if (reg_classes_intersect_p (to, from))
33160 ret = (FLOAT128_2REG_P (mode)) ? 4 : 2;
33162 /* Everything else has to go through GENERAL_REGS. */
33163 else
33164 ret = (rs6000_register_move_cost (mode, GENERAL_REGS, to)
33165 + rs6000_register_move_cost (mode, from, GENERAL_REGS));
33167 if (TARGET_DEBUG_COST)
33169 if (dbg_cost_ctrl == 1)
33170 fprintf (stderr,
33171 "rs6000_register_move_cost:, ret=%d, mode=%s, from=%s, to=%s\n",
33172 ret, GET_MODE_NAME (mode), reg_class_names[from],
33173 reg_class_names[to]);
33174 dbg_cost_ctrl--;
33177 return ret;
33180 /* A C expressions returning the cost of moving data of MODE from a register to
33181 or from memory. */
33183 static int
33184 rs6000_memory_move_cost (machine_mode mode, reg_class_t rclass,
33185 bool in ATTRIBUTE_UNUSED)
33187 int ret;
33189 if (TARGET_DEBUG_COST)
33190 dbg_cost_ctrl++;
33192 if (reg_classes_intersect_p (rclass, GENERAL_REGS))
33193 ret = 4 * hard_regno_nregs[0][mode];
33194 else if ((reg_classes_intersect_p (rclass, FLOAT_REGS)
33195 || reg_classes_intersect_p (rclass, VSX_REGS)))
33196 ret = 4 * hard_regno_nregs[32][mode];
33197 else if (reg_classes_intersect_p (rclass, ALTIVEC_REGS))
33198 ret = 4 * hard_regno_nregs[FIRST_ALTIVEC_REGNO][mode];
33199 else
33200 ret = 4 + rs6000_register_move_cost (mode, rclass, GENERAL_REGS);
33202 if (TARGET_DEBUG_COST)
33204 if (dbg_cost_ctrl == 1)
33205 fprintf (stderr,
33206 "rs6000_memory_move_cost: ret=%d, mode=%s, rclass=%s, in=%d\n",
33207 ret, GET_MODE_NAME (mode), reg_class_names[rclass], in);
33208 dbg_cost_ctrl--;
33211 return ret;
33214 /* Returns a code for a target-specific builtin that implements
33215 reciprocal of the function, or NULL_TREE if not available. */
33217 static tree
33218 rs6000_builtin_reciprocal (tree fndecl)
33220 switch (DECL_FUNCTION_CODE (fndecl))
33222 case VSX_BUILTIN_XVSQRTDP:
33223 if (!RS6000_RECIP_AUTO_RSQRTE_P (V2DFmode))
33224 return NULL_TREE;
33226 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_2DF];
33228 case VSX_BUILTIN_XVSQRTSP:
33229 if (!RS6000_RECIP_AUTO_RSQRTE_P (V4SFmode))
33230 return NULL_TREE;
33232 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_4SF];
33234 default:
33235 return NULL_TREE;
33239 /* Load up a constant. If the mode is a vector mode, splat the value across
33240 all of the vector elements. */
33242 static rtx
33243 rs6000_load_constant_and_splat (machine_mode mode, REAL_VALUE_TYPE dconst)
33245 rtx reg;
33247 if (mode == SFmode || mode == DFmode)
33249 rtx d = const_double_from_real_value (dconst, mode);
33250 reg = force_reg (mode, d);
33252 else if (mode == V4SFmode)
33254 rtx d = const_double_from_real_value (dconst, SFmode);
33255 rtvec v = gen_rtvec (4, d, d, d, d);
33256 reg = gen_reg_rtx (mode);
33257 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
33259 else if (mode == V2DFmode)
33261 rtx d = const_double_from_real_value (dconst, DFmode);
33262 rtvec v = gen_rtvec (2, d, d);
33263 reg = gen_reg_rtx (mode);
33264 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
33266 else
33267 gcc_unreachable ();
33269 return reg;
33272 /* Generate an FMA instruction. */
33274 static void
33275 rs6000_emit_madd (rtx target, rtx m1, rtx m2, rtx a)
33277 machine_mode mode = GET_MODE (target);
33278 rtx dst;
33280 dst = expand_ternary_op (mode, fma_optab, m1, m2, a, target, 0);
33281 gcc_assert (dst != NULL);
33283 if (dst != target)
33284 emit_move_insn (target, dst);
33287 /* Generate a FNMSUB instruction: dst = -fma(m1, m2, -a). */
33289 static void
33290 rs6000_emit_nmsub (rtx dst, rtx m1, rtx m2, rtx a)
33292 machine_mode mode = GET_MODE (dst);
33293 rtx r;
33295 /* This is a tad more complicated, since the fnma_optab is for
33296 a different expression: fma(-m1, m2, a), which is the same
33297 thing except in the case of signed zeros.
33299 Fortunately we know that if FMA is supported that FNMSUB is
33300 also supported in the ISA. Just expand it directly. */
33302 gcc_assert (optab_handler (fma_optab, mode) != CODE_FOR_nothing);
33304 r = gen_rtx_NEG (mode, a);
33305 r = gen_rtx_FMA (mode, m1, m2, r);
33306 r = gen_rtx_NEG (mode, r);
33307 emit_insn (gen_rtx_SET (dst, r));
33310 /* Newton-Raphson approximation of floating point divide DST = N/D. If NOTE_P,
33311 add a reg_note saying that this was a division. Support both scalar and
33312 vector divide. Assumes no trapping math and finite arguments. */
33314 void
33315 rs6000_emit_swdiv (rtx dst, rtx n, rtx d, bool note_p)
33317 machine_mode mode = GET_MODE (dst);
33318 rtx one, x0, e0, x1, xprev, eprev, xnext, enext, u, v;
33319 int i;
33321 /* Low precision estimates guarantee 5 bits of accuracy. High
33322 precision estimates guarantee 14 bits of accuracy. SFmode
33323 requires 23 bits of accuracy. DFmode requires 52 bits of
33324 accuracy. Each pass at least doubles the accuracy, leading
33325 to the following. */
33326 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
33327 if (mode == DFmode || mode == V2DFmode)
33328 passes++;
33330 enum insn_code code = optab_handler (smul_optab, mode);
33331 insn_gen_fn gen_mul = GEN_FCN (code);
33333 gcc_assert (code != CODE_FOR_nothing);
33335 one = rs6000_load_constant_and_splat (mode, dconst1);
33337 /* x0 = 1./d estimate */
33338 x0 = gen_reg_rtx (mode);
33339 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, d),
33340 UNSPEC_FRES)));
33342 /* Each iteration but the last calculates x_(i+1) = x_i * (2 - d * x_i). */
33343 if (passes > 1) {
33345 /* e0 = 1. - d * x0 */
33346 e0 = gen_reg_rtx (mode);
33347 rs6000_emit_nmsub (e0, d, x0, one);
33349 /* x1 = x0 + e0 * x0 */
33350 x1 = gen_reg_rtx (mode);
33351 rs6000_emit_madd (x1, e0, x0, x0);
33353 for (i = 0, xprev = x1, eprev = e0; i < passes - 2;
33354 ++i, xprev = xnext, eprev = enext) {
33356 /* enext = eprev * eprev */
33357 enext = gen_reg_rtx (mode);
33358 emit_insn (gen_mul (enext, eprev, eprev));
33360 /* xnext = xprev + enext * xprev */
33361 xnext = gen_reg_rtx (mode);
33362 rs6000_emit_madd (xnext, enext, xprev, xprev);
33365 } else
33366 xprev = x0;
33368 /* The last iteration calculates x_(i+1) = n * x_i * (2 - d * x_i). */
33370 /* u = n * xprev */
33371 u = gen_reg_rtx (mode);
33372 emit_insn (gen_mul (u, n, xprev));
33374 /* v = n - (d * u) */
33375 v = gen_reg_rtx (mode);
33376 rs6000_emit_nmsub (v, d, u, n);
33378 /* dst = (v * xprev) + u */
33379 rs6000_emit_madd (dst, v, xprev, u);
33381 if (note_p)
33382 add_reg_note (get_last_insn (), REG_EQUAL, gen_rtx_DIV (mode, n, d));
33385 /* Goldschmidt's Algorithm for single/double-precision floating point
33386 sqrt and rsqrt. Assumes no trapping math and finite arguments. */
33388 void
33389 rs6000_emit_swsqrt (rtx dst, rtx src, bool recip)
33391 machine_mode mode = GET_MODE (src);
33392 rtx e = gen_reg_rtx (mode);
33393 rtx g = gen_reg_rtx (mode);
33394 rtx h = gen_reg_rtx (mode);
33396 /* Low precision estimates guarantee 5 bits of accuracy. High
33397 precision estimates guarantee 14 bits of accuracy. SFmode
33398 requires 23 bits of accuracy. DFmode requires 52 bits of
33399 accuracy. Each pass at least doubles the accuracy, leading
33400 to the following. */
33401 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
33402 if (mode == DFmode || mode == V2DFmode)
33403 passes++;
33405 int i;
33406 rtx mhalf;
33407 enum insn_code code = optab_handler (smul_optab, mode);
33408 insn_gen_fn gen_mul = GEN_FCN (code);
33410 gcc_assert (code != CODE_FOR_nothing);
33412 mhalf = rs6000_load_constant_and_splat (mode, dconsthalf);
33414 /* e = rsqrt estimate */
33415 emit_insn (gen_rtx_SET (e, gen_rtx_UNSPEC (mode, gen_rtvec (1, src),
33416 UNSPEC_RSQRT)));
33418 /* If (src == 0.0) filter infinity to prevent NaN for sqrt(0.0). */
33419 if (!recip)
33421 rtx zero = force_reg (mode, CONST0_RTX (mode));
33423 if (mode == SFmode)
33425 rtx target = emit_conditional_move (e, GT, src, zero, mode,
33426 e, zero, mode, 0);
33427 if (target != e)
33428 emit_move_insn (e, target);
33430 else
33432 rtx cond = gen_rtx_GT (VOIDmode, e, zero);
33433 rs6000_emit_vector_cond_expr (e, e, zero, cond, src, zero);
33437 /* g = sqrt estimate. */
33438 emit_insn (gen_mul (g, e, src));
33439 /* h = 1/(2*sqrt) estimate. */
33440 emit_insn (gen_mul (h, e, mhalf));
33442 if (recip)
33444 if (passes == 1)
33446 rtx t = gen_reg_rtx (mode);
33447 rs6000_emit_nmsub (t, g, h, mhalf);
33448 /* Apply correction directly to 1/rsqrt estimate. */
33449 rs6000_emit_madd (dst, e, t, e);
33451 else
33453 for (i = 0; i < passes; i++)
33455 rtx t1 = gen_reg_rtx (mode);
33456 rtx g1 = gen_reg_rtx (mode);
33457 rtx h1 = gen_reg_rtx (mode);
33459 rs6000_emit_nmsub (t1, g, h, mhalf);
33460 rs6000_emit_madd (g1, g, t1, g);
33461 rs6000_emit_madd (h1, h, t1, h);
33463 g = g1;
33464 h = h1;
33466 /* Multiply by 2 for 1/rsqrt. */
33467 emit_insn (gen_add3_insn (dst, h, h));
33470 else
33472 rtx t = gen_reg_rtx (mode);
33473 rs6000_emit_nmsub (t, g, h, mhalf);
33474 rs6000_emit_madd (dst, g, t, g);
33477 return;
33480 /* Emit popcount intrinsic on TARGET_POPCNTB (Power5) and TARGET_POPCNTD
33481 (Power7) targets. DST is the target, and SRC is the argument operand. */
33483 void
33484 rs6000_emit_popcount (rtx dst, rtx src)
33486 machine_mode mode = GET_MODE (dst);
33487 rtx tmp1, tmp2;
33489 /* Use the PPC ISA 2.06 popcnt{w,d} instruction if we can. */
33490 if (TARGET_POPCNTD)
33492 if (mode == SImode)
33493 emit_insn (gen_popcntdsi2 (dst, src));
33494 else
33495 emit_insn (gen_popcntddi2 (dst, src));
33496 return;
33499 tmp1 = gen_reg_rtx (mode);
33501 if (mode == SImode)
33503 emit_insn (gen_popcntbsi2 (tmp1, src));
33504 tmp2 = expand_mult (SImode, tmp1, GEN_INT (0x01010101),
33505 NULL_RTX, 0);
33506 tmp2 = force_reg (SImode, tmp2);
33507 emit_insn (gen_lshrsi3 (dst, tmp2, GEN_INT (24)));
33509 else
33511 emit_insn (gen_popcntbdi2 (tmp1, src));
33512 tmp2 = expand_mult (DImode, tmp1,
33513 GEN_INT ((HOST_WIDE_INT)
33514 0x01010101 << 32 | 0x01010101),
33515 NULL_RTX, 0);
33516 tmp2 = force_reg (DImode, tmp2);
33517 emit_insn (gen_lshrdi3 (dst, tmp2, GEN_INT (56)));
33522 /* Emit parity intrinsic on TARGET_POPCNTB targets. DST is the
33523 target, and SRC is the argument operand. */
33525 void
33526 rs6000_emit_parity (rtx dst, rtx src)
33528 machine_mode mode = GET_MODE (dst);
33529 rtx tmp;
33531 tmp = gen_reg_rtx (mode);
33533 /* Use the PPC ISA 2.05 prtyw/prtyd instruction if we can. */
33534 if (TARGET_CMPB)
33536 if (mode == SImode)
33538 emit_insn (gen_popcntbsi2 (tmp, src));
33539 emit_insn (gen_paritysi2_cmpb (dst, tmp));
33541 else
33543 emit_insn (gen_popcntbdi2 (tmp, src));
33544 emit_insn (gen_paritydi2_cmpb (dst, tmp));
33546 return;
33549 if (mode == SImode)
33551 /* Is mult+shift >= shift+xor+shift+xor? */
33552 if (rs6000_cost->mulsi_const >= COSTS_N_INSNS (3))
33554 rtx tmp1, tmp2, tmp3, tmp4;
33556 tmp1 = gen_reg_rtx (SImode);
33557 emit_insn (gen_popcntbsi2 (tmp1, src));
33559 tmp2 = gen_reg_rtx (SImode);
33560 emit_insn (gen_lshrsi3 (tmp2, tmp1, GEN_INT (16)));
33561 tmp3 = gen_reg_rtx (SImode);
33562 emit_insn (gen_xorsi3 (tmp3, tmp1, tmp2));
33564 tmp4 = gen_reg_rtx (SImode);
33565 emit_insn (gen_lshrsi3 (tmp4, tmp3, GEN_INT (8)));
33566 emit_insn (gen_xorsi3 (tmp, tmp3, tmp4));
33568 else
33569 rs6000_emit_popcount (tmp, src);
33570 emit_insn (gen_andsi3 (dst, tmp, const1_rtx));
33572 else
33574 /* Is mult+shift >= shift+xor+shift+xor+shift+xor? */
33575 if (rs6000_cost->muldi >= COSTS_N_INSNS (5))
33577 rtx tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
33579 tmp1 = gen_reg_rtx (DImode);
33580 emit_insn (gen_popcntbdi2 (tmp1, src));
33582 tmp2 = gen_reg_rtx (DImode);
33583 emit_insn (gen_lshrdi3 (tmp2, tmp1, GEN_INT (32)));
33584 tmp3 = gen_reg_rtx (DImode);
33585 emit_insn (gen_xordi3 (tmp3, tmp1, tmp2));
33587 tmp4 = gen_reg_rtx (DImode);
33588 emit_insn (gen_lshrdi3 (tmp4, tmp3, GEN_INT (16)));
33589 tmp5 = gen_reg_rtx (DImode);
33590 emit_insn (gen_xordi3 (tmp5, tmp3, tmp4));
33592 tmp6 = gen_reg_rtx (DImode);
33593 emit_insn (gen_lshrdi3 (tmp6, tmp5, GEN_INT (8)));
33594 emit_insn (gen_xordi3 (tmp, tmp5, tmp6));
33596 else
33597 rs6000_emit_popcount (tmp, src);
33598 emit_insn (gen_anddi3 (dst, tmp, const1_rtx));
33602 /* Expand an Altivec constant permutation for little endian mode.
33603 There are two issues: First, the two input operands must be
33604 swapped so that together they form a double-wide array in LE
33605 order. Second, the vperm instruction has surprising behavior
33606 in LE mode: it interprets the elements of the source vectors
33607 in BE mode ("left to right") and interprets the elements of
33608 the destination vector in LE mode ("right to left"). To
33609 correct for this, we must subtract each element of the permute
33610 control vector from 31.
33612 For example, suppose we want to concatenate vr10 = {0, 1, 2, 3}
33613 with vr11 = {4, 5, 6, 7} and extract {0, 2, 4, 6} using a vperm.
33614 We place {0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27} in vr12 to
33615 serve as the permute control vector. Then, in BE mode,
33617 vperm 9,10,11,12
33619 places the desired result in vr9. However, in LE mode the
33620 vector contents will be
33622 vr10 = 00000003 00000002 00000001 00000000
33623 vr11 = 00000007 00000006 00000005 00000004
33625 The result of the vperm using the same permute control vector is
33627 vr9 = 05000000 07000000 01000000 03000000
33629 That is, the leftmost 4 bytes of vr10 are interpreted as the
33630 source for the rightmost 4 bytes of vr9, and so on.
33632 If we change the permute control vector to
33634 vr12 = {31,20,29,28,23,22,21,20,15,14,13,12,7,6,5,4}
33636 and issue
33638 vperm 9,11,10,12
33640 we get the desired
33642 vr9 = 00000006 00000004 00000002 00000000. */
33644 void
33645 altivec_expand_vec_perm_const_le (rtx operands[4])
33647 unsigned int i;
33648 rtx perm[16];
33649 rtx constv, unspec;
33650 rtx target = operands[0];
33651 rtx op0 = operands[1];
33652 rtx op1 = operands[2];
33653 rtx sel = operands[3];
33655 /* Unpack and adjust the constant selector. */
33656 for (i = 0; i < 16; ++i)
33658 rtx e = XVECEXP (sel, 0, i);
33659 unsigned int elt = 31 - (INTVAL (e) & 31);
33660 perm[i] = GEN_INT (elt);
33663 /* Expand to a permute, swapping the inputs and using the
33664 adjusted selector. */
33665 if (!REG_P (op0))
33666 op0 = force_reg (V16QImode, op0);
33667 if (!REG_P (op1))
33668 op1 = force_reg (V16QImode, op1);
33670 constv = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm));
33671 constv = force_reg (V16QImode, constv);
33672 unspec = gen_rtx_UNSPEC (V16QImode, gen_rtvec (3, op1, op0, constv),
33673 UNSPEC_VPERM);
33674 if (!REG_P (target))
33676 rtx tmp = gen_reg_rtx (V16QImode);
33677 emit_move_insn (tmp, unspec);
33678 unspec = tmp;
33681 emit_move_insn (target, unspec);
33684 /* Similarly to altivec_expand_vec_perm_const_le, we must adjust the
33685 permute control vector. But here it's not a constant, so we must
33686 generate a vector NAND or NOR to do the adjustment. */
33688 void
33689 altivec_expand_vec_perm_le (rtx operands[4])
33691 rtx notx, iorx, unspec;
33692 rtx target = operands[0];
33693 rtx op0 = operands[1];
33694 rtx op1 = operands[2];
33695 rtx sel = operands[3];
33696 rtx tmp = target;
33697 rtx norreg = gen_reg_rtx (V16QImode);
33698 machine_mode mode = GET_MODE (target);
33700 /* Get everything in regs so the pattern matches. */
33701 if (!REG_P (op0))
33702 op0 = force_reg (mode, op0);
33703 if (!REG_P (op1))
33704 op1 = force_reg (mode, op1);
33705 if (!REG_P (sel))
33706 sel = force_reg (V16QImode, sel);
33707 if (!REG_P (target))
33708 tmp = gen_reg_rtx (mode);
33710 /* Invert the selector with a VNAND if available, else a VNOR.
33711 The VNAND is preferred for future fusion opportunities. */
33712 notx = gen_rtx_NOT (V16QImode, sel);
33713 iorx = (TARGET_P8_VECTOR
33714 ? gen_rtx_IOR (V16QImode, notx, notx)
33715 : gen_rtx_AND (V16QImode, notx, notx));
33716 emit_insn (gen_rtx_SET (norreg, iorx));
33718 /* Permute with operands reversed and adjusted selector. */
33719 unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op0, norreg),
33720 UNSPEC_VPERM);
33722 /* Copy into target, possibly by way of a register. */
33723 if (!REG_P (target))
33725 emit_move_insn (tmp, unspec);
33726 unspec = tmp;
33729 emit_move_insn (target, unspec);
33732 /* Expand an Altivec constant permutation. Return true if we match
33733 an efficient implementation; false to fall back to VPERM. */
33735 bool
33736 altivec_expand_vec_perm_const (rtx operands[4])
33738 struct altivec_perm_insn {
33739 HOST_WIDE_INT mask;
33740 enum insn_code impl;
33741 unsigned char perm[16];
33743 static const struct altivec_perm_insn patterns[] = {
33744 { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuhum_direct,
33745 { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } },
33746 { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuwum_direct,
33747 { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } },
33748 { OPTION_MASK_ALTIVEC,
33749 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghb_direct
33750 : CODE_FOR_altivec_vmrglb_direct),
33751 { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } },
33752 { OPTION_MASK_ALTIVEC,
33753 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghh_direct
33754 : CODE_FOR_altivec_vmrglh_direct),
33755 { 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } },
33756 { OPTION_MASK_ALTIVEC,
33757 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghw_direct
33758 : CODE_FOR_altivec_vmrglw_direct),
33759 { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } },
33760 { OPTION_MASK_ALTIVEC,
33761 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglb_direct
33762 : CODE_FOR_altivec_vmrghb_direct),
33763 { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } },
33764 { OPTION_MASK_ALTIVEC,
33765 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglh_direct
33766 : CODE_FOR_altivec_vmrghh_direct),
33767 { 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } },
33768 { OPTION_MASK_ALTIVEC,
33769 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglw_direct
33770 : CODE_FOR_altivec_vmrghw_direct),
33771 { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } },
33772 { OPTION_MASK_P8_VECTOR, CODE_FOR_p8_vmrgew,
33773 { 0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 24, 25, 26, 27 } },
33774 { OPTION_MASK_P8_VECTOR, CODE_FOR_p8_vmrgow,
33775 { 4, 5, 6, 7, 20, 21, 22, 23, 12, 13, 14, 15, 28, 29, 30, 31 } }
33778 unsigned int i, j, elt, which;
33779 unsigned char perm[16];
33780 rtx target, op0, op1, sel, x;
33781 bool one_vec;
33783 target = operands[0];
33784 op0 = operands[1];
33785 op1 = operands[2];
33786 sel = operands[3];
33788 /* Unpack the constant selector. */
33789 for (i = which = 0; i < 16; ++i)
33791 rtx e = XVECEXP (sel, 0, i);
33792 elt = INTVAL (e) & 31;
33793 which |= (elt < 16 ? 1 : 2);
33794 perm[i] = elt;
33797 /* Simplify the constant selector based on operands. */
33798 switch (which)
33800 default:
33801 gcc_unreachable ();
33803 case 3:
33804 one_vec = false;
33805 if (!rtx_equal_p (op0, op1))
33806 break;
33807 /* FALLTHRU */
33809 case 2:
33810 for (i = 0; i < 16; ++i)
33811 perm[i] &= 15;
33812 op0 = op1;
33813 one_vec = true;
33814 break;
33816 case 1:
33817 op1 = op0;
33818 one_vec = true;
33819 break;
33822 /* Look for splat patterns. */
33823 if (one_vec)
33825 elt = perm[0];
33827 for (i = 0; i < 16; ++i)
33828 if (perm[i] != elt)
33829 break;
33830 if (i == 16)
33832 if (!BYTES_BIG_ENDIAN)
33833 elt = 15 - elt;
33834 emit_insn (gen_altivec_vspltb_direct (target, op0, GEN_INT (elt)));
33835 return true;
33838 if (elt % 2 == 0)
33840 for (i = 0; i < 16; i += 2)
33841 if (perm[i] != elt || perm[i + 1] != elt + 1)
33842 break;
33843 if (i == 16)
33845 int field = BYTES_BIG_ENDIAN ? elt / 2 : 7 - elt / 2;
33846 x = gen_reg_rtx (V8HImode);
33847 emit_insn (gen_altivec_vsplth_direct (x, gen_lowpart (V8HImode, op0),
33848 GEN_INT (field)));
33849 emit_move_insn (target, gen_lowpart (V16QImode, x));
33850 return true;
33854 if (elt % 4 == 0)
33856 for (i = 0; i < 16; i += 4)
33857 if (perm[i] != elt
33858 || perm[i + 1] != elt + 1
33859 || perm[i + 2] != elt + 2
33860 || perm[i + 3] != elt + 3)
33861 break;
33862 if (i == 16)
33864 int field = BYTES_BIG_ENDIAN ? elt / 4 : 3 - elt / 4;
33865 x = gen_reg_rtx (V4SImode);
33866 emit_insn (gen_altivec_vspltw_direct (x, gen_lowpart (V4SImode, op0),
33867 GEN_INT (field)));
33868 emit_move_insn (target, gen_lowpart (V16QImode, x));
33869 return true;
33874 /* Look for merge and pack patterns. */
33875 for (j = 0; j < ARRAY_SIZE (patterns); ++j)
33877 bool swapped;
33879 if ((patterns[j].mask & rs6000_isa_flags) == 0)
33880 continue;
33882 elt = patterns[j].perm[0];
33883 if (perm[0] == elt)
33884 swapped = false;
33885 else if (perm[0] == elt + 16)
33886 swapped = true;
33887 else
33888 continue;
33889 for (i = 1; i < 16; ++i)
33891 elt = patterns[j].perm[i];
33892 if (swapped)
33893 elt = (elt >= 16 ? elt - 16 : elt + 16);
33894 else if (one_vec && elt >= 16)
33895 elt -= 16;
33896 if (perm[i] != elt)
33897 break;
33899 if (i == 16)
33901 enum insn_code icode = patterns[j].impl;
33902 machine_mode omode = insn_data[icode].operand[0].mode;
33903 machine_mode imode = insn_data[icode].operand[1].mode;
33905 /* For little-endian, don't use vpkuwum and vpkuhum if the
33906 underlying vector type is not V4SI and V8HI, respectively.
33907 For example, using vpkuwum with a V8HI picks up the even
33908 halfwords (BE numbering) when the even halfwords (LE
33909 numbering) are what we need. */
33910 if (!BYTES_BIG_ENDIAN
33911 && icode == CODE_FOR_altivec_vpkuwum_direct
33912 && ((GET_CODE (op0) == REG
33913 && GET_MODE (op0) != V4SImode)
33914 || (GET_CODE (op0) == SUBREG
33915 && GET_MODE (XEXP (op0, 0)) != V4SImode)))
33916 continue;
33917 if (!BYTES_BIG_ENDIAN
33918 && icode == CODE_FOR_altivec_vpkuhum_direct
33919 && ((GET_CODE (op0) == REG
33920 && GET_MODE (op0) != V8HImode)
33921 || (GET_CODE (op0) == SUBREG
33922 && GET_MODE (XEXP (op0, 0)) != V8HImode)))
33923 continue;
33925 /* For little-endian, the two input operands must be swapped
33926 (or swapped back) to ensure proper right-to-left numbering
33927 from 0 to 2N-1. */
33928 if (swapped ^ !BYTES_BIG_ENDIAN)
33929 std::swap (op0, op1);
33930 if (imode != V16QImode)
33932 op0 = gen_lowpart (imode, op0);
33933 op1 = gen_lowpart (imode, op1);
33935 if (omode == V16QImode)
33936 x = target;
33937 else
33938 x = gen_reg_rtx (omode);
33939 emit_insn (GEN_FCN (icode) (x, op0, op1));
33940 if (omode != V16QImode)
33941 emit_move_insn (target, gen_lowpart (V16QImode, x));
33942 return true;
33946 if (!BYTES_BIG_ENDIAN)
33948 altivec_expand_vec_perm_const_le (operands);
33949 return true;
33952 return false;
33955 /* Expand a Paired Single, VSX Permute Doubleword, or SPE constant permutation.
33956 Return true if we match an efficient implementation. */
33958 static bool
33959 rs6000_expand_vec_perm_const_1 (rtx target, rtx op0, rtx op1,
33960 unsigned char perm0, unsigned char perm1)
33962 rtx x;
33964 /* If both selectors come from the same operand, fold to single op. */
33965 if ((perm0 & 2) == (perm1 & 2))
33967 if (perm0 & 2)
33968 op0 = op1;
33969 else
33970 op1 = op0;
33972 /* If both operands are equal, fold to simpler permutation. */
33973 if (rtx_equal_p (op0, op1))
33975 perm0 = perm0 & 1;
33976 perm1 = (perm1 & 1) + 2;
33978 /* If the first selector comes from the second operand, swap. */
33979 else if (perm0 & 2)
33981 if (perm1 & 2)
33982 return false;
33983 perm0 -= 2;
33984 perm1 += 2;
33985 std::swap (op0, op1);
33987 /* If the second selector does not come from the second operand, fail. */
33988 else if ((perm1 & 2) == 0)
33989 return false;
33991 /* Success! */
33992 if (target != NULL)
33994 machine_mode vmode, dmode;
33995 rtvec v;
33997 vmode = GET_MODE (target);
33998 gcc_assert (GET_MODE_NUNITS (vmode) == 2);
33999 dmode = mode_for_vector (GET_MODE_INNER (vmode), 4);
34000 x = gen_rtx_VEC_CONCAT (dmode, op0, op1);
34001 v = gen_rtvec (2, GEN_INT (perm0), GEN_INT (perm1));
34002 x = gen_rtx_VEC_SELECT (vmode, x, gen_rtx_PARALLEL (VOIDmode, v));
34003 emit_insn (gen_rtx_SET (target, x));
34005 return true;
34008 bool
34009 rs6000_expand_vec_perm_const (rtx operands[4])
34011 rtx target, op0, op1, sel;
34012 unsigned char perm0, perm1;
34014 target = operands[0];
34015 op0 = operands[1];
34016 op1 = operands[2];
34017 sel = operands[3];
34019 /* Unpack the constant selector. */
34020 perm0 = INTVAL (XVECEXP (sel, 0, 0)) & 3;
34021 perm1 = INTVAL (XVECEXP (sel, 0, 1)) & 3;
34023 return rs6000_expand_vec_perm_const_1 (target, op0, op1, perm0, perm1);
34026 /* Test whether a constant permutation is supported. */
34028 static bool
34029 rs6000_vectorize_vec_perm_const_ok (machine_mode vmode,
34030 const unsigned char *sel)
34032 /* AltiVec (and thus VSX) can handle arbitrary permutations. */
34033 if (TARGET_ALTIVEC)
34034 return true;
34036 /* Check for ps_merge* or evmerge* insns. */
34037 if ((TARGET_PAIRED_FLOAT && vmode == V2SFmode)
34038 || (TARGET_SPE && vmode == V2SImode))
34040 rtx op0 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 1);
34041 rtx op1 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 2);
34042 return rs6000_expand_vec_perm_const_1 (NULL, op0, op1, sel[0], sel[1]);
34045 return false;
34048 /* A subroutine for rs6000_expand_extract_even & rs6000_expand_interleave. */
34050 static void
34051 rs6000_do_expand_vec_perm (rtx target, rtx op0, rtx op1,
34052 machine_mode vmode, unsigned nelt, rtx perm[])
34054 machine_mode imode;
34055 rtx x;
34057 imode = vmode;
34058 if (GET_MODE_CLASS (vmode) != MODE_VECTOR_INT)
34060 imode = mode_for_size (GET_MODE_UNIT_BITSIZE (vmode), MODE_INT, 0);
34061 imode = mode_for_vector (imode, nelt);
34064 x = gen_rtx_CONST_VECTOR (imode, gen_rtvec_v (nelt, perm));
34065 x = expand_vec_perm (vmode, op0, op1, x, target);
34066 if (x != target)
34067 emit_move_insn (target, x);
34070 /* Expand an extract even operation. */
34072 void
34073 rs6000_expand_extract_even (rtx target, rtx op0, rtx op1)
34075 machine_mode vmode = GET_MODE (target);
34076 unsigned i, nelt = GET_MODE_NUNITS (vmode);
34077 rtx perm[16];
34079 for (i = 0; i < nelt; i++)
34080 perm[i] = GEN_INT (i * 2);
34082 rs6000_do_expand_vec_perm (target, op0, op1, vmode, nelt, perm);
34085 /* Expand a vector interleave operation. */
34087 void
34088 rs6000_expand_interleave (rtx target, rtx op0, rtx op1, bool highp)
34090 machine_mode vmode = GET_MODE (target);
34091 unsigned i, high, nelt = GET_MODE_NUNITS (vmode);
34092 rtx perm[16];
34094 high = (highp ? 0 : nelt / 2);
34095 for (i = 0; i < nelt / 2; i++)
34097 perm[i * 2] = GEN_INT (i + high);
34098 perm[i * 2 + 1] = GEN_INT (i + nelt + high);
34101 rs6000_do_expand_vec_perm (target, op0, op1, vmode, nelt, perm);
34104 /* Scale a V2DF vector SRC by two to the SCALE and place in TGT. */
34105 void
34106 rs6000_scale_v2df (rtx tgt, rtx src, int scale)
34108 HOST_WIDE_INT hwi_scale (scale);
34109 REAL_VALUE_TYPE r_pow;
34110 rtvec v = rtvec_alloc (2);
34111 rtx elt;
34112 rtx scale_vec = gen_reg_rtx (V2DFmode);
34113 (void)real_powi (&r_pow, DFmode, &dconst2, hwi_scale);
34114 elt = const_double_from_real_value (r_pow, DFmode);
34115 RTVEC_ELT (v, 0) = elt;
34116 RTVEC_ELT (v, 1) = elt;
34117 rs6000_expand_vector_init (scale_vec, gen_rtx_PARALLEL (V2DFmode, v));
34118 emit_insn (gen_mulv2df3 (tgt, src, scale_vec));
34121 /* Return an RTX representing where to find the function value of a
34122 function returning MODE. */
34123 static rtx
34124 rs6000_complex_function_value (machine_mode mode)
34126 unsigned int regno;
34127 rtx r1, r2;
34128 machine_mode inner = GET_MODE_INNER (mode);
34129 unsigned int inner_bytes = GET_MODE_UNIT_SIZE (mode);
34131 if (TARGET_FLOAT128
34132 && (mode == KCmode
34133 || (mode == TCmode && TARGET_IEEEQUAD)))
34134 regno = ALTIVEC_ARG_RETURN;
34136 else if (FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT && TARGET_FPRS)
34137 regno = FP_ARG_RETURN;
34139 else
34141 regno = GP_ARG_RETURN;
34143 /* 32-bit is OK since it'll go in r3/r4. */
34144 if (TARGET_32BIT && inner_bytes >= 4)
34145 return gen_rtx_REG (mode, regno);
34148 if (inner_bytes >= 8)
34149 return gen_rtx_REG (mode, regno);
34151 r1 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno),
34152 const0_rtx);
34153 r2 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno + 1),
34154 GEN_INT (inner_bytes));
34155 return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r2));
34158 /* Return an rtx describing a return value of MODE as a PARALLEL
34159 in N_ELTS registers, each of mode ELT_MODE, starting at REGNO,
34160 stride REG_STRIDE. */
34162 static rtx
34163 rs6000_parallel_return (machine_mode mode,
34164 int n_elts, machine_mode elt_mode,
34165 unsigned int regno, unsigned int reg_stride)
34167 rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
34169 int i;
34170 for (i = 0; i < n_elts; i++)
34172 rtx r = gen_rtx_REG (elt_mode, regno);
34173 rtx off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
34174 XVECEXP (par, 0, i) = gen_rtx_EXPR_LIST (VOIDmode, r, off);
34175 regno += reg_stride;
34178 return par;
34181 /* Target hook for TARGET_FUNCTION_VALUE.
34183 On the SPE, both FPs and vectors are returned in r3.
34185 On RS/6000 an integer value is in r3 and a floating-point value is in
34186 fp1, unless -msoft-float. */
34188 static rtx
34189 rs6000_function_value (const_tree valtype,
34190 const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
34191 bool outgoing ATTRIBUTE_UNUSED)
34193 machine_mode mode;
34194 unsigned int regno;
34195 machine_mode elt_mode;
34196 int n_elts;
34198 /* Special handling for structs in darwin64. */
34199 if (TARGET_MACHO
34200 && rs6000_darwin64_struct_check_p (TYPE_MODE (valtype), valtype))
34202 CUMULATIVE_ARGS valcum;
34203 rtx valret;
34205 valcum.words = 0;
34206 valcum.fregno = FP_ARG_MIN_REG;
34207 valcum.vregno = ALTIVEC_ARG_MIN_REG;
34208 /* Do a trial code generation as if this were going to be passed as
34209 an argument; if any part goes in memory, we return NULL. */
34210 valret = rs6000_darwin64_record_arg (&valcum, valtype, true, /* retval= */ true);
34211 if (valret)
34212 return valret;
34213 /* Otherwise fall through to standard ABI rules. */
34216 mode = TYPE_MODE (valtype);
34218 /* The ELFv2 ABI returns homogeneous VFP aggregates in registers. */
34219 if (rs6000_discover_homogeneous_aggregate (mode, valtype, &elt_mode, &n_elts))
34221 int first_reg, n_regs;
34223 if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (elt_mode))
34225 /* _Decimal128 must use even/odd register pairs. */
34226 first_reg = (elt_mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
34227 n_regs = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
34229 else
34231 first_reg = ALTIVEC_ARG_RETURN;
34232 n_regs = 1;
34235 return rs6000_parallel_return (mode, n_elts, elt_mode, first_reg, n_regs);
34238 /* Some return value types need be split in -mpowerpc64, 32bit ABI. */
34239 if (TARGET_32BIT && TARGET_POWERPC64)
34240 switch (mode)
34242 default:
34243 break;
34244 case DImode:
34245 case SCmode:
34246 case DCmode:
34247 case TCmode:
34248 int count = GET_MODE_SIZE (mode) / 4;
34249 return rs6000_parallel_return (mode, count, SImode, GP_ARG_RETURN, 1);
34252 if ((INTEGRAL_TYPE_P (valtype)
34253 && GET_MODE_BITSIZE (mode) < (TARGET_32BIT ? 32 : 64))
34254 || POINTER_TYPE_P (valtype))
34255 mode = TARGET_32BIT ? SImode : DImode;
34257 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT && TARGET_FPRS)
34258 /* _Decimal128 must use an even/odd register pair. */
34259 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
34260 else if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode) && TARGET_HARD_FLOAT && TARGET_FPRS
34261 && ((TARGET_SINGLE_FLOAT && (mode == SFmode)) || TARGET_DOUBLE_FLOAT))
34262 regno = FP_ARG_RETURN;
34263 else if (TREE_CODE (valtype) == COMPLEX_TYPE
34264 && targetm.calls.split_complex_arg)
34265 return rs6000_complex_function_value (mode);
34266 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
34267 return register is used in both cases, and we won't see V2DImode/V2DFmode
34268 for pure altivec, combine the two cases. */
34269 else if ((TREE_CODE (valtype) == VECTOR_TYPE || FLOAT128_VECTOR_P (mode))
34270 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI
34271 && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
34272 regno = ALTIVEC_ARG_RETURN;
34273 else if (TARGET_E500_DOUBLE && TARGET_HARD_FLOAT
34274 && (mode == DFmode || mode == DCmode
34275 || FLOAT128_IBM_P (mode) || mode == TCmode))
34276 return spe_build_register_parallel (mode, GP_ARG_RETURN);
34277 else
34278 regno = GP_ARG_RETURN;
34280 return gen_rtx_REG (mode, regno);
34283 /* Define how to find the value returned by a library function
34284 assuming the value has mode MODE. */
34286 rs6000_libcall_value (machine_mode mode)
34288 unsigned int regno;
34290 /* Long long return value need be split in -mpowerpc64, 32bit ABI. */
34291 if (TARGET_32BIT && TARGET_POWERPC64 && mode == DImode)
34292 return rs6000_parallel_return (mode, 2, SImode, GP_ARG_RETURN, 1);
34294 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT && TARGET_FPRS)
34295 /* _Decimal128 must use an even/odd register pair. */
34296 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
34297 else if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode)
34298 && TARGET_HARD_FLOAT && TARGET_FPRS
34299 && ((TARGET_SINGLE_FLOAT && mode == SFmode) || TARGET_DOUBLE_FLOAT))
34300 regno = FP_ARG_RETURN;
34301 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
34302 return register is used in both cases, and we won't see V2DImode/V2DFmode
34303 for pure altivec, combine the two cases. */
34304 else if (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
34305 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI)
34306 regno = ALTIVEC_ARG_RETURN;
34307 else if (COMPLEX_MODE_P (mode) && targetm.calls.split_complex_arg)
34308 return rs6000_complex_function_value (mode);
34309 else if (TARGET_E500_DOUBLE && TARGET_HARD_FLOAT
34310 && (mode == DFmode || mode == DCmode
34311 || FLOAT128_IBM_P (mode) || mode == TCmode))
34312 return spe_build_register_parallel (mode, GP_ARG_RETURN);
34313 else
34314 regno = GP_ARG_RETURN;
34316 return gen_rtx_REG (mode, regno);
34320 /* Return true if we use LRA instead of reload pass. */
34321 static bool
34322 rs6000_lra_p (void)
34324 return rs6000_lra_flag;
34327 /* Given FROM and TO register numbers, say whether this elimination is allowed.
34328 Frame pointer elimination is automatically handled.
34330 For the RS/6000, if frame pointer elimination is being done, we would like
34331 to convert ap into fp, not sp.
34333 We need r30 if -mminimal-toc was specified, and there are constant pool
34334 references. */
34336 static bool
34337 rs6000_can_eliminate (const int from, const int to)
34339 return (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM
34340 ? ! frame_pointer_needed
34341 : from == RS6000_PIC_OFFSET_TABLE_REGNUM
34342 ? ! TARGET_MINIMAL_TOC || TARGET_NO_TOC || get_pool_size () == 0
34343 : true);
34346 /* Define the offset between two registers, FROM to be eliminated and its
34347 replacement TO, at the start of a routine. */
34348 HOST_WIDE_INT
34349 rs6000_initial_elimination_offset (int from, int to)
34351 rs6000_stack_t *info = rs6000_stack_info ();
34352 HOST_WIDE_INT offset;
34354 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
34355 offset = info->push_p ? 0 : -info->total_size;
34356 else if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
34358 offset = info->push_p ? 0 : -info->total_size;
34359 if (FRAME_GROWS_DOWNWARD)
34360 offset += info->fixed_size + info->vars_size + info->parm_size;
34362 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
34363 offset = FRAME_GROWS_DOWNWARD
34364 ? info->fixed_size + info->vars_size + info->parm_size
34365 : 0;
34366 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
34367 offset = info->total_size;
34368 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
34369 offset = info->push_p ? info->total_size : 0;
34370 else if (from == RS6000_PIC_OFFSET_TABLE_REGNUM)
34371 offset = 0;
34372 else
34373 gcc_unreachable ();
34375 return offset;
34378 static rtx
34379 rs6000_dwarf_register_span (rtx reg)
34381 rtx parts[8];
34382 int i, words;
34383 unsigned regno = REGNO (reg);
34384 machine_mode mode = GET_MODE (reg);
34386 if (TARGET_SPE
34387 && regno < 32
34388 && (SPE_VECTOR_MODE (GET_MODE (reg))
34389 || (TARGET_E500_DOUBLE && FLOAT_MODE_P (mode)
34390 && mode != SFmode && mode != SDmode && mode != SCmode)))
34392 else
34393 return NULL_RTX;
34395 regno = REGNO (reg);
34397 /* The duality of the SPE register size wreaks all kinds of havoc.
34398 This is a way of distinguishing r0 in 32-bits from r0 in
34399 64-bits. */
34400 words = (GET_MODE_SIZE (mode) + UNITS_PER_FP_WORD - 1) / UNITS_PER_FP_WORD;
34401 gcc_assert (words <= 4);
34402 for (i = 0; i < words; i++, regno++)
34404 if (BYTES_BIG_ENDIAN)
34406 parts[2 * i] = gen_rtx_REG (SImode, regno + FIRST_SPE_HIGH_REGNO);
34407 parts[2 * i + 1] = gen_rtx_REG (SImode, regno);
34409 else
34411 parts[2 * i] = gen_rtx_REG (SImode, regno);
34412 parts[2 * i + 1] = gen_rtx_REG (SImode, regno + FIRST_SPE_HIGH_REGNO);
34416 return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (words * 2, parts));
34419 /* Fill in sizes for SPE register high parts in table used by unwinder. */
34421 static void
34422 rs6000_init_dwarf_reg_sizes_extra (tree address)
34424 if (TARGET_SPE)
34426 int i;
34427 machine_mode mode = TYPE_MODE (char_type_node);
34428 rtx addr = expand_expr (address, NULL_RTX, VOIDmode, EXPAND_NORMAL);
34429 rtx mem = gen_rtx_MEM (BLKmode, addr);
34430 rtx value = gen_int_mode (4, mode);
34432 for (i = FIRST_SPE_HIGH_REGNO; i < LAST_SPE_HIGH_REGNO+1; i++)
34434 int column = DWARF_REG_TO_UNWIND_COLUMN
34435 (DWARF2_FRAME_REG_OUT (DWARF_FRAME_REGNUM (i), true));
34436 HOST_WIDE_INT offset = column * GET_MODE_SIZE (mode);
34438 emit_move_insn (adjust_address (mem, mode, offset), value);
34442 if (TARGET_MACHO && ! TARGET_ALTIVEC)
34444 int i;
34445 machine_mode mode = TYPE_MODE (char_type_node);
34446 rtx addr = expand_expr (address, NULL_RTX, VOIDmode, EXPAND_NORMAL);
34447 rtx mem = gen_rtx_MEM (BLKmode, addr);
34448 rtx value = gen_int_mode (16, mode);
34450 /* On Darwin, libgcc may be built to run on both G3 and G4/5.
34451 The unwinder still needs to know the size of Altivec registers. */
34453 for (i = FIRST_ALTIVEC_REGNO; i < LAST_ALTIVEC_REGNO+1; i++)
34455 int column = DWARF_REG_TO_UNWIND_COLUMN
34456 (DWARF2_FRAME_REG_OUT (DWARF_FRAME_REGNUM (i), true));
34457 HOST_WIDE_INT offset = column * GET_MODE_SIZE (mode);
34459 emit_move_insn (adjust_address (mem, mode, offset), value);
34464 /* Map internal gcc register numbers to debug format register numbers.
34465 FORMAT specifies the type of debug register number to use:
34466 0 -- debug information, except for frame-related sections
34467 1 -- DWARF .debug_frame section
34468 2 -- DWARF .eh_frame section */
34470 unsigned int
34471 rs6000_dbx_register_number (unsigned int regno, unsigned int format)
34473 /* We never use the GCC internal number for SPE high registers.
34474 Those are mapped to the 1200..1231 range for all debug formats. */
34475 if (SPE_HIGH_REGNO_P (regno))
34476 return regno - FIRST_SPE_HIGH_REGNO + 1200;
34478 /* Except for the above, we use the internal number for non-DWARF
34479 debug information, and also for .eh_frame. */
34480 if ((format == 0 && write_symbols != DWARF2_DEBUG) || format == 2)
34481 return regno;
34483 /* On some platforms, we use the standard DWARF register
34484 numbering for .debug_info and .debug_frame. */
34485 #ifdef RS6000_USE_DWARF_NUMBERING
34486 if (regno <= 63)
34487 return regno;
34488 if (regno == LR_REGNO)
34489 return 108;
34490 if (regno == CTR_REGNO)
34491 return 109;
34492 /* Special handling for CR for .debug_frame: rs6000_emit_prologue has
34493 translated any combination of CR2, CR3, CR4 saves to a save of CR2.
34494 The actual code emitted saves the whole of CR, so we map CR2_REGNO
34495 to the DWARF reg for CR. */
34496 if (format == 1 && regno == CR2_REGNO)
34497 return 64;
34498 if (CR_REGNO_P (regno))
34499 return regno - CR0_REGNO + 86;
34500 if (regno == CA_REGNO)
34501 return 101; /* XER */
34502 if (ALTIVEC_REGNO_P (regno))
34503 return regno - FIRST_ALTIVEC_REGNO + 1124;
34504 if (regno == VRSAVE_REGNO)
34505 return 356;
34506 if (regno == VSCR_REGNO)
34507 return 67;
34508 if (regno == SPE_ACC_REGNO)
34509 return 99;
34510 if (regno == SPEFSCR_REGNO)
34511 return 612;
34512 #endif
34513 return regno;
34516 /* target hook eh_return_filter_mode */
34517 static machine_mode
34518 rs6000_eh_return_filter_mode (void)
34520 return TARGET_32BIT ? SImode : word_mode;
34523 /* Target hook for scalar_mode_supported_p. */
34524 static bool
34525 rs6000_scalar_mode_supported_p (machine_mode mode)
34527 /* -m32 does not support TImode. This is the default, from
34528 default_scalar_mode_supported_p. For -m32 -mpowerpc64 we want the
34529 same ABI as for -m32. But default_scalar_mode_supported_p allows
34530 integer modes of precision 2 * BITS_PER_WORD, which matches TImode
34531 for -mpowerpc64. */
34532 if (TARGET_32BIT && mode == TImode)
34533 return false;
34535 if (DECIMAL_FLOAT_MODE_P (mode))
34536 return default_decimal_float_supported_p ();
34537 else if (TARGET_FLOAT128 && (mode == KFmode || mode == IFmode))
34538 return true;
34539 else
34540 return default_scalar_mode_supported_p (mode);
34543 /* Target hook for vector_mode_supported_p. */
34544 static bool
34545 rs6000_vector_mode_supported_p (machine_mode mode)
34548 if (TARGET_PAIRED_FLOAT && PAIRED_VECTOR_MODE (mode))
34549 return true;
34551 if (TARGET_SPE && SPE_VECTOR_MODE (mode))
34552 return true;
34554 /* There is no vector form for IEEE 128-bit. If we return true for IEEE
34555 128-bit, the compiler might try to widen IEEE 128-bit to IBM
34556 double-double. */
34557 else if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode) && !FLOAT128_IEEE_P (mode))
34558 return true;
34560 else
34561 return false;
34564 /* Target hook for c_mode_for_suffix. */
34565 static machine_mode
34566 rs6000_c_mode_for_suffix (char suffix)
34568 if (TARGET_FLOAT128)
34570 if (suffix == 'q' || suffix == 'Q')
34571 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
34573 /* At the moment, we are not defining a suffix for IBM extended double.
34574 If/when the default for -mabi=ieeelongdouble is changed, and we want
34575 to support __ibm128 constants in legacy library code, we may need to
34576 re-evalaute this decision. Currently, c-lex.c only supports 'w' and
34577 'q' as machine dependent suffixes. The x86_64 port uses 'w' for
34578 __float80 constants. */
34581 return VOIDmode;
34584 /* Target hook for invalid_arg_for_unprototyped_fn. */
34585 static const char *
34586 invalid_arg_for_unprototyped_fn (const_tree typelist, const_tree funcdecl, const_tree val)
34588 return (!rs6000_darwin64_abi
34589 && typelist == 0
34590 && TREE_CODE (TREE_TYPE (val)) == VECTOR_TYPE
34591 && (funcdecl == NULL_TREE
34592 || (TREE_CODE (funcdecl) == FUNCTION_DECL
34593 && DECL_BUILT_IN_CLASS (funcdecl) != BUILT_IN_MD)))
34594 ? N_("AltiVec argument passed to unprototyped function")
34595 : NULL;
34598 /* For TARGET_SECURE_PLT 32-bit PIC code we can save PIC register
34599 setup by using __stack_chk_fail_local hidden function instead of
34600 calling __stack_chk_fail directly. Otherwise it is better to call
34601 __stack_chk_fail directly. */
34603 static tree ATTRIBUTE_UNUSED
34604 rs6000_stack_protect_fail (void)
34606 return (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
34607 ? default_hidden_stack_protect_fail ()
34608 : default_external_stack_protect_fail ();
34611 void
34612 rs6000_final_prescan_insn (rtx_insn *insn, rtx *operand ATTRIBUTE_UNUSED,
34613 int num_operands ATTRIBUTE_UNUSED)
34615 if (rs6000_warn_cell_microcode)
34617 const char *temp;
34618 int insn_code_number = recog_memoized (insn);
34619 location_t location = INSN_LOCATION (insn);
34621 /* Punt on insns we cannot recognize. */
34622 if (insn_code_number < 0)
34623 return;
34625 temp = get_insn_template (insn_code_number, insn);
34627 if (get_attr_cell_micro (insn) == CELL_MICRO_ALWAYS)
34628 warning_at (location, OPT_mwarn_cell_microcode,
34629 "emitting microcode insn %s\t[%s] #%d",
34630 temp, insn_data[INSN_CODE (insn)].name, INSN_UID (insn));
34631 else if (get_attr_cell_micro (insn) == CELL_MICRO_CONDITIONAL)
34632 warning_at (location, OPT_mwarn_cell_microcode,
34633 "emitting conditional microcode insn %s\t[%s] #%d",
34634 temp, insn_data[INSN_CODE (insn)].name, INSN_UID (insn));
34638 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
34640 #if TARGET_ELF
34641 static unsigned HOST_WIDE_INT
34642 rs6000_asan_shadow_offset (void)
34644 return (unsigned HOST_WIDE_INT) 1 << (TARGET_64BIT ? 41 : 29);
34646 #endif
34648 /* Mask options that we want to support inside of attribute((target)) and
34649 #pragma GCC target operations. Note, we do not include things like
34650 64/32-bit, endianess, hard/soft floating point, etc. that would have
34651 different calling sequences. */
34653 struct rs6000_opt_mask {
34654 const char *name; /* option name */
34655 HOST_WIDE_INT mask; /* mask to set */
34656 bool invert; /* invert sense of mask */
34657 bool valid_target; /* option is a target option */
34660 static struct rs6000_opt_mask const rs6000_opt_masks[] =
34662 { "altivec", OPTION_MASK_ALTIVEC, false, true },
34663 { "cmpb", OPTION_MASK_CMPB, false, true },
34664 { "crypto", OPTION_MASK_CRYPTO, false, true },
34665 { "direct-move", OPTION_MASK_DIRECT_MOVE, false, true },
34666 { "dlmzb", OPTION_MASK_DLMZB, false, true },
34667 { "efficient-unaligned-vsx", OPTION_MASK_EFFICIENT_UNALIGNED_VSX,
34668 false, true },
34669 { "float128", OPTION_MASK_FLOAT128, false, false },
34670 { "float128-hardware", OPTION_MASK_FLOAT128_HW, false, false },
34671 { "fprnd", OPTION_MASK_FPRND, false, true },
34672 { "hard-dfp", OPTION_MASK_DFP, false, true },
34673 { "htm", OPTION_MASK_HTM, false, true },
34674 { "isel", OPTION_MASK_ISEL, false, true },
34675 { "mfcrf", OPTION_MASK_MFCRF, false, true },
34676 { "mfpgpr", OPTION_MASK_MFPGPR, false, true },
34677 { "modulo", OPTION_MASK_MODULO, false, true },
34678 { "mulhw", OPTION_MASK_MULHW, false, true },
34679 { "multiple", OPTION_MASK_MULTIPLE, false, true },
34680 { "popcntb", OPTION_MASK_POPCNTB, false, true },
34681 { "popcntd", OPTION_MASK_POPCNTD, false, true },
34682 { "power8-fusion", OPTION_MASK_P8_FUSION, false, true },
34683 { "power8-fusion-sign", OPTION_MASK_P8_FUSION_SIGN, false, true },
34684 { "power8-vector", OPTION_MASK_P8_VECTOR, false, true },
34685 { "power9-dform", OPTION_MASK_P9_DFORM, false, true },
34686 { "power9-fusion", OPTION_MASK_P9_FUSION, false, true },
34687 { "power9-minmax", OPTION_MASK_P9_MINMAX, false, true },
34688 { "power9-vector", OPTION_MASK_P9_VECTOR, false, true },
34689 { "powerpc-gfxopt", OPTION_MASK_PPC_GFXOPT, false, true },
34690 { "powerpc-gpopt", OPTION_MASK_PPC_GPOPT, false, true },
34691 { "quad-memory", OPTION_MASK_QUAD_MEMORY, false, true },
34692 { "quad-memory-atomic", OPTION_MASK_QUAD_MEMORY_ATOMIC, false, true },
34693 { "recip-precision", OPTION_MASK_RECIP_PRECISION, false, true },
34694 { "save-toc-indirect", OPTION_MASK_SAVE_TOC_INDIRECT, false, true },
34695 { "string", OPTION_MASK_STRING, false, true },
34696 { "toc-fusion", OPTION_MASK_TOC_FUSION, false, true },
34697 { "update", OPTION_MASK_NO_UPDATE, true , true },
34698 { "upper-regs-df", OPTION_MASK_UPPER_REGS_DF, false, true },
34699 { "upper-regs-sf", OPTION_MASK_UPPER_REGS_SF, false, true },
34700 { "vsx", OPTION_MASK_VSX, false, true },
34701 { "vsx-timode", OPTION_MASK_VSX_TIMODE, false, true },
34702 #ifdef OPTION_MASK_64BIT
34703 #if TARGET_AIX_OS
34704 { "aix64", OPTION_MASK_64BIT, false, false },
34705 { "aix32", OPTION_MASK_64BIT, true, false },
34706 #else
34707 { "64", OPTION_MASK_64BIT, false, false },
34708 { "32", OPTION_MASK_64BIT, true, false },
34709 #endif
34710 #endif
34711 #ifdef OPTION_MASK_EABI
34712 { "eabi", OPTION_MASK_EABI, false, false },
34713 #endif
34714 #ifdef OPTION_MASK_LITTLE_ENDIAN
34715 { "little", OPTION_MASK_LITTLE_ENDIAN, false, false },
34716 { "big", OPTION_MASK_LITTLE_ENDIAN, true, false },
34717 #endif
34718 #ifdef OPTION_MASK_RELOCATABLE
34719 { "relocatable", OPTION_MASK_RELOCATABLE, false, false },
34720 #endif
34721 #ifdef OPTION_MASK_STRICT_ALIGN
34722 { "strict-align", OPTION_MASK_STRICT_ALIGN, false, false },
34723 #endif
34724 { "soft-float", OPTION_MASK_SOFT_FLOAT, false, false },
34725 { "string", OPTION_MASK_STRING, false, false },
34728 /* Builtin mask mapping for printing the flags. */
34729 static struct rs6000_opt_mask const rs6000_builtin_mask_names[] =
34731 { "altivec", RS6000_BTM_ALTIVEC, false, false },
34732 { "vsx", RS6000_BTM_VSX, false, false },
34733 { "spe", RS6000_BTM_SPE, false, false },
34734 { "paired", RS6000_BTM_PAIRED, false, false },
34735 { "fre", RS6000_BTM_FRE, false, false },
34736 { "fres", RS6000_BTM_FRES, false, false },
34737 { "frsqrte", RS6000_BTM_FRSQRTE, false, false },
34738 { "frsqrtes", RS6000_BTM_FRSQRTES, false, false },
34739 { "popcntd", RS6000_BTM_POPCNTD, false, false },
34740 { "cell", RS6000_BTM_CELL, false, false },
34741 { "power8-vector", RS6000_BTM_P8_VECTOR, false, false },
34742 { "power9-vector", RS6000_BTM_P9_VECTOR, false, false },
34743 { "crypto", RS6000_BTM_CRYPTO, false, false },
34744 { "htm", RS6000_BTM_HTM, false, false },
34745 { "hard-dfp", RS6000_BTM_DFP, false, false },
34746 { "hard-float", RS6000_BTM_HARD_FLOAT, false, false },
34747 { "long-double-128", RS6000_BTM_LDBL128, false, false },
34750 /* Option variables that we want to support inside attribute((target)) and
34751 #pragma GCC target operations. */
34753 struct rs6000_opt_var {
34754 const char *name; /* option name */
34755 size_t global_offset; /* offset of the option in global_options. */
34756 size_t target_offset; /* offset of the option in target options. */
34759 static struct rs6000_opt_var const rs6000_opt_vars[] =
34761 { "friz",
34762 offsetof (struct gcc_options, x_TARGET_FRIZ),
34763 offsetof (struct cl_target_option, x_TARGET_FRIZ), },
34764 { "avoid-indexed-addresses",
34765 offsetof (struct gcc_options, x_TARGET_AVOID_XFORM),
34766 offsetof (struct cl_target_option, x_TARGET_AVOID_XFORM) },
34767 { "paired",
34768 offsetof (struct gcc_options, x_rs6000_paired_float),
34769 offsetof (struct cl_target_option, x_rs6000_paired_float), },
34770 { "longcall",
34771 offsetof (struct gcc_options, x_rs6000_default_long_calls),
34772 offsetof (struct cl_target_option, x_rs6000_default_long_calls), },
34773 { "optimize-swaps",
34774 offsetof (struct gcc_options, x_rs6000_optimize_swaps),
34775 offsetof (struct cl_target_option, x_rs6000_optimize_swaps), },
34776 { "allow-movmisalign",
34777 offsetof (struct gcc_options, x_TARGET_ALLOW_MOVMISALIGN),
34778 offsetof (struct cl_target_option, x_TARGET_ALLOW_MOVMISALIGN), },
34779 { "allow-df-permute",
34780 offsetof (struct gcc_options, x_TARGET_ALLOW_DF_PERMUTE),
34781 offsetof (struct cl_target_option, x_TARGET_ALLOW_DF_PERMUTE), },
34782 { "sched-groups",
34783 offsetof (struct gcc_options, x_TARGET_SCHED_GROUPS),
34784 offsetof (struct cl_target_option, x_TARGET_SCHED_GROUPS), },
34785 { "always-hint",
34786 offsetof (struct gcc_options, x_TARGET_ALWAYS_HINT),
34787 offsetof (struct cl_target_option, x_TARGET_ALWAYS_HINT), },
34788 { "align-branch-targets",
34789 offsetof (struct gcc_options, x_TARGET_ALIGN_BRANCH_TARGETS),
34790 offsetof (struct cl_target_option, x_TARGET_ALIGN_BRANCH_TARGETS), },
34791 { "vectorize-builtins",
34792 offsetof (struct gcc_options, x_TARGET_VECTORIZE_BUILTINS),
34793 offsetof (struct cl_target_option, x_TARGET_VECTORIZE_BUILTINS), },
34794 { "tls-markers",
34795 offsetof (struct gcc_options, x_tls_markers),
34796 offsetof (struct cl_target_option, x_tls_markers), },
34797 { "sched-prolog",
34798 offsetof (struct gcc_options, x_TARGET_SCHED_PROLOG),
34799 offsetof (struct cl_target_option, x_TARGET_SCHED_PROLOG), },
34800 { "sched-epilog",
34801 offsetof (struct gcc_options, x_TARGET_SCHED_PROLOG),
34802 offsetof (struct cl_target_option, x_TARGET_SCHED_PROLOG), },
34803 { "gen-cell-microcode",
34804 offsetof (struct gcc_options, x_rs6000_gen_cell_microcode),
34805 offsetof (struct cl_target_option, x_rs6000_gen_cell_microcode), },
34806 { "warn-cell-microcode",
34807 offsetof (struct gcc_options, x_rs6000_warn_cell_microcode),
34808 offsetof (struct cl_target_option, x_rs6000_warn_cell_microcode), },
34811 /* Inner function to handle attribute((target("..."))) and #pragma GCC target
34812 parsing. Return true if there were no errors. */
34814 static bool
34815 rs6000_inner_target_options (tree args, bool attr_p)
34817 bool ret = true;
34819 if (args == NULL_TREE)
34822 else if (TREE_CODE (args) == STRING_CST)
34824 char *p = ASTRDUP (TREE_STRING_POINTER (args));
34825 char *q;
34827 while ((q = strtok (p, ",")) != NULL)
34829 bool error_p = false;
34830 bool not_valid_p = false;
34831 const char *cpu_opt = NULL;
34833 p = NULL;
34834 if (strncmp (q, "cpu=", 4) == 0)
34836 int cpu_index = rs6000_cpu_name_lookup (q+4);
34837 if (cpu_index >= 0)
34838 rs6000_cpu_index = cpu_index;
34839 else
34841 error_p = true;
34842 cpu_opt = q+4;
34845 else if (strncmp (q, "tune=", 5) == 0)
34847 int tune_index = rs6000_cpu_name_lookup (q+5);
34848 if (tune_index >= 0)
34849 rs6000_tune_index = tune_index;
34850 else
34852 error_p = true;
34853 cpu_opt = q+5;
34856 else
34858 size_t i;
34859 bool invert = false;
34860 char *r = q;
34862 error_p = true;
34863 if (strncmp (r, "no-", 3) == 0)
34865 invert = true;
34866 r += 3;
34869 for (i = 0; i < ARRAY_SIZE (rs6000_opt_masks); i++)
34870 if (strcmp (r, rs6000_opt_masks[i].name) == 0)
34872 HOST_WIDE_INT mask = rs6000_opt_masks[i].mask;
34874 if (!rs6000_opt_masks[i].valid_target)
34875 not_valid_p = true;
34876 else
34878 error_p = false;
34879 rs6000_isa_flags_explicit |= mask;
34881 /* VSX needs altivec, so -mvsx automagically sets
34882 altivec and disables -mavoid-indexed-addresses. */
34883 if (!invert)
34885 if (mask == OPTION_MASK_VSX)
34887 mask |= OPTION_MASK_ALTIVEC;
34888 TARGET_AVOID_XFORM = 0;
34892 if (rs6000_opt_masks[i].invert)
34893 invert = !invert;
34895 if (invert)
34896 rs6000_isa_flags &= ~mask;
34897 else
34898 rs6000_isa_flags |= mask;
34900 break;
34903 if (error_p && !not_valid_p)
34905 for (i = 0; i < ARRAY_SIZE (rs6000_opt_vars); i++)
34906 if (strcmp (r, rs6000_opt_vars[i].name) == 0)
34908 size_t j = rs6000_opt_vars[i].global_offset;
34909 *((int *) ((char *)&global_options + j)) = !invert;
34910 error_p = false;
34911 not_valid_p = false;
34912 break;
34917 if (error_p)
34919 const char *eprefix, *esuffix;
34921 ret = false;
34922 if (attr_p)
34924 eprefix = "__attribute__((__target__(";
34925 esuffix = ")))";
34927 else
34929 eprefix = "#pragma GCC target ";
34930 esuffix = "";
34933 if (cpu_opt)
34934 error ("invalid cpu \"%s\" for %s\"%s\"%s", cpu_opt, eprefix,
34935 q, esuffix);
34936 else if (not_valid_p)
34937 error ("%s\"%s\"%s is not allowed", eprefix, q, esuffix);
34938 else
34939 error ("%s\"%s\"%s is invalid", eprefix, q, esuffix);
34944 else if (TREE_CODE (args) == TREE_LIST)
34948 tree value = TREE_VALUE (args);
34949 if (value)
34951 bool ret2 = rs6000_inner_target_options (value, attr_p);
34952 if (!ret2)
34953 ret = false;
34955 args = TREE_CHAIN (args);
34957 while (args != NULL_TREE);
34960 else
34961 gcc_unreachable ();
34963 return ret;
34966 /* Print out the target options as a list for -mdebug=target. */
34968 static void
34969 rs6000_debug_target_options (tree args, const char *prefix)
34971 if (args == NULL_TREE)
34972 fprintf (stderr, "%s<NULL>", prefix);
34974 else if (TREE_CODE (args) == STRING_CST)
34976 char *p = ASTRDUP (TREE_STRING_POINTER (args));
34977 char *q;
34979 while ((q = strtok (p, ",")) != NULL)
34981 p = NULL;
34982 fprintf (stderr, "%s\"%s\"", prefix, q);
34983 prefix = ", ";
34987 else if (TREE_CODE (args) == TREE_LIST)
34991 tree value = TREE_VALUE (args);
34992 if (value)
34994 rs6000_debug_target_options (value, prefix);
34995 prefix = ", ";
34997 args = TREE_CHAIN (args);
34999 while (args != NULL_TREE);
35002 else
35003 gcc_unreachable ();
35005 return;
35009 /* Hook to validate attribute((target("..."))). */
35011 static bool
35012 rs6000_valid_attribute_p (tree fndecl,
35013 tree ARG_UNUSED (name),
35014 tree args,
35015 int flags)
35017 struct cl_target_option cur_target;
35018 bool ret;
35019 tree old_optimize = build_optimization_node (&global_options);
35020 tree new_target, new_optimize;
35021 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
35023 gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
35025 if (TARGET_DEBUG_TARGET)
35027 tree tname = DECL_NAME (fndecl);
35028 fprintf (stderr, "\n==================== rs6000_valid_attribute_p:\n");
35029 if (tname)
35030 fprintf (stderr, "function: %.*s\n",
35031 (int) IDENTIFIER_LENGTH (tname),
35032 IDENTIFIER_POINTER (tname));
35033 else
35034 fprintf (stderr, "function: unknown\n");
35036 fprintf (stderr, "args:");
35037 rs6000_debug_target_options (args, " ");
35038 fprintf (stderr, "\n");
35040 if (flags)
35041 fprintf (stderr, "flags: 0x%x\n", flags);
35043 fprintf (stderr, "--------------------\n");
35046 old_optimize = build_optimization_node (&global_options);
35047 func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
35049 /* If the function changed the optimization levels as well as setting target
35050 options, start with the optimizations specified. */
35051 if (func_optimize && func_optimize != old_optimize)
35052 cl_optimization_restore (&global_options,
35053 TREE_OPTIMIZATION (func_optimize));
35055 /* The target attributes may also change some optimization flags, so update
35056 the optimization options if necessary. */
35057 cl_target_option_save (&cur_target, &global_options);
35058 rs6000_cpu_index = rs6000_tune_index = -1;
35059 ret = rs6000_inner_target_options (args, true);
35061 /* Set up any additional state. */
35062 if (ret)
35064 ret = rs6000_option_override_internal (false);
35065 new_target = build_target_option_node (&global_options);
35067 else
35068 new_target = NULL;
35070 new_optimize = build_optimization_node (&global_options);
35072 if (!new_target)
35073 ret = false;
35075 else if (fndecl)
35077 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
35079 if (old_optimize != new_optimize)
35080 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
35083 cl_target_option_restore (&global_options, &cur_target);
35085 if (old_optimize != new_optimize)
35086 cl_optimization_restore (&global_options,
35087 TREE_OPTIMIZATION (old_optimize));
35089 return ret;
35093 /* Hook to validate the current #pragma GCC target and set the state, and
35094 update the macros based on what was changed. If ARGS is NULL, then
35095 POP_TARGET is used to reset the options. */
35097 bool
35098 rs6000_pragma_target_parse (tree args, tree pop_target)
35100 tree prev_tree = build_target_option_node (&global_options);
35101 tree cur_tree;
35102 struct cl_target_option *prev_opt, *cur_opt;
35103 HOST_WIDE_INT prev_flags, cur_flags, diff_flags;
35104 HOST_WIDE_INT prev_bumask, cur_bumask, diff_bumask;
35106 if (TARGET_DEBUG_TARGET)
35108 fprintf (stderr, "\n==================== rs6000_pragma_target_parse\n");
35109 fprintf (stderr, "args:");
35110 rs6000_debug_target_options (args, " ");
35111 fprintf (stderr, "\n");
35113 if (pop_target)
35115 fprintf (stderr, "pop_target:\n");
35116 debug_tree (pop_target);
35118 else
35119 fprintf (stderr, "pop_target: <NULL>\n");
35121 fprintf (stderr, "--------------------\n");
35124 if (! args)
35126 cur_tree = ((pop_target)
35127 ? pop_target
35128 : target_option_default_node);
35129 cl_target_option_restore (&global_options,
35130 TREE_TARGET_OPTION (cur_tree));
35132 else
35134 rs6000_cpu_index = rs6000_tune_index = -1;
35135 if (!rs6000_inner_target_options (args, false)
35136 || !rs6000_option_override_internal (false)
35137 || (cur_tree = build_target_option_node (&global_options))
35138 == NULL_TREE)
35140 if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
35141 fprintf (stderr, "invalid pragma\n");
35143 return false;
35147 target_option_current_node = cur_tree;
35149 /* If we have the preprocessor linked in (i.e. C or C++ languages), possibly
35150 change the macros that are defined. */
35151 if (rs6000_target_modify_macros_ptr)
35153 prev_opt = TREE_TARGET_OPTION (prev_tree);
35154 prev_bumask = prev_opt->x_rs6000_builtin_mask;
35155 prev_flags = prev_opt->x_rs6000_isa_flags;
35157 cur_opt = TREE_TARGET_OPTION (cur_tree);
35158 cur_flags = cur_opt->x_rs6000_isa_flags;
35159 cur_bumask = cur_opt->x_rs6000_builtin_mask;
35161 diff_bumask = (prev_bumask ^ cur_bumask);
35162 diff_flags = (prev_flags ^ cur_flags);
35164 if ((diff_flags != 0) || (diff_bumask != 0))
35166 /* Delete old macros. */
35167 rs6000_target_modify_macros_ptr (false,
35168 prev_flags & diff_flags,
35169 prev_bumask & diff_bumask);
35171 /* Define new macros. */
35172 rs6000_target_modify_macros_ptr (true,
35173 cur_flags & diff_flags,
35174 cur_bumask & diff_bumask);
35178 return true;
35182 /* Remember the last target of rs6000_set_current_function. */
35183 static GTY(()) tree rs6000_previous_fndecl;
35185 /* Establish appropriate back-end context for processing the function
35186 FNDECL. The argument might be NULL to indicate processing at top
35187 level, outside of any function scope. */
35188 static void
35189 rs6000_set_current_function (tree fndecl)
35191 tree old_tree = (rs6000_previous_fndecl
35192 ? DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl)
35193 : NULL_TREE);
35195 tree new_tree = (fndecl
35196 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
35197 : NULL_TREE);
35199 if (TARGET_DEBUG_TARGET)
35201 bool print_final = false;
35202 fprintf (stderr, "\n==================== rs6000_set_current_function");
35204 if (fndecl)
35205 fprintf (stderr, ", fndecl %s (%p)",
35206 (DECL_NAME (fndecl)
35207 ? IDENTIFIER_POINTER (DECL_NAME (fndecl))
35208 : "<unknown>"), (void *)fndecl);
35210 if (rs6000_previous_fndecl)
35211 fprintf (stderr, ", prev_fndecl (%p)", (void *)rs6000_previous_fndecl);
35213 fprintf (stderr, "\n");
35214 if (new_tree)
35216 fprintf (stderr, "\nnew fndecl target specific options:\n");
35217 debug_tree (new_tree);
35218 print_final = true;
35221 if (old_tree)
35223 fprintf (stderr, "\nold fndecl target specific options:\n");
35224 debug_tree (old_tree);
35225 print_final = true;
35228 if (print_final)
35229 fprintf (stderr, "--------------------\n");
35232 /* Only change the context if the function changes. This hook is called
35233 several times in the course of compiling a function, and we don't want to
35234 slow things down too much or call target_reinit when it isn't safe. */
35235 if (fndecl && fndecl != rs6000_previous_fndecl)
35237 rs6000_previous_fndecl = fndecl;
35238 if (old_tree == new_tree)
35241 else if (new_tree && new_tree != target_option_default_node)
35243 cl_target_option_restore (&global_options,
35244 TREE_TARGET_OPTION (new_tree));
35245 if (TREE_TARGET_GLOBALS (new_tree))
35246 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
35247 else
35248 TREE_TARGET_GLOBALS (new_tree)
35249 = save_target_globals_default_opts ();
35252 else if (old_tree && old_tree != target_option_default_node)
35254 new_tree = target_option_current_node;
35255 cl_target_option_restore (&global_options,
35256 TREE_TARGET_OPTION (new_tree));
35257 if (TREE_TARGET_GLOBALS (new_tree))
35258 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
35259 else if (new_tree == target_option_default_node)
35260 restore_target_globals (&default_target_globals);
35261 else
35262 TREE_TARGET_GLOBALS (new_tree)
35263 = save_target_globals_default_opts ();
35269 /* Save the current options */
35271 static void
35272 rs6000_function_specific_save (struct cl_target_option *ptr,
35273 struct gcc_options *opts)
35275 ptr->x_rs6000_isa_flags = opts->x_rs6000_isa_flags;
35276 ptr->x_rs6000_isa_flags_explicit = opts->x_rs6000_isa_flags_explicit;
35279 /* Restore the current options */
35281 static void
35282 rs6000_function_specific_restore (struct gcc_options *opts,
35283 struct cl_target_option *ptr)
35286 opts->x_rs6000_isa_flags = ptr->x_rs6000_isa_flags;
35287 opts->x_rs6000_isa_flags_explicit = ptr->x_rs6000_isa_flags_explicit;
35288 (void) rs6000_option_override_internal (false);
35291 /* Print the current options */
35293 static void
35294 rs6000_function_specific_print (FILE *file, int indent,
35295 struct cl_target_option *ptr)
35297 rs6000_print_isa_options (file, indent, "Isa options set",
35298 ptr->x_rs6000_isa_flags);
35300 rs6000_print_isa_options (file, indent, "Isa options explicit",
35301 ptr->x_rs6000_isa_flags_explicit);
35304 /* Helper function to print the current isa or misc options on a line. */
35306 static void
35307 rs6000_print_options_internal (FILE *file,
35308 int indent,
35309 const char *string,
35310 HOST_WIDE_INT flags,
35311 const char *prefix,
35312 const struct rs6000_opt_mask *opts,
35313 size_t num_elements)
35315 size_t i;
35316 size_t start_column = 0;
35317 size_t cur_column;
35318 size_t max_column = 76;
35319 const char *comma = "";
35321 if (indent)
35322 start_column += fprintf (file, "%*s", indent, "");
35324 if (!flags)
35326 fprintf (stderr, DEBUG_FMT_S, string, "<none>");
35327 return;
35330 start_column += fprintf (stderr, DEBUG_FMT_WX, string, flags);
35332 /* Print the various mask options. */
35333 cur_column = start_column;
35334 for (i = 0; i < num_elements; i++)
35336 if ((flags & opts[i].mask) != 0)
35338 const char *no_str = rs6000_opt_masks[i].invert ? "no-" : "";
35339 size_t len = (strlen (comma)
35340 + strlen (prefix)
35341 + strlen (no_str)
35342 + strlen (rs6000_opt_masks[i].name));
35344 cur_column += len;
35345 if (cur_column > max_column)
35347 fprintf (stderr, ", \\\n%*s", (int)start_column, "");
35348 cur_column = start_column + len;
35349 comma = "";
35352 fprintf (file, "%s%s%s%s", comma, prefix, no_str,
35353 rs6000_opt_masks[i].name);
35354 flags &= ~ opts[i].mask;
35355 comma = ", ";
35359 fputs ("\n", file);
35362 /* Helper function to print the current isa options on a line. */
35364 static void
35365 rs6000_print_isa_options (FILE *file, int indent, const char *string,
35366 HOST_WIDE_INT flags)
35368 rs6000_print_options_internal (file, indent, string, flags, "-m",
35369 &rs6000_opt_masks[0],
35370 ARRAY_SIZE (rs6000_opt_masks));
35373 static void
35374 rs6000_print_builtin_options (FILE *file, int indent, const char *string,
35375 HOST_WIDE_INT flags)
35377 rs6000_print_options_internal (file, indent, string, flags, "",
35378 &rs6000_builtin_mask_names[0],
35379 ARRAY_SIZE (rs6000_builtin_mask_names));
35383 /* Hook to determine if one function can safely inline another. */
35385 static bool
35386 rs6000_can_inline_p (tree caller, tree callee)
35388 bool ret = false;
35389 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
35390 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
35392 /* If callee has no option attributes, then it is ok to inline. */
35393 if (!callee_tree)
35394 ret = true;
35396 /* If caller has no option attributes, but callee does then it is not ok to
35397 inline. */
35398 else if (!caller_tree)
35399 ret = false;
35401 else
35403 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
35404 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
35406 /* Callee's options should a subset of the caller's, i.e. a vsx function
35407 can inline an altivec function but a non-vsx function can't inline a
35408 vsx function. */
35409 if ((caller_opts->x_rs6000_isa_flags & callee_opts->x_rs6000_isa_flags)
35410 == callee_opts->x_rs6000_isa_flags)
35411 ret = true;
35414 if (TARGET_DEBUG_TARGET)
35415 fprintf (stderr, "rs6000_can_inline_p:, caller %s, callee %s, %s inline\n",
35416 (DECL_NAME (caller)
35417 ? IDENTIFIER_POINTER (DECL_NAME (caller))
35418 : "<unknown>"),
35419 (DECL_NAME (callee)
35420 ? IDENTIFIER_POINTER (DECL_NAME (callee))
35421 : "<unknown>"),
35422 (ret ? "can" : "cannot"));
35424 return ret;
35427 /* Allocate a stack temp and fixup the address so it meets the particular
35428 memory requirements (either offetable or REG+REG addressing). */
35431 rs6000_allocate_stack_temp (machine_mode mode,
35432 bool offsettable_p,
35433 bool reg_reg_p)
35435 rtx stack = assign_stack_temp (mode, GET_MODE_SIZE (mode));
35436 rtx addr = XEXP (stack, 0);
35437 int strict_p = (reload_in_progress || reload_completed);
35439 if (!legitimate_indirect_address_p (addr, strict_p))
35441 if (offsettable_p
35442 && !rs6000_legitimate_offset_address_p (mode, addr, strict_p, true))
35443 stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
35445 else if (reg_reg_p && !legitimate_indexed_address_p (addr, strict_p))
35446 stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
35449 return stack;
35452 /* Given a memory reference, if it is not a reg or reg+reg addressing, convert
35453 to such a form to deal with memory reference instructions like STFIWX that
35454 only take reg+reg addressing. */
35457 rs6000_address_for_fpconvert (rtx x)
35459 int strict_p = (reload_in_progress || reload_completed);
35460 rtx addr;
35462 gcc_assert (MEM_P (x));
35463 addr = XEXP (x, 0);
35464 if (! legitimate_indirect_address_p (addr, strict_p)
35465 && ! legitimate_indexed_address_p (addr, strict_p))
35467 if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
35469 rtx reg = XEXP (addr, 0);
35470 HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (x));
35471 rtx size_rtx = GEN_INT ((GET_CODE (addr) == PRE_DEC) ? -size : size);
35472 gcc_assert (REG_P (reg));
35473 emit_insn (gen_add3_insn (reg, reg, size_rtx));
35474 addr = reg;
35476 else if (GET_CODE (addr) == PRE_MODIFY)
35478 rtx reg = XEXP (addr, 0);
35479 rtx expr = XEXP (addr, 1);
35480 gcc_assert (REG_P (reg));
35481 gcc_assert (GET_CODE (expr) == PLUS);
35482 emit_insn (gen_add3_insn (reg, XEXP (expr, 0), XEXP (expr, 1)));
35483 addr = reg;
35486 x = replace_equiv_address (x, copy_addr_to_reg (addr));
35489 return x;
35492 /* Given a memory reference, if it is not in the form for altivec memory
35493 reference instructions (i.e. reg or reg+reg addressing with AND of -16),
35494 convert to the altivec format. */
35497 rs6000_address_for_altivec (rtx x)
35499 gcc_assert (MEM_P (x));
35500 if (!altivec_indexed_or_indirect_operand (x, GET_MODE (x)))
35502 rtx addr = XEXP (x, 0);
35503 int strict_p = (reload_in_progress || reload_completed);
35505 if (!legitimate_indexed_address_p (addr, strict_p)
35506 && !legitimate_indirect_address_p (addr, strict_p))
35507 addr = copy_to_mode_reg (Pmode, addr);
35509 addr = gen_rtx_AND (Pmode, addr, GEN_INT (-16));
35510 x = change_address (x, GET_MODE (x), addr);
35513 return x;
35516 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
35518 On the RS/6000, all integer constants are acceptable, most won't be valid
35519 for particular insns, though. Only easy FP constants are acceptable. */
35521 static bool
35522 rs6000_legitimate_constant_p (machine_mode mode, rtx x)
35524 if (TARGET_ELF && tls_referenced_p (x))
35525 return false;
35527 return ((GET_CODE (x) != CONST_DOUBLE && GET_CODE (x) != CONST_VECTOR)
35528 || GET_MODE (x) == VOIDmode
35529 || (TARGET_POWERPC64 && mode == DImode)
35530 || easy_fp_constant (x, mode)
35531 || easy_vector_constant (x, mode));
35535 /* Return TRUE iff the sequence ending in LAST sets the static chain. */
35537 static bool
35538 chain_already_loaded (rtx_insn *last)
35540 for (; last != NULL; last = PREV_INSN (last))
35542 if (NONJUMP_INSN_P (last))
35544 rtx patt = PATTERN (last);
35546 if (GET_CODE (patt) == SET)
35548 rtx lhs = XEXP (patt, 0);
35550 if (REG_P (lhs) && REGNO (lhs) == STATIC_CHAIN_REGNUM)
35551 return true;
35555 return false;
35558 /* Expand code to perform a call under the AIX or ELFv2 ABI. */
35560 void
35561 rs6000_call_aix (rtx value, rtx func_desc, rtx flag, rtx cookie)
35563 const bool direct_call_p
35564 = GET_CODE (func_desc) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (func_desc);
35565 rtx toc_reg = gen_rtx_REG (Pmode, TOC_REGNUM);
35566 rtx toc_load = NULL_RTX;
35567 rtx toc_restore = NULL_RTX;
35568 rtx func_addr;
35569 rtx abi_reg = NULL_RTX;
35570 rtx call[4];
35571 int n_call;
35572 rtx insn;
35574 /* Handle longcall attributes. */
35575 if (INTVAL (cookie) & CALL_LONG)
35576 func_desc = rs6000_longcall_ref (func_desc);
35578 /* Handle indirect calls. */
35579 if (GET_CODE (func_desc) != SYMBOL_REF
35580 || (DEFAULT_ABI == ABI_AIX && !SYMBOL_REF_FUNCTION_P (func_desc)))
35582 /* Save the TOC into its reserved slot before the call,
35583 and prepare to restore it after the call. */
35584 rtx stack_ptr = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
35585 rtx stack_toc_offset = GEN_INT (RS6000_TOC_SAVE_SLOT);
35586 rtx stack_toc_mem = gen_frame_mem (Pmode,
35587 gen_rtx_PLUS (Pmode, stack_ptr,
35588 stack_toc_offset));
35589 rtx stack_toc_unspec = gen_rtx_UNSPEC (Pmode,
35590 gen_rtvec (1, stack_toc_offset),
35591 UNSPEC_TOCSLOT);
35592 toc_restore = gen_rtx_SET (toc_reg, stack_toc_unspec);
35594 /* Can we optimize saving the TOC in the prologue or
35595 do we need to do it at every call? */
35596 if (TARGET_SAVE_TOC_INDIRECT && !cfun->calls_alloca)
35597 cfun->machine->save_toc_in_prologue = true;
35598 else
35600 MEM_VOLATILE_P (stack_toc_mem) = 1;
35601 emit_move_insn (stack_toc_mem, toc_reg);
35604 if (DEFAULT_ABI == ABI_ELFv2)
35606 /* A function pointer in the ELFv2 ABI is just a plain address, but
35607 the ABI requires it to be loaded into r12 before the call. */
35608 func_addr = gen_rtx_REG (Pmode, 12);
35609 emit_move_insn (func_addr, func_desc);
35610 abi_reg = func_addr;
35612 else
35614 /* A function pointer under AIX is a pointer to a data area whose
35615 first word contains the actual address of the function, whose
35616 second word contains a pointer to its TOC, and whose third word
35617 contains a value to place in the static chain register (r11).
35618 Note that if we load the static chain, our "trampoline" need
35619 not have any executable code. */
35621 /* Load up address of the actual function. */
35622 func_desc = force_reg (Pmode, func_desc);
35623 func_addr = gen_reg_rtx (Pmode);
35624 emit_move_insn (func_addr, gen_rtx_MEM (Pmode, func_desc));
35626 /* Prepare to load the TOC of the called function. Note that the
35627 TOC load must happen immediately before the actual call so
35628 that unwinding the TOC registers works correctly. See the
35629 comment in frob_update_context. */
35630 rtx func_toc_offset = GEN_INT (GET_MODE_SIZE (Pmode));
35631 rtx func_toc_mem = gen_rtx_MEM (Pmode,
35632 gen_rtx_PLUS (Pmode, func_desc,
35633 func_toc_offset));
35634 toc_load = gen_rtx_USE (VOIDmode, func_toc_mem);
35636 /* If we have a static chain, load it up. But, if the call was
35637 originally direct, the 3rd word has not been written since no
35638 trampoline has been built, so we ought not to load it, lest we
35639 override a static chain value. */
35640 if (!direct_call_p
35641 && TARGET_POINTERS_TO_NESTED_FUNCTIONS
35642 && !chain_already_loaded (get_current_sequence ()->next->last))
35644 rtx sc_reg = gen_rtx_REG (Pmode, STATIC_CHAIN_REGNUM);
35645 rtx func_sc_offset = GEN_INT (2 * GET_MODE_SIZE (Pmode));
35646 rtx func_sc_mem = gen_rtx_MEM (Pmode,
35647 gen_rtx_PLUS (Pmode, func_desc,
35648 func_sc_offset));
35649 emit_move_insn (sc_reg, func_sc_mem);
35650 abi_reg = sc_reg;
35654 else
35656 /* Direct calls use the TOC: for local calls, the callee will
35657 assume the TOC register is set; for non-local calls, the
35658 PLT stub needs the TOC register. */
35659 abi_reg = toc_reg;
35660 func_addr = func_desc;
35663 /* Create the call. */
35664 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), flag);
35665 if (value != NULL_RTX)
35666 call[0] = gen_rtx_SET (value, call[0]);
35667 n_call = 1;
35669 if (toc_load)
35670 call[n_call++] = toc_load;
35671 if (toc_restore)
35672 call[n_call++] = toc_restore;
35674 call[n_call++] = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, LR_REGNO));
35676 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (n_call, call));
35677 insn = emit_call_insn (insn);
35679 /* Mention all registers defined by the ABI to hold information
35680 as uses in CALL_INSN_FUNCTION_USAGE. */
35681 if (abi_reg)
35682 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
35685 /* Expand code to perform a sibling call under the AIX or ELFv2 ABI. */
35687 void
35688 rs6000_sibcall_aix (rtx value, rtx func_desc, rtx flag, rtx cookie)
35690 rtx call[2];
35691 rtx insn;
35693 gcc_assert (INTVAL (cookie) == 0);
35695 /* Create the call. */
35696 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_desc), flag);
35697 if (value != NULL_RTX)
35698 call[0] = gen_rtx_SET (value, call[0]);
35700 call[1] = simple_return_rtx;
35702 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (2, call));
35703 insn = emit_call_insn (insn);
35705 /* Note use of the TOC register. */
35706 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), gen_rtx_REG (Pmode, TOC_REGNUM));
35707 /* We need to also mark a use of the link register since the function we
35708 sibling-call to will use it to return to our caller. */
35709 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), gen_rtx_REG (Pmode, LR_REGNO));
35712 /* Return whether we need to always update the saved TOC pointer when we update
35713 the stack pointer. */
35715 static bool
35716 rs6000_save_toc_in_prologue_p (void)
35718 return (cfun && cfun->machine && cfun->machine->save_toc_in_prologue);
35721 #ifdef HAVE_GAS_HIDDEN
35722 # define USE_HIDDEN_LINKONCE 1
35723 #else
35724 # define USE_HIDDEN_LINKONCE 0
35725 #endif
35727 /* Fills in the label name that should be used for a 476 link stack thunk. */
35729 void
35730 get_ppc476_thunk_name (char name[32])
35732 gcc_assert (TARGET_LINK_STACK);
35734 if (USE_HIDDEN_LINKONCE)
35735 sprintf (name, "__ppc476.get_thunk");
35736 else
35737 ASM_GENERATE_INTERNAL_LABEL (name, "LPPC476_", 0);
35740 /* This function emits the simple thunk routine that is used to preserve
35741 the link stack on the 476 cpu. */
35743 static void rs6000_code_end (void) ATTRIBUTE_UNUSED;
35744 static void
35745 rs6000_code_end (void)
35747 char name[32];
35748 tree decl;
35750 if (!TARGET_LINK_STACK)
35751 return;
35753 get_ppc476_thunk_name (name);
35755 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, get_identifier (name),
35756 build_function_type_list (void_type_node, NULL_TREE));
35757 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
35758 NULL_TREE, void_type_node);
35759 TREE_PUBLIC (decl) = 1;
35760 TREE_STATIC (decl) = 1;
35762 #if RS6000_WEAK
35763 if (USE_HIDDEN_LINKONCE)
35765 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
35766 targetm.asm_out.unique_section (decl, 0);
35767 switch_to_section (get_named_section (decl, NULL, 0));
35768 DECL_WEAK (decl) = 1;
35769 ASM_WEAKEN_DECL (asm_out_file, decl, name, 0);
35770 targetm.asm_out.globalize_label (asm_out_file, name);
35771 targetm.asm_out.assemble_visibility (decl, VISIBILITY_HIDDEN);
35772 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
35774 else
35775 #endif
35777 switch_to_section (text_section);
35778 ASM_OUTPUT_LABEL (asm_out_file, name);
35781 DECL_INITIAL (decl) = make_node (BLOCK);
35782 current_function_decl = decl;
35783 allocate_struct_function (decl, false);
35784 init_function_start (decl);
35785 first_function_block_is_cold = false;
35786 /* Make sure unwind info is emitted for the thunk if needed. */
35787 final_start_function (emit_barrier (), asm_out_file, 1);
35789 fputs ("\tblr\n", asm_out_file);
35791 final_end_function ();
35792 init_insn_lengths ();
35793 free_after_compilation (cfun);
35794 set_cfun (NULL);
35795 current_function_decl = NULL;
35798 /* Add r30 to hard reg set if the prologue sets it up and it is not
35799 pic_offset_table_rtx. */
35801 static void
35802 rs6000_set_up_by_prologue (struct hard_reg_set_container *set)
35804 if (!TARGET_SINGLE_PIC_BASE
35805 && TARGET_TOC
35806 && TARGET_MINIMAL_TOC
35807 && get_pool_size () != 0)
35808 add_to_hard_reg_set (&set->set, Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
35809 if (cfun->machine->split_stack_argp_used)
35810 add_to_hard_reg_set (&set->set, Pmode, 12);
35814 /* Helper function for rs6000_split_logical to emit a logical instruction after
35815 spliting the operation to single GPR registers.
35817 DEST is the destination register.
35818 OP1 and OP2 are the input source registers.
35819 CODE is the base operation (AND, IOR, XOR, NOT).
35820 MODE is the machine mode.
35821 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
35822 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
35823 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
35825 static void
35826 rs6000_split_logical_inner (rtx dest,
35827 rtx op1,
35828 rtx op2,
35829 enum rtx_code code,
35830 machine_mode mode,
35831 bool complement_final_p,
35832 bool complement_op1_p,
35833 bool complement_op2_p)
35835 rtx bool_rtx;
35837 /* Optimize AND of 0/0xffffffff and IOR/XOR of 0. */
35838 if (op2 && GET_CODE (op2) == CONST_INT
35839 && (mode == SImode || (mode == DImode && TARGET_POWERPC64))
35840 && !complement_final_p && !complement_op1_p && !complement_op2_p)
35842 HOST_WIDE_INT mask = GET_MODE_MASK (mode);
35843 HOST_WIDE_INT value = INTVAL (op2) & mask;
35845 /* Optimize AND of 0 to just set 0. Optimize AND of -1 to be a move. */
35846 if (code == AND)
35848 if (value == 0)
35850 emit_insn (gen_rtx_SET (dest, const0_rtx));
35851 return;
35854 else if (value == mask)
35856 if (!rtx_equal_p (dest, op1))
35857 emit_insn (gen_rtx_SET (dest, op1));
35858 return;
35862 /* Optimize IOR/XOR of 0 to be a simple move. Split large operations
35863 into separate ORI/ORIS or XORI/XORIS instrucitons. */
35864 else if (code == IOR || code == XOR)
35866 if (value == 0)
35868 if (!rtx_equal_p (dest, op1))
35869 emit_insn (gen_rtx_SET (dest, op1));
35870 return;
35875 if (code == AND && mode == SImode
35876 && !complement_final_p && !complement_op1_p && !complement_op2_p)
35878 emit_insn (gen_andsi3 (dest, op1, op2));
35879 return;
35882 if (complement_op1_p)
35883 op1 = gen_rtx_NOT (mode, op1);
35885 if (complement_op2_p)
35886 op2 = gen_rtx_NOT (mode, op2);
35888 /* For canonical RTL, if only one arm is inverted it is the first. */
35889 if (!complement_op1_p && complement_op2_p)
35890 std::swap (op1, op2);
35892 bool_rtx = ((code == NOT)
35893 ? gen_rtx_NOT (mode, op1)
35894 : gen_rtx_fmt_ee (code, mode, op1, op2));
35896 if (complement_final_p)
35897 bool_rtx = gen_rtx_NOT (mode, bool_rtx);
35899 emit_insn (gen_rtx_SET (dest, bool_rtx));
35902 /* Split a DImode AND/IOR/XOR with a constant on a 32-bit system. These
35903 operations are split immediately during RTL generation to allow for more
35904 optimizations of the AND/IOR/XOR.
35906 OPERANDS is an array containing the destination and two input operands.
35907 CODE is the base operation (AND, IOR, XOR, NOT).
35908 MODE is the machine mode.
35909 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
35910 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
35911 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT.
35912 CLOBBER_REG is either NULL or a scratch register of type CC to allow
35913 formation of the AND instructions. */
35915 static void
35916 rs6000_split_logical_di (rtx operands[3],
35917 enum rtx_code code,
35918 bool complement_final_p,
35919 bool complement_op1_p,
35920 bool complement_op2_p)
35922 const HOST_WIDE_INT lower_32bits = HOST_WIDE_INT_C(0xffffffff);
35923 const HOST_WIDE_INT upper_32bits = ~ lower_32bits;
35924 const HOST_WIDE_INT sign_bit = HOST_WIDE_INT_C(0x80000000);
35925 enum hi_lo { hi = 0, lo = 1 };
35926 rtx op0_hi_lo[2], op1_hi_lo[2], op2_hi_lo[2];
35927 size_t i;
35929 op0_hi_lo[hi] = gen_highpart (SImode, operands[0]);
35930 op1_hi_lo[hi] = gen_highpart (SImode, operands[1]);
35931 op0_hi_lo[lo] = gen_lowpart (SImode, operands[0]);
35932 op1_hi_lo[lo] = gen_lowpart (SImode, operands[1]);
35934 if (code == NOT)
35935 op2_hi_lo[hi] = op2_hi_lo[lo] = NULL_RTX;
35936 else
35938 if (GET_CODE (operands[2]) != CONST_INT)
35940 op2_hi_lo[hi] = gen_highpart_mode (SImode, DImode, operands[2]);
35941 op2_hi_lo[lo] = gen_lowpart (SImode, operands[2]);
35943 else
35945 HOST_WIDE_INT value = INTVAL (operands[2]);
35946 HOST_WIDE_INT value_hi_lo[2];
35948 gcc_assert (!complement_final_p);
35949 gcc_assert (!complement_op1_p);
35950 gcc_assert (!complement_op2_p);
35952 value_hi_lo[hi] = value >> 32;
35953 value_hi_lo[lo] = value & lower_32bits;
35955 for (i = 0; i < 2; i++)
35957 HOST_WIDE_INT sub_value = value_hi_lo[i];
35959 if (sub_value & sign_bit)
35960 sub_value |= upper_32bits;
35962 op2_hi_lo[i] = GEN_INT (sub_value);
35964 /* If this is an AND instruction, check to see if we need to load
35965 the value in a register. */
35966 if (code == AND && sub_value != -1 && sub_value != 0
35967 && !and_operand (op2_hi_lo[i], SImode))
35968 op2_hi_lo[i] = force_reg (SImode, op2_hi_lo[i]);
35973 for (i = 0; i < 2; i++)
35975 /* Split large IOR/XOR operations. */
35976 if ((code == IOR || code == XOR)
35977 && GET_CODE (op2_hi_lo[i]) == CONST_INT
35978 && !complement_final_p
35979 && !complement_op1_p
35980 && !complement_op2_p
35981 && !logical_const_operand (op2_hi_lo[i], SImode))
35983 HOST_WIDE_INT value = INTVAL (op2_hi_lo[i]);
35984 HOST_WIDE_INT hi_16bits = value & HOST_WIDE_INT_C(0xffff0000);
35985 HOST_WIDE_INT lo_16bits = value & HOST_WIDE_INT_C(0x0000ffff);
35986 rtx tmp = gen_reg_rtx (SImode);
35988 /* Make sure the constant is sign extended. */
35989 if ((hi_16bits & sign_bit) != 0)
35990 hi_16bits |= upper_32bits;
35992 rs6000_split_logical_inner (tmp, op1_hi_lo[i], GEN_INT (hi_16bits),
35993 code, SImode, false, false, false);
35995 rs6000_split_logical_inner (op0_hi_lo[i], tmp, GEN_INT (lo_16bits),
35996 code, SImode, false, false, false);
35998 else
35999 rs6000_split_logical_inner (op0_hi_lo[i], op1_hi_lo[i], op2_hi_lo[i],
36000 code, SImode, complement_final_p,
36001 complement_op1_p, complement_op2_p);
36004 return;
36007 /* Split the insns that make up boolean operations operating on multiple GPR
36008 registers. The boolean MD patterns ensure that the inputs either are
36009 exactly the same as the output registers, or there is no overlap.
36011 OPERANDS is an array containing the destination and two input operands.
36012 CODE is the base operation (AND, IOR, XOR, NOT).
36013 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
36014 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
36015 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
36017 void
36018 rs6000_split_logical (rtx operands[3],
36019 enum rtx_code code,
36020 bool complement_final_p,
36021 bool complement_op1_p,
36022 bool complement_op2_p)
36024 machine_mode mode = GET_MODE (operands[0]);
36025 machine_mode sub_mode;
36026 rtx op0, op1, op2;
36027 int sub_size, regno0, regno1, nregs, i;
36029 /* If this is DImode, use the specialized version that can run before
36030 register allocation. */
36031 if (mode == DImode && !TARGET_POWERPC64)
36033 rs6000_split_logical_di (operands, code, complement_final_p,
36034 complement_op1_p, complement_op2_p);
36035 return;
36038 op0 = operands[0];
36039 op1 = operands[1];
36040 op2 = (code == NOT) ? NULL_RTX : operands[2];
36041 sub_mode = (TARGET_POWERPC64) ? DImode : SImode;
36042 sub_size = GET_MODE_SIZE (sub_mode);
36043 regno0 = REGNO (op0);
36044 regno1 = REGNO (op1);
36046 gcc_assert (reload_completed);
36047 gcc_assert (IN_RANGE (regno0, FIRST_GPR_REGNO, LAST_GPR_REGNO));
36048 gcc_assert (IN_RANGE (regno1, FIRST_GPR_REGNO, LAST_GPR_REGNO));
36050 nregs = rs6000_hard_regno_nregs[(int)mode][regno0];
36051 gcc_assert (nregs > 1);
36053 if (op2 && REG_P (op2))
36054 gcc_assert (IN_RANGE (REGNO (op2), FIRST_GPR_REGNO, LAST_GPR_REGNO));
36056 for (i = 0; i < nregs; i++)
36058 int offset = i * sub_size;
36059 rtx sub_op0 = simplify_subreg (sub_mode, op0, mode, offset);
36060 rtx sub_op1 = simplify_subreg (sub_mode, op1, mode, offset);
36061 rtx sub_op2 = ((code == NOT)
36062 ? NULL_RTX
36063 : simplify_subreg (sub_mode, op2, mode, offset));
36065 rs6000_split_logical_inner (sub_op0, sub_op1, sub_op2, code, sub_mode,
36066 complement_final_p, complement_op1_p,
36067 complement_op2_p);
36070 return;
36074 /* Return true if the peephole2 can combine a load involving a combination of
36075 an addis instruction and a load with an offset that can be fused together on
36076 a power8. */
36078 bool
36079 fusion_gpr_load_p (rtx addis_reg, /* register set via addis. */
36080 rtx addis_value, /* addis value. */
36081 rtx target, /* target register that is loaded. */
36082 rtx mem) /* bottom part of the memory addr. */
36084 rtx addr;
36085 rtx base_reg;
36087 /* Validate arguments. */
36088 if (!base_reg_operand (addis_reg, GET_MODE (addis_reg)))
36089 return false;
36091 if (!base_reg_operand (target, GET_MODE (target)))
36092 return false;
36094 if (!fusion_gpr_addis (addis_value, GET_MODE (addis_value)))
36095 return false;
36097 /* Allow sign/zero extension. */
36098 if (GET_CODE (mem) == ZERO_EXTEND
36099 || (GET_CODE (mem) == SIGN_EXTEND && TARGET_P8_FUSION_SIGN))
36100 mem = XEXP (mem, 0);
36102 if (!MEM_P (mem))
36103 return false;
36105 if (!fusion_gpr_mem_load (mem, GET_MODE (mem)))
36106 return false;
36108 addr = XEXP (mem, 0); /* either PLUS or LO_SUM. */
36109 if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM)
36110 return false;
36112 /* Validate that the register used to load the high value is either the
36113 register being loaded, or we can safely replace its use.
36115 This function is only called from the peephole2 pass and we assume that
36116 there are 2 instructions in the peephole (addis and load), so we want to
36117 check if the target register was not used in the memory address and the
36118 register to hold the addis result is dead after the peephole. */
36119 if (REGNO (addis_reg) != REGNO (target))
36121 if (reg_mentioned_p (target, mem))
36122 return false;
36124 if (!peep2_reg_dead_p (2, addis_reg))
36125 return false;
36127 /* If the target register being loaded is the stack pointer, we must
36128 avoid loading any other value into it, even temporarily. */
36129 if (REG_P (target) && REGNO (target) == STACK_POINTER_REGNUM)
36130 return false;
36133 base_reg = XEXP (addr, 0);
36134 return REGNO (addis_reg) == REGNO (base_reg);
36137 /* During the peephole2 pass, adjust and expand the insns for a load fusion
36138 sequence. We adjust the addis register to use the target register. If the
36139 load sign extends, we adjust the code to do the zero extending load, and an
36140 explicit sign extension later since the fusion only covers zero extending
36141 loads.
36143 The operands are:
36144 operands[0] register set with addis (to be replaced with target)
36145 operands[1] value set via addis
36146 operands[2] target register being loaded
36147 operands[3] D-form memory reference using operands[0]. */
36149 void
36150 expand_fusion_gpr_load (rtx *operands)
36152 rtx addis_value = operands[1];
36153 rtx target = operands[2];
36154 rtx orig_mem = operands[3];
36155 rtx new_addr, new_mem, orig_addr, offset;
36156 enum rtx_code plus_or_lo_sum;
36157 machine_mode target_mode = GET_MODE (target);
36158 machine_mode extend_mode = target_mode;
36159 machine_mode ptr_mode = Pmode;
36160 enum rtx_code extend = UNKNOWN;
36162 if (GET_CODE (orig_mem) == ZERO_EXTEND
36163 || (TARGET_P8_FUSION_SIGN && GET_CODE (orig_mem) == SIGN_EXTEND))
36165 extend = GET_CODE (orig_mem);
36166 orig_mem = XEXP (orig_mem, 0);
36167 target_mode = GET_MODE (orig_mem);
36170 gcc_assert (MEM_P (orig_mem));
36172 orig_addr = XEXP (orig_mem, 0);
36173 plus_or_lo_sum = GET_CODE (orig_addr);
36174 gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM);
36176 offset = XEXP (orig_addr, 1);
36177 new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_value, offset);
36178 new_mem = replace_equiv_address_nv (orig_mem, new_addr, false);
36180 if (extend != UNKNOWN)
36181 new_mem = gen_rtx_fmt_e (ZERO_EXTEND, extend_mode, new_mem);
36183 new_mem = gen_rtx_UNSPEC (extend_mode, gen_rtvec (1, new_mem),
36184 UNSPEC_FUSION_GPR);
36185 emit_insn (gen_rtx_SET (target, new_mem));
36187 if (extend == SIGN_EXTEND)
36189 int sub_off = ((BYTES_BIG_ENDIAN)
36190 ? GET_MODE_SIZE (extend_mode) - GET_MODE_SIZE (target_mode)
36191 : 0);
36192 rtx sign_reg
36193 = simplify_subreg (target_mode, target, extend_mode, sub_off);
36195 emit_insn (gen_rtx_SET (target,
36196 gen_rtx_SIGN_EXTEND (extend_mode, sign_reg)));
36199 return;
36202 /* Emit the addis instruction that will be part of a fused instruction
36203 sequence. */
36205 void
36206 emit_fusion_addis (rtx target, rtx addis_value, const char *comment,
36207 const char *mode_name)
36209 rtx fuse_ops[10];
36210 char insn_template[80];
36211 const char *addis_str = NULL;
36212 const char *comment_str = ASM_COMMENT_START;
36214 if (*comment_str == ' ')
36215 comment_str++;
36217 /* Emit the addis instruction. */
36218 fuse_ops[0] = target;
36219 if (satisfies_constraint_L (addis_value))
36221 fuse_ops[1] = addis_value;
36222 addis_str = "lis %0,%v1";
36225 else if (GET_CODE (addis_value) == PLUS)
36227 rtx op0 = XEXP (addis_value, 0);
36228 rtx op1 = XEXP (addis_value, 1);
36230 if (REG_P (op0) && CONST_INT_P (op1)
36231 && satisfies_constraint_L (op1))
36233 fuse_ops[1] = op0;
36234 fuse_ops[2] = op1;
36235 addis_str = "addis %0,%1,%v2";
36239 else if (GET_CODE (addis_value) == HIGH)
36241 rtx value = XEXP (addis_value, 0);
36242 if (GET_CODE (value) == UNSPEC && XINT (value, 1) == UNSPEC_TOCREL)
36244 fuse_ops[1] = XVECEXP (value, 0, 0); /* symbol ref. */
36245 fuse_ops[2] = XVECEXP (value, 0, 1); /* TOC register. */
36246 if (TARGET_ELF)
36247 addis_str = "addis %0,%2,%1@toc@ha";
36249 else if (TARGET_XCOFF)
36250 addis_str = "addis %0,%1@u(%2)";
36252 else
36253 gcc_unreachable ();
36256 else if (GET_CODE (value) == PLUS)
36258 rtx op0 = XEXP (value, 0);
36259 rtx op1 = XEXP (value, 1);
36261 if (GET_CODE (op0) == UNSPEC
36262 && XINT (op0, 1) == UNSPEC_TOCREL
36263 && CONST_INT_P (op1))
36265 fuse_ops[1] = XVECEXP (op0, 0, 0); /* symbol ref. */
36266 fuse_ops[2] = XVECEXP (op0, 0, 1); /* TOC register. */
36267 fuse_ops[3] = op1;
36268 if (TARGET_ELF)
36269 addis_str = "addis %0,%2,%1+%3@toc@ha";
36271 else if (TARGET_XCOFF)
36272 addis_str = "addis %0,%1+%3@u(%2)";
36274 else
36275 gcc_unreachable ();
36279 else if (satisfies_constraint_L (value))
36281 fuse_ops[1] = value;
36282 addis_str = "lis %0,%v1";
36285 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (value))
36287 fuse_ops[1] = value;
36288 addis_str = "lis %0,%1@ha";
36292 if (!addis_str)
36293 fatal_insn ("Could not generate addis value for fusion", addis_value);
36295 sprintf (insn_template, "%s\t\t%s %s, type %s", addis_str, comment_str,
36296 comment, mode_name);
36297 output_asm_insn (insn_template, fuse_ops);
36300 /* Emit a D-form load or store instruction that is the second instruction
36301 of a fusion sequence. */
36303 void
36304 emit_fusion_load_store (rtx load_store_reg, rtx addis_reg, rtx offset,
36305 const char *insn_str)
36307 rtx fuse_ops[10];
36308 char insn_template[80];
36310 fuse_ops[0] = load_store_reg;
36311 fuse_ops[1] = addis_reg;
36313 if (CONST_INT_P (offset) && satisfies_constraint_I (offset))
36315 sprintf (insn_template, "%s %%0,%%2(%%1)", insn_str);
36316 fuse_ops[2] = offset;
36317 output_asm_insn (insn_template, fuse_ops);
36320 else if (GET_CODE (offset) == UNSPEC
36321 && XINT (offset, 1) == UNSPEC_TOCREL)
36323 if (TARGET_ELF)
36324 sprintf (insn_template, "%s %%0,%%2@toc@l(%%1)", insn_str);
36326 else if (TARGET_XCOFF)
36327 sprintf (insn_template, "%s %%0,%%2@l(%%1)", insn_str);
36329 else
36330 gcc_unreachable ();
36332 fuse_ops[2] = XVECEXP (offset, 0, 0);
36333 output_asm_insn (insn_template, fuse_ops);
36336 else if (GET_CODE (offset) == PLUS
36337 && GET_CODE (XEXP (offset, 0)) == UNSPEC
36338 && XINT (XEXP (offset, 0), 1) == UNSPEC_TOCREL
36339 && CONST_INT_P (XEXP (offset, 1)))
36341 rtx tocrel_unspec = XEXP (offset, 0);
36342 if (TARGET_ELF)
36343 sprintf (insn_template, "%s %%0,%%2+%%3@toc@l(%%1)", insn_str);
36345 else if (TARGET_XCOFF)
36346 sprintf (insn_template, "%s %%0,%%2+%%3@l(%%1)", insn_str);
36348 else
36349 gcc_unreachable ();
36351 fuse_ops[2] = XVECEXP (tocrel_unspec, 0, 0);
36352 fuse_ops[3] = XEXP (offset, 1);
36353 output_asm_insn (insn_template, fuse_ops);
36356 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (offset))
36358 sprintf (insn_template, "%s %%0,%%2@l(%%1)", insn_str);
36360 fuse_ops[2] = offset;
36361 output_asm_insn (insn_template, fuse_ops);
36364 else
36365 fatal_insn ("Unable to generate load/store offset for fusion", offset);
36367 return;
36370 /* Wrap a TOC address that can be fused to indicate that special fusion
36371 processing is needed. */
36374 fusion_wrap_memory_address (rtx old_mem)
36376 rtx old_addr = XEXP (old_mem, 0);
36377 rtvec v = gen_rtvec (1, old_addr);
36378 rtx new_addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_FUSION_ADDIS);
36379 return replace_equiv_address_nv (old_mem, new_addr, false);
36382 /* Given an address, convert it into the addis and load offset parts. Addresses
36383 created during the peephole2 process look like:
36384 (lo_sum (high (unspec [(sym)] UNSPEC_TOCREL))
36385 (unspec [(...)] UNSPEC_TOCREL))
36387 Addresses created via toc fusion look like:
36388 (unspec [(unspec [(...)] UNSPEC_TOCREL)] UNSPEC_FUSION_ADDIS)) */
36390 static void
36391 fusion_split_address (rtx addr, rtx *p_hi, rtx *p_lo)
36393 rtx hi, lo;
36395 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_FUSION_ADDIS)
36397 lo = XVECEXP (addr, 0, 0);
36398 hi = gen_rtx_HIGH (Pmode, lo);
36400 else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
36402 hi = XEXP (addr, 0);
36403 lo = XEXP (addr, 1);
36405 else
36406 gcc_unreachable ();
36408 *p_hi = hi;
36409 *p_lo = lo;
36412 /* Return a string to fuse an addis instruction with a gpr load to the same
36413 register that we loaded up the addis instruction. The address that is used
36414 is the logical address that was formed during peephole2:
36415 (lo_sum (high) (low-part))
36417 Or the address is the TOC address that is wrapped before register allocation:
36418 (unspec [(addr) (toc-reg)] UNSPEC_FUSION_ADDIS)
36420 The code is complicated, so we call output_asm_insn directly, and just
36421 return "". */
36423 const char *
36424 emit_fusion_gpr_load (rtx target, rtx mem)
36426 rtx addis_value;
36427 rtx addr;
36428 rtx load_offset;
36429 const char *load_str = NULL;
36430 const char *mode_name = NULL;
36431 machine_mode mode;
36433 if (GET_CODE (mem) == ZERO_EXTEND)
36434 mem = XEXP (mem, 0);
36436 gcc_assert (REG_P (target) && MEM_P (mem));
36438 addr = XEXP (mem, 0);
36439 fusion_split_address (addr, &addis_value, &load_offset);
36441 /* Now emit the load instruction to the same register. */
36442 mode = GET_MODE (mem);
36443 switch (mode)
36445 case QImode:
36446 mode_name = "char";
36447 load_str = "lbz";
36448 break;
36450 case HImode:
36451 mode_name = "short";
36452 load_str = "lhz";
36453 break;
36455 case SImode:
36456 case SFmode:
36457 mode_name = (mode == SFmode) ? "float" : "int";
36458 load_str = "lwz";
36459 break;
36461 case DImode:
36462 case DFmode:
36463 gcc_assert (TARGET_POWERPC64);
36464 mode_name = (mode == DFmode) ? "double" : "long";
36465 load_str = "ld";
36466 break;
36468 default:
36469 fatal_insn ("Bad GPR fusion", gen_rtx_SET (target, mem));
36472 /* Emit the addis instruction. */
36473 emit_fusion_addis (target, addis_value, "gpr load fusion", mode_name);
36475 /* Emit the D-form load instruction. */
36476 emit_fusion_load_store (target, target, load_offset, load_str);
36478 return "";
36482 /* Return true if the peephole2 can combine a load/store involving a
36483 combination of an addis instruction and the memory operation. This was
36484 added to the ISA 3.0 (power9) hardware. */
36486 bool
36487 fusion_p9_p (rtx addis_reg, /* register set via addis. */
36488 rtx addis_value, /* addis value. */
36489 rtx dest, /* destination (memory or register). */
36490 rtx src) /* source (register or memory). */
36492 rtx addr, mem, offset;
36493 enum machine_mode mode = GET_MODE (src);
36495 /* Validate arguments. */
36496 if (!base_reg_operand (addis_reg, GET_MODE (addis_reg)))
36497 return false;
36499 if (!fusion_gpr_addis (addis_value, GET_MODE (addis_value)))
36500 return false;
36502 /* Ignore extend operations that are part of the load. */
36503 if (GET_CODE (src) == FLOAT_EXTEND || GET_CODE (src) == ZERO_EXTEND)
36504 src = XEXP (src, 0);
36506 /* Test for memory<-register or register<-memory. */
36507 if (fpr_reg_operand (src, mode) || int_reg_operand (src, mode))
36509 if (!MEM_P (dest))
36510 return false;
36512 mem = dest;
36515 else if (MEM_P (src))
36517 if (!fpr_reg_operand (dest, mode) && !int_reg_operand (dest, mode))
36518 return false;
36520 mem = src;
36523 else
36524 return false;
36526 addr = XEXP (mem, 0); /* either PLUS or LO_SUM. */
36527 if (GET_CODE (addr) == PLUS)
36529 if (!rtx_equal_p (addis_reg, XEXP (addr, 0)))
36530 return false;
36532 return satisfies_constraint_I (XEXP (addr, 1));
36535 else if (GET_CODE (addr) == LO_SUM)
36537 if (!rtx_equal_p (addis_reg, XEXP (addr, 0)))
36538 return false;
36540 offset = XEXP (addr, 1);
36541 if (TARGET_XCOFF || (TARGET_ELF && TARGET_POWERPC64))
36542 return small_toc_ref (offset, GET_MODE (offset));
36544 else if (TARGET_ELF && !TARGET_POWERPC64)
36545 return CONSTANT_P (offset);
36548 return false;
36551 /* During the peephole2 pass, adjust and expand the insns for an extended fusion
36552 load sequence.
36554 The operands are:
36555 operands[0] register set with addis
36556 operands[1] value set via addis
36557 operands[2] target register being loaded
36558 operands[3] D-form memory reference using operands[0].
36560 This is similar to the fusion introduced with power8, except it scales to
36561 both loads/stores and does not require the result register to be the same as
36562 the base register. At the moment, we only do this if register set with addis
36563 is dead. */
36565 void
36566 expand_fusion_p9_load (rtx *operands)
36568 rtx tmp_reg = operands[0];
36569 rtx addis_value = operands[1];
36570 rtx target = operands[2];
36571 rtx orig_mem = operands[3];
36572 rtx new_addr, new_mem, orig_addr, offset, set, clobber, insn;
36573 enum rtx_code plus_or_lo_sum;
36574 machine_mode target_mode = GET_MODE (target);
36575 machine_mode extend_mode = target_mode;
36576 machine_mode ptr_mode = Pmode;
36577 enum rtx_code extend = UNKNOWN;
36579 if (GET_CODE (orig_mem) == FLOAT_EXTEND || GET_CODE (orig_mem) == ZERO_EXTEND)
36581 extend = GET_CODE (orig_mem);
36582 orig_mem = XEXP (orig_mem, 0);
36583 target_mode = GET_MODE (orig_mem);
36586 gcc_assert (MEM_P (orig_mem));
36588 orig_addr = XEXP (orig_mem, 0);
36589 plus_or_lo_sum = GET_CODE (orig_addr);
36590 gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM);
36592 offset = XEXP (orig_addr, 1);
36593 new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_value, offset);
36594 new_mem = replace_equiv_address_nv (orig_mem, new_addr, false);
36596 if (extend != UNKNOWN)
36597 new_mem = gen_rtx_fmt_e (extend, extend_mode, new_mem);
36599 new_mem = gen_rtx_UNSPEC (extend_mode, gen_rtvec (1, new_mem),
36600 UNSPEC_FUSION_P9);
36602 set = gen_rtx_SET (target, new_mem);
36603 clobber = gen_rtx_CLOBBER (VOIDmode, tmp_reg);
36604 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber));
36605 emit_insn (insn);
36607 return;
36610 /* During the peephole2 pass, adjust and expand the insns for an extended fusion
36611 store sequence.
36613 The operands are:
36614 operands[0] register set with addis
36615 operands[1] value set via addis
36616 operands[2] target D-form memory being stored to
36617 operands[3] register being stored
36619 This is similar to the fusion introduced with power8, except it scales to
36620 both loads/stores and does not require the result register to be the same as
36621 the base register. At the moment, we only do this if register set with addis
36622 is dead. */
36624 void
36625 expand_fusion_p9_store (rtx *operands)
36627 rtx tmp_reg = operands[0];
36628 rtx addis_value = operands[1];
36629 rtx orig_mem = operands[2];
36630 rtx src = operands[3];
36631 rtx new_addr, new_mem, orig_addr, offset, set, clobber, insn, new_src;
36632 enum rtx_code plus_or_lo_sum;
36633 machine_mode target_mode = GET_MODE (orig_mem);
36634 machine_mode ptr_mode = Pmode;
36636 gcc_assert (MEM_P (orig_mem));
36638 orig_addr = XEXP (orig_mem, 0);
36639 plus_or_lo_sum = GET_CODE (orig_addr);
36640 gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM);
36642 offset = XEXP (orig_addr, 1);
36643 new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_value, offset);
36644 new_mem = replace_equiv_address_nv (orig_mem, new_addr, false);
36646 new_src = gen_rtx_UNSPEC (target_mode, gen_rtvec (1, src),
36647 UNSPEC_FUSION_P9);
36649 set = gen_rtx_SET (new_mem, new_src);
36650 clobber = gen_rtx_CLOBBER (VOIDmode, tmp_reg);
36651 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber));
36652 emit_insn (insn);
36654 return;
36657 /* Return a string to fuse an addis instruction with a load using extended
36658 fusion. The address that is used is the logical address that was formed
36659 during peephole2: (lo_sum (high) (low-part))
36661 The code is complicated, so we call output_asm_insn directly, and just
36662 return "". */
36664 const char *
36665 emit_fusion_p9_load (rtx reg, rtx mem, rtx tmp_reg)
36667 enum machine_mode mode = GET_MODE (reg);
36668 rtx hi;
36669 rtx lo;
36670 rtx addr;
36671 const char *load_string;
36672 int r;
36674 if (GET_CODE (mem) == FLOAT_EXTEND || GET_CODE (mem) == ZERO_EXTEND)
36676 mem = XEXP (mem, 0);
36677 mode = GET_MODE (mem);
36680 if (GET_CODE (reg) == SUBREG)
36682 gcc_assert (SUBREG_BYTE (reg) == 0);
36683 reg = SUBREG_REG (reg);
36686 if (!REG_P (reg))
36687 fatal_insn ("emit_fusion_p9_load, bad reg #1", reg);
36689 r = REGNO (reg);
36690 if (FP_REGNO_P (r))
36692 if (mode == SFmode)
36693 load_string = "lfs";
36694 else if (mode == DFmode || mode == DImode)
36695 load_string = "lfd";
36696 else
36697 gcc_unreachable ();
36699 else if (INT_REGNO_P (r))
36701 switch (mode)
36703 case QImode:
36704 load_string = "lbz";
36705 break;
36706 case HImode:
36707 load_string = "lhz";
36708 break;
36709 case SImode:
36710 case SFmode:
36711 load_string = "lwz";
36712 break;
36713 case DImode:
36714 case DFmode:
36715 if (!TARGET_POWERPC64)
36716 gcc_unreachable ();
36717 load_string = "ld";
36718 break;
36719 default:
36720 gcc_unreachable ();
36723 else
36724 fatal_insn ("emit_fusion_p9_load, bad reg #2", reg);
36726 if (!MEM_P (mem))
36727 fatal_insn ("emit_fusion_p9_load not MEM", mem);
36729 addr = XEXP (mem, 0);
36730 fusion_split_address (addr, &hi, &lo);
36732 /* Emit the addis instruction. */
36733 emit_fusion_addis (tmp_reg, hi, "power9 load fusion", GET_MODE_NAME (mode));
36735 /* Emit the D-form load instruction. */
36736 emit_fusion_load_store (reg, tmp_reg, lo, load_string);
36738 return "";
36741 /* Return a string to fuse an addis instruction with a store using extended
36742 fusion. The address that is used is the logical address that was formed
36743 during peephole2: (lo_sum (high) (low-part))
36745 The code is complicated, so we call output_asm_insn directly, and just
36746 return "". */
36748 const char *
36749 emit_fusion_p9_store (rtx mem, rtx reg, rtx tmp_reg)
36751 enum machine_mode mode = GET_MODE (reg);
36752 rtx hi;
36753 rtx lo;
36754 rtx addr;
36755 const char *store_string;
36756 int r;
36758 if (GET_CODE (reg) == SUBREG)
36760 gcc_assert (SUBREG_BYTE (reg) == 0);
36761 reg = SUBREG_REG (reg);
36764 if (!REG_P (reg))
36765 fatal_insn ("emit_fusion_p9_store, bad reg #1", reg);
36767 r = REGNO (reg);
36768 if (FP_REGNO_P (r))
36770 if (mode == SFmode)
36771 store_string = "stfs";
36772 else if (mode == DFmode)
36773 store_string = "stfd";
36774 else
36775 gcc_unreachable ();
36777 else if (INT_REGNO_P (r))
36779 switch (mode)
36781 case QImode:
36782 store_string = "stb";
36783 break;
36784 case HImode:
36785 store_string = "sth";
36786 break;
36787 case SImode:
36788 case SFmode:
36789 store_string = "stw";
36790 break;
36791 case DImode:
36792 case DFmode:
36793 if (!TARGET_POWERPC64)
36794 gcc_unreachable ();
36795 store_string = "std";
36796 break;
36797 default:
36798 gcc_unreachable ();
36801 else
36802 fatal_insn ("emit_fusion_p9_store, bad reg #2", reg);
36804 if (!MEM_P (mem))
36805 fatal_insn ("emit_fusion_p9_store not MEM", mem);
36807 addr = XEXP (mem, 0);
36808 fusion_split_address (addr, &hi, &lo);
36810 /* Emit the addis instruction. */
36811 emit_fusion_addis (tmp_reg, hi, "power9 store fusion", GET_MODE_NAME (mode));
36813 /* Emit the D-form load instruction. */
36814 emit_fusion_load_store (reg, tmp_reg, lo, store_string);
36816 return "";
36820 /* Analyze vector computations and remove unnecessary doubleword
36821 swaps (xxswapdi instructions). This pass is performed only
36822 for little-endian VSX code generation.
36824 For this specific case, loads and stores of 4x32 and 2x64 vectors
36825 are inefficient. These are implemented using the lvx2dx and
36826 stvx2dx instructions, which invert the order of doublewords in
36827 a vector register. Thus the code generation inserts an xxswapdi
36828 after each such load, and prior to each such store. (For spill
36829 code after register assignment, an additional xxswapdi is inserted
36830 following each store in order to return a hard register to its
36831 unpermuted value.)
36833 The extra xxswapdi instructions reduce performance. This can be
36834 particularly bad for vectorized code. The purpose of this pass
36835 is to reduce the number of xxswapdi instructions required for
36836 correctness.
36838 The primary insight is that much code that operates on vectors
36839 does not care about the relative order of elements in a register,
36840 so long as the correct memory order is preserved. If we have
36841 a computation where all input values are provided by lvxd2x/xxswapdi
36842 sequences, all outputs are stored using xxswapdi/stvxd2x sequences,
36843 and all intermediate computations are pure SIMD (independent of
36844 element order), then all the xxswapdi's associated with the loads
36845 and stores may be removed.
36847 This pass uses some of the infrastructure and logical ideas from
36848 the "web" pass in web.c. We create maximal webs of computations
36849 fitting the description above using union-find. Each such web is
36850 then optimized by removing its unnecessary xxswapdi instructions.
36852 The pass is placed prior to global optimization so that we can
36853 perform the optimization in the safest and simplest way possible;
36854 that is, by replacing each xxswapdi insn with a register copy insn.
36855 Subsequent forward propagation will remove copies where possible.
36857 There are some operations sensitive to element order for which we
36858 can still allow the operation, provided we modify those operations.
36859 These include CONST_VECTORs, for which we must swap the first and
36860 second halves of the constant vector; and SUBREGs, for which we
36861 must adjust the byte offset to account for the swapped doublewords.
36862 A remaining opportunity would be non-immediate-form splats, for
36863 which we should adjust the selected lane of the input. We should
36864 also make code generation adjustments for sum-across operations,
36865 since this is a common vectorizer reduction.
36867 Because we run prior to the first split, we can see loads and stores
36868 here that match *vsx_le_perm_{load,store}_<mode>. These are vanilla
36869 vector loads and stores that have not yet been split into a permuting
36870 load/store and a swap. (One way this can happen is with a builtin
36871 call to vec_vsx_{ld,st}.) We can handle these as well, but rather
36872 than deleting a swap, we convert the load/store into a permuting
36873 load/store (which effectively removes the swap). */
36875 /* Notes on Permutes
36877 We do not currently handle computations that contain permutes. There
36878 is a general transformation that can be performed correctly, but it
36879 may introduce more expensive code than it replaces. To handle these
36880 would require a cost model to determine when to perform the optimization.
36881 This commentary records how this could be done if desired.
36883 The most general permute is something like this (example for V16QI):
36885 (vec_select:V16QI (vec_concat:V32QI (op1:V16QI) (op2:V16QI))
36886 (parallel [(const_int a0) (const_int a1)
36888 (const_int a14) (const_int a15)]))
36890 where a0,...,a15 are in [0,31] and select elements from op1 and op2
36891 to produce in the result.
36893 Regardless of mode, we can convert the PARALLEL to a mask of 16
36894 byte-element selectors. Let's call this M, with M[i] representing
36895 the ith byte-element selector value. Then if we swap doublewords
36896 throughout the computation, we can get correct behavior by replacing
36897 M with M' as follows:
36899 M'[i] = { (M[i]+8)%16 : M[i] in [0,15]
36900 { ((M[i]+8)%16)+16 : M[i] in [16,31]
36902 This seems promising at first, since we are just replacing one mask
36903 with another. But certain masks are preferable to others. If M
36904 is a mask that matches a vmrghh pattern, for example, M' certainly
36905 will not. Instead of a single vmrghh, we would generate a load of
36906 M' and a vperm. So we would need to know how many xxswapd's we can
36907 remove as a result of this transformation to determine if it's
36908 profitable; and preferably the logic would need to be aware of all
36909 the special preferable masks.
36911 Another form of permute is an UNSPEC_VPERM, in which the mask is
36912 already in a register. In some cases, this mask may be a constant
36913 that we can discover with ud-chains, in which case the above
36914 transformation is ok. However, the common usage here is for the
36915 mask to be produced by an UNSPEC_LVSL, in which case the mask
36916 cannot be known at compile time. In such a case we would have to
36917 generate several instructions to compute M' as above at run time,
36918 and a cost model is needed again.
36920 However, when the mask M for an UNSPEC_VPERM is loaded from the
36921 constant pool, we can replace M with M' as above at no cost
36922 beyond adding a constant pool entry. */
36924 /* This is based on the union-find logic in web.c. web_entry_base is
36925 defined in df.h. */
36926 class swap_web_entry : public web_entry_base
36928 public:
36929 /* Pointer to the insn. */
36930 rtx_insn *insn;
36931 /* Set if insn contains a mention of a vector register. All other
36932 fields are undefined if this field is unset. */
36933 unsigned int is_relevant : 1;
36934 /* Set if insn is a load. */
36935 unsigned int is_load : 1;
36936 /* Set if insn is a store. */
36937 unsigned int is_store : 1;
36938 /* Set if insn is a doubleword swap. This can either be a register swap
36939 or a permuting load or store (test is_load and is_store for this). */
36940 unsigned int is_swap : 1;
36941 /* Set if the insn has a live-in use of a parameter register. */
36942 unsigned int is_live_in : 1;
36943 /* Set if the insn has a live-out def of a return register. */
36944 unsigned int is_live_out : 1;
36945 /* Set if the insn contains a subreg reference of a vector register. */
36946 unsigned int contains_subreg : 1;
36947 /* Set if the insn contains a 128-bit integer operand. */
36948 unsigned int is_128_int : 1;
36949 /* Set if this is a call-insn. */
36950 unsigned int is_call : 1;
36951 /* Set if this insn does not perform a vector operation for which
36952 element order matters, or if we know how to fix it up if it does.
36953 Undefined if is_swap is set. */
36954 unsigned int is_swappable : 1;
36955 /* A nonzero value indicates what kind of special handling for this
36956 insn is required if doublewords are swapped. Undefined if
36957 is_swappable is not set. */
36958 unsigned int special_handling : 4;
36959 /* Set if the web represented by this entry cannot be optimized. */
36960 unsigned int web_not_optimizable : 1;
36961 /* Set if this insn should be deleted. */
36962 unsigned int will_delete : 1;
36965 enum special_handling_values {
36966 SH_NONE = 0,
36967 SH_CONST_VECTOR,
36968 SH_SUBREG,
36969 SH_NOSWAP_LD,
36970 SH_NOSWAP_ST,
36971 SH_EXTRACT,
36972 SH_SPLAT,
36973 SH_XXPERMDI,
36974 SH_CONCAT,
36975 SH_VPERM
36978 /* Union INSN with all insns containing definitions that reach USE.
36979 Detect whether USE is live-in to the current function. */
36980 static void
36981 union_defs (swap_web_entry *insn_entry, rtx insn, df_ref use)
36983 struct df_link *link = DF_REF_CHAIN (use);
36985 if (!link)
36986 insn_entry[INSN_UID (insn)].is_live_in = 1;
36988 while (link)
36990 if (DF_REF_IS_ARTIFICIAL (link->ref))
36991 insn_entry[INSN_UID (insn)].is_live_in = 1;
36993 if (DF_REF_INSN_INFO (link->ref))
36995 rtx def_insn = DF_REF_INSN (link->ref);
36996 (void)unionfind_union (insn_entry + INSN_UID (insn),
36997 insn_entry + INSN_UID (def_insn));
37000 link = link->next;
37004 /* Union INSN with all insns containing uses reached from DEF.
37005 Detect whether DEF is live-out from the current function. */
37006 static void
37007 union_uses (swap_web_entry *insn_entry, rtx insn, df_ref def)
37009 struct df_link *link = DF_REF_CHAIN (def);
37011 if (!link)
37012 insn_entry[INSN_UID (insn)].is_live_out = 1;
37014 while (link)
37016 /* This could be an eh use or some other artificial use;
37017 we treat these all the same (killing the optimization). */
37018 if (DF_REF_IS_ARTIFICIAL (link->ref))
37019 insn_entry[INSN_UID (insn)].is_live_out = 1;
37021 if (DF_REF_INSN_INFO (link->ref))
37023 rtx use_insn = DF_REF_INSN (link->ref);
37024 (void)unionfind_union (insn_entry + INSN_UID (insn),
37025 insn_entry + INSN_UID (use_insn));
37028 link = link->next;
37032 /* Return 1 iff INSN is a load insn, including permuting loads that
37033 represent an lvxd2x instruction; else return 0. */
37034 static unsigned int
37035 insn_is_load_p (rtx insn)
37037 rtx body = PATTERN (insn);
37039 if (GET_CODE (body) == SET)
37041 if (GET_CODE (SET_SRC (body)) == MEM)
37042 return 1;
37044 if (GET_CODE (SET_SRC (body)) == VEC_SELECT
37045 && GET_CODE (XEXP (SET_SRC (body), 0)) == MEM)
37046 return 1;
37048 return 0;
37051 if (GET_CODE (body) != PARALLEL)
37052 return 0;
37054 rtx set = XVECEXP (body, 0, 0);
37056 if (GET_CODE (set) == SET && GET_CODE (SET_SRC (set)) == MEM)
37057 return 1;
37059 return 0;
37062 /* Return 1 iff INSN is a store insn, including permuting stores that
37063 represent an stvxd2x instruction; else return 0. */
37064 static unsigned int
37065 insn_is_store_p (rtx insn)
37067 rtx body = PATTERN (insn);
37068 if (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == MEM)
37069 return 1;
37070 if (GET_CODE (body) != PARALLEL)
37071 return 0;
37072 rtx set = XVECEXP (body, 0, 0);
37073 if (GET_CODE (set) == SET && GET_CODE (SET_DEST (set)) == MEM)
37074 return 1;
37075 return 0;
37078 /* Return 1 iff INSN swaps doublewords. This may be a reg-reg swap,
37079 a permuting load, or a permuting store. */
37080 static unsigned int
37081 insn_is_swap_p (rtx insn)
37083 rtx body = PATTERN (insn);
37084 if (GET_CODE (body) != SET)
37085 return 0;
37086 rtx rhs = SET_SRC (body);
37087 if (GET_CODE (rhs) != VEC_SELECT)
37088 return 0;
37089 rtx parallel = XEXP (rhs, 1);
37090 if (GET_CODE (parallel) != PARALLEL)
37091 return 0;
37092 unsigned int len = XVECLEN (parallel, 0);
37093 if (len != 2 && len != 4 && len != 8 && len != 16)
37094 return 0;
37095 for (unsigned int i = 0; i < len / 2; ++i)
37097 rtx op = XVECEXP (parallel, 0, i);
37098 if (GET_CODE (op) != CONST_INT || INTVAL (op) != len / 2 + i)
37099 return 0;
37101 for (unsigned int i = len / 2; i < len; ++i)
37103 rtx op = XVECEXP (parallel, 0, i);
37104 if (GET_CODE (op) != CONST_INT || INTVAL (op) != i - len / 2)
37105 return 0;
37107 return 1;
37110 /* Return TRUE if insn is a swap fed by a load from the constant pool. */
37111 static bool
37112 const_load_sequence_p (swap_web_entry *insn_entry, rtx insn)
37114 unsigned uid = INSN_UID (insn);
37115 if (!insn_entry[uid].is_swap || insn_entry[uid].is_load)
37116 return false;
37118 /* Find the unique use in the swap and locate its def. If the def
37119 isn't unique, punt. */
37120 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
37121 df_ref use;
37122 FOR_EACH_INSN_INFO_USE (use, insn_info)
37124 struct df_link *def_link = DF_REF_CHAIN (use);
37125 if (!def_link || def_link->next)
37126 return false;
37128 rtx def_insn = DF_REF_INSN (def_link->ref);
37129 unsigned uid2 = INSN_UID (def_insn);
37130 if (!insn_entry[uid2].is_load || !insn_entry[uid2].is_swap)
37131 return false;
37133 rtx body = PATTERN (def_insn);
37134 if (GET_CODE (body) != SET
37135 || GET_CODE (SET_SRC (body)) != VEC_SELECT
37136 || GET_CODE (XEXP (SET_SRC (body), 0)) != MEM)
37137 return false;
37139 rtx mem = XEXP (SET_SRC (body), 0);
37140 rtx base_reg = XEXP (mem, 0);
37142 df_ref base_use;
37143 insn_info = DF_INSN_INFO_GET (def_insn);
37144 FOR_EACH_INSN_INFO_USE (base_use, insn_info)
37146 if (!rtx_equal_p (DF_REF_REG (base_use), base_reg))
37147 continue;
37149 struct df_link *base_def_link = DF_REF_CHAIN (base_use);
37150 if (!base_def_link || base_def_link->next)
37151 return false;
37153 rtx tocrel_insn = DF_REF_INSN (base_def_link->ref);
37154 rtx tocrel_body = PATTERN (tocrel_insn);
37155 rtx base, offset;
37156 if (GET_CODE (tocrel_body) != SET)
37157 return false;
37158 /* There is an extra level of indirection for small/large
37159 code models. */
37160 rtx tocrel_expr = SET_SRC (tocrel_body);
37161 if (GET_CODE (tocrel_expr) == MEM)
37162 tocrel_expr = XEXP (tocrel_expr, 0);
37163 if (!toc_relative_expr_p (tocrel_expr, false))
37164 return false;
37165 split_const (XVECEXP (tocrel_base, 0, 0), &base, &offset);
37166 if (GET_CODE (base) != SYMBOL_REF || !CONSTANT_POOL_ADDRESS_P (base))
37167 return false;
37170 return true;
37173 /* Return TRUE iff OP matches a V2DF reduction pattern. See the
37174 definition of vsx_reduc_<VEC_reduc_name>_v2df in vsx.md. */
37175 static bool
37176 v2df_reduction_p (rtx op)
37178 if (GET_MODE (op) != V2DFmode)
37179 return false;
37181 enum rtx_code code = GET_CODE (op);
37182 if (code != PLUS && code != SMIN && code != SMAX)
37183 return false;
37185 rtx concat = XEXP (op, 0);
37186 if (GET_CODE (concat) != VEC_CONCAT)
37187 return false;
37189 rtx select0 = XEXP (concat, 0);
37190 rtx select1 = XEXP (concat, 1);
37191 if (GET_CODE (select0) != VEC_SELECT || GET_CODE (select1) != VEC_SELECT)
37192 return false;
37194 rtx reg0 = XEXP (select0, 0);
37195 rtx reg1 = XEXP (select1, 0);
37196 if (!rtx_equal_p (reg0, reg1) || !REG_P (reg0))
37197 return false;
37199 rtx parallel0 = XEXP (select0, 1);
37200 rtx parallel1 = XEXP (select1, 1);
37201 if (GET_CODE (parallel0) != PARALLEL || GET_CODE (parallel1) != PARALLEL)
37202 return false;
37204 if (!rtx_equal_p (XVECEXP (parallel0, 0, 0), const1_rtx)
37205 || !rtx_equal_p (XVECEXP (parallel1, 0, 0), const0_rtx))
37206 return false;
37208 return true;
37211 /* Return 1 iff OP is an operand that will not be affected by having
37212 vector doublewords swapped in memory. */
37213 static unsigned int
37214 rtx_is_swappable_p (rtx op, unsigned int *special)
37216 enum rtx_code code = GET_CODE (op);
37217 int i, j;
37218 rtx parallel;
37220 switch (code)
37222 case LABEL_REF:
37223 case SYMBOL_REF:
37224 case CLOBBER:
37225 case REG:
37226 return 1;
37228 case VEC_CONCAT:
37229 case ASM_INPUT:
37230 case ASM_OPERANDS:
37231 return 0;
37233 case CONST_VECTOR:
37235 *special = SH_CONST_VECTOR;
37236 return 1;
37239 case VEC_DUPLICATE:
37240 /* Opportunity: If XEXP (op, 0) has the same mode as the result,
37241 and XEXP (op, 1) is a PARALLEL with a single QImode const int,
37242 it represents a vector splat for which we can do special
37243 handling. */
37244 if (GET_CODE (XEXP (op, 0)) == CONST_INT)
37245 return 1;
37246 else if (GET_CODE (XEXP (op, 0)) == REG
37247 && GET_MODE_INNER (GET_MODE (op)) == GET_MODE (XEXP (op, 0)))
37248 /* This catches V2DF and V2DI splat, at a minimum. */
37249 return 1;
37250 else if (GET_CODE (XEXP (op, 0)) == VEC_SELECT)
37251 /* If the duplicated item is from a select, defer to the select
37252 processing to see if we can change the lane for the splat. */
37253 return rtx_is_swappable_p (XEXP (op, 0), special);
37254 else
37255 return 0;
37257 case VEC_SELECT:
37258 /* A vec_extract operation is ok if we change the lane. */
37259 if (GET_CODE (XEXP (op, 0)) == REG
37260 && GET_MODE_INNER (GET_MODE (XEXP (op, 0))) == GET_MODE (op)
37261 && GET_CODE ((parallel = XEXP (op, 1))) == PARALLEL
37262 && XVECLEN (parallel, 0) == 1
37263 && GET_CODE (XVECEXP (parallel, 0, 0)) == CONST_INT)
37265 *special = SH_EXTRACT;
37266 return 1;
37268 /* An XXPERMDI is ok if we adjust the lanes. Note that if the
37269 XXPERMDI is a swap operation, it will be identified by
37270 insn_is_swap_p and therefore we won't get here. */
37271 else if (GET_CODE (XEXP (op, 0)) == VEC_CONCAT
37272 && (GET_MODE (XEXP (op, 0)) == V4DFmode
37273 || GET_MODE (XEXP (op, 0)) == V4DImode)
37274 && GET_CODE ((parallel = XEXP (op, 1))) == PARALLEL
37275 && XVECLEN (parallel, 0) == 2
37276 && GET_CODE (XVECEXP (parallel, 0, 0)) == CONST_INT
37277 && GET_CODE (XVECEXP (parallel, 0, 1)) == CONST_INT)
37279 *special = SH_XXPERMDI;
37280 return 1;
37282 else if (v2df_reduction_p (op))
37283 return 1;
37284 else
37285 return 0;
37287 case UNSPEC:
37289 /* Various operations are unsafe for this optimization, at least
37290 without significant additional work. Permutes are obviously
37291 problematic, as both the permute control vector and the ordering
37292 of the target values are invalidated by doubleword swapping.
37293 Vector pack and unpack modify the number of vector lanes.
37294 Merge-high/low will not operate correctly on swapped operands.
37295 Vector shifts across element boundaries are clearly uncool,
37296 as are vector select and concatenate operations. Vector
37297 sum-across instructions define one operand with a specific
37298 order-dependent element, so additional fixup code would be
37299 needed to make those work. Vector set and non-immediate-form
37300 vector splat are element-order sensitive. A few of these
37301 cases might be workable with special handling if required.
37302 Adding cost modeling would be appropriate in some cases. */
37303 int val = XINT (op, 1);
37304 switch (val)
37306 default:
37307 break;
37308 case UNSPEC_VMRGH_DIRECT:
37309 case UNSPEC_VMRGL_DIRECT:
37310 case UNSPEC_VPACK_SIGN_SIGN_SAT:
37311 case UNSPEC_VPACK_SIGN_UNS_SAT:
37312 case UNSPEC_VPACK_UNS_UNS_MOD:
37313 case UNSPEC_VPACK_UNS_UNS_MOD_DIRECT:
37314 case UNSPEC_VPACK_UNS_UNS_SAT:
37315 case UNSPEC_VPERM:
37316 case UNSPEC_VPERM_UNS:
37317 case UNSPEC_VPERMHI:
37318 case UNSPEC_VPERMSI:
37319 case UNSPEC_VPKPX:
37320 case UNSPEC_VSLDOI:
37321 case UNSPEC_VSLO:
37322 case UNSPEC_VSRO:
37323 case UNSPEC_VSUM2SWS:
37324 case UNSPEC_VSUM4S:
37325 case UNSPEC_VSUM4UBS:
37326 case UNSPEC_VSUMSWS:
37327 case UNSPEC_VSUMSWS_DIRECT:
37328 case UNSPEC_VSX_CONCAT:
37329 case UNSPEC_VSX_SET:
37330 case UNSPEC_VSX_SLDWI:
37331 case UNSPEC_VUNPACK_HI_SIGN:
37332 case UNSPEC_VUNPACK_HI_SIGN_DIRECT:
37333 case UNSPEC_VUNPACK_LO_SIGN:
37334 case UNSPEC_VUNPACK_LO_SIGN_DIRECT:
37335 case UNSPEC_VUPKHPX:
37336 case UNSPEC_VUPKHS_V4SF:
37337 case UNSPEC_VUPKHU_V4SF:
37338 case UNSPEC_VUPKLPX:
37339 case UNSPEC_VUPKLS_V4SF:
37340 case UNSPEC_VUPKLU_V4SF:
37341 case UNSPEC_VSX_CVDPSPN:
37342 case UNSPEC_VSX_CVSPDP:
37343 case UNSPEC_VSX_CVSPDPN:
37344 return 0;
37345 case UNSPEC_VSPLT_DIRECT:
37346 *special = SH_SPLAT;
37347 return 1;
37348 case UNSPEC_REDUC_PLUS:
37349 case UNSPEC_REDUC:
37350 return 1;
37354 default:
37355 break;
37358 const char *fmt = GET_RTX_FORMAT (code);
37359 int ok = 1;
37361 for (i = 0; i < GET_RTX_LENGTH (code); ++i)
37362 if (fmt[i] == 'e' || fmt[i] == 'u')
37364 unsigned int special_op = SH_NONE;
37365 ok &= rtx_is_swappable_p (XEXP (op, i), &special_op);
37366 if (special_op == SH_NONE)
37367 continue;
37368 /* Ensure we never have two kinds of special handling
37369 for the same insn. */
37370 if (*special != SH_NONE && *special != special_op)
37371 return 0;
37372 *special = special_op;
37374 else if (fmt[i] == 'E')
37375 for (j = 0; j < XVECLEN (op, i); ++j)
37377 unsigned int special_op = SH_NONE;
37378 ok &= rtx_is_swappable_p (XVECEXP (op, i, j), &special_op);
37379 if (special_op == SH_NONE)
37380 continue;
37381 /* Ensure we never have two kinds of special handling
37382 for the same insn. */
37383 if (*special != SH_NONE && *special != special_op)
37384 return 0;
37385 *special = special_op;
37388 return ok;
37391 /* Return 1 iff INSN is an operand that will not be affected by
37392 having vector doublewords swapped in memory (in which case
37393 *SPECIAL is unchanged), or that can be modified to be correct
37394 if vector doublewords are swapped in memory (in which case
37395 *SPECIAL is changed to a value indicating how). */
37396 static unsigned int
37397 insn_is_swappable_p (swap_web_entry *insn_entry, rtx insn,
37398 unsigned int *special)
37400 /* Calls are always bad. */
37401 if (GET_CODE (insn) == CALL_INSN)
37402 return 0;
37404 /* Loads and stores seen here are not permuting, but we can still
37405 fix them up by converting them to permuting ones. Exceptions:
37406 UNSPEC_LVE, UNSPEC_LVX, and UNSPEC_STVX, which have a PARALLEL
37407 body instead of a SET; and UNSPEC_STVE, which has an UNSPEC
37408 for the SET source. Also we must now make an exception for lvx
37409 and stvx when they are not in the UNSPEC_LVX/STVX form (with the
37410 explicit "& -16") since this leads to unrecognizable insns. */
37411 rtx body = PATTERN (insn);
37412 int i = INSN_UID (insn);
37414 if (insn_entry[i].is_load)
37416 if (GET_CODE (body) == SET)
37418 rtx rhs = SET_SRC (body);
37419 gcc_assert (GET_CODE (rhs) == MEM);
37420 if (GET_CODE (XEXP (rhs, 0)) == AND)
37421 return 0;
37423 *special = SH_NOSWAP_LD;
37424 return 1;
37426 else
37427 return 0;
37430 if (insn_entry[i].is_store)
37432 if (GET_CODE (body) == SET
37433 && GET_CODE (SET_SRC (body)) != UNSPEC)
37435 rtx lhs = SET_DEST (body);
37436 gcc_assert (GET_CODE (lhs) == MEM);
37437 if (GET_CODE (XEXP (lhs, 0)) == AND)
37438 return 0;
37440 *special = SH_NOSWAP_ST;
37441 return 1;
37443 else
37444 return 0;
37447 /* A convert to single precision can be left as is provided that
37448 all of its uses are in xxspltw instructions that splat BE element
37449 zero. */
37450 if (GET_CODE (body) == SET
37451 && GET_CODE (SET_SRC (body)) == UNSPEC
37452 && XINT (SET_SRC (body), 1) == UNSPEC_VSX_CVDPSPN)
37454 df_ref def;
37455 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
37457 FOR_EACH_INSN_INFO_DEF (def, insn_info)
37459 struct df_link *link = DF_REF_CHAIN (def);
37460 if (!link)
37461 return 0;
37463 for (; link; link = link->next) {
37464 rtx use_insn = DF_REF_INSN (link->ref);
37465 rtx use_body = PATTERN (use_insn);
37466 if (GET_CODE (use_body) != SET
37467 || GET_CODE (SET_SRC (use_body)) != UNSPEC
37468 || XINT (SET_SRC (use_body), 1) != UNSPEC_VSX_XXSPLTW
37469 || XEXP (XEXP (SET_SRC (use_body), 0), 1) != const0_rtx)
37470 return 0;
37474 return 1;
37477 /* A concatenation of two doublewords is ok if we reverse the
37478 order of the inputs. */
37479 if (GET_CODE (body) == SET
37480 && GET_CODE (SET_SRC (body)) == VEC_CONCAT
37481 && (GET_MODE (SET_SRC (body)) == V2DFmode
37482 || GET_MODE (SET_SRC (body)) == V2DImode))
37484 *special = SH_CONCAT;
37485 return 1;
37488 /* V2DF reductions are always swappable. */
37489 if (GET_CODE (body) == PARALLEL)
37491 rtx expr = XVECEXP (body, 0, 0);
37492 if (GET_CODE (expr) == SET
37493 && v2df_reduction_p (SET_SRC (expr)))
37494 return 1;
37497 /* An UNSPEC_VPERM is ok if the mask operand is loaded from the
37498 constant pool. */
37499 if (GET_CODE (body) == SET
37500 && GET_CODE (SET_SRC (body)) == UNSPEC
37501 && XINT (SET_SRC (body), 1) == UNSPEC_VPERM
37502 && XVECLEN (SET_SRC (body), 0) == 3
37503 && GET_CODE (XVECEXP (SET_SRC (body), 0, 2)) == REG)
37505 rtx mask_reg = XVECEXP (SET_SRC (body), 0, 2);
37506 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
37507 df_ref use;
37508 FOR_EACH_INSN_INFO_USE (use, insn_info)
37509 if (rtx_equal_p (DF_REF_REG (use), mask_reg))
37511 struct df_link *def_link = DF_REF_CHAIN (use);
37512 /* Punt if multiple definitions for this reg. */
37513 if (def_link && !def_link->next &&
37514 const_load_sequence_p (insn_entry,
37515 DF_REF_INSN (def_link->ref)))
37517 *special = SH_VPERM;
37518 return 1;
37523 /* Otherwise check the operands for vector lane violations. */
37524 return rtx_is_swappable_p (body, special);
37527 enum chain_purpose { FOR_LOADS, FOR_STORES };
37529 /* Return true if the UD or DU chain headed by LINK is non-empty,
37530 and every entry on the chain references an insn that is a
37531 register swap. Furthermore, if PURPOSE is FOR_LOADS, each such
37532 register swap must have only permuting loads as reaching defs.
37533 If PURPOSE is FOR_STORES, each such register swap must have only
37534 register swaps or permuting stores as reached uses. */
37535 static bool
37536 chain_contains_only_swaps (swap_web_entry *insn_entry, struct df_link *link,
37537 enum chain_purpose purpose)
37539 if (!link)
37540 return false;
37542 for (; link; link = link->next)
37544 if (!ALTIVEC_OR_VSX_VECTOR_MODE (GET_MODE (DF_REF_REG (link->ref))))
37545 continue;
37547 if (DF_REF_IS_ARTIFICIAL (link->ref))
37548 return false;
37550 rtx reached_insn = DF_REF_INSN (link->ref);
37551 unsigned uid = INSN_UID (reached_insn);
37552 struct df_insn_info *insn_info = DF_INSN_INFO_GET (reached_insn);
37554 if (!insn_entry[uid].is_swap || insn_entry[uid].is_load
37555 || insn_entry[uid].is_store)
37556 return false;
37558 if (purpose == FOR_LOADS)
37560 df_ref use;
37561 FOR_EACH_INSN_INFO_USE (use, insn_info)
37563 struct df_link *swap_link = DF_REF_CHAIN (use);
37565 while (swap_link)
37567 if (DF_REF_IS_ARTIFICIAL (link->ref))
37568 return false;
37570 rtx swap_def_insn = DF_REF_INSN (swap_link->ref);
37571 unsigned uid2 = INSN_UID (swap_def_insn);
37573 /* Only permuting loads are allowed. */
37574 if (!insn_entry[uid2].is_swap || !insn_entry[uid2].is_load)
37575 return false;
37577 swap_link = swap_link->next;
37581 else if (purpose == FOR_STORES)
37583 df_ref def;
37584 FOR_EACH_INSN_INFO_DEF (def, insn_info)
37586 struct df_link *swap_link = DF_REF_CHAIN (def);
37588 while (swap_link)
37590 if (DF_REF_IS_ARTIFICIAL (link->ref))
37591 return false;
37593 rtx swap_use_insn = DF_REF_INSN (swap_link->ref);
37594 unsigned uid2 = INSN_UID (swap_use_insn);
37596 /* Permuting stores or register swaps are allowed. */
37597 if (!insn_entry[uid2].is_swap || insn_entry[uid2].is_load)
37598 return false;
37600 swap_link = swap_link->next;
37606 return true;
37609 /* Mark the xxswapdi instructions associated with permuting loads and
37610 stores for removal. Note that we only flag them for deletion here,
37611 as there is a possibility of a swap being reached from multiple
37612 loads, etc. */
37613 static void
37614 mark_swaps_for_removal (swap_web_entry *insn_entry, unsigned int i)
37616 rtx insn = insn_entry[i].insn;
37617 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
37619 if (insn_entry[i].is_load)
37621 df_ref def;
37622 FOR_EACH_INSN_INFO_DEF (def, insn_info)
37624 struct df_link *link = DF_REF_CHAIN (def);
37626 /* We know by now that these are swaps, so we can delete
37627 them confidently. */
37628 while (link)
37630 rtx use_insn = DF_REF_INSN (link->ref);
37631 insn_entry[INSN_UID (use_insn)].will_delete = 1;
37632 link = link->next;
37636 else if (insn_entry[i].is_store)
37638 df_ref use;
37639 FOR_EACH_INSN_INFO_USE (use, insn_info)
37641 /* Ignore uses for addressability. */
37642 machine_mode mode = GET_MODE (DF_REF_REG (use));
37643 if (!ALTIVEC_OR_VSX_VECTOR_MODE (mode))
37644 continue;
37646 struct df_link *link = DF_REF_CHAIN (use);
37648 /* We know by now that these are swaps, so we can delete
37649 them confidently. */
37650 while (link)
37652 rtx def_insn = DF_REF_INSN (link->ref);
37653 insn_entry[INSN_UID (def_insn)].will_delete = 1;
37654 link = link->next;
37660 /* OP is either a CONST_VECTOR or an expression containing one.
37661 Swap the first half of the vector with the second in the first
37662 case. Recurse to find it in the second. */
37663 static void
37664 swap_const_vector_halves (rtx op)
37666 int i;
37667 enum rtx_code code = GET_CODE (op);
37668 if (GET_CODE (op) == CONST_VECTOR)
37670 int half_units = GET_MODE_NUNITS (GET_MODE (op)) / 2;
37671 for (i = 0; i < half_units; ++i)
37673 rtx temp = CONST_VECTOR_ELT (op, i);
37674 CONST_VECTOR_ELT (op, i) = CONST_VECTOR_ELT (op, i + half_units);
37675 CONST_VECTOR_ELT (op, i + half_units) = temp;
37678 else
37680 int j;
37681 const char *fmt = GET_RTX_FORMAT (code);
37682 for (i = 0; i < GET_RTX_LENGTH (code); ++i)
37683 if (fmt[i] == 'e' || fmt[i] == 'u')
37684 swap_const_vector_halves (XEXP (op, i));
37685 else if (fmt[i] == 'E')
37686 for (j = 0; j < XVECLEN (op, i); ++j)
37687 swap_const_vector_halves (XVECEXP (op, i, j));
37691 /* Find all subregs of a vector expression that perform a narrowing,
37692 and adjust the subreg index to account for doubleword swapping. */
37693 static void
37694 adjust_subreg_index (rtx op)
37696 enum rtx_code code = GET_CODE (op);
37697 if (code == SUBREG
37698 && (GET_MODE_SIZE (GET_MODE (op))
37699 < GET_MODE_SIZE (GET_MODE (XEXP (op, 0)))))
37701 unsigned int index = SUBREG_BYTE (op);
37702 if (index < 8)
37703 index += 8;
37704 else
37705 index -= 8;
37706 SUBREG_BYTE (op) = index;
37709 const char *fmt = GET_RTX_FORMAT (code);
37710 int i,j;
37711 for (i = 0; i < GET_RTX_LENGTH (code); ++i)
37712 if (fmt[i] == 'e' || fmt[i] == 'u')
37713 adjust_subreg_index (XEXP (op, i));
37714 else if (fmt[i] == 'E')
37715 for (j = 0; j < XVECLEN (op, i); ++j)
37716 adjust_subreg_index (XVECEXP (op, i, j));
37719 /* Convert the non-permuting load INSN to a permuting one. */
37720 static void
37721 permute_load (rtx_insn *insn)
37723 rtx body = PATTERN (insn);
37724 rtx mem_op = SET_SRC (body);
37725 rtx tgt_reg = SET_DEST (body);
37726 machine_mode mode = GET_MODE (tgt_reg);
37727 int n_elts = GET_MODE_NUNITS (mode);
37728 int half_elts = n_elts / 2;
37729 rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
37730 int i, j;
37731 for (i = 0, j = half_elts; i < half_elts; ++i, ++j)
37732 XVECEXP (par, 0, i) = GEN_INT (j);
37733 for (i = half_elts, j = 0; j < half_elts; ++i, ++j)
37734 XVECEXP (par, 0, i) = GEN_INT (j);
37735 rtx sel = gen_rtx_VEC_SELECT (mode, mem_op, par);
37736 SET_SRC (body) = sel;
37737 INSN_CODE (insn) = -1; /* Force re-recognition. */
37738 df_insn_rescan (insn);
37740 if (dump_file)
37741 fprintf (dump_file, "Replacing load %d with permuted load\n",
37742 INSN_UID (insn));
37745 /* Convert the non-permuting store INSN to a permuting one. */
37746 static void
37747 permute_store (rtx_insn *insn)
37749 rtx body = PATTERN (insn);
37750 rtx src_reg = SET_SRC (body);
37751 machine_mode mode = GET_MODE (src_reg);
37752 int n_elts = GET_MODE_NUNITS (mode);
37753 int half_elts = n_elts / 2;
37754 rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
37755 int i, j;
37756 for (i = 0, j = half_elts; i < half_elts; ++i, ++j)
37757 XVECEXP (par, 0, i) = GEN_INT (j);
37758 for (i = half_elts, j = 0; j < half_elts; ++i, ++j)
37759 XVECEXP (par, 0, i) = GEN_INT (j);
37760 rtx sel = gen_rtx_VEC_SELECT (mode, src_reg, par);
37761 SET_SRC (body) = sel;
37762 INSN_CODE (insn) = -1; /* Force re-recognition. */
37763 df_insn_rescan (insn);
37765 if (dump_file)
37766 fprintf (dump_file, "Replacing store %d with permuted store\n",
37767 INSN_UID (insn));
37770 /* Given OP that contains a vector extract operation, adjust the index
37771 of the extracted lane to account for the doubleword swap. */
37772 static void
37773 adjust_extract (rtx_insn *insn)
37775 rtx pattern = PATTERN (insn);
37776 if (GET_CODE (pattern) == PARALLEL)
37777 pattern = XVECEXP (pattern, 0, 0);
37778 rtx src = SET_SRC (pattern);
37779 /* The vec_select may be wrapped in a vec_duplicate for a splat, so
37780 account for that. */
37781 rtx sel = GET_CODE (src) == VEC_DUPLICATE ? XEXP (src, 0) : src;
37782 rtx par = XEXP (sel, 1);
37783 int half_elts = GET_MODE_NUNITS (GET_MODE (XEXP (sel, 0))) >> 1;
37784 int lane = INTVAL (XVECEXP (par, 0, 0));
37785 lane = lane >= half_elts ? lane - half_elts : lane + half_elts;
37786 XVECEXP (par, 0, 0) = GEN_INT (lane);
37787 INSN_CODE (insn) = -1; /* Force re-recognition. */
37788 df_insn_rescan (insn);
37790 if (dump_file)
37791 fprintf (dump_file, "Changing lane for extract %d\n", INSN_UID (insn));
37794 /* Given OP that contains a vector direct-splat operation, adjust the index
37795 of the source lane to account for the doubleword swap. */
37796 static void
37797 adjust_splat (rtx_insn *insn)
37799 rtx body = PATTERN (insn);
37800 rtx unspec = XEXP (body, 1);
37801 int half_elts = GET_MODE_NUNITS (GET_MODE (unspec)) >> 1;
37802 int lane = INTVAL (XVECEXP (unspec, 0, 1));
37803 lane = lane >= half_elts ? lane - half_elts : lane + half_elts;
37804 XVECEXP (unspec, 0, 1) = GEN_INT (lane);
37805 INSN_CODE (insn) = -1; /* Force re-recognition. */
37806 df_insn_rescan (insn);
37808 if (dump_file)
37809 fprintf (dump_file, "Changing lane for splat %d\n", INSN_UID (insn));
37812 /* Given OP that contains an XXPERMDI operation (that is not a doubleword
37813 swap), reverse the order of the source operands and adjust the indices
37814 of the source lanes to account for doubleword reversal. */
37815 static void
37816 adjust_xxpermdi (rtx_insn *insn)
37818 rtx set = PATTERN (insn);
37819 rtx select = XEXP (set, 1);
37820 rtx concat = XEXP (select, 0);
37821 rtx src0 = XEXP (concat, 0);
37822 XEXP (concat, 0) = XEXP (concat, 1);
37823 XEXP (concat, 1) = src0;
37824 rtx parallel = XEXP (select, 1);
37825 int lane0 = INTVAL (XVECEXP (parallel, 0, 0));
37826 int lane1 = INTVAL (XVECEXP (parallel, 0, 1));
37827 int new_lane0 = 3 - lane1;
37828 int new_lane1 = 3 - lane0;
37829 XVECEXP (parallel, 0, 0) = GEN_INT (new_lane0);
37830 XVECEXP (parallel, 0, 1) = GEN_INT (new_lane1);
37831 INSN_CODE (insn) = -1; /* Force re-recognition. */
37832 df_insn_rescan (insn);
37834 if (dump_file)
37835 fprintf (dump_file, "Changing lanes for xxpermdi %d\n", INSN_UID (insn));
37838 /* Given OP that contains a VEC_CONCAT operation of two doublewords,
37839 reverse the order of those inputs. */
37840 static void
37841 adjust_concat (rtx_insn *insn)
37843 rtx set = PATTERN (insn);
37844 rtx concat = XEXP (set, 1);
37845 rtx src0 = XEXP (concat, 0);
37846 XEXP (concat, 0) = XEXP (concat, 1);
37847 XEXP (concat, 1) = src0;
37848 INSN_CODE (insn) = -1; /* Force re-recognition. */
37849 df_insn_rescan (insn);
37851 if (dump_file)
37852 fprintf (dump_file, "Reversing inputs for concat %d\n", INSN_UID (insn));
37855 /* Given an UNSPEC_VPERM insn, modify the mask loaded from the
37856 constant pool to reflect swapped doublewords. */
37857 static void
37858 adjust_vperm (rtx_insn *insn)
37860 /* We previously determined that the UNSPEC_VPERM was fed by a
37861 swap of a swapping load of a TOC-relative constant pool symbol.
37862 Find the MEM in the swapping load and replace it with a MEM for
37863 the adjusted mask constant. */
37864 rtx set = PATTERN (insn);
37865 rtx mask_reg = XVECEXP (SET_SRC (set), 0, 2);
37867 /* Find the swap. */
37868 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
37869 df_ref use;
37870 rtx_insn *swap_insn = 0;
37871 FOR_EACH_INSN_INFO_USE (use, insn_info)
37872 if (rtx_equal_p (DF_REF_REG (use), mask_reg))
37874 struct df_link *def_link = DF_REF_CHAIN (use);
37875 gcc_assert (def_link && !def_link->next);
37876 swap_insn = DF_REF_INSN (def_link->ref);
37877 break;
37879 gcc_assert (swap_insn);
37881 /* Find the load. */
37882 insn_info = DF_INSN_INFO_GET (swap_insn);
37883 rtx_insn *load_insn = 0;
37884 FOR_EACH_INSN_INFO_USE (use, insn_info)
37886 struct df_link *def_link = DF_REF_CHAIN (use);
37887 gcc_assert (def_link && !def_link->next);
37888 load_insn = DF_REF_INSN (def_link->ref);
37889 break;
37891 gcc_assert (load_insn);
37893 /* Find the TOC-relative symbol access. */
37894 insn_info = DF_INSN_INFO_GET (load_insn);
37895 rtx_insn *tocrel_insn = 0;
37896 FOR_EACH_INSN_INFO_USE (use, insn_info)
37898 struct df_link *def_link = DF_REF_CHAIN (use);
37899 gcc_assert (def_link && !def_link->next);
37900 tocrel_insn = DF_REF_INSN (def_link->ref);
37901 break;
37903 gcc_assert (tocrel_insn);
37905 /* Find the embedded CONST_VECTOR. We have to call toc_relative_expr_p
37906 to set tocrel_base; otherwise it would be unnecessary as we've
37907 already established it will return true. */
37908 rtx base, offset;
37909 rtx tocrel_expr = SET_SRC (PATTERN (tocrel_insn));
37910 /* There is an extra level of indirection for small/large code models. */
37911 if (GET_CODE (tocrel_expr) == MEM)
37912 tocrel_expr = XEXP (tocrel_expr, 0);
37913 if (!toc_relative_expr_p (tocrel_expr, false))
37914 gcc_unreachable ();
37915 split_const (XVECEXP (tocrel_base, 0, 0), &base, &offset);
37916 rtx const_vector = get_pool_constant (base);
37917 /* With the extra indirection, get_pool_constant will produce the
37918 real constant from the reg_equal expression, so get the real
37919 constant. */
37920 if (GET_CODE (const_vector) == SYMBOL_REF)
37921 const_vector = get_pool_constant (const_vector);
37922 gcc_assert (GET_CODE (const_vector) == CONST_VECTOR);
37924 /* Create an adjusted mask from the initial mask. */
37925 unsigned int new_mask[16], i, val;
37926 for (i = 0; i < 16; ++i) {
37927 val = INTVAL (XVECEXP (const_vector, 0, i));
37928 if (val < 16)
37929 new_mask[i] = (val + 8) % 16;
37930 else
37931 new_mask[i] = ((val + 8) % 16) + 16;
37934 /* Create a new CONST_VECTOR and a MEM that references it. */
37935 rtx vals = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
37936 for (i = 0; i < 16; ++i)
37937 XVECEXP (vals, 0, i) = GEN_INT (new_mask[i]);
37938 rtx new_const_vector = gen_rtx_CONST_VECTOR (V16QImode, XVEC (vals, 0));
37939 rtx new_mem = force_const_mem (V16QImode, new_const_vector);
37940 /* This gives us a MEM whose base operand is a SYMBOL_REF, which we
37941 can't recognize. Force the SYMBOL_REF into a register. */
37942 if (!REG_P (XEXP (new_mem, 0))) {
37943 rtx base_reg = force_reg (Pmode, XEXP (new_mem, 0));
37944 XEXP (new_mem, 0) = base_reg;
37945 /* Move the newly created insn ahead of the load insn. */
37946 rtx_insn *force_insn = get_last_insn ();
37947 remove_insn (force_insn);
37948 rtx_insn *before_load_insn = PREV_INSN (load_insn);
37949 add_insn_after (force_insn, before_load_insn, BLOCK_FOR_INSN (load_insn));
37950 df_insn_rescan (before_load_insn);
37951 df_insn_rescan (force_insn);
37954 /* Replace the MEM in the load instruction and rescan it. */
37955 XEXP (SET_SRC (PATTERN (load_insn)), 0) = new_mem;
37956 INSN_CODE (load_insn) = -1; /* Force re-recognition. */
37957 df_insn_rescan (load_insn);
37959 if (dump_file)
37960 fprintf (dump_file, "Adjusting mask for vperm %d\n", INSN_UID (insn));
37963 /* The insn described by INSN_ENTRY[I] can be swapped, but only
37964 with special handling. Take care of that here. */
37965 static void
37966 handle_special_swappables (swap_web_entry *insn_entry, unsigned i)
37968 rtx_insn *insn = insn_entry[i].insn;
37969 rtx body = PATTERN (insn);
37971 switch (insn_entry[i].special_handling)
37973 default:
37974 gcc_unreachable ();
37975 case SH_CONST_VECTOR:
37977 /* A CONST_VECTOR will only show up somewhere in the RHS of a SET. */
37978 gcc_assert (GET_CODE (body) == SET);
37979 rtx rhs = SET_SRC (body);
37980 swap_const_vector_halves (rhs);
37981 if (dump_file)
37982 fprintf (dump_file, "Swapping constant halves in insn %d\n", i);
37983 break;
37985 case SH_SUBREG:
37986 /* A subreg of the same size is already safe. For subregs that
37987 select a smaller portion of a reg, adjust the index for
37988 swapped doublewords. */
37989 adjust_subreg_index (body);
37990 if (dump_file)
37991 fprintf (dump_file, "Adjusting subreg in insn %d\n", i);
37992 break;
37993 case SH_NOSWAP_LD:
37994 /* Convert a non-permuting load to a permuting one. */
37995 permute_load (insn);
37996 break;
37997 case SH_NOSWAP_ST:
37998 /* Convert a non-permuting store to a permuting one. */
37999 permute_store (insn);
38000 break;
38001 case SH_EXTRACT:
38002 /* Change the lane on an extract operation. */
38003 adjust_extract (insn);
38004 break;
38005 case SH_SPLAT:
38006 /* Change the lane on a direct-splat operation. */
38007 adjust_splat (insn);
38008 break;
38009 case SH_XXPERMDI:
38010 /* Change the lanes on an XXPERMDI operation. */
38011 adjust_xxpermdi (insn);
38012 break;
38013 case SH_CONCAT:
38014 /* Reverse the order of a concatenation operation. */
38015 adjust_concat (insn);
38016 break;
38017 case SH_VPERM:
38018 /* Change the mask loaded from the constant pool for a VPERM. */
38019 adjust_vperm (insn);
38020 break;
38024 /* Find the insn from the Ith table entry, which is known to be a
38025 register swap Y = SWAP(X). Replace it with a copy Y = X. */
38026 static void
38027 replace_swap_with_copy (swap_web_entry *insn_entry, unsigned i)
38029 rtx_insn *insn = insn_entry[i].insn;
38030 rtx body = PATTERN (insn);
38031 rtx src_reg = XEXP (SET_SRC (body), 0);
38032 rtx copy = gen_rtx_SET (SET_DEST (body), src_reg);
38033 rtx_insn *new_insn = emit_insn_before (copy, insn);
38034 set_block_for_insn (new_insn, BLOCK_FOR_INSN (insn));
38035 df_insn_rescan (new_insn);
38037 if (dump_file)
38039 unsigned int new_uid = INSN_UID (new_insn);
38040 fprintf (dump_file, "Replacing swap %d with copy %d\n", i, new_uid);
38043 df_insn_delete (insn);
38044 remove_insn (insn);
38045 insn->set_deleted ();
38048 /* Dump the swap table to DUMP_FILE. */
38049 static void
38050 dump_swap_insn_table (swap_web_entry *insn_entry)
38052 int e = get_max_uid ();
38053 fprintf (dump_file, "\nRelevant insns with their flag settings\n\n");
38055 for (int i = 0; i < e; ++i)
38056 if (insn_entry[i].is_relevant)
38058 swap_web_entry *pred_entry = (swap_web_entry *)insn_entry[i].pred ();
38059 fprintf (dump_file, "%6d %6d ", i,
38060 pred_entry && pred_entry->insn
38061 ? INSN_UID (pred_entry->insn) : 0);
38062 if (insn_entry[i].is_load)
38063 fputs ("load ", dump_file);
38064 if (insn_entry[i].is_store)
38065 fputs ("store ", dump_file);
38066 if (insn_entry[i].is_swap)
38067 fputs ("swap ", dump_file);
38068 if (insn_entry[i].is_live_in)
38069 fputs ("live-in ", dump_file);
38070 if (insn_entry[i].is_live_out)
38071 fputs ("live-out ", dump_file);
38072 if (insn_entry[i].contains_subreg)
38073 fputs ("subreg ", dump_file);
38074 if (insn_entry[i].is_128_int)
38075 fputs ("int128 ", dump_file);
38076 if (insn_entry[i].is_call)
38077 fputs ("call ", dump_file);
38078 if (insn_entry[i].is_swappable)
38080 fputs ("swappable ", dump_file);
38081 if (insn_entry[i].special_handling == SH_CONST_VECTOR)
38082 fputs ("special:constvec ", dump_file);
38083 else if (insn_entry[i].special_handling == SH_SUBREG)
38084 fputs ("special:subreg ", dump_file);
38085 else if (insn_entry[i].special_handling == SH_NOSWAP_LD)
38086 fputs ("special:load ", dump_file);
38087 else if (insn_entry[i].special_handling == SH_NOSWAP_ST)
38088 fputs ("special:store ", dump_file);
38089 else if (insn_entry[i].special_handling == SH_EXTRACT)
38090 fputs ("special:extract ", dump_file);
38091 else if (insn_entry[i].special_handling == SH_SPLAT)
38092 fputs ("special:splat ", dump_file);
38093 else if (insn_entry[i].special_handling == SH_XXPERMDI)
38094 fputs ("special:xxpermdi ", dump_file);
38095 else if (insn_entry[i].special_handling == SH_CONCAT)
38096 fputs ("special:concat ", dump_file);
38097 else if (insn_entry[i].special_handling == SH_VPERM)
38098 fputs ("special:vperm ", dump_file);
38100 if (insn_entry[i].web_not_optimizable)
38101 fputs ("unoptimizable ", dump_file);
38102 if (insn_entry[i].will_delete)
38103 fputs ("delete ", dump_file);
38104 fputs ("\n", dump_file);
38106 fputs ("\n", dump_file);
38109 /* Return RTX with its address canonicalized to (reg) or (+ reg reg).
38110 Here RTX is an (& addr (const_int -16)). Always return a new copy
38111 to avoid problems with combine. */
38112 static rtx
38113 alignment_with_canonical_addr (rtx align)
38115 rtx canon;
38116 rtx addr = XEXP (align, 0);
38118 if (REG_P (addr))
38119 canon = addr;
38121 else if (GET_CODE (addr) == PLUS)
38123 rtx addrop0 = XEXP (addr, 0);
38124 rtx addrop1 = XEXP (addr, 1);
38126 if (!REG_P (addrop0))
38127 addrop0 = force_reg (GET_MODE (addrop0), addrop0);
38129 if (!REG_P (addrop1))
38130 addrop1 = force_reg (GET_MODE (addrop1), addrop1);
38132 canon = gen_rtx_PLUS (GET_MODE (addr), addrop0, addrop1);
38135 else
38136 canon = force_reg (GET_MODE (addr), addr);
38138 return gen_rtx_AND (GET_MODE (align), canon, GEN_INT (-16));
38141 /* Check whether an rtx is an alignment mask, and if so, return
38142 a fully-expanded rtx for the masking operation. */
38143 static rtx
38144 alignment_mask (rtx_insn *insn)
38146 rtx body = PATTERN (insn);
38148 if (GET_CODE (body) != SET
38149 || GET_CODE (SET_SRC (body)) != AND
38150 || !REG_P (XEXP (SET_SRC (body), 0)))
38151 return 0;
38153 rtx mask = XEXP (SET_SRC (body), 1);
38155 if (GET_CODE (mask) == CONST_INT)
38157 if (INTVAL (mask) == -16)
38158 return alignment_with_canonical_addr (SET_SRC (body));
38159 else
38160 return 0;
38163 if (!REG_P (mask))
38164 return 0;
38166 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
38167 df_ref use;
38168 rtx real_mask = 0;
38170 FOR_EACH_INSN_INFO_USE (use, insn_info)
38172 if (!rtx_equal_p (DF_REF_REG (use), mask))
38173 continue;
38175 struct df_link *def_link = DF_REF_CHAIN (use);
38176 if (!def_link || def_link->next)
38177 return 0;
38179 rtx_insn *const_insn = DF_REF_INSN (def_link->ref);
38180 rtx const_body = PATTERN (const_insn);
38181 if (GET_CODE (const_body) != SET)
38182 return 0;
38184 real_mask = SET_SRC (const_body);
38186 if (GET_CODE (real_mask) != CONST_INT
38187 || INTVAL (real_mask) != -16)
38188 return 0;
38191 if (real_mask == 0)
38192 return 0;
38194 return alignment_with_canonical_addr (SET_SRC (body));
38197 /* Given INSN that's a load or store based at BASE_REG, look for a
38198 feeding computation that aligns its address on a 16-byte boundary. */
38199 static rtx
38200 find_alignment_op (rtx_insn *insn, rtx base_reg)
38202 df_ref base_use;
38203 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
38204 rtx and_operation = 0;
38206 FOR_EACH_INSN_INFO_USE (base_use, insn_info)
38208 if (!rtx_equal_p (DF_REF_REG (base_use), base_reg))
38209 continue;
38211 struct df_link *base_def_link = DF_REF_CHAIN (base_use);
38212 if (!base_def_link || base_def_link->next)
38213 break;
38215 rtx_insn *and_insn = DF_REF_INSN (base_def_link->ref);
38216 and_operation = alignment_mask (and_insn);
38217 if (and_operation != 0)
38218 break;
38221 return and_operation;
38224 struct del_info { bool replace; rtx_insn *replace_insn; };
38226 /* If INSN is the load for an lvx pattern, put it in canonical form. */
38227 static void
38228 recombine_lvx_pattern (rtx_insn *insn, del_info *to_delete)
38230 rtx body = PATTERN (insn);
38231 gcc_assert (GET_CODE (body) == SET
38232 && GET_CODE (SET_SRC (body)) == VEC_SELECT
38233 && GET_CODE (XEXP (SET_SRC (body), 0)) == MEM);
38235 rtx mem = XEXP (SET_SRC (body), 0);
38236 rtx base_reg = XEXP (mem, 0);
38238 rtx and_operation = find_alignment_op (insn, base_reg);
38240 if (and_operation != 0)
38242 df_ref def;
38243 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
38244 FOR_EACH_INSN_INFO_DEF (def, insn_info)
38246 struct df_link *link = DF_REF_CHAIN (def);
38247 if (!link || link->next)
38248 break;
38250 rtx_insn *swap_insn = DF_REF_INSN (link->ref);
38251 if (!insn_is_swap_p (swap_insn)
38252 || insn_is_load_p (swap_insn)
38253 || insn_is_store_p (swap_insn))
38254 break;
38256 /* Expected lvx pattern found. Change the swap to
38257 a copy, and propagate the AND operation into the
38258 load. */
38259 to_delete[INSN_UID (swap_insn)].replace = true;
38260 to_delete[INSN_UID (swap_insn)].replace_insn = swap_insn;
38262 XEXP (mem, 0) = and_operation;
38263 SET_SRC (body) = mem;
38264 INSN_CODE (insn) = -1; /* Force re-recognition. */
38265 df_insn_rescan (insn);
38267 if (dump_file)
38268 fprintf (dump_file, "lvx opportunity found at %d\n",
38269 INSN_UID (insn));
38274 /* If INSN is the store for an stvx pattern, put it in canonical form. */
38275 static void
38276 recombine_stvx_pattern (rtx_insn *insn, del_info *to_delete)
38278 rtx body = PATTERN (insn);
38279 gcc_assert (GET_CODE (body) == SET
38280 && GET_CODE (SET_DEST (body)) == MEM
38281 && GET_CODE (SET_SRC (body)) == VEC_SELECT);
38282 rtx mem = SET_DEST (body);
38283 rtx base_reg = XEXP (mem, 0);
38285 rtx and_operation = find_alignment_op (insn, base_reg);
38287 if (and_operation != 0)
38289 rtx src_reg = XEXP (SET_SRC (body), 0);
38290 df_ref src_use;
38291 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
38292 FOR_EACH_INSN_INFO_USE (src_use, insn_info)
38294 if (!rtx_equal_p (DF_REF_REG (src_use), src_reg))
38295 continue;
38297 struct df_link *link = DF_REF_CHAIN (src_use);
38298 if (!link || link->next)
38299 break;
38301 rtx_insn *swap_insn = DF_REF_INSN (link->ref);
38302 if (!insn_is_swap_p (swap_insn)
38303 || insn_is_load_p (swap_insn)
38304 || insn_is_store_p (swap_insn))
38305 break;
38307 /* Expected stvx pattern found. Change the swap to
38308 a copy, and propagate the AND operation into the
38309 store. */
38310 to_delete[INSN_UID (swap_insn)].replace = true;
38311 to_delete[INSN_UID (swap_insn)].replace_insn = swap_insn;
38313 XEXP (mem, 0) = and_operation;
38314 SET_SRC (body) = src_reg;
38315 INSN_CODE (insn) = -1; /* Force re-recognition. */
38316 df_insn_rescan (insn);
38318 if (dump_file)
38319 fprintf (dump_file, "stvx opportunity found at %d\n",
38320 INSN_UID (insn));
38325 /* Look for patterns created from builtin lvx and stvx calls, and
38326 canonicalize them to be properly recognized as such. */
38327 static void
38328 recombine_lvx_stvx_patterns (function *fun)
38330 int i;
38331 basic_block bb;
38332 rtx_insn *insn;
38334 int num_insns = get_max_uid ();
38335 del_info *to_delete = XCNEWVEC (del_info, num_insns);
38337 FOR_ALL_BB_FN (bb, fun)
38338 FOR_BB_INSNS (bb, insn)
38340 if (!NONDEBUG_INSN_P (insn))
38341 continue;
38343 if (insn_is_load_p (insn) && insn_is_swap_p (insn))
38344 recombine_lvx_pattern (insn, to_delete);
38345 else if (insn_is_store_p (insn) && insn_is_swap_p (insn))
38346 recombine_stvx_pattern (insn, to_delete);
38349 /* Turning swaps into copies is delayed until now, to avoid problems
38350 with deleting instructions during the insn walk. */
38351 for (i = 0; i < num_insns; i++)
38352 if (to_delete[i].replace)
38354 rtx swap_body = PATTERN (to_delete[i].replace_insn);
38355 rtx src_reg = XEXP (SET_SRC (swap_body), 0);
38356 rtx copy = gen_rtx_SET (SET_DEST (swap_body), src_reg);
38357 rtx_insn *new_insn = emit_insn_before (copy,
38358 to_delete[i].replace_insn);
38359 set_block_for_insn (new_insn,
38360 BLOCK_FOR_INSN (to_delete[i].replace_insn));
38361 df_insn_rescan (new_insn);
38362 df_insn_delete (to_delete[i].replace_insn);
38363 remove_insn (to_delete[i].replace_insn);
38364 to_delete[i].replace_insn->set_deleted ();
38367 free (to_delete);
38370 /* Main entry point for this pass. */
38371 unsigned int
38372 rs6000_analyze_swaps (function *fun)
38374 swap_web_entry *insn_entry;
38375 basic_block bb;
38376 rtx_insn *insn, *curr_insn = 0;
38378 /* Dataflow analysis for use-def chains. */
38379 df_set_flags (DF_RD_PRUNE_DEAD_DEFS);
38380 df_chain_add_problem (DF_DU_CHAIN | DF_UD_CHAIN);
38381 df_analyze ();
38382 df_set_flags (DF_DEFER_INSN_RESCAN);
38384 /* Pre-pass to recombine lvx and stvx patterns so we don't lose info. */
38385 recombine_lvx_stvx_patterns (fun);
38387 /* Allocate structure to represent webs of insns. */
38388 insn_entry = XCNEWVEC (swap_web_entry, get_max_uid ());
38390 /* Walk the insns to gather basic data. */
38391 FOR_ALL_BB_FN (bb, fun)
38392 FOR_BB_INSNS_SAFE (bb, insn, curr_insn)
38394 unsigned int uid = INSN_UID (insn);
38395 if (NONDEBUG_INSN_P (insn))
38397 insn_entry[uid].insn = insn;
38399 if (GET_CODE (insn) == CALL_INSN)
38400 insn_entry[uid].is_call = 1;
38402 /* Walk the uses and defs to see if we mention vector regs.
38403 Record any constraints on optimization of such mentions. */
38404 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
38405 df_ref mention;
38406 FOR_EACH_INSN_INFO_USE (mention, insn_info)
38408 /* We use DF_REF_REAL_REG here to get inside any subregs. */
38409 machine_mode mode = GET_MODE (DF_REF_REAL_REG (mention));
38411 /* If a use gets its value from a call insn, it will be
38412 a hard register and will look like (reg:V4SI 3 3).
38413 The df analysis creates two mentions for GPR3 and GPR4,
38414 both DImode. We must recognize this and treat it as a
38415 vector mention to ensure the call is unioned with this
38416 use. */
38417 if (mode == DImode && DF_REF_INSN_INFO (mention))
38419 rtx feeder = DF_REF_INSN (mention);
38420 /* FIXME: It is pretty hard to get from the df mention
38421 to the mode of the use in the insn. We arbitrarily
38422 pick a vector mode here, even though the use might
38423 be a real DImode. We can be too conservative
38424 (create a web larger than necessary) because of
38425 this, so consider eventually fixing this. */
38426 if (GET_CODE (feeder) == CALL_INSN)
38427 mode = V4SImode;
38430 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode) || mode == TImode)
38432 insn_entry[uid].is_relevant = 1;
38433 if (mode == TImode || mode == V1TImode
38434 || FLOAT128_VECTOR_P (mode))
38435 insn_entry[uid].is_128_int = 1;
38436 if (DF_REF_INSN_INFO (mention))
38437 insn_entry[uid].contains_subreg
38438 = !rtx_equal_p (DF_REF_REG (mention),
38439 DF_REF_REAL_REG (mention));
38440 union_defs (insn_entry, insn, mention);
38443 FOR_EACH_INSN_INFO_DEF (mention, insn_info)
38445 /* We use DF_REF_REAL_REG here to get inside any subregs. */
38446 machine_mode mode = GET_MODE (DF_REF_REAL_REG (mention));
38448 /* If we're loading up a hard vector register for a call,
38449 it looks like (set (reg:V4SI 9 9) (...)). The df
38450 analysis creates two mentions for GPR9 and GPR10, both
38451 DImode. So relying on the mode from the mentions
38452 isn't sufficient to ensure we union the call into the
38453 web with the parameter setup code. */
38454 if (mode == DImode && GET_CODE (insn) == SET
38455 && ALTIVEC_OR_VSX_VECTOR_MODE (GET_MODE (SET_DEST (insn))))
38456 mode = GET_MODE (SET_DEST (insn));
38458 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode) || mode == TImode)
38460 insn_entry[uid].is_relevant = 1;
38461 if (mode == TImode || mode == V1TImode
38462 || FLOAT128_VECTOR_P (mode))
38463 insn_entry[uid].is_128_int = 1;
38464 if (DF_REF_INSN_INFO (mention))
38465 insn_entry[uid].contains_subreg
38466 = !rtx_equal_p (DF_REF_REG (mention),
38467 DF_REF_REAL_REG (mention));
38468 /* REG_FUNCTION_VALUE_P is not valid for subregs. */
38469 else if (REG_FUNCTION_VALUE_P (DF_REF_REG (mention)))
38470 insn_entry[uid].is_live_out = 1;
38471 union_uses (insn_entry, insn, mention);
38475 if (insn_entry[uid].is_relevant)
38477 /* Determine if this is a load or store. */
38478 insn_entry[uid].is_load = insn_is_load_p (insn);
38479 insn_entry[uid].is_store = insn_is_store_p (insn);
38481 /* Determine if this is a doubleword swap. If not,
38482 determine whether it can legally be swapped. */
38483 if (insn_is_swap_p (insn))
38484 insn_entry[uid].is_swap = 1;
38485 else
38487 unsigned int special = SH_NONE;
38488 insn_entry[uid].is_swappable
38489 = insn_is_swappable_p (insn_entry, insn, &special);
38490 if (special != SH_NONE && insn_entry[uid].contains_subreg)
38491 insn_entry[uid].is_swappable = 0;
38492 else if (special != SH_NONE)
38493 insn_entry[uid].special_handling = special;
38494 else if (insn_entry[uid].contains_subreg)
38495 insn_entry[uid].special_handling = SH_SUBREG;
38501 if (dump_file)
38503 fprintf (dump_file, "\nSwap insn entry table when first built\n");
38504 dump_swap_insn_table (insn_entry);
38507 /* Record unoptimizable webs. */
38508 unsigned e = get_max_uid (), i;
38509 for (i = 0; i < e; ++i)
38511 if (!insn_entry[i].is_relevant)
38512 continue;
38514 swap_web_entry *root
38515 = (swap_web_entry*)(&insn_entry[i])->unionfind_root ();
38517 if (insn_entry[i].is_live_in || insn_entry[i].is_live_out
38518 || (insn_entry[i].contains_subreg
38519 && insn_entry[i].special_handling != SH_SUBREG)
38520 || insn_entry[i].is_128_int || insn_entry[i].is_call
38521 || !(insn_entry[i].is_swappable || insn_entry[i].is_swap))
38522 root->web_not_optimizable = 1;
38524 /* If we have loads or stores that aren't permuting then the
38525 optimization isn't appropriate. */
38526 else if ((insn_entry[i].is_load || insn_entry[i].is_store)
38527 && !insn_entry[i].is_swap && !insn_entry[i].is_swappable)
38528 root->web_not_optimizable = 1;
38530 /* If we have permuting loads or stores that are not accompanied
38531 by a register swap, the optimization isn't appropriate. */
38532 else if (insn_entry[i].is_load && insn_entry[i].is_swap)
38534 rtx insn = insn_entry[i].insn;
38535 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
38536 df_ref def;
38538 FOR_EACH_INSN_INFO_DEF (def, insn_info)
38540 struct df_link *link = DF_REF_CHAIN (def);
38542 if (!chain_contains_only_swaps (insn_entry, link, FOR_LOADS))
38544 root->web_not_optimizable = 1;
38545 break;
38549 else if (insn_entry[i].is_store && insn_entry[i].is_swap)
38551 rtx insn = insn_entry[i].insn;
38552 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
38553 df_ref use;
38555 FOR_EACH_INSN_INFO_USE (use, insn_info)
38557 struct df_link *link = DF_REF_CHAIN (use);
38559 if (!chain_contains_only_swaps (insn_entry, link, FOR_STORES))
38561 root->web_not_optimizable = 1;
38562 break;
38568 if (dump_file)
38570 fprintf (dump_file, "\nSwap insn entry table after web analysis\n");
38571 dump_swap_insn_table (insn_entry);
38574 /* For each load and store in an optimizable web (which implies
38575 the loads and stores are permuting), find the associated
38576 register swaps and mark them for removal. Due to various
38577 optimizations we may mark the same swap more than once. Also
38578 perform special handling for swappable insns that require it. */
38579 for (i = 0; i < e; ++i)
38580 if ((insn_entry[i].is_load || insn_entry[i].is_store)
38581 && insn_entry[i].is_swap)
38583 swap_web_entry* root_entry
38584 = (swap_web_entry*)((&insn_entry[i])->unionfind_root ());
38585 if (!root_entry->web_not_optimizable)
38586 mark_swaps_for_removal (insn_entry, i);
38588 else if (insn_entry[i].is_swappable && insn_entry[i].special_handling)
38590 swap_web_entry* root_entry
38591 = (swap_web_entry*)((&insn_entry[i])->unionfind_root ());
38592 if (!root_entry->web_not_optimizable)
38593 handle_special_swappables (insn_entry, i);
38596 /* Now delete the swaps marked for removal. */
38597 for (i = 0; i < e; ++i)
38598 if (insn_entry[i].will_delete)
38599 replace_swap_with_copy (insn_entry, i);
38601 /* Clean up. */
38602 free (insn_entry);
38603 return 0;
38606 const pass_data pass_data_analyze_swaps =
38608 RTL_PASS, /* type */
38609 "swaps", /* name */
38610 OPTGROUP_NONE, /* optinfo_flags */
38611 TV_NONE, /* tv_id */
38612 0, /* properties_required */
38613 0, /* properties_provided */
38614 0, /* properties_destroyed */
38615 0, /* todo_flags_start */
38616 TODO_df_finish, /* todo_flags_finish */
38619 class pass_analyze_swaps : public rtl_opt_pass
38621 public:
38622 pass_analyze_swaps(gcc::context *ctxt)
38623 : rtl_opt_pass(pass_data_analyze_swaps, ctxt)
38626 /* opt_pass methods: */
38627 virtual bool gate (function *)
38629 return (optimize > 0 && !BYTES_BIG_ENDIAN && TARGET_VSX
38630 && !TARGET_P9_VECTOR && rs6000_optimize_swaps);
38633 virtual unsigned int execute (function *fun)
38635 return rs6000_analyze_swaps (fun);
38638 }; // class pass_analyze_swaps
38640 rtl_opt_pass *
38641 make_pass_analyze_swaps (gcc::context *ctxt)
38643 return new pass_analyze_swaps (ctxt);
38646 #ifdef RS6000_GLIBC_ATOMIC_FENV
38647 /* Function declarations for rs6000_atomic_assign_expand_fenv. */
38648 static tree atomic_hold_decl, atomic_clear_decl, atomic_update_decl;
38649 #endif
38651 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
38653 static void
38654 rs6000_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
38656 if (!TARGET_HARD_FLOAT || !TARGET_FPRS)
38658 #ifdef RS6000_GLIBC_ATOMIC_FENV
38659 if (atomic_hold_decl == NULL_TREE)
38661 atomic_hold_decl
38662 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
38663 get_identifier ("__atomic_feholdexcept"),
38664 build_function_type_list (void_type_node,
38665 double_ptr_type_node,
38666 NULL_TREE));
38667 TREE_PUBLIC (atomic_hold_decl) = 1;
38668 DECL_EXTERNAL (atomic_hold_decl) = 1;
38671 if (atomic_clear_decl == NULL_TREE)
38673 atomic_clear_decl
38674 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
38675 get_identifier ("__atomic_feclearexcept"),
38676 build_function_type_list (void_type_node,
38677 NULL_TREE));
38678 TREE_PUBLIC (atomic_clear_decl) = 1;
38679 DECL_EXTERNAL (atomic_clear_decl) = 1;
38682 tree const_double = build_qualified_type (double_type_node,
38683 TYPE_QUAL_CONST);
38684 tree const_double_ptr = build_pointer_type (const_double);
38685 if (atomic_update_decl == NULL_TREE)
38687 atomic_update_decl
38688 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
38689 get_identifier ("__atomic_feupdateenv"),
38690 build_function_type_list (void_type_node,
38691 const_double_ptr,
38692 NULL_TREE));
38693 TREE_PUBLIC (atomic_update_decl) = 1;
38694 DECL_EXTERNAL (atomic_update_decl) = 1;
38697 tree fenv_var = create_tmp_var_raw (double_type_node);
38698 TREE_ADDRESSABLE (fenv_var) = 1;
38699 tree fenv_addr = build1 (ADDR_EXPR, double_ptr_type_node, fenv_var);
38701 *hold = build_call_expr (atomic_hold_decl, 1, fenv_addr);
38702 *clear = build_call_expr (atomic_clear_decl, 0);
38703 *update = build_call_expr (atomic_update_decl, 1,
38704 fold_convert (const_double_ptr, fenv_addr));
38705 #endif
38706 return;
38709 tree mffs = rs6000_builtin_decls[RS6000_BUILTIN_MFFS];
38710 tree mtfsf = rs6000_builtin_decls[RS6000_BUILTIN_MTFSF];
38711 tree call_mffs = build_call_expr (mffs, 0);
38713 /* Generates the equivalent of feholdexcept (&fenv_var)
38715 *fenv_var = __builtin_mffs ();
38716 double fenv_hold;
38717 *(uint64_t*)&fenv_hold = *(uint64_t*)fenv_var & 0xffffffff00000007LL;
38718 __builtin_mtfsf (0xff, fenv_hold); */
38720 /* Mask to clear everything except for the rounding modes and non-IEEE
38721 arithmetic flag. */
38722 const unsigned HOST_WIDE_INT hold_exception_mask =
38723 HOST_WIDE_INT_C (0xffffffff00000007);
38725 tree fenv_var = create_tmp_var_raw (double_type_node);
38727 tree hold_mffs = build2 (MODIFY_EXPR, void_type_node, fenv_var, call_mffs);
38729 tree fenv_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_var);
38730 tree fenv_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
38731 build_int_cst (uint64_type_node,
38732 hold_exception_mask));
38734 tree fenv_hold_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
38735 fenv_llu_and);
38737 tree hold_mtfsf = build_call_expr (mtfsf, 2,
38738 build_int_cst (unsigned_type_node, 0xff),
38739 fenv_hold_mtfsf);
38741 *hold = build2 (COMPOUND_EXPR, void_type_node, hold_mffs, hold_mtfsf);
38743 /* Generates the equivalent of feclearexcept (FE_ALL_EXCEPT):
38745 double fenv_clear = __builtin_mffs ();
38746 *(uint64_t)&fenv_clear &= 0xffffffff00000000LL;
38747 __builtin_mtfsf (0xff, fenv_clear); */
38749 /* Mask to clear everything except for the rounding modes and non-IEEE
38750 arithmetic flag. */
38751 const unsigned HOST_WIDE_INT clear_exception_mask =
38752 HOST_WIDE_INT_C (0xffffffff00000000);
38754 tree fenv_clear = create_tmp_var_raw (double_type_node);
38756 tree clear_mffs = build2 (MODIFY_EXPR, void_type_node, fenv_clear, call_mffs);
38758 tree fenv_clean_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_clear);
38759 tree fenv_clear_llu_and = build2 (BIT_AND_EXPR, uint64_type_node,
38760 fenv_clean_llu,
38761 build_int_cst (uint64_type_node,
38762 clear_exception_mask));
38764 tree fenv_clear_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
38765 fenv_clear_llu_and);
38767 tree clear_mtfsf = build_call_expr (mtfsf, 2,
38768 build_int_cst (unsigned_type_node, 0xff),
38769 fenv_clear_mtfsf);
38771 *clear = build2 (COMPOUND_EXPR, void_type_node, clear_mffs, clear_mtfsf);
38773 /* Generates the equivalent of feupdateenv (&fenv_var)
38775 double old_fenv = __builtin_mffs ();
38776 double fenv_update;
38777 *(uint64_t*)&fenv_update = (*(uint64_t*)&old & 0xffffffff1fffff00LL) |
38778 (*(uint64_t*)fenv_var 0x1ff80fff);
38779 __builtin_mtfsf (0xff, fenv_update); */
38781 const unsigned HOST_WIDE_INT update_exception_mask =
38782 HOST_WIDE_INT_C (0xffffffff1fffff00);
38783 const unsigned HOST_WIDE_INT new_exception_mask =
38784 HOST_WIDE_INT_C (0x1ff80fff);
38786 tree old_fenv = create_tmp_var_raw (double_type_node);
38787 tree update_mffs = build2 (MODIFY_EXPR, void_type_node, old_fenv, call_mffs);
38789 tree old_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, old_fenv);
38790 tree old_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, old_llu,
38791 build_int_cst (uint64_type_node,
38792 update_exception_mask));
38794 tree new_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
38795 build_int_cst (uint64_type_node,
38796 new_exception_mask));
38798 tree new_llu_mask = build2 (BIT_IOR_EXPR, uint64_type_node,
38799 old_llu_and, new_llu_and);
38801 tree fenv_update_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
38802 new_llu_mask);
38804 tree update_mtfsf = build_call_expr (mtfsf, 2,
38805 build_int_cst (unsigned_type_node, 0xff),
38806 fenv_update_mtfsf);
38808 *update = build2 (COMPOUND_EXPR, void_type_node, update_mffs, update_mtfsf);
38811 /* Implement the TARGET_OPTAB_SUPPORTED_P hook. */
38813 static bool
38814 rs6000_optab_supported_p (int op, machine_mode mode1, machine_mode,
38815 optimization_type opt_type)
38817 switch (op)
38819 case rsqrt_optab:
38820 return (opt_type == OPTIMIZE_FOR_SPEED
38821 && RS6000_RECIP_AUTO_RSQRTE_P (mode1));
38823 default:
38824 return true;
38828 struct gcc_target targetm = TARGET_INITIALIZER;
38830 #include "gt-rs6000.h"