[RS6000] Rewrite rs6000_frame_related to use simplify_replace_rtx
[official-gcc.git] / gcc / config / rs6000 / rs6000.c
blob701530205fca135d099cefd6f68bcf5fa82e54bf
1 /* Subroutines used for code generation on IBM RS/6000.
2 Copyright (C) 1991-2016 Free Software Foundation, Inc.
3 Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published
9 by the Free Software Foundation; either version 3, or (at your
10 option) any later version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "gimple.h"
28 #include "cfghooks.h"
29 #include "cfgloop.h"
30 #include "df.h"
31 #include "tm_p.h"
32 #include "stringpool.h"
33 #include "expmed.h"
34 #include "optabs.h"
35 #include "regs.h"
36 #include "ira.h"
37 #include "recog.h"
38 #include "cgraph.h"
39 #include "diagnostic-core.h"
40 #include "insn-attr.h"
41 #include "flags.h"
42 #include "alias.h"
43 #include "fold-const.h"
44 #include "stor-layout.h"
45 #include "calls.h"
46 #include "print-tree.h"
47 #include "varasm.h"
48 #include "explow.h"
49 #include "expr.h"
50 #include "output.h"
51 #include "dbxout.h"
52 #include "common/common-target.h"
53 #include "langhooks.h"
54 #include "reload.h"
55 #include "sched-int.h"
56 #include "gimplify.h"
57 #include "gimple-iterator.h"
58 #include "gimple-walk.h"
59 #include "intl.h"
60 #include "params.h"
61 #include "tm-constrs.h"
62 #include "tree-vectorizer.h"
63 #include "target-globals.h"
64 #include "builtins.h"
65 #include "context.h"
66 #include "tree-pass.h"
67 #if TARGET_XCOFF
68 #include "xcoffout.h" /* get declarations of xcoff_*_section_name */
69 #endif
70 #if TARGET_MACHO
71 #include "gstab.h" /* for N_SLINE */
72 #endif
73 #include "case-cfn-macros.h"
74 #include "ppc-auxv.h"
76 /* This file should be included last. */
77 #include "target-def.h"
79 #ifndef TARGET_NO_PROTOTYPE
80 #define TARGET_NO_PROTOTYPE 0
81 #endif
83 #define min(A,B) ((A) < (B) ? (A) : (B))
84 #define max(A,B) ((A) > (B) ? (A) : (B))
86 /* Structure used to define the rs6000 stack */
87 typedef struct rs6000_stack {
88 int reload_completed; /* stack info won't change from here on */
89 int first_gp_reg_save; /* first callee saved GP register used */
90 int first_fp_reg_save; /* first callee saved FP register used */
91 int first_altivec_reg_save; /* first callee saved AltiVec register used */
92 int lr_save_p; /* true if the link reg needs to be saved */
93 int cr_save_p; /* true if the CR reg needs to be saved */
94 unsigned int vrsave_mask; /* mask of vec registers to save */
95 int push_p; /* true if we need to allocate stack space */
96 int calls_p; /* true if the function makes any calls */
97 int world_save_p; /* true if we're saving *everything*:
98 r13-r31, cr, f14-f31, vrsave, v20-v31 */
99 enum rs6000_abi abi; /* which ABI to use */
100 int gp_save_offset; /* offset to save GP regs from initial SP */
101 int fp_save_offset; /* offset to save FP regs from initial SP */
102 int altivec_save_offset; /* offset to save AltiVec regs from initial SP */
103 int lr_save_offset; /* offset to save LR from initial SP */
104 int cr_save_offset; /* offset to save CR from initial SP */
105 int vrsave_save_offset; /* offset to save VRSAVE from initial SP */
106 int spe_gp_save_offset; /* offset to save spe 64-bit gprs */
107 int varargs_save_offset; /* offset to save the varargs registers */
108 int ehrd_offset; /* offset to EH return data */
109 int ehcr_offset; /* offset to EH CR field data */
110 int reg_size; /* register size (4 or 8) */
111 HOST_WIDE_INT vars_size; /* variable save area size */
112 int parm_size; /* outgoing parameter size */
113 int save_size; /* save area size */
114 int fixed_size; /* fixed size of stack frame */
115 int gp_size; /* size of saved GP registers */
116 int fp_size; /* size of saved FP registers */
117 int altivec_size; /* size of saved AltiVec registers */
118 int cr_size; /* size to hold CR if not in fixed area */
119 int vrsave_size; /* size to hold VRSAVE */
120 int altivec_padding_size; /* size of altivec alignment padding */
121 int spe_gp_size; /* size of 64-bit GPR save size for SPE */
122 int spe_padding_size;
123 HOST_WIDE_INT total_size; /* total bytes allocated for stack */
124 int spe_64bit_regs_used;
125 int savres_strategy;
126 } rs6000_stack_t;
128 /* A C structure for machine-specific, per-function data.
129 This is added to the cfun structure. */
130 typedef struct GTY(()) machine_function
132 /* Whether the instruction chain has been scanned already. */
133 int spe_insn_chain_scanned_p;
134 /* Flags if __builtin_return_address (n) with n >= 1 was used. */
135 int ra_needs_full_frame;
136 /* Flags if __builtin_return_address (0) was used. */
137 int ra_need_lr;
138 /* Cache lr_save_p after expansion of builtin_eh_return. */
139 int lr_save_state;
140 /* Whether we need to save the TOC to the reserved stack location in the
141 function prologue. */
142 bool save_toc_in_prologue;
143 /* Offset from virtual_stack_vars_rtx to the start of the ABI_V4
144 varargs save area. */
145 HOST_WIDE_INT varargs_save_offset;
146 /* Temporary stack slot to use for SDmode copies. This slot is
147 64-bits wide and is allocated early enough so that the offset
148 does not overflow the 16-bit load/store offset field. */
149 rtx sdmode_stack_slot;
150 /* Alternative internal arg pointer for -fsplit-stack. */
151 rtx split_stack_arg_pointer;
152 bool split_stack_argp_used;
153 /* Flag if r2 setup is needed with ELFv2 ABI. */
154 bool r2_setup_needed;
155 } machine_function;
157 /* Support targetm.vectorize.builtin_mask_for_load. */
158 static GTY(()) tree altivec_builtin_mask_for_load;
160 /* Set to nonzero once AIX common-mode calls have been defined. */
161 static GTY(()) int common_mode_defined;
163 /* Label number of label created for -mrelocatable, to call to so we can
164 get the address of the GOT section */
165 static int rs6000_pic_labelno;
167 #ifdef USING_ELFOS_H
168 /* Counter for labels which are to be placed in .fixup. */
169 int fixuplabelno = 0;
170 #endif
172 /* Whether to use variant of AIX ABI for PowerPC64 Linux. */
173 int dot_symbols;
175 /* Specify the machine mode that pointers have. After generation of rtl, the
176 compiler makes no further distinction between pointers and any other objects
177 of this machine mode. The type is unsigned since not all things that
178 include rs6000.h also include machmode.h. */
179 unsigned rs6000_pmode;
181 /* Width in bits of a pointer. */
182 unsigned rs6000_pointer_size;
184 #ifdef HAVE_AS_GNU_ATTRIBUTE
185 /* Flag whether floating point values have been passed/returned. */
186 static bool rs6000_passes_float;
187 /* Flag whether vector values have been passed/returned. */
188 static bool rs6000_passes_vector;
189 /* Flag whether small (<= 8 byte) structures have been returned. */
190 static bool rs6000_returns_struct;
191 #endif
193 /* Value is TRUE if register/mode pair is acceptable. */
194 bool rs6000_hard_regno_mode_ok_p[NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
196 /* Maximum number of registers needed for a given register class and mode. */
197 unsigned char rs6000_class_max_nregs[NUM_MACHINE_MODES][LIM_REG_CLASSES];
199 /* How many registers are needed for a given register and mode. */
200 unsigned char rs6000_hard_regno_nregs[NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
202 /* Map register number to register class. */
203 enum reg_class rs6000_regno_regclass[FIRST_PSEUDO_REGISTER];
205 static int dbg_cost_ctrl;
207 /* Built in types. */
208 tree rs6000_builtin_types[RS6000_BTI_MAX];
209 tree rs6000_builtin_decls[RS6000_BUILTIN_COUNT];
211 /* Flag to say the TOC is initialized */
212 int toc_initialized, need_toc_init;
213 char toc_label_name[10];
215 /* Cached value of rs6000_variable_issue. This is cached in
216 rs6000_variable_issue hook and returned from rs6000_sched_reorder2. */
217 static short cached_can_issue_more;
219 static GTY(()) section *read_only_data_section;
220 static GTY(()) section *private_data_section;
221 static GTY(()) section *tls_data_section;
222 static GTY(()) section *tls_private_data_section;
223 static GTY(()) section *read_only_private_data_section;
224 static GTY(()) section *sdata2_section;
225 static GTY(()) section *toc_section;
227 struct builtin_description
229 const HOST_WIDE_INT mask;
230 const enum insn_code icode;
231 const char *const name;
232 const enum rs6000_builtins code;
235 /* Describe the vector unit used for modes. */
236 enum rs6000_vector rs6000_vector_unit[NUM_MACHINE_MODES];
237 enum rs6000_vector rs6000_vector_mem[NUM_MACHINE_MODES];
239 /* Register classes for various constraints that are based on the target
240 switches. */
241 enum reg_class rs6000_constraints[RS6000_CONSTRAINT_MAX];
243 /* Describe the alignment of a vector. */
244 int rs6000_vector_align[NUM_MACHINE_MODES];
246 /* Map selected modes to types for builtins. */
247 static GTY(()) tree builtin_mode_to_type[MAX_MACHINE_MODE][2];
249 /* What modes to automatically generate reciprocal divide estimate (fre) and
250 reciprocal sqrt (frsqrte) for. */
251 unsigned char rs6000_recip_bits[MAX_MACHINE_MODE];
253 /* Masks to determine which reciprocal esitmate instructions to generate
254 automatically. */
255 enum rs6000_recip_mask {
256 RECIP_SF_DIV = 0x001, /* Use divide estimate */
257 RECIP_DF_DIV = 0x002,
258 RECIP_V4SF_DIV = 0x004,
259 RECIP_V2DF_DIV = 0x008,
261 RECIP_SF_RSQRT = 0x010, /* Use reciprocal sqrt estimate. */
262 RECIP_DF_RSQRT = 0x020,
263 RECIP_V4SF_RSQRT = 0x040,
264 RECIP_V2DF_RSQRT = 0x080,
266 /* Various combination of flags for -mrecip=xxx. */
267 RECIP_NONE = 0,
268 RECIP_ALL = (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
269 | RECIP_V2DF_DIV | RECIP_SF_RSQRT | RECIP_DF_RSQRT
270 | RECIP_V4SF_RSQRT | RECIP_V2DF_RSQRT),
272 RECIP_HIGH_PRECISION = RECIP_ALL,
274 /* On low precision machines like the power5, don't enable double precision
275 reciprocal square root estimate, since it isn't accurate enough. */
276 RECIP_LOW_PRECISION = (RECIP_ALL & ~(RECIP_DF_RSQRT | RECIP_V2DF_RSQRT))
279 /* -mrecip options. */
280 static struct
282 const char *string; /* option name */
283 unsigned int mask; /* mask bits to set */
284 } recip_options[] = {
285 { "all", RECIP_ALL },
286 { "none", RECIP_NONE },
287 { "div", (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
288 | RECIP_V2DF_DIV) },
289 { "divf", (RECIP_SF_DIV | RECIP_V4SF_DIV) },
290 { "divd", (RECIP_DF_DIV | RECIP_V2DF_DIV) },
291 { "rsqrt", (RECIP_SF_RSQRT | RECIP_DF_RSQRT | RECIP_V4SF_RSQRT
292 | RECIP_V2DF_RSQRT) },
293 { "rsqrtf", (RECIP_SF_RSQRT | RECIP_V4SF_RSQRT) },
294 { "rsqrtd", (RECIP_DF_RSQRT | RECIP_V2DF_RSQRT) },
297 /* Used by __builtin_cpu_is(), mapping from PLATFORM names to values. */
298 static const struct
300 const char *cpu;
301 unsigned int cpuid;
302 } cpu_is_info[] = {
303 { "power9", PPC_PLATFORM_POWER9 },
304 { "power8", PPC_PLATFORM_POWER8 },
305 { "power7", PPC_PLATFORM_POWER7 },
306 { "power6x", PPC_PLATFORM_POWER6X },
307 { "power6", PPC_PLATFORM_POWER6 },
308 { "power5+", PPC_PLATFORM_POWER5_PLUS },
309 { "power5", PPC_PLATFORM_POWER5 },
310 { "ppc970", PPC_PLATFORM_PPC970 },
311 { "power4", PPC_PLATFORM_POWER4 },
312 { "ppca2", PPC_PLATFORM_PPCA2 },
313 { "ppc476", PPC_PLATFORM_PPC476 },
314 { "ppc464", PPC_PLATFORM_PPC464 },
315 { "ppc440", PPC_PLATFORM_PPC440 },
316 { "ppc405", PPC_PLATFORM_PPC405 },
317 { "ppc-cell-be", PPC_PLATFORM_CELL_BE }
320 /* Used by __builtin_cpu_supports(), mapping from HWCAP names to masks. */
321 static const struct
323 const char *hwcap;
324 int mask;
325 unsigned int id;
326 } cpu_supports_info[] = {
327 /* AT_HWCAP masks. */
328 { "4xxmac", PPC_FEATURE_HAS_4xxMAC, 0 },
329 { "altivec", PPC_FEATURE_HAS_ALTIVEC, 0 },
330 { "arch_2_05", PPC_FEATURE_ARCH_2_05, 0 },
331 { "arch_2_06", PPC_FEATURE_ARCH_2_06, 0 },
332 { "archpmu", PPC_FEATURE_PERFMON_COMPAT, 0 },
333 { "booke", PPC_FEATURE_BOOKE, 0 },
334 { "cellbe", PPC_FEATURE_CELL_BE, 0 },
335 { "dfp", PPC_FEATURE_HAS_DFP, 0 },
336 { "efpdouble", PPC_FEATURE_HAS_EFP_DOUBLE, 0 },
337 { "efpsingle", PPC_FEATURE_HAS_EFP_SINGLE, 0 },
338 { "fpu", PPC_FEATURE_HAS_FPU, 0 },
339 { "ic_snoop", PPC_FEATURE_ICACHE_SNOOP, 0 },
340 { "mmu", PPC_FEATURE_HAS_MMU, 0 },
341 { "notb", PPC_FEATURE_NO_TB, 0 },
342 { "pa6t", PPC_FEATURE_PA6T, 0 },
343 { "power4", PPC_FEATURE_POWER4, 0 },
344 { "power5", PPC_FEATURE_POWER5, 0 },
345 { "power5+", PPC_FEATURE_POWER5_PLUS, 0 },
346 { "power6x", PPC_FEATURE_POWER6_EXT, 0 },
347 { "ppc32", PPC_FEATURE_32, 0 },
348 { "ppc601", PPC_FEATURE_601_INSTR, 0 },
349 { "ppc64", PPC_FEATURE_64, 0 },
350 { "ppcle", PPC_FEATURE_PPC_LE, 0 },
351 { "smt", PPC_FEATURE_SMT, 0 },
352 { "spe", PPC_FEATURE_HAS_SPE, 0 },
353 { "true_le", PPC_FEATURE_TRUE_LE, 0 },
354 { "ucache", PPC_FEATURE_UNIFIED_CACHE, 0 },
355 { "vsx", PPC_FEATURE_HAS_VSX, 0 },
357 /* AT_HWCAP2 masks. */
358 { "arch_2_07", PPC_FEATURE2_ARCH_2_07, 1 },
359 { "dscr", PPC_FEATURE2_HAS_DSCR, 1 },
360 { "ebb", PPC_FEATURE2_HAS_EBB, 1 },
361 { "htm", PPC_FEATURE2_HAS_HTM, 1 },
362 { "htm-nosc", PPC_FEATURE2_HTM_NOSC, 1 },
363 { "isel", PPC_FEATURE2_HAS_ISEL, 1 },
364 { "tar", PPC_FEATURE2_HAS_TAR, 1 },
365 { "vcrypto", PPC_FEATURE2_HAS_VEC_CRYPTO, 1 },
366 { "arch_3_00", PPC_FEATURE2_ARCH_3_00, 1 },
367 { "ieee128", PPC_FEATURE2_HAS_IEEE128, 1 }
370 /* Newer LIBCs explicitly export this symbol to declare that they provide
371 the AT_PLATFORM and AT_HWCAP/AT_HWCAP2 values in the TCB. We emit a
372 reference to this symbol whenever we expand a CPU builtin, so that
373 we never link against an old LIBC. */
374 const char *tcb_verification_symbol = "__parse_hwcap_and_convert_at_platform";
376 /* True if we have expanded a CPU builtin. */
377 bool cpu_builtin_p;
379 /* Pointer to function (in rs6000-c.c) that can define or undefine target
380 macros that have changed. Languages that don't support the preprocessor
381 don't link in rs6000-c.c, so we can't call it directly. */
382 void (*rs6000_target_modify_macros_ptr) (bool, HOST_WIDE_INT, HOST_WIDE_INT);
384 /* Simplfy register classes into simpler classifications. We assume
385 GPR_REG_TYPE - FPR_REG_TYPE are ordered so that we can use a simple range
386 check for standard register classes (gpr/floating/altivec/vsx) and
387 floating/vector classes (float/altivec/vsx). */
389 enum rs6000_reg_type {
390 NO_REG_TYPE,
391 PSEUDO_REG_TYPE,
392 GPR_REG_TYPE,
393 VSX_REG_TYPE,
394 ALTIVEC_REG_TYPE,
395 FPR_REG_TYPE,
396 SPR_REG_TYPE,
397 CR_REG_TYPE,
398 SPE_ACC_TYPE,
399 SPEFSCR_REG_TYPE
402 /* Map register class to register type. */
403 static enum rs6000_reg_type reg_class_to_reg_type[N_REG_CLASSES];
405 /* First/last register type for the 'normal' register types (i.e. general
406 purpose, floating point, altivec, and VSX registers). */
407 #define IS_STD_REG_TYPE(RTYPE) IN_RANGE(RTYPE, GPR_REG_TYPE, FPR_REG_TYPE)
409 #define IS_FP_VECT_REG_TYPE(RTYPE) IN_RANGE(RTYPE, VSX_REG_TYPE, FPR_REG_TYPE)
412 /* Register classes we care about in secondary reload or go if legitimate
413 address. We only need to worry about GPR, FPR, and Altivec registers here,
414 along an ANY field that is the OR of the 3 register classes. */
416 enum rs6000_reload_reg_type {
417 RELOAD_REG_GPR, /* General purpose registers. */
418 RELOAD_REG_FPR, /* Traditional floating point regs. */
419 RELOAD_REG_VMX, /* Altivec (VMX) registers. */
420 RELOAD_REG_ANY, /* OR of GPR, FPR, Altivec masks. */
421 N_RELOAD_REG
424 /* For setting up register classes, loop through the 3 register classes mapping
425 into real registers, and skip the ANY class, which is just an OR of the
426 bits. */
427 #define FIRST_RELOAD_REG_CLASS RELOAD_REG_GPR
428 #define LAST_RELOAD_REG_CLASS RELOAD_REG_VMX
430 /* Map reload register type to a register in the register class. */
431 struct reload_reg_map_type {
432 const char *name; /* Register class name. */
433 int reg; /* Register in the register class. */
436 static const struct reload_reg_map_type reload_reg_map[N_RELOAD_REG] = {
437 { "Gpr", FIRST_GPR_REGNO }, /* RELOAD_REG_GPR. */
438 { "Fpr", FIRST_FPR_REGNO }, /* RELOAD_REG_FPR. */
439 { "VMX", FIRST_ALTIVEC_REGNO }, /* RELOAD_REG_VMX. */
440 { "Any", -1 }, /* RELOAD_REG_ANY. */
443 /* Mask bits for each register class, indexed per mode. Historically the
444 compiler has been more restrictive which types can do PRE_MODIFY instead of
445 PRE_INC and PRE_DEC, so keep track of sepaate bits for these two. */
446 typedef unsigned char addr_mask_type;
448 #define RELOAD_REG_VALID 0x01 /* Mode valid in register.. */
449 #define RELOAD_REG_MULTIPLE 0x02 /* Mode takes multiple registers. */
450 #define RELOAD_REG_INDEXED 0x04 /* Reg+reg addressing. */
451 #define RELOAD_REG_OFFSET 0x08 /* Reg+offset addressing. */
452 #define RELOAD_REG_PRE_INCDEC 0x10 /* PRE_INC/PRE_DEC valid. */
453 #define RELOAD_REG_PRE_MODIFY 0x20 /* PRE_MODIFY valid. */
454 #define RELOAD_REG_AND_M16 0x40 /* AND -16 addressing. */
456 /* Register type masks based on the type, of valid addressing modes. */
457 struct rs6000_reg_addr {
458 enum insn_code reload_load; /* INSN to reload for loading. */
459 enum insn_code reload_store; /* INSN to reload for storing. */
460 enum insn_code reload_fpr_gpr; /* INSN to move from FPR to GPR. */
461 enum insn_code reload_gpr_vsx; /* INSN to move from GPR to VSX. */
462 enum insn_code reload_vsx_gpr; /* INSN to move from VSX to GPR. */
463 enum insn_code fusion_gpr_ld; /* INSN for fusing gpr ADDIS/loads. */
464 /* INSNs for fusing addi with loads
465 or stores for each reg. class. */
466 enum insn_code fusion_addi_ld[(int)N_RELOAD_REG];
467 enum insn_code fusion_addi_st[(int)N_RELOAD_REG];
468 /* INSNs for fusing addis with loads
469 or stores for each reg. class. */
470 enum insn_code fusion_addis_ld[(int)N_RELOAD_REG];
471 enum insn_code fusion_addis_st[(int)N_RELOAD_REG];
472 addr_mask_type addr_mask[(int)N_RELOAD_REG]; /* Valid address masks. */
473 bool scalar_in_vmx_p; /* Scalar value can go in VMX. */
474 bool fused_toc; /* Mode supports TOC fusion. */
477 static struct rs6000_reg_addr reg_addr[NUM_MACHINE_MODES];
479 /* Helper function to say whether a mode supports PRE_INC or PRE_DEC. */
480 static inline bool
481 mode_supports_pre_incdec_p (machine_mode mode)
483 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_INCDEC)
484 != 0);
487 /* Helper function to say whether a mode supports PRE_MODIFY. */
488 static inline bool
489 mode_supports_pre_modify_p (machine_mode mode)
491 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_MODIFY)
492 != 0);
495 /* Return true if we have D-form addressing in altivec registers. */
496 static inline bool
497 mode_supports_vmx_dform (machine_mode mode)
499 return ((reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_OFFSET) != 0);
503 /* Target cpu costs. */
505 struct processor_costs {
506 const int mulsi; /* cost of SImode multiplication. */
507 const int mulsi_const; /* cost of SImode multiplication by constant. */
508 const int mulsi_const9; /* cost of SImode mult by short constant. */
509 const int muldi; /* cost of DImode multiplication. */
510 const int divsi; /* cost of SImode division. */
511 const int divdi; /* cost of DImode division. */
512 const int fp; /* cost of simple SFmode and DFmode insns. */
513 const int dmul; /* cost of DFmode multiplication (and fmadd). */
514 const int sdiv; /* cost of SFmode division (fdivs). */
515 const int ddiv; /* cost of DFmode division (fdiv). */
516 const int cache_line_size; /* cache line size in bytes. */
517 const int l1_cache_size; /* size of l1 cache, in kilobytes. */
518 const int l2_cache_size; /* size of l2 cache, in kilobytes. */
519 const int simultaneous_prefetches; /* number of parallel prefetch
520 operations. */
521 const int sfdf_convert; /* cost of SF->DF conversion. */
524 const struct processor_costs *rs6000_cost;
526 /* Processor costs (relative to an add) */
528 /* Instruction size costs on 32bit processors. */
529 static const
530 struct processor_costs size32_cost = {
531 COSTS_N_INSNS (1), /* mulsi */
532 COSTS_N_INSNS (1), /* mulsi_const */
533 COSTS_N_INSNS (1), /* mulsi_const9 */
534 COSTS_N_INSNS (1), /* muldi */
535 COSTS_N_INSNS (1), /* divsi */
536 COSTS_N_INSNS (1), /* divdi */
537 COSTS_N_INSNS (1), /* fp */
538 COSTS_N_INSNS (1), /* dmul */
539 COSTS_N_INSNS (1), /* sdiv */
540 COSTS_N_INSNS (1), /* ddiv */
541 32, /* cache line size */
542 0, /* l1 cache */
543 0, /* l2 cache */
544 0, /* streams */
545 0, /* SF->DF convert */
548 /* Instruction size costs on 64bit processors. */
549 static const
550 struct processor_costs size64_cost = {
551 COSTS_N_INSNS (1), /* mulsi */
552 COSTS_N_INSNS (1), /* mulsi_const */
553 COSTS_N_INSNS (1), /* mulsi_const9 */
554 COSTS_N_INSNS (1), /* muldi */
555 COSTS_N_INSNS (1), /* divsi */
556 COSTS_N_INSNS (1), /* divdi */
557 COSTS_N_INSNS (1), /* fp */
558 COSTS_N_INSNS (1), /* dmul */
559 COSTS_N_INSNS (1), /* sdiv */
560 COSTS_N_INSNS (1), /* ddiv */
561 128, /* cache line size */
562 0, /* l1 cache */
563 0, /* l2 cache */
564 0, /* streams */
565 0, /* SF->DF convert */
568 /* Instruction costs on RS64A processors. */
569 static const
570 struct processor_costs rs64a_cost = {
571 COSTS_N_INSNS (20), /* mulsi */
572 COSTS_N_INSNS (12), /* mulsi_const */
573 COSTS_N_INSNS (8), /* mulsi_const9 */
574 COSTS_N_INSNS (34), /* muldi */
575 COSTS_N_INSNS (65), /* divsi */
576 COSTS_N_INSNS (67), /* divdi */
577 COSTS_N_INSNS (4), /* fp */
578 COSTS_N_INSNS (4), /* dmul */
579 COSTS_N_INSNS (31), /* sdiv */
580 COSTS_N_INSNS (31), /* ddiv */
581 128, /* cache line size */
582 128, /* l1 cache */
583 2048, /* l2 cache */
584 1, /* streams */
585 0, /* SF->DF convert */
588 /* Instruction costs on MPCCORE processors. */
589 static const
590 struct processor_costs mpccore_cost = {
591 COSTS_N_INSNS (2), /* mulsi */
592 COSTS_N_INSNS (2), /* mulsi_const */
593 COSTS_N_INSNS (2), /* mulsi_const9 */
594 COSTS_N_INSNS (2), /* muldi */
595 COSTS_N_INSNS (6), /* divsi */
596 COSTS_N_INSNS (6), /* divdi */
597 COSTS_N_INSNS (4), /* fp */
598 COSTS_N_INSNS (5), /* dmul */
599 COSTS_N_INSNS (10), /* sdiv */
600 COSTS_N_INSNS (17), /* ddiv */
601 32, /* cache line size */
602 4, /* l1 cache */
603 16, /* l2 cache */
604 1, /* streams */
605 0, /* SF->DF convert */
608 /* Instruction costs on PPC403 processors. */
609 static const
610 struct processor_costs ppc403_cost = {
611 COSTS_N_INSNS (4), /* mulsi */
612 COSTS_N_INSNS (4), /* mulsi_const */
613 COSTS_N_INSNS (4), /* mulsi_const9 */
614 COSTS_N_INSNS (4), /* muldi */
615 COSTS_N_INSNS (33), /* divsi */
616 COSTS_N_INSNS (33), /* divdi */
617 COSTS_N_INSNS (11), /* fp */
618 COSTS_N_INSNS (11), /* dmul */
619 COSTS_N_INSNS (11), /* sdiv */
620 COSTS_N_INSNS (11), /* ddiv */
621 32, /* cache line size */
622 4, /* l1 cache */
623 16, /* l2 cache */
624 1, /* streams */
625 0, /* SF->DF convert */
628 /* Instruction costs on PPC405 processors. */
629 static const
630 struct processor_costs ppc405_cost = {
631 COSTS_N_INSNS (5), /* mulsi */
632 COSTS_N_INSNS (4), /* mulsi_const */
633 COSTS_N_INSNS (3), /* mulsi_const9 */
634 COSTS_N_INSNS (5), /* muldi */
635 COSTS_N_INSNS (35), /* divsi */
636 COSTS_N_INSNS (35), /* divdi */
637 COSTS_N_INSNS (11), /* fp */
638 COSTS_N_INSNS (11), /* dmul */
639 COSTS_N_INSNS (11), /* sdiv */
640 COSTS_N_INSNS (11), /* ddiv */
641 32, /* cache line size */
642 16, /* l1 cache */
643 128, /* l2 cache */
644 1, /* streams */
645 0, /* SF->DF convert */
648 /* Instruction costs on PPC440 processors. */
649 static const
650 struct processor_costs ppc440_cost = {
651 COSTS_N_INSNS (3), /* mulsi */
652 COSTS_N_INSNS (2), /* mulsi_const */
653 COSTS_N_INSNS (2), /* mulsi_const9 */
654 COSTS_N_INSNS (3), /* muldi */
655 COSTS_N_INSNS (34), /* divsi */
656 COSTS_N_INSNS (34), /* divdi */
657 COSTS_N_INSNS (5), /* fp */
658 COSTS_N_INSNS (5), /* dmul */
659 COSTS_N_INSNS (19), /* sdiv */
660 COSTS_N_INSNS (33), /* ddiv */
661 32, /* cache line size */
662 32, /* l1 cache */
663 256, /* l2 cache */
664 1, /* streams */
665 0, /* SF->DF convert */
668 /* Instruction costs on PPC476 processors. */
669 static const
670 struct processor_costs ppc476_cost = {
671 COSTS_N_INSNS (4), /* mulsi */
672 COSTS_N_INSNS (4), /* mulsi_const */
673 COSTS_N_INSNS (4), /* mulsi_const9 */
674 COSTS_N_INSNS (4), /* muldi */
675 COSTS_N_INSNS (11), /* divsi */
676 COSTS_N_INSNS (11), /* divdi */
677 COSTS_N_INSNS (6), /* fp */
678 COSTS_N_INSNS (6), /* dmul */
679 COSTS_N_INSNS (19), /* sdiv */
680 COSTS_N_INSNS (33), /* ddiv */
681 32, /* l1 cache line size */
682 32, /* l1 cache */
683 512, /* l2 cache */
684 1, /* streams */
685 0, /* SF->DF convert */
688 /* Instruction costs on PPC601 processors. */
689 static const
690 struct processor_costs ppc601_cost = {
691 COSTS_N_INSNS (5), /* mulsi */
692 COSTS_N_INSNS (5), /* mulsi_const */
693 COSTS_N_INSNS (5), /* mulsi_const9 */
694 COSTS_N_INSNS (5), /* muldi */
695 COSTS_N_INSNS (36), /* divsi */
696 COSTS_N_INSNS (36), /* divdi */
697 COSTS_N_INSNS (4), /* fp */
698 COSTS_N_INSNS (5), /* dmul */
699 COSTS_N_INSNS (17), /* sdiv */
700 COSTS_N_INSNS (31), /* ddiv */
701 32, /* cache line size */
702 32, /* l1 cache */
703 256, /* l2 cache */
704 1, /* streams */
705 0, /* SF->DF convert */
708 /* Instruction costs on PPC603 processors. */
709 static const
710 struct processor_costs ppc603_cost = {
711 COSTS_N_INSNS (5), /* mulsi */
712 COSTS_N_INSNS (3), /* mulsi_const */
713 COSTS_N_INSNS (2), /* mulsi_const9 */
714 COSTS_N_INSNS (5), /* muldi */
715 COSTS_N_INSNS (37), /* divsi */
716 COSTS_N_INSNS (37), /* divdi */
717 COSTS_N_INSNS (3), /* fp */
718 COSTS_N_INSNS (4), /* dmul */
719 COSTS_N_INSNS (18), /* sdiv */
720 COSTS_N_INSNS (33), /* ddiv */
721 32, /* cache line size */
722 8, /* l1 cache */
723 64, /* l2 cache */
724 1, /* streams */
725 0, /* SF->DF convert */
728 /* Instruction costs on PPC604 processors. */
729 static const
730 struct processor_costs ppc604_cost = {
731 COSTS_N_INSNS (4), /* mulsi */
732 COSTS_N_INSNS (4), /* mulsi_const */
733 COSTS_N_INSNS (4), /* mulsi_const9 */
734 COSTS_N_INSNS (4), /* muldi */
735 COSTS_N_INSNS (20), /* divsi */
736 COSTS_N_INSNS (20), /* divdi */
737 COSTS_N_INSNS (3), /* fp */
738 COSTS_N_INSNS (3), /* dmul */
739 COSTS_N_INSNS (18), /* sdiv */
740 COSTS_N_INSNS (32), /* ddiv */
741 32, /* cache line size */
742 16, /* l1 cache */
743 512, /* l2 cache */
744 1, /* streams */
745 0, /* SF->DF convert */
748 /* Instruction costs on PPC604e processors. */
749 static const
750 struct processor_costs ppc604e_cost = {
751 COSTS_N_INSNS (2), /* mulsi */
752 COSTS_N_INSNS (2), /* mulsi_const */
753 COSTS_N_INSNS (2), /* mulsi_const9 */
754 COSTS_N_INSNS (2), /* muldi */
755 COSTS_N_INSNS (20), /* divsi */
756 COSTS_N_INSNS (20), /* divdi */
757 COSTS_N_INSNS (3), /* fp */
758 COSTS_N_INSNS (3), /* dmul */
759 COSTS_N_INSNS (18), /* sdiv */
760 COSTS_N_INSNS (32), /* ddiv */
761 32, /* cache line size */
762 32, /* l1 cache */
763 1024, /* l2 cache */
764 1, /* streams */
765 0, /* SF->DF convert */
768 /* Instruction costs on PPC620 processors. */
769 static const
770 struct processor_costs ppc620_cost = {
771 COSTS_N_INSNS (5), /* mulsi */
772 COSTS_N_INSNS (4), /* mulsi_const */
773 COSTS_N_INSNS (3), /* mulsi_const9 */
774 COSTS_N_INSNS (7), /* muldi */
775 COSTS_N_INSNS (21), /* divsi */
776 COSTS_N_INSNS (37), /* divdi */
777 COSTS_N_INSNS (3), /* fp */
778 COSTS_N_INSNS (3), /* dmul */
779 COSTS_N_INSNS (18), /* sdiv */
780 COSTS_N_INSNS (32), /* ddiv */
781 128, /* cache line size */
782 32, /* l1 cache */
783 1024, /* l2 cache */
784 1, /* streams */
785 0, /* SF->DF convert */
788 /* Instruction costs on PPC630 processors. */
789 static const
790 struct processor_costs ppc630_cost = {
791 COSTS_N_INSNS (5), /* mulsi */
792 COSTS_N_INSNS (4), /* mulsi_const */
793 COSTS_N_INSNS (3), /* mulsi_const9 */
794 COSTS_N_INSNS (7), /* muldi */
795 COSTS_N_INSNS (21), /* divsi */
796 COSTS_N_INSNS (37), /* divdi */
797 COSTS_N_INSNS (3), /* fp */
798 COSTS_N_INSNS (3), /* dmul */
799 COSTS_N_INSNS (17), /* sdiv */
800 COSTS_N_INSNS (21), /* ddiv */
801 128, /* cache line size */
802 64, /* l1 cache */
803 1024, /* l2 cache */
804 1, /* streams */
805 0, /* SF->DF convert */
808 /* Instruction costs on Cell processor. */
809 /* COSTS_N_INSNS (1) ~ one add. */
810 static const
811 struct processor_costs ppccell_cost = {
812 COSTS_N_INSNS (9/2)+2, /* mulsi */
813 COSTS_N_INSNS (6/2), /* mulsi_const */
814 COSTS_N_INSNS (6/2), /* mulsi_const9 */
815 COSTS_N_INSNS (15/2)+2, /* muldi */
816 COSTS_N_INSNS (38/2), /* divsi */
817 COSTS_N_INSNS (70/2), /* divdi */
818 COSTS_N_INSNS (10/2), /* fp */
819 COSTS_N_INSNS (10/2), /* dmul */
820 COSTS_N_INSNS (74/2), /* sdiv */
821 COSTS_N_INSNS (74/2), /* ddiv */
822 128, /* cache line size */
823 32, /* l1 cache */
824 512, /* l2 cache */
825 6, /* streams */
826 0, /* SF->DF convert */
829 /* Instruction costs on PPC750 and PPC7400 processors. */
830 static const
831 struct processor_costs ppc750_cost = {
832 COSTS_N_INSNS (5), /* mulsi */
833 COSTS_N_INSNS (3), /* mulsi_const */
834 COSTS_N_INSNS (2), /* mulsi_const9 */
835 COSTS_N_INSNS (5), /* muldi */
836 COSTS_N_INSNS (17), /* divsi */
837 COSTS_N_INSNS (17), /* divdi */
838 COSTS_N_INSNS (3), /* fp */
839 COSTS_N_INSNS (3), /* dmul */
840 COSTS_N_INSNS (17), /* sdiv */
841 COSTS_N_INSNS (31), /* ddiv */
842 32, /* cache line size */
843 32, /* l1 cache */
844 512, /* l2 cache */
845 1, /* streams */
846 0, /* SF->DF convert */
849 /* Instruction costs on PPC7450 processors. */
850 static const
851 struct processor_costs ppc7450_cost = {
852 COSTS_N_INSNS (4), /* mulsi */
853 COSTS_N_INSNS (3), /* mulsi_const */
854 COSTS_N_INSNS (3), /* mulsi_const9 */
855 COSTS_N_INSNS (4), /* muldi */
856 COSTS_N_INSNS (23), /* divsi */
857 COSTS_N_INSNS (23), /* divdi */
858 COSTS_N_INSNS (5), /* fp */
859 COSTS_N_INSNS (5), /* dmul */
860 COSTS_N_INSNS (21), /* sdiv */
861 COSTS_N_INSNS (35), /* ddiv */
862 32, /* cache line size */
863 32, /* l1 cache */
864 1024, /* l2 cache */
865 1, /* streams */
866 0, /* SF->DF convert */
869 /* Instruction costs on PPC8540 processors. */
870 static const
871 struct processor_costs ppc8540_cost = {
872 COSTS_N_INSNS (4), /* mulsi */
873 COSTS_N_INSNS (4), /* mulsi_const */
874 COSTS_N_INSNS (4), /* mulsi_const9 */
875 COSTS_N_INSNS (4), /* muldi */
876 COSTS_N_INSNS (19), /* divsi */
877 COSTS_N_INSNS (19), /* divdi */
878 COSTS_N_INSNS (4), /* fp */
879 COSTS_N_INSNS (4), /* dmul */
880 COSTS_N_INSNS (29), /* sdiv */
881 COSTS_N_INSNS (29), /* ddiv */
882 32, /* cache line size */
883 32, /* l1 cache */
884 256, /* l2 cache */
885 1, /* prefetch streams /*/
886 0, /* SF->DF convert */
889 /* Instruction costs on E300C2 and E300C3 cores. */
890 static const
891 struct processor_costs ppce300c2c3_cost = {
892 COSTS_N_INSNS (4), /* mulsi */
893 COSTS_N_INSNS (4), /* mulsi_const */
894 COSTS_N_INSNS (4), /* mulsi_const9 */
895 COSTS_N_INSNS (4), /* muldi */
896 COSTS_N_INSNS (19), /* divsi */
897 COSTS_N_INSNS (19), /* divdi */
898 COSTS_N_INSNS (3), /* fp */
899 COSTS_N_INSNS (4), /* dmul */
900 COSTS_N_INSNS (18), /* sdiv */
901 COSTS_N_INSNS (33), /* ddiv */
903 16, /* l1 cache */
904 16, /* l2 cache */
905 1, /* prefetch streams /*/
906 0, /* SF->DF convert */
909 /* Instruction costs on PPCE500MC processors. */
910 static const
911 struct processor_costs ppce500mc_cost = {
912 COSTS_N_INSNS (4), /* mulsi */
913 COSTS_N_INSNS (4), /* mulsi_const */
914 COSTS_N_INSNS (4), /* mulsi_const9 */
915 COSTS_N_INSNS (4), /* muldi */
916 COSTS_N_INSNS (14), /* divsi */
917 COSTS_N_INSNS (14), /* divdi */
918 COSTS_N_INSNS (8), /* fp */
919 COSTS_N_INSNS (10), /* dmul */
920 COSTS_N_INSNS (36), /* sdiv */
921 COSTS_N_INSNS (66), /* ddiv */
922 64, /* cache line size */
923 32, /* l1 cache */
924 128, /* l2 cache */
925 1, /* prefetch streams /*/
926 0, /* SF->DF convert */
929 /* Instruction costs on PPCE500MC64 processors. */
930 static const
931 struct processor_costs ppce500mc64_cost = {
932 COSTS_N_INSNS (4), /* mulsi */
933 COSTS_N_INSNS (4), /* mulsi_const */
934 COSTS_N_INSNS (4), /* mulsi_const9 */
935 COSTS_N_INSNS (4), /* muldi */
936 COSTS_N_INSNS (14), /* divsi */
937 COSTS_N_INSNS (14), /* divdi */
938 COSTS_N_INSNS (4), /* fp */
939 COSTS_N_INSNS (10), /* dmul */
940 COSTS_N_INSNS (36), /* sdiv */
941 COSTS_N_INSNS (66), /* ddiv */
942 64, /* cache line size */
943 32, /* l1 cache */
944 128, /* l2 cache */
945 1, /* prefetch streams /*/
946 0, /* SF->DF convert */
949 /* Instruction costs on PPCE5500 processors. */
950 static const
951 struct processor_costs ppce5500_cost = {
952 COSTS_N_INSNS (5), /* mulsi */
953 COSTS_N_INSNS (5), /* mulsi_const */
954 COSTS_N_INSNS (4), /* mulsi_const9 */
955 COSTS_N_INSNS (5), /* muldi */
956 COSTS_N_INSNS (14), /* divsi */
957 COSTS_N_INSNS (14), /* divdi */
958 COSTS_N_INSNS (7), /* fp */
959 COSTS_N_INSNS (10), /* dmul */
960 COSTS_N_INSNS (36), /* sdiv */
961 COSTS_N_INSNS (66), /* ddiv */
962 64, /* cache line size */
963 32, /* l1 cache */
964 128, /* l2 cache */
965 1, /* prefetch streams /*/
966 0, /* SF->DF convert */
969 /* Instruction costs on PPCE6500 processors. */
970 static const
971 struct processor_costs ppce6500_cost = {
972 COSTS_N_INSNS (5), /* mulsi */
973 COSTS_N_INSNS (5), /* mulsi_const */
974 COSTS_N_INSNS (4), /* mulsi_const9 */
975 COSTS_N_INSNS (5), /* muldi */
976 COSTS_N_INSNS (14), /* divsi */
977 COSTS_N_INSNS (14), /* divdi */
978 COSTS_N_INSNS (7), /* fp */
979 COSTS_N_INSNS (10), /* dmul */
980 COSTS_N_INSNS (36), /* sdiv */
981 COSTS_N_INSNS (66), /* ddiv */
982 64, /* cache line size */
983 32, /* l1 cache */
984 128, /* l2 cache */
985 1, /* prefetch streams /*/
986 0, /* SF->DF convert */
989 /* Instruction costs on AppliedMicro Titan processors. */
990 static const
991 struct processor_costs titan_cost = {
992 COSTS_N_INSNS (5), /* mulsi */
993 COSTS_N_INSNS (5), /* mulsi_const */
994 COSTS_N_INSNS (5), /* mulsi_const9 */
995 COSTS_N_INSNS (5), /* muldi */
996 COSTS_N_INSNS (18), /* divsi */
997 COSTS_N_INSNS (18), /* divdi */
998 COSTS_N_INSNS (10), /* fp */
999 COSTS_N_INSNS (10), /* dmul */
1000 COSTS_N_INSNS (46), /* sdiv */
1001 COSTS_N_INSNS (72), /* ddiv */
1002 32, /* cache line size */
1003 32, /* l1 cache */
1004 512, /* l2 cache */
1005 1, /* prefetch streams /*/
1006 0, /* SF->DF convert */
1009 /* Instruction costs on POWER4 and POWER5 processors. */
1010 static const
1011 struct processor_costs power4_cost = {
1012 COSTS_N_INSNS (3), /* mulsi */
1013 COSTS_N_INSNS (2), /* mulsi_const */
1014 COSTS_N_INSNS (2), /* mulsi_const9 */
1015 COSTS_N_INSNS (4), /* muldi */
1016 COSTS_N_INSNS (18), /* divsi */
1017 COSTS_N_INSNS (34), /* divdi */
1018 COSTS_N_INSNS (3), /* fp */
1019 COSTS_N_INSNS (3), /* dmul */
1020 COSTS_N_INSNS (17), /* sdiv */
1021 COSTS_N_INSNS (17), /* ddiv */
1022 128, /* cache line size */
1023 32, /* l1 cache */
1024 1024, /* l2 cache */
1025 8, /* prefetch streams /*/
1026 0, /* SF->DF convert */
1029 /* Instruction costs on POWER6 processors. */
1030 static const
1031 struct processor_costs power6_cost = {
1032 COSTS_N_INSNS (8), /* mulsi */
1033 COSTS_N_INSNS (8), /* mulsi_const */
1034 COSTS_N_INSNS (8), /* mulsi_const9 */
1035 COSTS_N_INSNS (8), /* muldi */
1036 COSTS_N_INSNS (22), /* divsi */
1037 COSTS_N_INSNS (28), /* divdi */
1038 COSTS_N_INSNS (3), /* fp */
1039 COSTS_N_INSNS (3), /* dmul */
1040 COSTS_N_INSNS (13), /* sdiv */
1041 COSTS_N_INSNS (16), /* ddiv */
1042 128, /* cache line size */
1043 64, /* l1 cache */
1044 2048, /* l2 cache */
1045 16, /* prefetch streams */
1046 0, /* SF->DF convert */
1049 /* Instruction costs on POWER7 processors. */
1050 static const
1051 struct processor_costs power7_cost = {
1052 COSTS_N_INSNS (2), /* mulsi */
1053 COSTS_N_INSNS (2), /* mulsi_const */
1054 COSTS_N_INSNS (2), /* mulsi_const9 */
1055 COSTS_N_INSNS (2), /* muldi */
1056 COSTS_N_INSNS (18), /* divsi */
1057 COSTS_N_INSNS (34), /* divdi */
1058 COSTS_N_INSNS (3), /* fp */
1059 COSTS_N_INSNS (3), /* dmul */
1060 COSTS_N_INSNS (13), /* sdiv */
1061 COSTS_N_INSNS (16), /* ddiv */
1062 128, /* cache line size */
1063 32, /* l1 cache */
1064 256, /* l2 cache */
1065 12, /* prefetch streams */
1066 COSTS_N_INSNS (3), /* SF->DF convert */
1069 /* Instruction costs on POWER8 processors. */
1070 static const
1071 struct processor_costs power8_cost = {
1072 COSTS_N_INSNS (3), /* mulsi */
1073 COSTS_N_INSNS (3), /* mulsi_const */
1074 COSTS_N_INSNS (3), /* mulsi_const9 */
1075 COSTS_N_INSNS (3), /* muldi */
1076 COSTS_N_INSNS (19), /* divsi */
1077 COSTS_N_INSNS (35), /* divdi */
1078 COSTS_N_INSNS (3), /* fp */
1079 COSTS_N_INSNS (3), /* dmul */
1080 COSTS_N_INSNS (14), /* sdiv */
1081 COSTS_N_INSNS (17), /* ddiv */
1082 128, /* cache line size */
1083 32, /* l1 cache */
1084 256, /* l2 cache */
1085 12, /* prefetch streams */
1086 COSTS_N_INSNS (3), /* SF->DF convert */
1089 /* Instruction costs on POWER9 processors. */
1090 static const
1091 struct processor_costs power9_cost = {
1092 COSTS_N_INSNS (3), /* mulsi */
1093 COSTS_N_INSNS (3), /* mulsi_const */
1094 COSTS_N_INSNS (3), /* mulsi_const9 */
1095 COSTS_N_INSNS (3), /* muldi */
1096 COSTS_N_INSNS (19), /* divsi */
1097 COSTS_N_INSNS (35), /* divdi */
1098 COSTS_N_INSNS (3), /* fp */
1099 COSTS_N_INSNS (3), /* dmul */
1100 COSTS_N_INSNS (14), /* sdiv */
1101 COSTS_N_INSNS (17), /* ddiv */
1102 128, /* cache line size */
1103 32, /* l1 cache */
1104 256, /* l2 cache */
1105 12, /* prefetch streams */
1106 COSTS_N_INSNS (3), /* SF->DF convert */
1109 /* Instruction costs on POWER A2 processors. */
1110 static const
1111 struct processor_costs ppca2_cost = {
1112 COSTS_N_INSNS (16), /* mulsi */
1113 COSTS_N_INSNS (16), /* mulsi_const */
1114 COSTS_N_INSNS (16), /* mulsi_const9 */
1115 COSTS_N_INSNS (16), /* muldi */
1116 COSTS_N_INSNS (22), /* divsi */
1117 COSTS_N_INSNS (28), /* divdi */
1118 COSTS_N_INSNS (3), /* fp */
1119 COSTS_N_INSNS (3), /* dmul */
1120 COSTS_N_INSNS (59), /* sdiv */
1121 COSTS_N_INSNS (72), /* ddiv */
1123 16, /* l1 cache */
1124 2048, /* l2 cache */
1125 16, /* prefetch streams */
1126 0, /* SF->DF convert */
1130 /* Table that classifies rs6000 builtin functions (pure, const, etc.). */
1131 #undef RS6000_BUILTIN_1
1132 #undef RS6000_BUILTIN_2
1133 #undef RS6000_BUILTIN_3
1134 #undef RS6000_BUILTIN_A
1135 #undef RS6000_BUILTIN_D
1136 #undef RS6000_BUILTIN_E
1137 #undef RS6000_BUILTIN_H
1138 #undef RS6000_BUILTIN_P
1139 #undef RS6000_BUILTIN_Q
1140 #undef RS6000_BUILTIN_S
1141 #undef RS6000_BUILTIN_X
1143 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE) \
1144 { NAME, ICODE, MASK, ATTR },
1146 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE) \
1147 { NAME, ICODE, MASK, ATTR },
1149 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE) \
1150 { NAME, ICODE, MASK, ATTR },
1152 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE) \
1153 { NAME, ICODE, MASK, ATTR },
1155 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE) \
1156 { NAME, ICODE, MASK, ATTR },
1158 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE) \
1159 { NAME, ICODE, MASK, ATTR },
1161 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) \
1162 { NAME, ICODE, MASK, ATTR },
1164 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) \
1165 { NAME, ICODE, MASK, ATTR },
1167 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE) \
1168 { NAME, ICODE, MASK, ATTR },
1170 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE) \
1171 { NAME, ICODE, MASK, ATTR },
1173 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE) \
1174 { NAME, ICODE, MASK, ATTR },
1176 struct rs6000_builtin_info_type {
1177 const char *name;
1178 const enum insn_code icode;
1179 const HOST_WIDE_INT mask;
1180 const unsigned attr;
1183 static const struct rs6000_builtin_info_type rs6000_builtin_info[] =
1185 #include "rs6000-builtin.def"
1188 #undef RS6000_BUILTIN_1
1189 #undef RS6000_BUILTIN_2
1190 #undef RS6000_BUILTIN_3
1191 #undef RS6000_BUILTIN_A
1192 #undef RS6000_BUILTIN_D
1193 #undef RS6000_BUILTIN_E
1194 #undef RS6000_BUILTIN_H
1195 #undef RS6000_BUILTIN_P
1196 #undef RS6000_BUILTIN_Q
1197 #undef RS6000_BUILTIN_S
1198 #undef RS6000_BUILTIN_X
1200 /* Support for -mveclibabi=<xxx> to control which vector library to use. */
1201 static tree (*rs6000_veclib_handler) (combined_fn, tree, tree);
1204 static bool rs6000_debug_legitimate_address_p (machine_mode, rtx, bool);
1205 static bool spe_func_has_64bit_regs_p (void);
1206 static struct machine_function * rs6000_init_machine_status (void);
1207 static int rs6000_ra_ever_killed (void);
1208 static tree rs6000_handle_longcall_attribute (tree *, tree, tree, int, bool *);
1209 static tree rs6000_handle_altivec_attribute (tree *, tree, tree, int, bool *);
1210 static tree rs6000_handle_struct_attribute (tree *, tree, tree, int, bool *);
1211 static tree rs6000_builtin_vectorized_libmass (combined_fn, tree, tree);
1212 static void rs6000_emit_set_long_const (rtx, HOST_WIDE_INT);
1213 static int rs6000_memory_move_cost (machine_mode, reg_class_t, bool);
1214 static bool rs6000_debug_rtx_costs (rtx, machine_mode, int, int, int *, bool);
1215 static int rs6000_debug_address_cost (rtx, machine_mode, addr_space_t,
1216 bool);
1217 static int rs6000_debug_adjust_cost (rtx_insn *, rtx, rtx_insn *, int);
1218 static bool is_microcoded_insn (rtx_insn *);
1219 static bool is_nonpipeline_insn (rtx_insn *);
1220 static bool is_cracked_insn (rtx_insn *);
1221 static bool is_load_insn (rtx, rtx *);
1222 static bool is_store_insn (rtx, rtx *);
1223 static bool set_to_load_agen (rtx_insn *,rtx_insn *);
1224 static bool insn_terminates_group_p (rtx_insn *, enum group_termination);
1225 static bool insn_must_be_first_in_group (rtx_insn *);
1226 static bool insn_must_be_last_in_group (rtx_insn *);
1227 static void altivec_init_builtins (void);
1228 static tree builtin_function_type (machine_mode, machine_mode,
1229 machine_mode, machine_mode,
1230 enum rs6000_builtins, const char *name);
1231 static void rs6000_common_init_builtins (void);
1232 static void paired_init_builtins (void);
1233 static rtx paired_expand_predicate_builtin (enum insn_code, tree, rtx);
1234 static void spe_init_builtins (void);
1235 static void htm_init_builtins (void);
1236 static rtx spe_expand_predicate_builtin (enum insn_code, tree, rtx);
1237 static rtx spe_expand_evsel_builtin (enum insn_code, tree, rtx);
1238 static int rs6000_emit_int_cmove (rtx, rtx, rtx, rtx);
1239 static rs6000_stack_t *rs6000_stack_info (void);
1240 static void is_altivec_return_reg (rtx, void *);
1241 int easy_vector_constant (rtx, machine_mode);
1242 static rtx rs6000_debug_legitimize_address (rtx, rtx, machine_mode);
1243 static rtx rs6000_legitimize_tls_address (rtx, enum tls_model);
1244 static rtx rs6000_darwin64_record_arg (CUMULATIVE_ARGS *, const_tree,
1245 bool, bool);
1246 #if TARGET_MACHO
1247 static void macho_branch_islands (void);
1248 #endif
1249 static rtx rs6000_legitimize_reload_address (rtx, machine_mode, int, int,
1250 int, int *);
1251 static rtx rs6000_debug_legitimize_reload_address (rtx, machine_mode, int,
1252 int, int, int *);
1253 static bool rs6000_mode_dependent_address (const_rtx);
1254 static bool rs6000_debug_mode_dependent_address (const_rtx);
1255 static enum reg_class rs6000_secondary_reload_class (enum reg_class,
1256 machine_mode, rtx);
1257 static enum reg_class rs6000_debug_secondary_reload_class (enum reg_class,
1258 machine_mode,
1259 rtx);
1260 static enum reg_class rs6000_preferred_reload_class (rtx, enum reg_class);
1261 static enum reg_class rs6000_debug_preferred_reload_class (rtx,
1262 enum reg_class);
1263 static bool rs6000_secondary_memory_needed (enum reg_class, enum reg_class,
1264 machine_mode);
1265 static bool rs6000_debug_secondary_memory_needed (enum reg_class,
1266 enum reg_class,
1267 machine_mode);
1268 static bool rs6000_cannot_change_mode_class (machine_mode,
1269 machine_mode,
1270 enum reg_class);
1271 static bool rs6000_debug_cannot_change_mode_class (machine_mode,
1272 machine_mode,
1273 enum reg_class);
1274 static bool rs6000_save_toc_in_prologue_p (void);
1275 static rtx rs6000_internal_arg_pointer (void);
1277 rtx (*rs6000_legitimize_reload_address_ptr) (rtx, machine_mode, int, int,
1278 int, int *)
1279 = rs6000_legitimize_reload_address;
1281 static bool (*rs6000_mode_dependent_address_ptr) (const_rtx)
1282 = rs6000_mode_dependent_address;
1284 enum reg_class (*rs6000_secondary_reload_class_ptr) (enum reg_class,
1285 machine_mode, rtx)
1286 = rs6000_secondary_reload_class;
1288 enum reg_class (*rs6000_preferred_reload_class_ptr) (rtx, enum reg_class)
1289 = rs6000_preferred_reload_class;
1291 bool (*rs6000_secondary_memory_needed_ptr) (enum reg_class, enum reg_class,
1292 machine_mode)
1293 = rs6000_secondary_memory_needed;
1295 bool (*rs6000_cannot_change_mode_class_ptr) (machine_mode,
1296 machine_mode,
1297 enum reg_class)
1298 = rs6000_cannot_change_mode_class;
1300 const int INSN_NOT_AVAILABLE = -1;
1302 static void rs6000_print_isa_options (FILE *, int, const char *,
1303 HOST_WIDE_INT);
1304 static void rs6000_print_builtin_options (FILE *, int, const char *,
1305 HOST_WIDE_INT);
1307 static enum rs6000_reg_type register_to_reg_type (rtx, bool *);
1308 static bool rs6000_secondary_reload_move (enum rs6000_reg_type,
1309 enum rs6000_reg_type,
1310 machine_mode,
1311 secondary_reload_info *,
1312 bool);
1313 rtl_opt_pass *make_pass_analyze_swaps (gcc::context*);
1314 static bool rs6000_keep_leaf_when_profiled () __attribute__ ((unused));
1316 /* Hash table stuff for keeping track of TOC entries. */
1318 struct GTY((for_user)) toc_hash_struct
1320 /* `key' will satisfy CONSTANT_P; in fact, it will satisfy
1321 ASM_OUTPUT_SPECIAL_POOL_ENTRY_P. */
1322 rtx key;
1323 machine_mode key_mode;
1324 int labelno;
1327 struct toc_hasher : ggc_ptr_hash<toc_hash_struct>
1329 static hashval_t hash (toc_hash_struct *);
1330 static bool equal (toc_hash_struct *, toc_hash_struct *);
1333 static GTY (()) hash_table<toc_hasher> *toc_hash_table;
1335 /* Hash table to keep track of the argument types for builtin functions. */
1337 struct GTY((for_user)) builtin_hash_struct
1339 tree type;
1340 machine_mode mode[4]; /* return value + 3 arguments. */
1341 unsigned char uns_p[4]; /* and whether the types are unsigned. */
1344 struct builtin_hasher : ggc_ptr_hash<builtin_hash_struct>
1346 static hashval_t hash (builtin_hash_struct *);
1347 static bool equal (builtin_hash_struct *, builtin_hash_struct *);
1350 static GTY (()) hash_table<builtin_hasher> *builtin_hash_table;
1353 /* Default register names. */
1354 char rs6000_reg_names[][8] =
1356 "0", "1", "2", "3", "4", "5", "6", "7",
1357 "8", "9", "10", "11", "12", "13", "14", "15",
1358 "16", "17", "18", "19", "20", "21", "22", "23",
1359 "24", "25", "26", "27", "28", "29", "30", "31",
1360 "0", "1", "2", "3", "4", "5", "6", "7",
1361 "8", "9", "10", "11", "12", "13", "14", "15",
1362 "16", "17", "18", "19", "20", "21", "22", "23",
1363 "24", "25", "26", "27", "28", "29", "30", "31",
1364 "mq", "lr", "ctr","ap",
1365 "0", "1", "2", "3", "4", "5", "6", "7",
1366 "ca",
1367 /* AltiVec registers. */
1368 "0", "1", "2", "3", "4", "5", "6", "7",
1369 "8", "9", "10", "11", "12", "13", "14", "15",
1370 "16", "17", "18", "19", "20", "21", "22", "23",
1371 "24", "25", "26", "27", "28", "29", "30", "31",
1372 "vrsave", "vscr",
1373 /* SPE registers. */
1374 "spe_acc", "spefscr",
1375 /* Soft frame pointer. */
1376 "sfp",
1377 /* HTM SPR registers. */
1378 "tfhar", "tfiar", "texasr",
1379 /* SPE High registers. */
1380 "0", "1", "2", "3", "4", "5", "6", "7",
1381 "8", "9", "10", "11", "12", "13", "14", "15",
1382 "16", "17", "18", "19", "20", "21", "22", "23",
1383 "24", "25", "26", "27", "28", "29", "30", "31"
1386 #ifdef TARGET_REGNAMES
1387 static const char alt_reg_names[][8] =
1389 "%r0", "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7",
1390 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
1391 "%r16", "%r17", "%r18", "%r19", "%r20", "%r21", "%r22", "%r23",
1392 "%r24", "%r25", "%r26", "%r27", "%r28", "%r29", "%r30", "%r31",
1393 "%f0", "%f1", "%f2", "%f3", "%f4", "%f5", "%f6", "%f7",
1394 "%f8", "%f9", "%f10", "%f11", "%f12", "%f13", "%f14", "%f15",
1395 "%f16", "%f17", "%f18", "%f19", "%f20", "%f21", "%f22", "%f23",
1396 "%f24", "%f25", "%f26", "%f27", "%f28", "%f29", "%f30", "%f31",
1397 "mq", "lr", "ctr", "ap",
1398 "%cr0", "%cr1", "%cr2", "%cr3", "%cr4", "%cr5", "%cr6", "%cr7",
1399 "ca",
1400 /* AltiVec registers. */
1401 "%v0", "%v1", "%v2", "%v3", "%v4", "%v5", "%v6", "%v7",
1402 "%v8", "%v9", "%v10", "%v11", "%v12", "%v13", "%v14", "%v15",
1403 "%v16", "%v17", "%v18", "%v19", "%v20", "%v21", "%v22", "%v23",
1404 "%v24", "%v25", "%v26", "%v27", "%v28", "%v29", "%v30", "%v31",
1405 "vrsave", "vscr",
1406 /* SPE registers. */
1407 "spe_acc", "spefscr",
1408 /* Soft frame pointer. */
1409 "sfp",
1410 /* HTM SPR registers. */
1411 "tfhar", "tfiar", "texasr",
1412 /* SPE High registers. */
1413 "%rh0", "%rh1", "%rh2", "%rh3", "%rh4", "%rh5", "%rh6", "%rh7",
1414 "%rh8", "%rh9", "%rh10", "%r11", "%rh12", "%rh13", "%rh14", "%rh15",
1415 "%rh16", "%rh17", "%rh18", "%rh19", "%rh20", "%rh21", "%rh22", "%rh23",
1416 "%rh24", "%rh25", "%rh26", "%rh27", "%rh28", "%rh29", "%rh30", "%rh31"
1418 #endif
1420 /* Table of valid machine attributes. */
1422 static const struct attribute_spec rs6000_attribute_table[] =
1424 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
1425 affects_type_identity } */
1426 { "altivec", 1, 1, false, true, false, rs6000_handle_altivec_attribute,
1427 false },
1428 { "longcall", 0, 0, false, true, true, rs6000_handle_longcall_attribute,
1429 false },
1430 { "shortcall", 0, 0, false, true, true, rs6000_handle_longcall_attribute,
1431 false },
1432 { "ms_struct", 0, 0, false, false, false, rs6000_handle_struct_attribute,
1433 false },
1434 { "gcc_struct", 0, 0, false, false, false, rs6000_handle_struct_attribute,
1435 false },
1436 #ifdef SUBTARGET_ATTRIBUTE_TABLE
1437 SUBTARGET_ATTRIBUTE_TABLE,
1438 #endif
1439 { NULL, 0, 0, false, false, false, NULL, false }
1442 #ifndef TARGET_PROFILE_KERNEL
1443 #define TARGET_PROFILE_KERNEL 0
1444 #endif
1446 /* The VRSAVE bitmask puts bit %v0 as the most significant bit. */
1447 #define ALTIVEC_REG_BIT(REGNO) (0x80000000 >> ((REGNO) - FIRST_ALTIVEC_REGNO))
1449 /* Initialize the GCC target structure. */
1450 #undef TARGET_ATTRIBUTE_TABLE
1451 #define TARGET_ATTRIBUTE_TABLE rs6000_attribute_table
1452 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
1453 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES rs6000_set_default_type_attributes
1454 #undef TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P
1455 #define TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P rs6000_attribute_takes_identifier_p
1457 #undef TARGET_ASM_ALIGNED_DI_OP
1458 #define TARGET_ASM_ALIGNED_DI_OP DOUBLE_INT_ASM_OP
1460 /* Default unaligned ops are only provided for ELF. Find the ops needed
1461 for non-ELF systems. */
1462 #ifndef OBJECT_FORMAT_ELF
1463 #if TARGET_XCOFF
1464 /* For XCOFF. rs6000_assemble_integer will handle unaligned DIs on
1465 64-bit targets. */
1466 #undef TARGET_ASM_UNALIGNED_HI_OP
1467 #define TARGET_ASM_UNALIGNED_HI_OP "\t.vbyte\t2,"
1468 #undef TARGET_ASM_UNALIGNED_SI_OP
1469 #define TARGET_ASM_UNALIGNED_SI_OP "\t.vbyte\t4,"
1470 #undef TARGET_ASM_UNALIGNED_DI_OP
1471 #define TARGET_ASM_UNALIGNED_DI_OP "\t.vbyte\t8,"
1472 #else
1473 /* For Darwin. */
1474 #undef TARGET_ASM_UNALIGNED_HI_OP
1475 #define TARGET_ASM_UNALIGNED_HI_OP "\t.short\t"
1476 #undef TARGET_ASM_UNALIGNED_SI_OP
1477 #define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
1478 #undef TARGET_ASM_UNALIGNED_DI_OP
1479 #define TARGET_ASM_UNALIGNED_DI_OP "\t.quad\t"
1480 #undef TARGET_ASM_ALIGNED_DI_OP
1481 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
1482 #endif
1483 #endif
1485 /* This hook deals with fixups for relocatable code and DI-mode objects
1486 in 64-bit code. */
1487 #undef TARGET_ASM_INTEGER
1488 #define TARGET_ASM_INTEGER rs6000_assemble_integer
1490 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
1491 #undef TARGET_ASM_ASSEMBLE_VISIBILITY
1492 #define TARGET_ASM_ASSEMBLE_VISIBILITY rs6000_assemble_visibility
1493 #endif
1495 #undef TARGET_SET_UP_BY_PROLOGUE
1496 #define TARGET_SET_UP_BY_PROLOGUE rs6000_set_up_by_prologue
1498 #undef TARGET_EXTRA_LIVE_ON_ENTRY
1499 #define TARGET_EXTRA_LIVE_ON_ENTRY rs6000_live_on_entry
1501 #undef TARGET_INTERNAL_ARG_POINTER
1502 #define TARGET_INTERNAL_ARG_POINTER rs6000_internal_arg_pointer
1504 #undef TARGET_HAVE_TLS
1505 #define TARGET_HAVE_TLS HAVE_AS_TLS
1507 #undef TARGET_CANNOT_FORCE_CONST_MEM
1508 #define TARGET_CANNOT_FORCE_CONST_MEM rs6000_cannot_force_const_mem
1510 #undef TARGET_DELEGITIMIZE_ADDRESS
1511 #define TARGET_DELEGITIMIZE_ADDRESS rs6000_delegitimize_address
1513 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
1514 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P rs6000_const_not_ok_for_debug_p
1516 #undef TARGET_ASM_FUNCTION_PROLOGUE
1517 #define TARGET_ASM_FUNCTION_PROLOGUE rs6000_output_function_prologue
1518 #undef TARGET_ASM_FUNCTION_EPILOGUE
1519 #define TARGET_ASM_FUNCTION_EPILOGUE rs6000_output_function_epilogue
1521 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
1522 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA rs6000_output_addr_const_extra
1524 #undef TARGET_LEGITIMIZE_ADDRESS
1525 #define TARGET_LEGITIMIZE_ADDRESS rs6000_legitimize_address
1527 #undef TARGET_SCHED_VARIABLE_ISSUE
1528 #define TARGET_SCHED_VARIABLE_ISSUE rs6000_variable_issue
1530 #undef TARGET_SCHED_ISSUE_RATE
1531 #define TARGET_SCHED_ISSUE_RATE rs6000_issue_rate
1532 #undef TARGET_SCHED_ADJUST_COST
1533 #define TARGET_SCHED_ADJUST_COST rs6000_adjust_cost
1534 #undef TARGET_SCHED_ADJUST_PRIORITY
1535 #define TARGET_SCHED_ADJUST_PRIORITY rs6000_adjust_priority
1536 #undef TARGET_SCHED_IS_COSTLY_DEPENDENCE
1537 #define TARGET_SCHED_IS_COSTLY_DEPENDENCE rs6000_is_costly_dependence
1538 #undef TARGET_SCHED_INIT
1539 #define TARGET_SCHED_INIT rs6000_sched_init
1540 #undef TARGET_SCHED_FINISH
1541 #define TARGET_SCHED_FINISH rs6000_sched_finish
1542 #undef TARGET_SCHED_REORDER
1543 #define TARGET_SCHED_REORDER rs6000_sched_reorder
1544 #undef TARGET_SCHED_REORDER2
1545 #define TARGET_SCHED_REORDER2 rs6000_sched_reorder2
1547 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1548 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD rs6000_use_sched_lookahead
1550 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
1551 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD rs6000_use_sched_lookahead_guard
1553 #undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
1554 #define TARGET_SCHED_ALLOC_SCHED_CONTEXT rs6000_alloc_sched_context
1555 #undef TARGET_SCHED_INIT_SCHED_CONTEXT
1556 #define TARGET_SCHED_INIT_SCHED_CONTEXT rs6000_init_sched_context
1557 #undef TARGET_SCHED_SET_SCHED_CONTEXT
1558 #define TARGET_SCHED_SET_SCHED_CONTEXT rs6000_set_sched_context
1559 #undef TARGET_SCHED_FREE_SCHED_CONTEXT
1560 #define TARGET_SCHED_FREE_SCHED_CONTEXT rs6000_free_sched_context
1562 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
1563 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD rs6000_builtin_mask_for_load
1564 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
1565 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
1566 rs6000_builtin_support_vector_misalignment
1567 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
1568 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE rs6000_vector_alignment_reachable
1569 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
1570 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
1571 rs6000_builtin_vectorization_cost
1572 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
1573 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
1574 rs6000_preferred_simd_mode
1575 #undef TARGET_VECTORIZE_INIT_COST
1576 #define TARGET_VECTORIZE_INIT_COST rs6000_init_cost
1577 #undef TARGET_VECTORIZE_ADD_STMT_COST
1578 #define TARGET_VECTORIZE_ADD_STMT_COST rs6000_add_stmt_cost
1579 #undef TARGET_VECTORIZE_FINISH_COST
1580 #define TARGET_VECTORIZE_FINISH_COST rs6000_finish_cost
1581 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
1582 #define TARGET_VECTORIZE_DESTROY_COST_DATA rs6000_destroy_cost_data
1584 #undef TARGET_INIT_BUILTINS
1585 #define TARGET_INIT_BUILTINS rs6000_init_builtins
1586 #undef TARGET_BUILTIN_DECL
1587 #define TARGET_BUILTIN_DECL rs6000_builtin_decl
1589 #undef TARGET_EXPAND_BUILTIN
1590 #define TARGET_EXPAND_BUILTIN rs6000_expand_builtin
1592 #undef TARGET_MANGLE_TYPE
1593 #define TARGET_MANGLE_TYPE rs6000_mangle_type
1595 #undef TARGET_INIT_LIBFUNCS
1596 #define TARGET_INIT_LIBFUNCS rs6000_init_libfuncs
1598 #if TARGET_MACHO
1599 #undef TARGET_BINDS_LOCAL_P
1600 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1601 #endif
1603 #undef TARGET_MS_BITFIELD_LAYOUT_P
1604 #define TARGET_MS_BITFIELD_LAYOUT_P rs6000_ms_bitfield_layout_p
1606 #undef TARGET_ASM_OUTPUT_MI_THUNK
1607 #define TARGET_ASM_OUTPUT_MI_THUNK rs6000_output_mi_thunk
1609 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1610 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
1612 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1613 #define TARGET_FUNCTION_OK_FOR_SIBCALL rs6000_function_ok_for_sibcall
1615 #undef TARGET_REGISTER_MOVE_COST
1616 #define TARGET_REGISTER_MOVE_COST rs6000_register_move_cost
1617 #undef TARGET_MEMORY_MOVE_COST
1618 #define TARGET_MEMORY_MOVE_COST rs6000_memory_move_cost
1619 #undef TARGET_CANNOT_COPY_INSN_P
1620 #define TARGET_CANNOT_COPY_INSN_P rs6000_cannot_copy_insn_p
1621 #undef TARGET_RTX_COSTS
1622 #define TARGET_RTX_COSTS rs6000_rtx_costs
1623 #undef TARGET_ADDRESS_COST
1624 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
1626 #undef TARGET_DWARF_REGISTER_SPAN
1627 #define TARGET_DWARF_REGISTER_SPAN rs6000_dwarf_register_span
1629 #undef TARGET_INIT_DWARF_REG_SIZES_EXTRA
1630 #define TARGET_INIT_DWARF_REG_SIZES_EXTRA rs6000_init_dwarf_reg_sizes_extra
1632 #undef TARGET_MEMBER_TYPE_FORCES_BLK
1633 #define TARGET_MEMBER_TYPE_FORCES_BLK rs6000_member_type_forces_blk
1635 #undef TARGET_PROMOTE_FUNCTION_MODE
1636 #define TARGET_PROMOTE_FUNCTION_MODE rs6000_promote_function_mode
1638 #undef TARGET_RETURN_IN_MEMORY
1639 #define TARGET_RETURN_IN_MEMORY rs6000_return_in_memory
1641 #undef TARGET_RETURN_IN_MSB
1642 #define TARGET_RETURN_IN_MSB rs6000_return_in_msb
1644 #undef TARGET_SETUP_INCOMING_VARARGS
1645 #define TARGET_SETUP_INCOMING_VARARGS setup_incoming_varargs
1647 /* Always strict argument naming on rs6000. */
1648 #undef TARGET_STRICT_ARGUMENT_NAMING
1649 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
1650 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
1651 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED hook_bool_CUMULATIVE_ARGS_true
1652 #undef TARGET_SPLIT_COMPLEX_ARG
1653 #define TARGET_SPLIT_COMPLEX_ARG hook_bool_const_tree_true
1654 #undef TARGET_MUST_PASS_IN_STACK
1655 #define TARGET_MUST_PASS_IN_STACK rs6000_must_pass_in_stack
1656 #undef TARGET_PASS_BY_REFERENCE
1657 #define TARGET_PASS_BY_REFERENCE rs6000_pass_by_reference
1658 #undef TARGET_ARG_PARTIAL_BYTES
1659 #define TARGET_ARG_PARTIAL_BYTES rs6000_arg_partial_bytes
1660 #undef TARGET_FUNCTION_ARG_ADVANCE
1661 #define TARGET_FUNCTION_ARG_ADVANCE rs6000_function_arg_advance
1662 #undef TARGET_FUNCTION_ARG
1663 #define TARGET_FUNCTION_ARG rs6000_function_arg
1664 #undef TARGET_FUNCTION_ARG_BOUNDARY
1665 #define TARGET_FUNCTION_ARG_BOUNDARY rs6000_function_arg_boundary
1667 #undef TARGET_BUILD_BUILTIN_VA_LIST
1668 #define TARGET_BUILD_BUILTIN_VA_LIST rs6000_build_builtin_va_list
1670 #undef TARGET_EXPAND_BUILTIN_VA_START
1671 #define TARGET_EXPAND_BUILTIN_VA_START rs6000_va_start
1673 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1674 #define TARGET_GIMPLIFY_VA_ARG_EXPR rs6000_gimplify_va_arg
1676 #undef TARGET_EH_RETURN_FILTER_MODE
1677 #define TARGET_EH_RETURN_FILTER_MODE rs6000_eh_return_filter_mode
1679 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1680 #define TARGET_SCALAR_MODE_SUPPORTED_P rs6000_scalar_mode_supported_p
1682 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1683 #define TARGET_VECTOR_MODE_SUPPORTED_P rs6000_vector_mode_supported_p
1685 #undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN
1686 #define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN invalid_arg_for_unprototyped_fn
1688 #undef TARGET_ASM_LOOP_ALIGN_MAX_SKIP
1689 #define TARGET_ASM_LOOP_ALIGN_MAX_SKIP rs6000_loop_align_max_skip
1691 #undef TARGET_MD_ASM_ADJUST
1692 #define TARGET_MD_ASM_ADJUST rs6000_md_asm_adjust
1694 #undef TARGET_OPTION_OVERRIDE
1695 #define TARGET_OPTION_OVERRIDE rs6000_option_override
1697 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
1698 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
1699 rs6000_builtin_vectorized_function
1701 #undef TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION
1702 #define TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION \
1703 rs6000_builtin_md_vectorized_function
1705 #if !TARGET_MACHO
1706 #undef TARGET_STACK_PROTECT_FAIL
1707 #define TARGET_STACK_PROTECT_FAIL rs6000_stack_protect_fail
1708 #endif
1710 #ifdef HAVE_AS_TLS
1711 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1712 #define TARGET_ASM_OUTPUT_DWARF_DTPREL rs6000_output_dwarf_dtprel
1713 #endif
1715 /* Use a 32-bit anchor range. This leads to sequences like:
1717 addis tmp,anchor,high
1718 add dest,tmp,low
1720 where tmp itself acts as an anchor, and can be shared between
1721 accesses to the same 64k page. */
1722 #undef TARGET_MIN_ANCHOR_OFFSET
1723 #define TARGET_MIN_ANCHOR_OFFSET -0x7fffffff - 1
1724 #undef TARGET_MAX_ANCHOR_OFFSET
1725 #define TARGET_MAX_ANCHOR_OFFSET 0x7fffffff
1726 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1727 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P rs6000_use_blocks_for_constant_p
1728 #undef TARGET_USE_BLOCKS_FOR_DECL_P
1729 #define TARGET_USE_BLOCKS_FOR_DECL_P rs6000_use_blocks_for_decl_p
1731 #undef TARGET_BUILTIN_RECIPROCAL
1732 #define TARGET_BUILTIN_RECIPROCAL rs6000_builtin_reciprocal
1734 #undef TARGET_EXPAND_TO_RTL_HOOK
1735 #define TARGET_EXPAND_TO_RTL_HOOK rs6000_alloc_sdmode_stack_slot
1737 #undef TARGET_INSTANTIATE_DECLS
1738 #define TARGET_INSTANTIATE_DECLS rs6000_instantiate_decls
1740 #undef TARGET_SECONDARY_RELOAD
1741 #define TARGET_SECONDARY_RELOAD rs6000_secondary_reload
1743 #undef TARGET_LEGITIMATE_ADDRESS_P
1744 #define TARGET_LEGITIMATE_ADDRESS_P rs6000_legitimate_address_p
1746 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
1747 #define TARGET_MODE_DEPENDENT_ADDRESS_P rs6000_mode_dependent_address_p
1749 #undef TARGET_LRA_P
1750 #define TARGET_LRA_P rs6000_lra_p
1752 #undef TARGET_CAN_ELIMINATE
1753 #define TARGET_CAN_ELIMINATE rs6000_can_eliminate
1755 #undef TARGET_CONDITIONAL_REGISTER_USAGE
1756 #define TARGET_CONDITIONAL_REGISTER_USAGE rs6000_conditional_register_usage
1758 #undef TARGET_TRAMPOLINE_INIT
1759 #define TARGET_TRAMPOLINE_INIT rs6000_trampoline_init
1761 #undef TARGET_FUNCTION_VALUE
1762 #define TARGET_FUNCTION_VALUE rs6000_function_value
1764 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
1765 #define TARGET_OPTION_VALID_ATTRIBUTE_P rs6000_valid_attribute_p
1767 #undef TARGET_OPTION_SAVE
1768 #define TARGET_OPTION_SAVE rs6000_function_specific_save
1770 #undef TARGET_OPTION_RESTORE
1771 #define TARGET_OPTION_RESTORE rs6000_function_specific_restore
1773 #undef TARGET_OPTION_PRINT
1774 #define TARGET_OPTION_PRINT rs6000_function_specific_print
1776 #undef TARGET_CAN_INLINE_P
1777 #define TARGET_CAN_INLINE_P rs6000_can_inline_p
1779 #undef TARGET_SET_CURRENT_FUNCTION
1780 #define TARGET_SET_CURRENT_FUNCTION rs6000_set_current_function
1782 #undef TARGET_LEGITIMATE_CONSTANT_P
1783 #define TARGET_LEGITIMATE_CONSTANT_P rs6000_legitimate_constant_p
1785 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
1786 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK rs6000_vectorize_vec_perm_const_ok
1788 #undef TARGET_CAN_USE_DOLOOP_P
1789 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
1791 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
1792 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV rs6000_atomic_assign_expand_fenv
1794 #undef TARGET_LIBGCC_CMP_RETURN_MODE
1795 #define TARGET_LIBGCC_CMP_RETURN_MODE rs6000_abi_word_mode
1796 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
1797 #define TARGET_LIBGCC_SHIFT_COUNT_MODE rs6000_abi_word_mode
1798 #undef TARGET_UNWIND_WORD_MODE
1799 #define TARGET_UNWIND_WORD_MODE rs6000_abi_word_mode
1801 #undef TARGET_OFFLOAD_OPTIONS
1802 #define TARGET_OFFLOAD_OPTIONS rs6000_offload_options
1804 #undef TARGET_C_MODE_FOR_SUFFIX
1805 #define TARGET_C_MODE_FOR_SUFFIX rs6000_c_mode_for_suffix
1807 #undef TARGET_INVALID_BINARY_OP
1808 #define TARGET_INVALID_BINARY_OP rs6000_invalid_binary_op
1810 #undef TARGET_OPTAB_SUPPORTED_P
1811 #define TARGET_OPTAB_SUPPORTED_P rs6000_optab_supported_p
1814 /* Processor table. */
1815 struct rs6000_ptt
1817 const char *const name; /* Canonical processor name. */
1818 const enum processor_type processor; /* Processor type enum value. */
1819 const HOST_WIDE_INT target_enable; /* Target flags to enable. */
1822 static struct rs6000_ptt const processor_target_table[] =
1824 #define RS6000_CPU(NAME, CPU, FLAGS) { NAME, CPU, FLAGS },
1825 #include "rs6000-cpus.def"
1826 #undef RS6000_CPU
1829 /* Look up a processor name for -mcpu=xxx and -mtune=xxx. Return -1 if the
1830 name is invalid. */
1832 static int
1833 rs6000_cpu_name_lookup (const char *name)
1835 size_t i;
1837 if (name != NULL)
1839 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
1840 if (! strcmp (name, processor_target_table[i].name))
1841 return (int)i;
1844 return -1;
1848 /* Return number of consecutive hard regs needed starting at reg REGNO
1849 to hold something of mode MODE.
1850 This is ordinarily the length in words of a value of mode MODE
1851 but can be less for certain modes in special long registers.
1853 For the SPE, GPRs are 64 bits but only 32 bits are visible in
1854 scalar instructions. The upper 32 bits are only available to the
1855 SIMD instructions.
1857 POWER and PowerPC GPRs hold 32 bits worth;
1858 PowerPC64 GPRs and FPRs point register holds 64 bits worth. */
1860 static int
1861 rs6000_hard_regno_nregs_internal (int regno, machine_mode mode)
1863 unsigned HOST_WIDE_INT reg_size;
1865 /* 128-bit floating point usually takes 2 registers, unless it is IEEE
1866 128-bit floating point that can go in vector registers, which has VSX
1867 memory addressing. */
1868 if (FP_REGNO_P (regno))
1869 reg_size = (VECTOR_MEM_VSX_P (mode) || FLOAT128_VECTOR_P (mode)
1870 ? UNITS_PER_VSX_WORD
1871 : UNITS_PER_FP_WORD);
1873 else if (SPE_SIMD_REGNO_P (regno) && TARGET_SPE && SPE_VECTOR_MODE (mode))
1874 reg_size = UNITS_PER_SPE_WORD;
1876 else if (ALTIVEC_REGNO_P (regno))
1877 reg_size = UNITS_PER_ALTIVEC_WORD;
1879 /* The value returned for SCmode in the E500 double case is 2 for
1880 ABI compatibility; storing an SCmode value in a single register
1881 would require function_arg and rs6000_spe_function_arg to handle
1882 SCmode so as to pass the value correctly in a pair of
1883 registers. */
1884 else if (TARGET_E500_DOUBLE && FLOAT_MODE_P (mode) && mode != SCmode
1885 && !DECIMAL_FLOAT_MODE_P (mode) && SPE_SIMD_REGNO_P (regno))
1886 reg_size = UNITS_PER_FP_WORD;
1888 else
1889 reg_size = UNITS_PER_WORD;
1891 return (GET_MODE_SIZE (mode) + reg_size - 1) / reg_size;
1894 /* Value is 1 if hard register REGNO can hold a value of machine-mode
1895 MODE. */
1896 static int
1897 rs6000_hard_regno_mode_ok (int regno, machine_mode mode)
1899 int last_regno = regno + rs6000_hard_regno_nregs[mode][regno] - 1;
1901 if (COMPLEX_MODE_P (mode))
1902 mode = GET_MODE_INNER (mode);
1904 /* PTImode can only go in GPRs. Quad word memory operations require even/odd
1905 register combinations, and use PTImode where we need to deal with quad
1906 word memory operations. Don't allow quad words in the argument or frame
1907 pointer registers, just registers 0..31. */
1908 if (mode == PTImode)
1909 return (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
1910 && IN_RANGE (last_regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
1911 && ((regno & 1) == 0));
1913 /* VSX registers that overlap the FPR registers are larger than for non-VSX
1914 implementations. Don't allow an item to be split between a FP register
1915 and an Altivec register. Allow TImode in all VSX registers if the user
1916 asked for it. */
1917 if (TARGET_VSX && VSX_REGNO_P (regno)
1918 && (VECTOR_MEM_VSX_P (mode)
1919 || FLOAT128_VECTOR_P (mode)
1920 || reg_addr[mode].scalar_in_vmx_p
1921 || (TARGET_VSX_TIMODE && mode == TImode)
1922 || (TARGET_VADDUQM && mode == V1TImode)))
1924 if (FP_REGNO_P (regno))
1925 return FP_REGNO_P (last_regno);
1927 if (ALTIVEC_REGNO_P (regno))
1929 if (GET_MODE_SIZE (mode) != 16 && !reg_addr[mode].scalar_in_vmx_p)
1930 return 0;
1932 return ALTIVEC_REGNO_P (last_regno);
1936 /* The GPRs can hold any mode, but values bigger than one register
1937 cannot go past R31. */
1938 if (INT_REGNO_P (regno))
1939 return INT_REGNO_P (last_regno);
1941 /* The float registers (except for VSX vector modes) can only hold floating
1942 modes and DImode. */
1943 if (FP_REGNO_P (regno))
1945 if (FLOAT128_VECTOR_P (mode))
1946 return false;
1948 if (SCALAR_FLOAT_MODE_P (mode)
1949 && (mode != TDmode || (regno % 2) == 0)
1950 && FP_REGNO_P (last_regno))
1951 return 1;
1953 if (GET_MODE_CLASS (mode) == MODE_INT
1954 && GET_MODE_SIZE (mode) == UNITS_PER_FP_WORD)
1955 return 1;
1957 if (PAIRED_SIMD_REGNO_P (regno) && TARGET_PAIRED_FLOAT
1958 && PAIRED_VECTOR_MODE (mode))
1959 return 1;
1961 return 0;
1964 /* The CR register can only hold CC modes. */
1965 if (CR_REGNO_P (regno))
1966 return GET_MODE_CLASS (mode) == MODE_CC;
1968 if (CA_REGNO_P (regno))
1969 return mode == Pmode || mode == SImode;
1971 /* AltiVec only in AldyVec registers. */
1972 if (ALTIVEC_REGNO_P (regno))
1973 return (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)
1974 || mode == V1TImode);
1976 /* ...but GPRs can hold SIMD data on the SPE in one register. */
1977 if (SPE_SIMD_REGNO_P (regno) && TARGET_SPE && SPE_VECTOR_MODE (mode))
1978 return 1;
1980 /* We cannot put non-VSX TImode or PTImode anywhere except general register
1981 and it must be able to fit within the register set. */
1983 return GET_MODE_SIZE (mode) <= UNITS_PER_WORD;
1986 /* Print interesting facts about registers. */
1987 static void
1988 rs6000_debug_reg_print (int first_regno, int last_regno, const char *reg_name)
1990 int r, m;
1992 for (r = first_regno; r <= last_regno; ++r)
1994 const char *comma = "";
1995 int len;
1997 if (first_regno == last_regno)
1998 fprintf (stderr, "%s:\t", reg_name);
1999 else
2000 fprintf (stderr, "%s%d:\t", reg_name, r - first_regno);
2002 len = 8;
2003 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2004 if (rs6000_hard_regno_mode_ok_p[m][r] && rs6000_hard_regno_nregs[m][r])
2006 if (len > 70)
2008 fprintf (stderr, ",\n\t");
2009 len = 8;
2010 comma = "";
2013 if (rs6000_hard_regno_nregs[m][r] > 1)
2014 len += fprintf (stderr, "%s%s/%d", comma, GET_MODE_NAME (m),
2015 rs6000_hard_regno_nregs[m][r]);
2016 else
2017 len += fprintf (stderr, "%s%s", comma, GET_MODE_NAME (m));
2019 comma = ", ";
2022 if (call_used_regs[r])
2024 if (len > 70)
2026 fprintf (stderr, ",\n\t");
2027 len = 8;
2028 comma = "";
2031 len += fprintf (stderr, "%s%s", comma, "call-used");
2032 comma = ", ";
2035 if (fixed_regs[r])
2037 if (len > 70)
2039 fprintf (stderr, ",\n\t");
2040 len = 8;
2041 comma = "";
2044 len += fprintf (stderr, "%s%s", comma, "fixed");
2045 comma = ", ";
2048 if (len > 70)
2050 fprintf (stderr, ",\n\t");
2051 comma = "";
2054 len += fprintf (stderr, "%sreg-class = %s", comma,
2055 reg_class_names[(int)rs6000_regno_regclass[r]]);
2056 comma = ", ";
2058 if (len > 70)
2060 fprintf (stderr, ",\n\t");
2061 comma = "";
2064 fprintf (stderr, "%sregno = %d\n", comma, r);
2068 static const char *
2069 rs6000_debug_vector_unit (enum rs6000_vector v)
2071 const char *ret;
2073 switch (v)
2075 case VECTOR_NONE: ret = "none"; break;
2076 case VECTOR_ALTIVEC: ret = "altivec"; break;
2077 case VECTOR_VSX: ret = "vsx"; break;
2078 case VECTOR_P8_VECTOR: ret = "p8_vector"; break;
2079 case VECTOR_PAIRED: ret = "paired"; break;
2080 case VECTOR_SPE: ret = "spe"; break;
2081 case VECTOR_OTHER: ret = "other"; break;
2082 default: ret = "unknown"; break;
2085 return ret;
2088 /* Inner function printing just the address mask for a particular reload
2089 register class. */
2090 DEBUG_FUNCTION char *
2091 rs6000_debug_addr_mask (addr_mask_type mask, bool keep_spaces)
2093 static char ret[8];
2094 char *p = ret;
2096 if ((mask & RELOAD_REG_VALID) != 0)
2097 *p++ = 'v';
2098 else if (keep_spaces)
2099 *p++ = ' ';
2101 if ((mask & RELOAD_REG_MULTIPLE) != 0)
2102 *p++ = 'm';
2103 else if (keep_spaces)
2104 *p++ = ' ';
2106 if ((mask & RELOAD_REG_INDEXED) != 0)
2107 *p++ = 'i';
2108 else if (keep_spaces)
2109 *p++ = ' ';
2111 if ((mask & RELOAD_REG_OFFSET) != 0)
2112 *p++ = 'o';
2113 else if (keep_spaces)
2114 *p++ = ' ';
2116 if ((mask & RELOAD_REG_PRE_INCDEC) != 0)
2117 *p++ = '+';
2118 else if (keep_spaces)
2119 *p++ = ' ';
2121 if ((mask & RELOAD_REG_PRE_MODIFY) != 0)
2122 *p++ = '+';
2123 else if (keep_spaces)
2124 *p++ = ' ';
2126 if ((mask & RELOAD_REG_AND_M16) != 0)
2127 *p++ = '&';
2128 else if (keep_spaces)
2129 *p++ = ' ';
2131 *p = '\0';
2133 return ret;
2136 /* Print the address masks in a human readble fashion. */
2137 DEBUG_FUNCTION void
2138 rs6000_debug_print_mode (ssize_t m)
2140 ssize_t rc;
2141 int spaces = 0;
2142 bool fuse_extra_p;
2144 fprintf (stderr, "Mode: %-5s", GET_MODE_NAME (m));
2145 for (rc = 0; rc < N_RELOAD_REG; rc++)
2146 fprintf (stderr, " %s: %s", reload_reg_map[rc].name,
2147 rs6000_debug_addr_mask (reg_addr[m].addr_mask[rc], true));
2149 if ((reg_addr[m].reload_store != CODE_FOR_nothing)
2150 || (reg_addr[m].reload_load != CODE_FOR_nothing))
2151 fprintf (stderr, " Reload=%c%c",
2152 (reg_addr[m].reload_store != CODE_FOR_nothing) ? 's' : '*',
2153 (reg_addr[m].reload_load != CODE_FOR_nothing) ? 'l' : '*');
2154 else
2155 spaces += sizeof (" Reload=sl") - 1;
2157 if (reg_addr[m].scalar_in_vmx_p)
2159 fprintf (stderr, "%*s Upper=y", spaces, "");
2160 spaces = 0;
2162 else
2163 spaces += sizeof (" Upper=y") - 1;
2165 fuse_extra_p = ((reg_addr[m].fusion_gpr_ld != CODE_FOR_nothing)
2166 || reg_addr[m].fused_toc);
2167 if (!fuse_extra_p)
2169 for (rc = 0; rc < N_RELOAD_REG; rc++)
2171 if (rc != RELOAD_REG_ANY)
2173 if (reg_addr[m].fusion_addi_ld[rc] != CODE_FOR_nothing
2174 || reg_addr[m].fusion_addi_ld[rc] != CODE_FOR_nothing
2175 || reg_addr[m].fusion_addi_st[rc] != CODE_FOR_nothing
2176 || reg_addr[m].fusion_addis_ld[rc] != CODE_FOR_nothing
2177 || reg_addr[m].fusion_addis_st[rc] != CODE_FOR_nothing)
2179 fuse_extra_p = true;
2180 break;
2186 if (fuse_extra_p)
2188 fprintf (stderr, "%*s Fuse:", spaces, "");
2189 spaces = 0;
2191 for (rc = 0; rc < N_RELOAD_REG; rc++)
2193 if (rc != RELOAD_REG_ANY)
2195 char load, store;
2197 if (reg_addr[m].fusion_addis_ld[rc] != CODE_FOR_nothing)
2198 load = 'l';
2199 else if (reg_addr[m].fusion_addi_ld[rc] != CODE_FOR_nothing)
2200 load = 'L';
2201 else
2202 load = '-';
2204 if (reg_addr[m].fusion_addis_st[rc] != CODE_FOR_nothing)
2205 store = 's';
2206 else if (reg_addr[m].fusion_addi_st[rc] != CODE_FOR_nothing)
2207 store = 'S';
2208 else
2209 store = '-';
2211 if (load == '-' && store == '-')
2212 spaces += 5;
2213 else
2215 fprintf (stderr, "%*s%c=%c%c", (spaces + 1), "",
2216 reload_reg_map[rc].name[0], load, store);
2217 spaces = 0;
2222 if (reg_addr[m].fusion_gpr_ld != CODE_FOR_nothing)
2224 fprintf (stderr, "%*sP8gpr", (spaces + 1), "");
2225 spaces = 0;
2227 else
2228 spaces += sizeof (" P8gpr") - 1;
2230 if (reg_addr[m].fused_toc)
2232 fprintf (stderr, "%*sToc", (spaces + 1), "");
2233 spaces = 0;
2235 else
2236 spaces += sizeof (" Toc") - 1;
2238 else
2239 spaces += sizeof (" Fuse: G=ls F=ls v=ls P8gpr Toc") - 1;
2241 if (rs6000_vector_unit[m] != VECTOR_NONE
2242 || rs6000_vector_mem[m] != VECTOR_NONE)
2244 fprintf (stderr, "%*s vector: arith=%-10s mem=%s",
2245 spaces, "",
2246 rs6000_debug_vector_unit (rs6000_vector_unit[m]),
2247 rs6000_debug_vector_unit (rs6000_vector_mem[m]));
2250 fputs ("\n", stderr);
2253 #define DEBUG_FMT_ID "%-32s= "
2254 #define DEBUG_FMT_D DEBUG_FMT_ID "%d\n"
2255 #define DEBUG_FMT_WX DEBUG_FMT_ID "%#.12" HOST_WIDE_INT_PRINT "x: "
2256 #define DEBUG_FMT_S DEBUG_FMT_ID "%s\n"
2258 /* Print various interesting information with -mdebug=reg. */
2259 static void
2260 rs6000_debug_reg_global (void)
2262 static const char *const tf[2] = { "false", "true" };
2263 const char *nl = (const char *)0;
2264 int m;
2265 size_t m1, m2, v;
2266 char costly_num[20];
2267 char nop_num[20];
2268 char flags_buffer[40];
2269 const char *costly_str;
2270 const char *nop_str;
2271 const char *trace_str;
2272 const char *abi_str;
2273 const char *cmodel_str;
2274 struct cl_target_option cl_opts;
2276 /* Modes we want tieable information on. */
2277 static const machine_mode print_tieable_modes[] = {
2278 QImode,
2279 HImode,
2280 SImode,
2281 DImode,
2282 TImode,
2283 PTImode,
2284 SFmode,
2285 DFmode,
2286 TFmode,
2287 IFmode,
2288 KFmode,
2289 SDmode,
2290 DDmode,
2291 TDmode,
2292 V8QImode,
2293 V4HImode,
2294 V2SImode,
2295 V16QImode,
2296 V8HImode,
2297 V4SImode,
2298 V2DImode,
2299 V1TImode,
2300 V32QImode,
2301 V16HImode,
2302 V8SImode,
2303 V4DImode,
2304 V2TImode,
2305 V2SFmode,
2306 V4SFmode,
2307 V2DFmode,
2308 V8SFmode,
2309 V4DFmode,
2310 CCmode,
2311 CCUNSmode,
2312 CCEQmode,
2315 /* Virtual regs we are interested in. */
2316 const static struct {
2317 int regno; /* register number. */
2318 const char *name; /* register name. */
2319 } virtual_regs[] = {
2320 { STACK_POINTER_REGNUM, "stack pointer:" },
2321 { TOC_REGNUM, "toc: " },
2322 { STATIC_CHAIN_REGNUM, "static chain: " },
2323 { RS6000_PIC_OFFSET_TABLE_REGNUM, "pic offset: " },
2324 { HARD_FRAME_POINTER_REGNUM, "hard frame: " },
2325 { ARG_POINTER_REGNUM, "arg pointer: " },
2326 { FRAME_POINTER_REGNUM, "frame pointer:" },
2327 { FIRST_PSEUDO_REGISTER, "first pseudo: " },
2328 { FIRST_VIRTUAL_REGISTER, "first virtual:" },
2329 { VIRTUAL_INCOMING_ARGS_REGNUM, "incoming_args:" },
2330 { VIRTUAL_STACK_VARS_REGNUM, "stack_vars: " },
2331 { VIRTUAL_STACK_DYNAMIC_REGNUM, "stack_dynamic:" },
2332 { VIRTUAL_OUTGOING_ARGS_REGNUM, "outgoing_args:" },
2333 { VIRTUAL_CFA_REGNUM, "cfa (frame): " },
2334 { VIRTUAL_PREFERRED_STACK_BOUNDARY_REGNUM, "stack boundry:" },
2335 { LAST_VIRTUAL_REGISTER, "last virtual: " },
2338 fputs ("\nHard register information:\n", stderr);
2339 rs6000_debug_reg_print (FIRST_GPR_REGNO, LAST_GPR_REGNO, "gr");
2340 rs6000_debug_reg_print (FIRST_FPR_REGNO, LAST_FPR_REGNO, "fp");
2341 rs6000_debug_reg_print (FIRST_ALTIVEC_REGNO,
2342 LAST_ALTIVEC_REGNO,
2343 "vs");
2344 rs6000_debug_reg_print (LR_REGNO, LR_REGNO, "lr");
2345 rs6000_debug_reg_print (CTR_REGNO, CTR_REGNO, "ctr");
2346 rs6000_debug_reg_print (CR0_REGNO, CR7_REGNO, "cr");
2347 rs6000_debug_reg_print (CA_REGNO, CA_REGNO, "ca");
2348 rs6000_debug_reg_print (VRSAVE_REGNO, VRSAVE_REGNO, "vrsave");
2349 rs6000_debug_reg_print (VSCR_REGNO, VSCR_REGNO, "vscr");
2350 rs6000_debug_reg_print (SPE_ACC_REGNO, SPE_ACC_REGNO, "spe_a");
2351 rs6000_debug_reg_print (SPEFSCR_REGNO, SPEFSCR_REGNO, "spe_f");
2353 fputs ("\nVirtual/stack/frame registers:\n", stderr);
2354 for (v = 0; v < ARRAY_SIZE (virtual_regs); v++)
2355 fprintf (stderr, "%s regno = %3d\n", virtual_regs[v].name, virtual_regs[v].regno);
2357 fprintf (stderr,
2358 "\n"
2359 "d reg_class = %s\n"
2360 "f reg_class = %s\n"
2361 "v reg_class = %s\n"
2362 "wa reg_class = %s\n"
2363 "wb reg_class = %s\n"
2364 "wd reg_class = %s\n"
2365 "we reg_class = %s\n"
2366 "wf reg_class = %s\n"
2367 "wg reg_class = %s\n"
2368 "wh reg_class = %s\n"
2369 "wi reg_class = %s\n"
2370 "wj reg_class = %s\n"
2371 "wk reg_class = %s\n"
2372 "wl reg_class = %s\n"
2373 "wm reg_class = %s\n"
2374 "wo reg_class = %s\n"
2375 "wp reg_class = %s\n"
2376 "wq reg_class = %s\n"
2377 "wr reg_class = %s\n"
2378 "ws reg_class = %s\n"
2379 "wt reg_class = %s\n"
2380 "wu reg_class = %s\n"
2381 "wv reg_class = %s\n"
2382 "ww reg_class = %s\n"
2383 "wx reg_class = %s\n"
2384 "wy reg_class = %s\n"
2385 "wz reg_class = %s\n"
2386 "\n",
2387 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_d]],
2388 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_f]],
2389 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_v]],
2390 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wa]],
2391 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wb]],
2392 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wd]],
2393 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_we]],
2394 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wf]],
2395 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wg]],
2396 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wh]],
2397 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wi]],
2398 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wj]],
2399 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wk]],
2400 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wl]],
2401 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wm]],
2402 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wo]],
2403 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wp]],
2404 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wq]],
2405 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wr]],
2406 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_ws]],
2407 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wt]],
2408 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wu]],
2409 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wv]],
2410 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_ww]],
2411 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wx]],
2412 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wy]],
2413 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wz]]);
2415 nl = "\n";
2416 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2417 rs6000_debug_print_mode (m);
2419 fputs ("\n", stderr);
2421 for (m1 = 0; m1 < ARRAY_SIZE (print_tieable_modes); m1++)
2423 machine_mode mode1 = print_tieable_modes[m1];
2424 bool first_time = true;
2426 nl = (const char *)0;
2427 for (m2 = 0; m2 < ARRAY_SIZE (print_tieable_modes); m2++)
2429 machine_mode mode2 = print_tieable_modes[m2];
2430 if (mode1 != mode2 && MODES_TIEABLE_P (mode1, mode2))
2432 if (first_time)
2434 fprintf (stderr, "Tieable modes %s:", GET_MODE_NAME (mode1));
2435 nl = "\n";
2436 first_time = false;
2439 fprintf (stderr, " %s", GET_MODE_NAME (mode2));
2443 if (!first_time)
2444 fputs ("\n", stderr);
2447 if (nl)
2448 fputs (nl, stderr);
2450 if (rs6000_recip_control)
2452 fprintf (stderr, "\nReciprocal mask = 0x%x\n", rs6000_recip_control);
2454 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2455 if (rs6000_recip_bits[m])
2457 fprintf (stderr,
2458 "Reciprocal estimate mode: %-5s divide: %s rsqrt: %s\n",
2459 GET_MODE_NAME (m),
2460 (RS6000_RECIP_AUTO_RE_P (m)
2461 ? "auto"
2462 : (RS6000_RECIP_HAVE_RE_P (m) ? "have" : "none")),
2463 (RS6000_RECIP_AUTO_RSQRTE_P (m)
2464 ? "auto"
2465 : (RS6000_RECIP_HAVE_RSQRTE_P (m) ? "have" : "none")));
2468 fputs ("\n", stderr);
2471 if (rs6000_cpu_index >= 0)
2473 const char *name = processor_target_table[rs6000_cpu_index].name;
2474 HOST_WIDE_INT flags
2475 = processor_target_table[rs6000_cpu_index].target_enable;
2477 sprintf (flags_buffer, "-mcpu=%s flags", name);
2478 rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2480 else
2481 fprintf (stderr, DEBUG_FMT_S, "cpu", "<none>");
2483 if (rs6000_tune_index >= 0)
2485 const char *name = processor_target_table[rs6000_tune_index].name;
2486 HOST_WIDE_INT flags
2487 = processor_target_table[rs6000_tune_index].target_enable;
2489 sprintf (flags_buffer, "-mtune=%s flags", name);
2490 rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2492 else
2493 fprintf (stderr, DEBUG_FMT_S, "tune", "<none>");
2495 cl_target_option_save (&cl_opts, &global_options);
2496 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags",
2497 rs6000_isa_flags);
2499 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags_explicit",
2500 rs6000_isa_flags_explicit);
2502 rs6000_print_builtin_options (stderr, 0, "rs6000_builtin_mask",
2503 rs6000_builtin_mask);
2505 rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT);
2507 fprintf (stderr, DEBUG_FMT_S, "--with-cpu default",
2508 OPTION_TARGET_CPU_DEFAULT ? OPTION_TARGET_CPU_DEFAULT : "<none>");
2510 switch (rs6000_sched_costly_dep)
2512 case max_dep_latency:
2513 costly_str = "max_dep_latency";
2514 break;
2516 case no_dep_costly:
2517 costly_str = "no_dep_costly";
2518 break;
2520 case all_deps_costly:
2521 costly_str = "all_deps_costly";
2522 break;
2524 case true_store_to_load_dep_costly:
2525 costly_str = "true_store_to_load_dep_costly";
2526 break;
2528 case store_to_load_dep_costly:
2529 costly_str = "store_to_load_dep_costly";
2530 break;
2532 default:
2533 costly_str = costly_num;
2534 sprintf (costly_num, "%d", (int)rs6000_sched_costly_dep);
2535 break;
2538 fprintf (stderr, DEBUG_FMT_S, "sched_costly_dep", costly_str);
2540 switch (rs6000_sched_insert_nops)
2542 case sched_finish_regroup_exact:
2543 nop_str = "sched_finish_regroup_exact";
2544 break;
2546 case sched_finish_pad_groups:
2547 nop_str = "sched_finish_pad_groups";
2548 break;
2550 case sched_finish_none:
2551 nop_str = "sched_finish_none";
2552 break;
2554 default:
2555 nop_str = nop_num;
2556 sprintf (nop_num, "%d", (int)rs6000_sched_insert_nops);
2557 break;
2560 fprintf (stderr, DEBUG_FMT_S, "sched_insert_nops", nop_str);
2562 switch (rs6000_sdata)
2564 default:
2565 case SDATA_NONE:
2566 break;
2568 case SDATA_DATA:
2569 fprintf (stderr, DEBUG_FMT_S, "sdata", "data");
2570 break;
2572 case SDATA_SYSV:
2573 fprintf (stderr, DEBUG_FMT_S, "sdata", "sysv");
2574 break;
2576 case SDATA_EABI:
2577 fprintf (stderr, DEBUG_FMT_S, "sdata", "eabi");
2578 break;
2582 switch (rs6000_traceback)
2584 case traceback_default: trace_str = "default"; break;
2585 case traceback_none: trace_str = "none"; break;
2586 case traceback_part: trace_str = "part"; break;
2587 case traceback_full: trace_str = "full"; break;
2588 default: trace_str = "unknown"; break;
2591 fprintf (stderr, DEBUG_FMT_S, "traceback", trace_str);
2593 switch (rs6000_current_cmodel)
2595 case CMODEL_SMALL: cmodel_str = "small"; break;
2596 case CMODEL_MEDIUM: cmodel_str = "medium"; break;
2597 case CMODEL_LARGE: cmodel_str = "large"; break;
2598 default: cmodel_str = "unknown"; break;
2601 fprintf (stderr, DEBUG_FMT_S, "cmodel", cmodel_str);
2603 switch (rs6000_current_abi)
2605 case ABI_NONE: abi_str = "none"; break;
2606 case ABI_AIX: abi_str = "aix"; break;
2607 case ABI_ELFv2: abi_str = "ELFv2"; break;
2608 case ABI_V4: abi_str = "V4"; break;
2609 case ABI_DARWIN: abi_str = "darwin"; break;
2610 default: abi_str = "unknown"; break;
2613 fprintf (stderr, DEBUG_FMT_S, "abi", abi_str);
2615 if (rs6000_altivec_abi)
2616 fprintf (stderr, DEBUG_FMT_S, "altivec_abi", "true");
2618 if (rs6000_spe_abi)
2619 fprintf (stderr, DEBUG_FMT_S, "spe_abi", "true");
2621 if (rs6000_darwin64_abi)
2622 fprintf (stderr, DEBUG_FMT_S, "darwin64_abi", "true");
2624 if (rs6000_float_gprs)
2625 fprintf (stderr, DEBUG_FMT_S, "float_gprs", "true");
2627 fprintf (stderr, DEBUG_FMT_S, "fprs",
2628 (TARGET_FPRS ? "true" : "false"));
2630 fprintf (stderr, DEBUG_FMT_S, "single_float",
2631 (TARGET_SINGLE_FLOAT ? "true" : "false"));
2633 fprintf (stderr, DEBUG_FMT_S, "double_float",
2634 (TARGET_DOUBLE_FLOAT ? "true" : "false"));
2636 fprintf (stderr, DEBUG_FMT_S, "soft_float",
2637 (TARGET_SOFT_FLOAT ? "true" : "false"));
2639 fprintf (stderr, DEBUG_FMT_S, "e500_single",
2640 (TARGET_E500_SINGLE ? "true" : "false"));
2642 fprintf (stderr, DEBUG_FMT_S, "e500_double",
2643 (TARGET_E500_DOUBLE ? "true" : "false"));
2645 if (TARGET_LINK_STACK)
2646 fprintf (stderr, DEBUG_FMT_S, "link_stack", "true");
2648 if (targetm.lra_p ())
2649 fprintf (stderr, DEBUG_FMT_S, "lra", "true");
2651 if (TARGET_P8_FUSION)
2653 char options[80];
2655 strcpy (options, (TARGET_P9_FUSION) ? "power9" : "power8");
2656 if (TARGET_TOC_FUSION)
2657 strcat (options, ", toc");
2659 if (TARGET_P8_FUSION_SIGN)
2660 strcat (options, ", sign");
2662 fprintf (stderr, DEBUG_FMT_S, "fusion", options);
2665 fprintf (stderr, DEBUG_FMT_S, "plt-format",
2666 TARGET_SECURE_PLT ? "secure" : "bss");
2667 fprintf (stderr, DEBUG_FMT_S, "struct-return",
2668 aix_struct_return ? "aix" : "sysv");
2669 fprintf (stderr, DEBUG_FMT_S, "always_hint", tf[!!rs6000_always_hint]);
2670 fprintf (stderr, DEBUG_FMT_S, "sched_groups", tf[!!rs6000_sched_groups]);
2671 fprintf (stderr, DEBUG_FMT_S, "align_branch",
2672 tf[!!rs6000_align_branch_targets]);
2673 fprintf (stderr, DEBUG_FMT_D, "tls_size", rs6000_tls_size);
2674 fprintf (stderr, DEBUG_FMT_D, "long_double_size",
2675 rs6000_long_double_type_size);
2676 fprintf (stderr, DEBUG_FMT_D, "sched_restricted_insns_priority",
2677 (int)rs6000_sched_restricted_insns_priority);
2678 fprintf (stderr, DEBUG_FMT_D, "Number of standard builtins",
2679 (int)END_BUILTINS);
2680 fprintf (stderr, DEBUG_FMT_D, "Number of rs6000 builtins",
2681 (int)RS6000_BUILTIN_COUNT);
2683 if (TARGET_VSX)
2684 fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit scalar element",
2685 (int)VECTOR_ELEMENT_SCALAR_64BIT);
2687 if (TARGET_DIRECT_MOVE_128)
2688 fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit mfvsrld element",
2689 (int)VECTOR_ELEMENT_MFVSRLD_64BIT);
2693 /* Update the addr mask bits in reg_addr to help secondary reload and go if
2694 legitimate address support to figure out the appropriate addressing to
2695 use. */
2697 static void
2698 rs6000_setup_reg_addr_masks (void)
2700 ssize_t rc, reg, m, nregs;
2701 addr_mask_type any_addr_mask, addr_mask;
2703 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2705 machine_mode m2 = (machine_mode) m;
2706 bool complex_p = false;
2707 size_t msize;
2709 if (COMPLEX_MODE_P (m2))
2711 complex_p = true;
2712 m2 = GET_MODE_INNER (m2);
2715 msize = GET_MODE_SIZE (m2);
2717 /* SDmode is special in that we want to access it only via REG+REG
2718 addressing on power7 and above, since we want to use the LFIWZX and
2719 STFIWZX instructions to load it. */
2720 bool indexed_only_p = (m == SDmode && TARGET_NO_SDMODE_STACK);
2722 any_addr_mask = 0;
2723 for (rc = FIRST_RELOAD_REG_CLASS; rc <= LAST_RELOAD_REG_CLASS; rc++)
2725 addr_mask = 0;
2726 reg = reload_reg_map[rc].reg;
2728 /* Can mode values go in the GPR/FPR/Altivec registers? */
2729 if (reg >= 0 && rs6000_hard_regno_mode_ok_p[m][reg])
2731 nregs = rs6000_hard_regno_nregs[m][reg];
2732 addr_mask |= RELOAD_REG_VALID;
2734 /* Indicate if the mode takes more than 1 physical register. If
2735 it takes a single register, indicate it can do REG+REG
2736 addressing. */
2737 if (nregs > 1 || m == BLKmode || complex_p)
2738 addr_mask |= RELOAD_REG_MULTIPLE;
2739 else
2740 addr_mask |= RELOAD_REG_INDEXED;
2742 /* Figure out if we can do PRE_INC, PRE_DEC, or PRE_MODIFY
2743 addressing. Restrict addressing on SPE for 64-bit types
2744 because of the SUBREG hackery used to address 64-bit floats in
2745 '32-bit' GPRs. If we allow scalars into Altivec registers,
2746 don't allow PRE_INC, PRE_DEC, or PRE_MODIFY. */
2748 if (TARGET_UPDATE
2749 && (rc == RELOAD_REG_GPR || rc == RELOAD_REG_FPR)
2750 && msize <= 8
2751 && !VECTOR_MODE_P (m2)
2752 && !FLOAT128_VECTOR_P (m2)
2753 && !complex_p
2754 && (m2 != DFmode || !TARGET_UPPER_REGS_DF)
2755 && (m2 != SFmode || !TARGET_UPPER_REGS_SF)
2756 && !(TARGET_E500_DOUBLE && msize == 8))
2758 addr_mask |= RELOAD_REG_PRE_INCDEC;
2760 /* PRE_MODIFY is more restricted than PRE_INC/PRE_DEC in that
2761 we don't allow PRE_MODIFY for some multi-register
2762 operations. */
2763 switch (m)
2765 default:
2766 addr_mask |= RELOAD_REG_PRE_MODIFY;
2767 break;
2769 case DImode:
2770 if (TARGET_POWERPC64)
2771 addr_mask |= RELOAD_REG_PRE_MODIFY;
2772 break;
2774 case DFmode:
2775 case DDmode:
2776 if (TARGET_DF_INSN)
2777 addr_mask |= RELOAD_REG_PRE_MODIFY;
2778 break;
2783 /* GPR and FPR registers can do REG+OFFSET addressing, except
2784 possibly for SDmode. ISA 3.0 (i.e. power9) adds D-form
2785 addressing for scalars to altivec registers. */
2786 if ((addr_mask != 0) && !indexed_only_p
2787 && msize <= 8
2788 && (rc == RELOAD_REG_GPR
2789 || rc == RELOAD_REG_FPR
2790 || (rc == RELOAD_REG_VMX
2791 && TARGET_P9_DFORM
2792 && (m2 == DFmode || m2 == SFmode))))
2793 addr_mask |= RELOAD_REG_OFFSET;
2795 /* VMX registers can do (REG & -16) and ((REG+REG) & -16)
2796 addressing on 128-bit types. */
2797 if (rc == RELOAD_REG_VMX && msize == 16
2798 && (addr_mask & RELOAD_REG_VALID) != 0)
2799 addr_mask |= RELOAD_REG_AND_M16;
2801 reg_addr[m].addr_mask[rc] = addr_mask;
2802 any_addr_mask |= addr_mask;
2805 reg_addr[m].addr_mask[RELOAD_REG_ANY] = any_addr_mask;
2810 /* Initialize the various global tables that are based on register size. */
2811 static void
2812 rs6000_init_hard_regno_mode_ok (bool global_init_p)
2814 ssize_t r, m, c;
2815 int align64;
2816 int align32;
2818 /* Precalculate REGNO_REG_CLASS. */
2819 rs6000_regno_regclass[0] = GENERAL_REGS;
2820 for (r = 1; r < 32; ++r)
2821 rs6000_regno_regclass[r] = BASE_REGS;
2823 for (r = 32; r < 64; ++r)
2824 rs6000_regno_regclass[r] = FLOAT_REGS;
2826 for (r = 64; r < FIRST_PSEUDO_REGISTER; ++r)
2827 rs6000_regno_regclass[r] = NO_REGS;
2829 for (r = FIRST_ALTIVEC_REGNO; r <= LAST_ALTIVEC_REGNO; ++r)
2830 rs6000_regno_regclass[r] = ALTIVEC_REGS;
2832 rs6000_regno_regclass[CR0_REGNO] = CR0_REGS;
2833 for (r = CR1_REGNO; r <= CR7_REGNO; ++r)
2834 rs6000_regno_regclass[r] = CR_REGS;
2836 rs6000_regno_regclass[LR_REGNO] = LINK_REGS;
2837 rs6000_regno_regclass[CTR_REGNO] = CTR_REGS;
2838 rs6000_regno_regclass[CA_REGNO] = NO_REGS;
2839 rs6000_regno_regclass[VRSAVE_REGNO] = VRSAVE_REGS;
2840 rs6000_regno_regclass[VSCR_REGNO] = VRSAVE_REGS;
2841 rs6000_regno_regclass[SPE_ACC_REGNO] = SPE_ACC_REGS;
2842 rs6000_regno_regclass[SPEFSCR_REGNO] = SPEFSCR_REGS;
2843 rs6000_regno_regclass[TFHAR_REGNO] = SPR_REGS;
2844 rs6000_regno_regclass[TFIAR_REGNO] = SPR_REGS;
2845 rs6000_regno_regclass[TEXASR_REGNO] = SPR_REGS;
2846 rs6000_regno_regclass[ARG_POINTER_REGNUM] = BASE_REGS;
2847 rs6000_regno_regclass[FRAME_POINTER_REGNUM] = BASE_REGS;
2849 /* Precalculate register class to simpler reload register class. We don't
2850 need all of the register classes that are combinations of different
2851 classes, just the simple ones that have constraint letters. */
2852 for (c = 0; c < N_REG_CLASSES; c++)
2853 reg_class_to_reg_type[c] = NO_REG_TYPE;
2855 reg_class_to_reg_type[(int)GENERAL_REGS] = GPR_REG_TYPE;
2856 reg_class_to_reg_type[(int)BASE_REGS] = GPR_REG_TYPE;
2857 reg_class_to_reg_type[(int)VSX_REGS] = VSX_REG_TYPE;
2858 reg_class_to_reg_type[(int)VRSAVE_REGS] = SPR_REG_TYPE;
2859 reg_class_to_reg_type[(int)VSCR_REGS] = SPR_REG_TYPE;
2860 reg_class_to_reg_type[(int)LINK_REGS] = SPR_REG_TYPE;
2861 reg_class_to_reg_type[(int)CTR_REGS] = SPR_REG_TYPE;
2862 reg_class_to_reg_type[(int)LINK_OR_CTR_REGS] = SPR_REG_TYPE;
2863 reg_class_to_reg_type[(int)CR_REGS] = CR_REG_TYPE;
2864 reg_class_to_reg_type[(int)CR0_REGS] = CR_REG_TYPE;
2865 reg_class_to_reg_type[(int)SPE_ACC_REGS] = SPE_ACC_TYPE;
2866 reg_class_to_reg_type[(int)SPEFSCR_REGS] = SPEFSCR_REG_TYPE;
2868 if (TARGET_VSX)
2870 reg_class_to_reg_type[(int)FLOAT_REGS] = VSX_REG_TYPE;
2871 reg_class_to_reg_type[(int)ALTIVEC_REGS] = VSX_REG_TYPE;
2873 else
2875 reg_class_to_reg_type[(int)FLOAT_REGS] = FPR_REG_TYPE;
2876 reg_class_to_reg_type[(int)ALTIVEC_REGS] = ALTIVEC_REG_TYPE;
2879 /* Precalculate the valid memory formats as well as the vector information,
2880 this must be set up before the rs6000_hard_regno_nregs_internal calls
2881 below. */
2882 gcc_assert ((int)VECTOR_NONE == 0);
2883 memset ((void *) &rs6000_vector_unit[0], '\0', sizeof (rs6000_vector_unit));
2884 memset ((void *) &rs6000_vector_mem[0], '\0', sizeof (rs6000_vector_unit));
2886 gcc_assert ((int)CODE_FOR_nothing == 0);
2887 memset ((void *) &reg_addr[0], '\0', sizeof (reg_addr));
2889 gcc_assert ((int)NO_REGS == 0);
2890 memset ((void *) &rs6000_constraints[0], '\0', sizeof (rs6000_constraints));
2892 /* The VSX hardware allows native alignment for vectors, but control whether the compiler
2893 believes it can use native alignment or still uses 128-bit alignment. */
2894 if (TARGET_VSX && !TARGET_VSX_ALIGN_128)
2896 align64 = 64;
2897 align32 = 32;
2899 else
2901 align64 = 128;
2902 align32 = 128;
2905 /* KF mode (IEEE 128-bit in VSX registers). We do not have arithmetic, so
2906 only set the memory modes. Include TFmode if -mabi=ieeelongdouble. */
2907 if (TARGET_FLOAT128)
2909 rs6000_vector_mem[KFmode] = VECTOR_VSX;
2910 rs6000_vector_align[KFmode] = 128;
2912 if (FLOAT128_IEEE_P (TFmode))
2914 rs6000_vector_mem[TFmode] = VECTOR_VSX;
2915 rs6000_vector_align[TFmode] = 128;
2919 /* V2DF mode, VSX only. */
2920 if (TARGET_VSX)
2922 rs6000_vector_unit[V2DFmode] = VECTOR_VSX;
2923 rs6000_vector_mem[V2DFmode] = VECTOR_VSX;
2924 rs6000_vector_align[V2DFmode] = align64;
2927 /* V4SF mode, either VSX or Altivec. */
2928 if (TARGET_VSX)
2930 rs6000_vector_unit[V4SFmode] = VECTOR_VSX;
2931 rs6000_vector_mem[V4SFmode] = VECTOR_VSX;
2932 rs6000_vector_align[V4SFmode] = align32;
2934 else if (TARGET_ALTIVEC)
2936 rs6000_vector_unit[V4SFmode] = VECTOR_ALTIVEC;
2937 rs6000_vector_mem[V4SFmode] = VECTOR_ALTIVEC;
2938 rs6000_vector_align[V4SFmode] = align32;
2941 /* V16QImode, V8HImode, V4SImode are Altivec only, but possibly do VSX loads
2942 and stores. */
2943 if (TARGET_ALTIVEC)
2945 rs6000_vector_unit[V4SImode] = VECTOR_ALTIVEC;
2946 rs6000_vector_unit[V8HImode] = VECTOR_ALTIVEC;
2947 rs6000_vector_unit[V16QImode] = VECTOR_ALTIVEC;
2948 rs6000_vector_align[V4SImode] = align32;
2949 rs6000_vector_align[V8HImode] = align32;
2950 rs6000_vector_align[V16QImode] = align32;
2952 if (TARGET_VSX)
2954 rs6000_vector_mem[V4SImode] = VECTOR_VSX;
2955 rs6000_vector_mem[V8HImode] = VECTOR_VSX;
2956 rs6000_vector_mem[V16QImode] = VECTOR_VSX;
2958 else
2960 rs6000_vector_mem[V4SImode] = VECTOR_ALTIVEC;
2961 rs6000_vector_mem[V8HImode] = VECTOR_ALTIVEC;
2962 rs6000_vector_mem[V16QImode] = VECTOR_ALTIVEC;
2966 /* V2DImode, full mode depends on ISA 2.07 vector mode. Allow under VSX to
2967 do insert/splat/extract. Altivec doesn't have 64-bit integer support. */
2968 if (TARGET_VSX)
2970 rs6000_vector_mem[V2DImode] = VECTOR_VSX;
2971 rs6000_vector_unit[V2DImode]
2972 = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
2973 rs6000_vector_align[V2DImode] = align64;
2975 rs6000_vector_mem[V1TImode] = VECTOR_VSX;
2976 rs6000_vector_unit[V1TImode]
2977 = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
2978 rs6000_vector_align[V1TImode] = 128;
2981 /* DFmode, see if we want to use the VSX unit. Memory is handled
2982 differently, so don't set rs6000_vector_mem. */
2983 if (TARGET_VSX && TARGET_VSX_SCALAR_DOUBLE)
2985 rs6000_vector_unit[DFmode] = VECTOR_VSX;
2986 rs6000_vector_align[DFmode] = 64;
2989 /* SFmode, see if we want to use the VSX unit. */
2990 if (TARGET_P8_VECTOR && TARGET_VSX_SCALAR_FLOAT)
2992 rs6000_vector_unit[SFmode] = VECTOR_VSX;
2993 rs6000_vector_align[SFmode] = 32;
2996 /* Allow TImode in VSX register and set the VSX memory macros. */
2997 if (TARGET_VSX && TARGET_VSX_TIMODE)
2999 rs6000_vector_mem[TImode] = VECTOR_VSX;
3000 rs6000_vector_align[TImode] = align64;
3003 /* TODO add SPE and paired floating point vector support. */
3005 /* Register class constraints for the constraints that depend on compile
3006 switches. When the VSX code was added, different constraints were added
3007 based on the type (DFmode, V2DFmode, V4SFmode). For the vector types, all
3008 of the VSX registers are used. The register classes for scalar floating
3009 point types is set, based on whether we allow that type into the upper
3010 (Altivec) registers. GCC has register classes to target the Altivec
3011 registers for load/store operations, to select using a VSX memory
3012 operation instead of the traditional floating point operation. The
3013 constraints are:
3015 d - Register class to use with traditional DFmode instructions.
3016 f - Register class to use with traditional SFmode instructions.
3017 v - Altivec register.
3018 wa - Any VSX register.
3019 wc - Reserved to represent individual CR bits (used in LLVM).
3020 wd - Preferred register class for V2DFmode.
3021 wf - Preferred register class for V4SFmode.
3022 wg - Float register for power6x move insns.
3023 wh - FP register for direct move instructions.
3024 wi - FP or VSX register to hold 64-bit integers for VSX insns.
3025 wj - FP or VSX register to hold 64-bit integers for direct moves.
3026 wk - FP or VSX register to hold 64-bit doubles for direct moves.
3027 wl - Float register if we can do 32-bit signed int loads.
3028 wm - VSX register for ISA 2.07 direct move operations.
3029 wn - always NO_REGS.
3030 wr - GPR if 64-bit mode is permitted.
3031 ws - Register class to do ISA 2.06 DF operations.
3032 wt - VSX register for TImode in VSX registers.
3033 wu - Altivec register for ISA 2.07 VSX SF/SI load/stores.
3034 wv - Altivec register for ISA 2.06 VSX DF/DI load/stores.
3035 ww - Register class to do SF conversions in with VSX operations.
3036 wx - Float register if we can do 32-bit int stores.
3037 wy - Register class to do ISA 2.07 SF operations.
3038 wz - Float register if we can do 32-bit unsigned int loads. */
3040 if (TARGET_HARD_FLOAT && TARGET_FPRS)
3041 rs6000_constraints[RS6000_CONSTRAINT_f] = FLOAT_REGS; /* SFmode */
3043 if (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
3044 rs6000_constraints[RS6000_CONSTRAINT_d] = FLOAT_REGS; /* DFmode */
3046 if (TARGET_VSX)
3048 rs6000_constraints[RS6000_CONSTRAINT_wa] = VSX_REGS;
3049 rs6000_constraints[RS6000_CONSTRAINT_wd] = VSX_REGS; /* V2DFmode */
3050 rs6000_constraints[RS6000_CONSTRAINT_wf] = VSX_REGS; /* V4SFmode */
3051 rs6000_constraints[RS6000_CONSTRAINT_wi] = FLOAT_REGS; /* DImode */
3053 if (TARGET_VSX_TIMODE)
3054 rs6000_constraints[RS6000_CONSTRAINT_wt] = VSX_REGS; /* TImode */
3056 if (TARGET_UPPER_REGS_DF) /* DFmode */
3058 rs6000_constraints[RS6000_CONSTRAINT_ws] = VSX_REGS;
3059 rs6000_constraints[RS6000_CONSTRAINT_wv] = ALTIVEC_REGS;
3061 else
3062 rs6000_constraints[RS6000_CONSTRAINT_ws] = FLOAT_REGS;
3065 /* Add conditional constraints based on various options, to allow us to
3066 collapse multiple insn patterns. */
3067 if (TARGET_ALTIVEC)
3068 rs6000_constraints[RS6000_CONSTRAINT_v] = ALTIVEC_REGS;
3070 if (TARGET_MFPGPR) /* DFmode */
3071 rs6000_constraints[RS6000_CONSTRAINT_wg] = FLOAT_REGS;
3073 if (TARGET_LFIWAX)
3074 rs6000_constraints[RS6000_CONSTRAINT_wl] = FLOAT_REGS; /* DImode */
3076 if (TARGET_DIRECT_MOVE)
3078 rs6000_constraints[RS6000_CONSTRAINT_wh] = FLOAT_REGS;
3079 rs6000_constraints[RS6000_CONSTRAINT_wj] /* DImode */
3080 = rs6000_constraints[RS6000_CONSTRAINT_wi];
3081 rs6000_constraints[RS6000_CONSTRAINT_wk] /* DFmode */
3082 = rs6000_constraints[RS6000_CONSTRAINT_ws];
3083 rs6000_constraints[RS6000_CONSTRAINT_wm] = VSX_REGS;
3086 if (TARGET_POWERPC64)
3087 rs6000_constraints[RS6000_CONSTRAINT_wr] = GENERAL_REGS;
3089 if (TARGET_P8_VECTOR && TARGET_UPPER_REGS_SF) /* SFmode */
3091 rs6000_constraints[RS6000_CONSTRAINT_wu] = ALTIVEC_REGS;
3092 rs6000_constraints[RS6000_CONSTRAINT_wy] = VSX_REGS;
3093 rs6000_constraints[RS6000_CONSTRAINT_ww] = VSX_REGS;
3095 else if (TARGET_P8_VECTOR)
3097 rs6000_constraints[RS6000_CONSTRAINT_wy] = FLOAT_REGS;
3098 rs6000_constraints[RS6000_CONSTRAINT_ww] = FLOAT_REGS;
3100 else if (TARGET_VSX)
3101 rs6000_constraints[RS6000_CONSTRAINT_ww] = FLOAT_REGS;
3103 if (TARGET_STFIWX)
3104 rs6000_constraints[RS6000_CONSTRAINT_wx] = FLOAT_REGS; /* DImode */
3106 if (TARGET_LFIWZX)
3107 rs6000_constraints[RS6000_CONSTRAINT_wz] = FLOAT_REGS; /* DImode */
3109 if (TARGET_FLOAT128)
3111 rs6000_constraints[RS6000_CONSTRAINT_wq] = VSX_REGS; /* KFmode */
3112 if (FLOAT128_IEEE_P (TFmode))
3113 rs6000_constraints[RS6000_CONSTRAINT_wp] = VSX_REGS; /* TFmode */
3116 /* Support for new D-form instructions. */
3117 if (TARGET_P9_DFORM)
3118 rs6000_constraints[RS6000_CONSTRAINT_wb] = ALTIVEC_REGS;
3120 /* Support for ISA 3.0 (power9) vectors. */
3121 if (TARGET_P9_VECTOR)
3122 rs6000_constraints[RS6000_CONSTRAINT_wo] = VSX_REGS;
3124 /* Support for new direct moves (ISA 3.0 + 64bit). */
3125 if (TARGET_DIRECT_MOVE_128)
3126 rs6000_constraints[RS6000_CONSTRAINT_we] = VSX_REGS;
3128 /* Set up the reload helper and direct move functions. */
3129 if (TARGET_VSX || TARGET_ALTIVEC)
3131 if (TARGET_64BIT)
3133 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_di_store;
3134 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_di_load;
3135 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_di_store;
3136 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_di_load;
3137 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_di_store;
3138 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_di_load;
3139 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_di_store;
3140 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_di_load;
3141 reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_di_store;
3142 reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_di_load;
3143 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_di_store;
3144 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_di_load;
3145 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_di_store;
3146 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_di_load;
3147 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_di_store;
3148 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_di_load;
3149 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_di_store;
3150 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_di_load;
3151 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_di_store;
3152 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_di_load;
3154 if (FLOAT128_VECTOR_P (KFmode))
3156 reg_addr[KFmode].reload_store = CODE_FOR_reload_kf_di_store;
3157 reg_addr[KFmode].reload_load = CODE_FOR_reload_kf_di_load;
3160 if (FLOAT128_VECTOR_P (TFmode))
3162 reg_addr[TFmode].reload_store = CODE_FOR_reload_tf_di_store;
3163 reg_addr[TFmode].reload_load = CODE_FOR_reload_tf_di_load;
3166 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
3167 available. */
3168 if (TARGET_NO_SDMODE_STACK)
3170 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_di_store;
3171 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_di_load;
3174 if (TARGET_VSX_TIMODE)
3176 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_di_store;
3177 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_di_load;
3180 if (TARGET_DIRECT_MOVE && !TARGET_DIRECT_MOVE_128)
3182 reg_addr[TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxti;
3183 reg_addr[V1TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv1ti;
3184 reg_addr[V2DFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2df;
3185 reg_addr[V2DImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2di;
3186 reg_addr[V4SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4sf;
3187 reg_addr[V4SImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4si;
3188 reg_addr[V8HImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv8hi;
3189 reg_addr[V16QImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv16qi;
3190 reg_addr[SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxsf;
3192 reg_addr[TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprti;
3193 reg_addr[V1TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv1ti;
3194 reg_addr[V2DFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2df;
3195 reg_addr[V2DImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2di;
3196 reg_addr[V4SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4sf;
3197 reg_addr[V4SImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4si;
3198 reg_addr[V8HImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv8hi;
3199 reg_addr[V16QImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv16qi;
3200 reg_addr[SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprsf;
3202 if (FLOAT128_VECTOR_P (KFmode))
3204 reg_addr[KFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxkf;
3205 reg_addr[KFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprkf;
3208 if (FLOAT128_VECTOR_P (TFmode))
3210 reg_addr[TFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxtf;
3211 reg_addr[TFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprtf;
3215 else
3217 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_si_store;
3218 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_si_load;
3219 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_si_store;
3220 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_si_load;
3221 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_si_store;
3222 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_si_load;
3223 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_si_store;
3224 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_si_load;
3225 reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_si_store;
3226 reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_si_load;
3227 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_si_store;
3228 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_si_load;
3229 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_si_store;
3230 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_si_load;
3231 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_si_store;
3232 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_si_load;
3233 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_si_store;
3234 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_si_load;
3235 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_si_store;
3236 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_si_load;
3238 if (FLOAT128_VECTOR_P (KFmode))
3240 reg_addr[KFmode].reload_store = CODE_FOR_reload_kf_si_store;
3241 reg_addr[KFmode].reload_load = CODE_FOR_reload_kf_si_load;
3244 if (FLOAT128_IEEE_P (TFmode))
3246 reg_addr[TFmode].reload_store = CODE_FOR_reload_tf_si_store;
3247 reg_addr[TFmode].reload_load = CODE_FOR_reload_tf_si_load;
3250 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
3251 available. */
3252 if (TARGET_NO_SDMODE_STACK)
3254 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_si_store;
3255 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_si_load;
3258 if (TARGET_VSX_TIMODE)
3260 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_si_store;
3261 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_si_load;
3264 if (TARGET_DIRECT_MOVE)
3266 reg_addr[DImode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdi;
3267 reg_addr[DDmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdd;
3268 reg_addr[DFmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdf;
3272 if (TARGET_UPPER_REGS_DF)
3273 reg_addr[DFmode].scalar_in_vmx_p = true;
3275 if (TARGET_UPPER_REGS_SF)
3276 reg_addr[SFmode].scalar_in_vmx_p = true;
3279 /* Setup the fusion operations. */
3280 if (TARGET_P8_FUSION)
3282 reg_addr[QImode].fusion_gpr_ld = CODE_FOR_fusion_gpr_load_qi;
3283 reg_addr[HImode].fusion_gpr_ld = CODE_FOR_fusion_gpr_load_hi;
3284 reg_addr[SImode].fusion_gpr_ld = CODE_FOR_fusion_gpr_load_si;
3285 if (TARGET_64BIT)
3286 reg_addr[DImode].fusion_gpr_ld = CODE_FOR_fusion_gpr_load_di;
3289 if (TARGET_P9_FUSION)
3291 struct fuse_insns {
3292 enum machine_mode mode; /* mode of the fused type. */
3293 enum machine_mode pmode; /* pointer mode. */
3294 enum rs6000_reload_reg_type rtype; /* register type. */
3295 enum insn_code load; /* load insn. */
3296 enum insn_code store; /* store insn. */
3299 static const struct fuse_insns addis_insns[] = {
3300 { SFmode, DImode, RELOAD_REG_FPR,
3301 CODE_FOR_fusion_fpr_di_sf_load,
3302 CODE_FOR_fusion_fpr_di_sf_store },
3304 { SFmode, SImode, RELOAD_REG_FPR,
3305 CODE_FOR_fusion_fpr_si_sf_load,
3306 CODE_FOR_fusion_fpr_si_sf_store },
3308 { DFmode, DImode, RELOAD_REG_FPR,
3309 CODE_FOR_fusion_fpr_di_df_load,
3310 CODE_FOR_fusion_fpr_di_df_store },
3312 { DFmode, SImode, RELOAD_REG_FPR,
3313 CODE_FOR_fusion_fpr_si_df_load,
3314 CODE_FOR_fusion_fpr_si_df_store },
3316 { DImode, DImode, RELOAD_REG_FPR,
3317 CODE_FOR_fusion_fpr_di_di_load,
3318 CODE_FOR_fusion_fpr_di_di_store },
3320 { DImode, SImode, RELOAD_REG_FPR,
3321 CODE_FOR_fusion_fpr_si_di_load,
3322 CODE_FOR_fusion_fpr_si_di_store },
3324 { QImode, DImode, RELOAD_REG_GPR,
3325 CODE_FOR_fusion_gpr_di_qi_load,
3326 CODE_FOR_fusion_gpr_di_qi_store },
3328 { QImode, SImode, RELOAD_REG_GPR,
3329 CODE_FOR_fusion_gpr_si_qi_load,
3330 CODE_FOR_fusion_gpr_si_qi_store },
3332 { HImode, DImode, RELOAD_REG_GPR,
3333 CODE_FOR_fusion_gpr_di_hi_load,
3334 CODE_FOR_fusion_gpr_di_hi_store },
3336 { HImode, SImode, RELOAD_REG_GPR,
3337 CODE_FOR_fusion_gpr_si_hi_load,
3338 CODE_FOR_fusion_gpr_si_hi_store },
3340 { SImode, DImode, RELOAD_REG_GPR,
3341 CODE_FOR_fusion_gpr_di_si_load,
3342 CODE_FOR_fusion_gpr_di_si_store },
3344 { SImode, SImode, RELOAD_REG_GPR,
3345 CODE_FOR_fusion_gpr_si_si_load,
3346 CODE_FOR_fusion_gpr_si_si_store },
3348 { SFmode, DImode, RELOAD_REG_GPR,
3349 CODE_FOR_fusion_gpr_di_sf_load,
3350 CODE_FOR_fusion_gpr_di_sf_store },
3352 { SFmode, SImode, RELOAD_REG_GPR,
3353 CODE_FOR_fusion_gpr_si_sf_load,
3354 CODE_FOR_fusion_gpr_si_sf_store },
3356 { DImode, DImode, RELOAD_REG_GPR,
3357 CODE_FOR_fusion_gpr_di_di_load,
3358 CODE_FOR_fusion_gpr_di_di_store },
3360 { DFmode, DImode, RELOAD_REG_GPR,
3361 CODE_FOR_fusion_gpr_di_df_load,
3362 CODE_FOR_fusion_gpr_di_df_store },
3365 enum machine_mode cur_pmode = Pmode;
3366 size_t i;
3368 for (i = 0; i < ARRAY_SIZE (addis_insns); i++)
3370 enum machine_mode xmode = addis_insns[i].mode;
3371 enum rs6000_reload_reg_type rtype = addis_insns[i].rtype;
3373 if (addis_insns[i].pmode != cur_pmode)
3374 continue;
3376 if (rtype == RELOAD_REG_FPR
3377 && (!TARGET_HARD_FLOAT || !TARGET_FPRS))
3378 continue;
3380 reg_addr[xmode].fusion_addis_ld[rtype] = addis_insns[i].load;
3381 reg_addr[xmode].fusion_addis_st[rtype] = addis_insns[i].store;
3385 /* Note which types we support fusing TOC setup plus memory insn. We only do
3386 fused TOCs for medium/large code models. */
3387 if (TARGET_P8_FUSION && TARGET_TOC_FUSION && TARGET_POWERPC64
3388 && (TARGET_CMODEL != CMODEL_SMALL))
3390 reg_addr[QImode].fused_toc = true;
3391 reg_addr[HImode].fused_toc = true;
3392 reg_addr[SImode].fused_toc = true;
3393 reg_addr[DImode].fused_toc = true;
3394 if (TARGET_HARD_FLOAT && TARGET_FPRS)
3396 if (TARGET_SINGLE_FLOAT)
3397 reg_addr[SFmode].fused_toc = true;
3398 if (TARGET_DOUBLE_FLOAT)
3399 reg_addr[DFmode].fused_toc = true;
3403 /* Precalculate HARD_REGNO_NREGS. */
3404 for (r = 0; r < FIRST_PSEUDO_REGISTER; ++r)
3405 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3406 rs6000_hard_regno_nregs[m][r]
3407 = rs6000_hard_regno_nregs_internal (r, (machine_mode)m);
3409 /* Precalculate HARD_REGNO_MODE_OK. */
3410 for (r = 0; r < FIRST_PSEUDO_REGISTER; ++r)
3411 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3412 if (rs6000_hard_regno_mode_ok (r, (machine_mode)m))
3413 rs6000_hard_regno_mode_ok_p[m][r] = true;
3415 /* Precalculate CLASS_MAX_NREGS sizes. */
3416 for (c = 0; c < LIM_REG_CLASSES; ++c)
3418 int reg_size;
3420 if (TARGET_VSX && VSX_REG_CLASS_P (c))
3421 reg_size = UNITS_PER_VSX_WORD;
3423 else if (c == ALTIVEC_REGS)
3424 reg_size = UNITS_PER_ALTIVEC_WORD;
3426 else if (c == FLOAT_REGS)
3427 reg_size = UNITS_PER_FP_WORD;
3429 else
3430 reg_size = UNITS_PER_WORD;
3432 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3434 machine_mode m2 = (machine_mode)m;
3435 int reg_size2 = reg_size;
3437 /* TDmode & IBM 128-bit floating point always takes 2 registers, even
3438 in VSX. */
3439 if (TARGET_VSX && VSX_REG_CLASS_P (c) && FLOAT128_2REG_P (m))
3440 reg_size2 = UNITS_PER_FP_WORD;
3442 rs6000_class_max_nregs[m][c]
3443 = (GET_MODE_SIZE (m2) + reg_size2 - 1) / reg_size2;
3447 if (TARGET_E500_DOUBLE)
3448 rs6000_class_max_nregs[DFmode][GENERAL_REGS] = 1;
3450 /* Calculate which modes to automatically generate code to use a the
3451 reciprocal divide and square root instructions. In the future, possibly
3452 automatically generate the instructions even if the user did not specify
3453 -mrecip. The older machines double precision reciprocal sqrt estimate is
3454 not accurate enough. */
3455 memset (rs6000_recip_bits, 0, sizeof (rs6000_recip_bits));
3456 if (TARGET_FRES)
3457 rs6000_recip_bits[SFmode] = RS6000_RECIP_MASK_HAVE_RE;
3458 if (TARGET_FRE)
3459 rs6000_recip_bits[DFmode] = RS6000_RECIP_MASK_HAVE_RE;
3460 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
3461 rs6000_recip_bits[V4SFmode] = RS6000_RECIP_MASK_HAVE_RE;
3462 if (VECTOR_UNIT_VSX_P (V2DFmode))
3463 rs6000_recip_bits[V2DFmode] = RS6000_RECIP_MASK_HAVE_RE;
3465 if (TARGET_FRSQRTES)
3466 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3467 if (TARGET_FRSQRTE)
3468 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3469 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
3470 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3471 if (VECTOR_UNIT_VSX_P (V2DFmode))
3472 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3474 if (rs6000_recip_control)
3476 if (!flag_finite_math_only)
3477 warning (0, "-mrecip requires -ffinite-math or -ffast-math");
3478 if (flag_trapping_math)
3479 warning (0, "-mrecip requires -fno-trapping-math or -ffast-math");
3480 if (!flag_reciprocal_math)
3481 warning (0, "-mrecip requires -freciprocal-math or -ffast-math");
3482 if (flag_finite_math_only && !flag_trapping_math && flag_reciprocal_math)
3484 if (RS6000_RECIP_HAVE_RE_P (SFmode)
3485 && (rs6000_recip_control & RECIP_SF_DIV) != 0)
3486 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3488 if (RS6000_RECIP_HAVE_RE_P (DFmode)
3489 && (rs6000_recip_control & RECIP_DF_DIV) != 0)
3490 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3492 if (RS6000_RECIP_HAVE_RE_P (V4SFmode)
3493 && (rs6000_recip_control & RECIP_V4SF_DIV) != 0)
3494 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3496 if (RS6000_RECIP_HAVE_RE_P (V2DFmode)
3497 && (rs6000_recip_control & RECIP_V2DF_DIV) != 0)
3498 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3500 if (RS6000_RECIP_HAVE_RSQRTE_P (SFmode)
3501 && (rs6000_recip_control & RECIP_SF_RSQRT) != 0)
3502 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3504 if (RS6000_RECIP_HAVE_RSQRTE_P (DFmode)
3505 && (rs6000_recip_control & RECIP_DF_RSQRT) != 0)
3506 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3508 if (RS6000_RECIP_HAVE_RSQRTE_P (V4SFmode)
3509 && (rs6000_recip_control & RECIP_V4SF_RSQRT) != 0)
3510 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3512 if (RS6000_RECIP_HAVE_RSQRTE_P (V2DFmode)
3513 && (rs6000_recip_control & RECIP_V2DF_RSQRT) != 0)
3514 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3518 /* Update the addr mask bits in reg_addr to help secondary reload and go if
3519 legitimate address support to figure out the appropriate addressing to
3520 use. */
3521 rs6000_setup_reg_addr_masks ();
3523 if (global_init_p || TARGET_DEBUG_TARGET)
3525 if (TARGET_DEBUG_REG)
3526 rs6000_debug_reg_global ();
3528 if (TARGET_DEBUG_COST || TARGET_DEBUG_REG)
3529 fprintf (stderr,
3530 "SImode variable mult cost = %d\n"
3531 "SImode constant mult cost = %d\n"
3532 "SImode short constant mult cost = %d\n"
3533 "DImode multipliciation cost = %d\n"
3534 "SImode division cost = %d\n"
3535 "DImode division cost = %d\n"
3536 "Simple fp operation cost = %d\n"
3537 "DFmode multiplication cost = %d\n"
3538 "SFmode division cost = %d\n"
3539 "DFmode division cost = %d\n"
3540 "cache line size = %d\n"
3541 "l1 cache size = %d\n"
3542 "l2 cache size = %d\n"
3543 "simultaneous prefetches = %d\n"
3544 "\n",
3545 rs6000_cost->mulsi,
3546 rs6000_cost->mulsi_const,
3547 rs6000_cost->mulsi_const9,
3548 rs6000_cost->muldi,
3549 rs6000_cost->divsi,
3550 rs6000_cost->divdi,
3551 rs6000_cost->fp,
3552 rs6000_cost->dmul,
3553 rs6000_cost->sdiv,
3554 rs6000_cost->ddiv,
3555 rs6000_cost->cache_line_size,
3556 rs6000_cost->l1_cache_size,
3557 rs6000_cost->l2_cache_size,
3558 rs6000_cost->simultaneous_prefetches);
3562 #if TARGET_MACHO
3563 /* The Darwin version of SUBTARGET_OVERRIDE_OPTIONS. */
3565 static void
3566 darwin_rs6000_override_options (void)
3568 /* The Darwin ABI always includes AltiVec, can't be (validly) turned
3569 off. */
3570 rs6000_altivec_abi = 1;
3571 TARGET_ALTIVEC_VRSAVE = 1;
3572 rs6000_current_abi = ABI_DARWIN;
3574 if (DEFAULT_ABI == ABI_DARWIN
3575 && TARGET_64BIT)
3576 darwin_one_byte_bool = 1;
3578 if (TARGET_64BIT && ! TARGET_POWERPC64)
3580 rs6000_isa_flags |= OPTION_MASK_POWERPC64;
3581 warning (0, "-m64 requires PowerPC64 architecture, enabling");
3583 if (flag_mkernel)
3585 rs6000_default_long_calls = 1;
3586 rs6000_isa_flags |= OPTION_MASK_SOFT_FLOAT;
3589 /* Make -m64 imply -maltivec. Darwin's 64-bit ABI includes
3590 Altivec. */
3591 if (!flag_mkernel && !flag_apple_kext
3592 && TARGET_64BIT
3593 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC))
3594 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3596 /* Unless the user (not the configurer) has explicitly overridden
3597 it with -mcpu=G3 or -mno-altivec, then 10.5+ targets default to
3598 G4 unless targeting the kernel. */
3599 if (!flag_mkernel
3600 && !flag_apple_kext
3601 && strverscmp (darwin_macosx_version_min, "10.5") >= 0
3602 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC)
3603 && ! global_options_set.x_rs6000_cpu_index)
3605 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3608 #endif
3610 /* If not otherwise specified by a target, make 'long double' equivalent to
3611 'double'. */
3613 #ifndef RS6000_DEFAULT_LONG_DOUBLE_SIZE
3614 #define RS6000_DEFAULT_LONG_DOUBLE_SIZE 64
3615 #endif
3617 /* Return the builtin mask of the various options used that could affect which
3618 builtins were used. In the past we used target_flags, but we've run out of
3619 bits, and some options like SPE and PAIRED are no longer in
3620 target_flags. */
3622 HOST_WIDE_INT
3623 rs6000_builtin_mask_calculate (void)
3625 return (((TARGET_ALTIVEC) ? RS6000_BTM_ALTIVEC : 0)
3626 | ((TARGET_VSX) ? RS6000_BTM_VSX : 0)
3627 | ((TARGET_SPE) ? RS6000_BTM_SPE : 0)
3628 | ((TARGET_PAIRED_FLOAT) ? RS6000_BTM_PAIRED : 0)
3629 | ((TARGET_FRE) ? RS6000_BTM_FRE : 0)
3630 | ((TARGET_FRES) ? RS6000_BTM_FRES : 0)
3631 | ((TARGET_FRSQRTE) ? RS6000_BTM_FRSQRTE : 0)
3632 | ((TARGET_FRSQRTES) ? RS6000_BTM_FRSQRTES : 0)
3633 | ((TARGET_POPCNTD) ? RS6000_BTM_POPCNTD : 0)
3634 | ((rs6000_cpu == PROCESSOR_CELL) ? RS6000_BTM_CELL : 0)
3635 | ((TARGET_P8_VECTOR) ? RS6000_BTM_P8_VECTOR : 0)
3636 | ((TARGET_P9_VECTOR) ? RS6000_BTM_P9_VECTOR : 0)
3637 | ((TARGET_CRYPTO) ? RS6000_BTM_CRYPTO : 0)
3638 | ((TARGET_HTM) ? RS6000_BTM_HTM : 0)
3639 | ((TARGET_DFP) ? RS6000_BTM_DFP : 0)
3640 | ((TARGET_HARD_FLOAT) ? RS6000_BTM_HARD_FLOAT : 0)
3641 | ((TARGET_LONG_DOUBLE_128) ? RS6000_BTM_LDBL128 : 0));
3644 /* Implement TARGET_MD_ASM_ADJUST. All asm statements are considered
3645 to clobber the XER[CA] bit because clobbering that bit without telling
3646 the compiler worked just fine with versions of GCC before GCC 5, and
3647 breaking a lot of older code in ways that are hard to track down is
3648 not such a great idea. */
3650 static rtx_insn *
3651 rs6000_md_asm_adjust (vec<rtx> &/*outputs*/, vec<rtx> &/*inputs*/,
3652 vec<const char *> &/*constraints*/,
3653 vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs)
3655 clobbers.safe_push (gen_rtx_REG (SImode, CA_REGNO));
3656 SET_HARD_REG_BIT (clobbered_regs, CA_REGNO);
3657 return NULL;
3660 /* Override command line options. Mostly we process the processor type and
3661 sometimes adjust other TARGET_ options. */
3663 static bool
3664 rs6000_option_override_internal (bool global_init_p)
3666 bool ret = true;
3667 bool have_cpu = false;
3669 /* The default cpu requested at configure time, if any. */
3670 const char *implicit_cpu = OPTION_TARGET_CPU_DEFAULT;
3672 HOST_WIDE_INT set_masks;
3673 int cpu_index;
3674 int tune_index;
3675 struct cl_target_option *main_target_opt
3676 = ((global_init_p || target_option_default_node == NULL)
3677 ? NULL : TREE_TARGET_OPTION (target_option_default_node));
3679 /* Print defaults. */
3680 if ((TARGET_DEBUG_REG || TARGET_DEBUG_TARGET) && global_init_p)
3681 rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT);
3683 /* Remember the explicit arguments. */
3684 if (global_init_p)
3685 rs6000_isa_flags_explicit = global_options_set.x_rs6000_isa_flags;
3687 /* On 64-bit Darwin, power alignment is ABI-incompatible with some C
3688 library functions, so warn about it. The flag may be useful for
3689 performance studies from time to time though, so don't disable it
3690 entirely. */
3691 if (global_options_set.x_rs6000_alignment_flags
3692 && rs6000_alignment_flags == MASK_ALIGN_POWER
3693 && DEFAULT_ABI == ABI_DARWIN
3694 && TARGET_64BIT)
3695 warning (0, "-malign-power is not supported for 64-bit Darwin;"
3696 " it is incompatible with the installed C and C++ libraries");
3698 /* Numerous experiment shows that IRA based loop pressure
3699 calculation works better for RTL loop invariant motion on targets
3700 with enough (>= 32) registers. It is an expensive optimization.
3701 So it is on only for peak performance. */
3702 if (optimize >= 3 && global_init_p
3703 && !global_options_set.x_flag_ira_loop_pressure)
3704 flag_ira_loop_pressure = 1;
3706 /* Set the pointer size. */
3707 if (TARGET_64BIT)
3709 rs6000_pmode = (int)DImode;
3710 rs6000_pointer_size = 64;
3712 else
3714 rs6000_pmode = (int)SImode;
3715 rs6000_pointer_size = 32;
3718 /* Some OSs don't support saving the high part of 64-bit registers on context
3719 switch. Other OSs don't support saving Altivec registers. On those OSs,
3720 we don't touch the OPTION_MASK_POWERPC64 or OPTION_MASK_ALTIVEC settings;
3721 if the user wants either, the user must explicitly specify them and we
3722 won't interfere with the user's specification. */
3724 set_masks = POWERPC_MASKS;
3725 #ifdef OS_MISSING_POWERPC64
3726 if (OS_MISSING_POWERPC64)
3727 set_masks &= ~OPTION_MASK_POWERPC64;
3728 #endif
3729 #ifdef OS_MISSING_ALTIVEC
3730 if (OS_MISSING_ALTIVEC)
3731 set_masks &= ~(OPTION_MASK_ALTIVEC | OPTION_MASK_VSX);
3732 #endif
3734 /* Don't override by the processor default if given explicitly. */
3735 set_masks &= ~rs6000_isa_flags_explicit;
3737 /* Process the -mcpu=<xxx> and -mtune=<xxx> argument. If the user changed
3738 the cpu in a target attribute or pragma, but did not specify a tuning
3739 option, use the cpu for the tuning option rather than the option specified
3740 with -mtune on the command line. Process a '--with-cpu' configuration
3741 request as an implicit --cpu. */
3742 if (rs6000_cpu_index >= 0)
3744 cpu_index = rs6000_cpu_index;
3745 have_cpu = true;
3747 else if (main_target_opt != NULL && main_target_opt->x_rs6000_cpu_index >= 0)
3749 rs6000_cpu_index = cpu_index = main_target_opt->x_rs6000_cpu_index;
3750 have_cpu = true;
3752 else if (implicit_cpu)
3754 rs6000_cpu_index = cpu_index = rs6000_cpu_name_lookup (implicit_cpu);
3755 have_cpu = true;
3757 else
3759 /* PowerPC 64-bit LE requires at least ISA 2.07. */
3760 const char *default_cpu = ((!TARGET_POWERPC64)
3761 ? "powerpc"
3762 : ((BYTES_BIG_ENDIAN)
3763 ? "powerpc64"
3764 : "powerpc64le"));
3766 rs6000_cpu_index = cpu_index = rs6000_cpu_name_lookup (default_cpu);
3767 have_cpu = false;
3770 gcc_assert (cpu_index >= 0);
3772 /* If we have a cpu, either through an explicit -mcpu=<xxx> or if the
3773 compiler was configured with --with-cpu=<xxx>, replace all of the ISA bits
3774 with those from the cpu, except for options that were explicitly set. If
3775 we don't have a cpu, do not override the target bits set in
3776 TARGET_DEFAULT. */
3777 if (have_cpu)
3779 rs6000_isa_flags &= ~set_masks;
3780 rs6000_isa_flags |= (processor_target_table[cpu_index].target_enable
3781 & set_masks);
3783 else
3785 /* If no -mcpu=<xxx>, inherit any default options that were cleared via
3786 POWERPC_MASKS. Originally, TARGET_DEFAULT was used to initialize
3787 target_flags via the TARGET_DEFAULT_TARGET_FLAGS hook. When we switched
3788 to using rs6000_isa_flags, we need to do the initialization here.
3790 If there is a TARGET_DEFAULT, use that. Otherwise fall back to using
3791 -mcpu=powerpc, -mcpu=powerpc64, or -mcpu=powerpc64le defaults. */
3792 HOST_WIDE_INT flags = ((TARGET_DEFAULT) ? TARGET_DEFAULT
3793 : processor_target_table[cpu_index].target_enable);
3794 rs6000_isa_flags |= (flags & ~rs6000_isa_flags_explicit);
3797 if (rs6000_tune_index >= 0)
3798 tune_index = rs6000_tune_index;
3799 else if (have_cpu)
3801 /* Until power9 tuning is available, use power8 tuning if -mcpu=power9. */
3802 if (processor_target_table[cpu_index].processor != PROCESSOR_POWER9)
3803 rs6000_tune_index = tune_index = cpu_index;
3804 else
3806 size_t i;
3807 tune_index = -1;
3808 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
3809 if (processor_target_table[i].processor == PROCESSOR_POWER8)
3811 rs6000_tune_index = tune_index = i;
3812 break;
3816 else
3818 size_t i;
3819 enum processor_type tune_proc
3820 = (TARGET_POWERPC64 ? PROCESSOR_DEFAULT64 : PROCESSOR_DEFAULT);
3822 tune_index = -1;
3823 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
3824 if (processor_target_table[i].processor == tune_proc)
3826 rs6000_tune_index = tune_index = i;
3827 break;
3831 gcc_assert (tune_index >= 0);
3832 rs6000_cpu = processor_target_table[tune_index].processor;
3834 /* Pick defaults for SPE related control flags. Do this early to make sure
3835 that the TARGET_ macros are representative ASAP. */
3837 int spe_capable_cpu =
3838 (rs6000_cpu == PROCESSOR_PPC8540
3839 || rs6000_cpu == PROCESSOR_PPC8548);
3841 if (!global_options_set.x_rs6000_spe_abi)
3842 rs6000_spe_abi = spe_capable_cpu;
3844 if (!global_options_set.x_rs6000_spe)
3845 rs6000_spe = spe_capable_cpu;
3847 if (!global_options_set.x_rs6000_float_gprs)
3848 rs6000_float_gprs =
3849 (rs6000_cpu == PROCESSOR_PPC8540 ? 1
3850 : rs6000_cpu == PROCESSOR_PPC8548 ? 2
3851 : 0);
3854 if (global_options_set.x_rs6000_spe_abi
3855 && rs6000_spe_abi
3856 && !TARGET_SPE_ABI)
3857 error ("not configured for SPE ABI");
3859 if (global_options_set.x_rs6000_spe
3860 && rs6000_spe
3861 && !TARGET_SPE)
3862 error ("not configured for SPE instruction set");
3864 if (main_target_opt != NULL
3865 && ((main_target_opt->x_rs6000_spe_abi != rs6000_spe_abi)
3866 || (main_target_opt->x_rs6000_spe != rs6000_spe)
3867 || (main_target_opt->x_rs6000_float_gprs != rs6000_float_gprs)))
3868 error ("target attribute or pragma changes SPE ABI");
3870 if (rs6000_cpu == PROCESSOR_PPCE300C2 || rs6000_cpu == PROCESSOR_PPCE300C3
3871 || rs6000_cpu == PROCESSOR_PPCE500MC || rs6000_cpu == PROCESSOR_PPCE500MC64
3872 || rs6000_cpu == PROCESSOR_PPCE5500)
3874 if (TARGET_ALTIVEC)
3875 error ("AltiVec not supported in this target");
3876 if (TARGET_SPE)
3877 error ("SPE not supported in this target");
3879 if (rs6000_cpu == PROCESSOR_PPCE6500)
3881 if (TARGET_SPE)
3882 error ("SPE not supported in this target");
3885 /* Disable Cell microcode if we are optimizing for the Cell
3886 and not optimizing for size. */
3887 if (rs6000_gen_cell_microcode == -1)
3888 rs6000_gen_cell_microcode = !(rs6000_cpu == PROCESSOR_CELL
3889 && !optimize_size);
3891 /* If we are optimizing big endian systems for space and it's OK to
3892 use instructions that would be microcoded on the Cell, use the
3893 load/store multiple and string instructions. */
3894 if (BYTES_BIG_ENDIAN && optimize_size && rs6000_gen_cell_microcode)
3895 rs6000_isa_flags |= ~rs6000_isa_flags_explicit & (OPTION_MASK_MULTIPLE
3896 | OPTION_MASK_STRING);
3898 /* Don't allow -mmultiple or -mstring on little endian systems
3899 unless the cpu is a 750, because the hardware doesn't support the
3900 instructions used in little endian mode, and causes an alignment
3901 trap. The 750 does not cause an alignment trap (except when the
3902 target is unaligned). */
3904 if (!BYTES_BIG_ENDIAN && rs6000_cpu != PROCESSOR_PPC750)
3906 if (TARGET_MULTIPLE)
3908 rs6000_isa_flags &= ~OPTION_MASK_MULTIPLE;
3909 if ((rs6000_isa_flags_explicit & OPTION_MASK_MULTIPLE) != 0)
3910 warning (0, "-mmultiple is not supported on little endian systems");
3913 if (TARGET_STRING)
3915 rs6000_isa_flags &= ~OPTION_MASK_STRING;
3916 if ((rs6000_isa_flags_explicit & OPTION_MASK_STRING) != 0)
3917 warning (0, "-mstring is not supported on little endian systems");
3921 /* If little-endian, default to -mstrict-align on older processors.
3922 Testing for htm matches power8 and later. */
3923 if (!BYTES_BIG_ENDIAN
3924 && !(processor_target_table[tune_index].target_enable & OPTION_MASK_HTM))
3925 rs6000_isa_flags |= ~rs6000_isa_flags_explicit & OPTION_MASK_STRICT_ALIGN;
3927 /* -maltivec={le,be} implies -maltivec. */
3928 if (rs6000_altivec_element_order != 0)
3929 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3931 /* Disallow -maltivec=le in big endian mode for now. This is not
3932 known to be useful for anyone. */
3933 if (BYTES_BIG_ENDIAN && rs6000_altivec_element_order == 1)
3935 warning (0, N_("-maltivec=le not allowed for big-endian targets"));
3936 rs6000_altivec_element_order = 0;
3939 /* Add some warnings for VSX. */
3940 if (TARGET_VSX)
3942 const char *msg = NULL;
3943 if (!TARGET_HARD_FLOAT || !TARGET_FPRS
3944 || !TARGET_SINGLE_FLOAT || !TARGET_DOUBLE_FLOAT)
3946 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
3947 msg = N_("-mvsx requires hardware floating point");
3948 else
3950 rs6000_isa_flags &= ~ OPTION_MASK_VSX;
3951 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
3954 else if (TARGET_PAIRED_FLOAT)
3955 msg = N_("-mvsx and -mpaired are incompatible");
3956 else if (TARGET_AVOID_XFORM > 0)
3957 msg = N_("-mvsx needs indexed addressing");
3958 else if (!TARGET_ALTIVEC && (rs6000_isa_flags_explicit
3959 & OPTION_MASK_ALTIVEC))
3961 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
3962 msg = N_("-mvsx and -mno-altivec are incompatible");
3963 else
3964 msg = N_("-mno-altivec disables vsx");
3967 if (msg)
3969 warning (0, msg);
3970 rs6000_isa_flags &= ~ OPTION_MASK_VSX;
3971 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
3975 /* If hard-float/altivec/vsx were explicitly turned off then don't allow
3976 the -mcpu setting to enable options that conflict. */
3977 if ((!TARGET_HARD_FLOAT || !TARGET_ALTIVEC || !TARGET_VSX)
3978 && (rs6000_isa_flags_explicit & (OPTION_MASK_SOFT_FLOAT
3979 | OPTION_MASK_ALTIVEC
3980 | OPTION_MASK_VSX)) != 0)
3981 rs6000_isa_flags &= ~((OPTION_MASK_P8_VECTOR | OPTION_MASK_CRYPTO
3982 | OPTION_MASK_DIRECT_MOVE)
3983 & ~rs6000_isa_flags_explicit);
3985 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
3986 rs6000_print_isa_options (stderr, 0, "before defaults", rs6000_isa_flags);
3988 /* For the newer switches (vsx, dfp, etc.) set some of the older options,
3989 unless the user explicitly used the -mno-<option> to disable the code. */
3990 if (TARGET_P9_VECTOR || TARGET_MODULO || TARGET_P9_DFORM || TARGET_P9_MINMAX)
3991 rs6000_isa_flags |= (ISA_3_0_MASKS_SERVER & ~rs6000_isa_flags_explicit);
3992 else if (TARGET_P8_VECTOR || TARGET_DIRECT_MOVE || TARGET_CRYPTO)
3993 rs6000_isa_flags |= (ISA_2_7_MASKS_SERVER & ~rs6000_isa_flags_explicit);
3994 else if (TARGET_VSX)
3995 rs6000_isa_flags |= (ISA_2_6_MASKS_SERVER & ~rs6000_isa_flags_explicit);
3996 else if (TARGET_POPCNTD)
3997 rs6000_isa_flags |= (ISA_2_6_MASKS_EMBEDDED & ~rs6000_isa_flags_explicit);
3998 else if (TARGET_DFP)
3999 rs6000_isa_flags |= (ISA_2_5_MASKS_SERVER & ~rs6000_isa_flags_explicit);
4000 else if (TARGET_CMPB)
4001 rs6000_isa_flags |= (ISA_2_5_MASKS_EMBEDDED & ~rs6000_isa_flags_explicit);
4002 else if (TARGET_FPRND)
4003 rs6000_isa_flags |= (ISA_2_4_MASKS & ~rs6000_isa_flags_explicit);
4004 else if (TARGET_POPCNTB)
4005 rs6000_isa_flags |= (ISA_2_2_MASKS & ~rs6000_isa_flags_explicit);
4006 else if (TARGET_ALTIVEC)
4007 rs6000_isa_flags |= (OPTION_MASK_PPC_GFXOPT & ~rs6000_isa_flags_explicit);
4009 if (TARGET_CRYPTO && !TARGET_ALTIVEC)
4011 if (rs6000_isa_flags_explicit & OPTION_MASK_CRYPTO)
4012 error ("-mcrypto requires -maltivec");
4013 rs6000_isa_flags &= ~OPTION_MASK_CRYPTO;
4016 if (TARGET_DIRECT_MOVE && !TARGET_VSX)
4018 if (rs6000_isa_flags_explicit & OPTION_MASK_DIRECT_MOVE)
4019 error ("-mdirect-move requires -mvsx");
4020 rs6000_isa_flags &= ~OPTION_MASK_DIRECT_MOVE;
4023 if (TARGET_P8_VECTOR && !TARGET_ALTIVEC)
4025 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
4026 error ("-mpower8-vector requires -maltivec");
4027 rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
4030 if (TARGET_P8_VECTOR && !TARGET_VSX)
4032 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
4033 error ("-mpower8-vector requires -mvsx");
4034 rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
4037 if (TARGET_VSX_TIMODE && !TARGET_VSX)
4039 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX_TIMODE)
4040 error ("-mvsx-timode requires -mvsx");
4041 rs6000_isa_flags &= ~OPTION_MASK_VSX_TIMODE;
4044 if (TARGET_DFP && !TARGET_HARD_FLOAT)
4046 if (rs6000_isa_flags_explicit & OPTION_MASK_DFP)
4047 error ("-mhard-dfp requires -mhard-float");
4048 rs6000_isa_flags &= ~OPTION_MASK_DFP;
4051 /* Allow an explicit -mupper-regs to set both -mupper-regs-df and
4052 -mupper-regs-sf, depending on the cpu, unless the user explicitly also set
4053 the individual option. */
4054 if (TARGET_UPPER_REGS > 0)
4056 if (TARGET_VSX
4057 && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF))
4059 rs6000_isa_flags |= OPTION_MASK_UPPER_REGS_DF;
4060 rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_DF;
4062 if (TARGET_P8_VECTOR
4063 && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_SF))
4065 rs6000_isa_flags |= OPTION_MASK_UPPER_REGS_SF;
4066 rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_SF;
4069 else if (TARGET_UPPER_REGS == 0)
4071 if (TARGET_VSX
4072 && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF))
4074 rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_DF;
4075 rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_DF;
4077 if (TARGET_P8_VECTOR
4078 && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_SF))
4080 rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_SF;
4081 rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_SF;
4085 if (TARGET_UPPER_REGS_DF && !TARGET_VSX)
4087 if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF)
4088 error ("-mupper-regs-df requires -mvsx");
4089 rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_DF;
4092 if (TARGET_UPPER_REGS_SF && !TARGET_P8_VECTOR)
4094 if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_SF)
4095 error ("-mupper-regs-sf requires -mpower8-vector");
4096 rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_SF;
4099 /* The quad memory instructions only works in 64-bit mode. In 32-bit mode,
4100 silently turn off quad memory mode. */
4101 if ((TARGET_QUAD_MEMORY || TARGET_QUAD_MEMORY_ATOMIC) && !TARGET_POWERPC64)
4103 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
4104 warning (0, N_("-mquad-memory requires 64-bit mode"));
4106 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) != 0)
4107 warning (0, N_("-mquad-memory-atomic requires 64-bit mode"));
4109 rs6000_isa_flags &= ~(OPTION_MASK_QUAD_MEMORY
4110 | OPTION_MASK_QUAD_MEMORY_ATOMIC);
4113 /* Non-atomic quad memory load/store are disabled for little endian, since
4114 the words are reversed, but atomic operations can still be done by
4115 swapping the words. */
4116 if (TARGET_QUAD_MEMORY && !WORDS_BIG_ENDIAN)
4118 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
4119 warning (0, N_("-mquad-memory is not available in little endian mode"));
4121 rs6000_isa_flags &= ~OPTION_MASK_QUAD_MEMORY;
4124 /* Assume if the user asked for normal quad memory instructions, they want
4125 the atomic versions as well, unless they explicity told us not to use quad
4126 word atomic instructions. */
4127 if (TARGET_QUAD_MEMORY
4128 && !TARGET_QUAD_MEMORY_ATOMIC
4129 && ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) == 0))
4130 rs6000_isa_flags |= OPTION_MASK_QUAD_MEMORY_ATOMIC;
4132 /* Enable power8 fusion if we are tuning for power8, even if we aren't
4133 generating power8 instructions. */
4134 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION))
4135 rs6000_isa_flags |= (processor_target_table[tune_index].target_enable
4136 & OPTION_MASK_P8_FUSION);
4138 /* Setting additional fusion flags turns on base fusion. */
4139 if (!TARGET_P8_FUSION && (TARGET_P8_FUSION_SIGN || TARGET_TOC_FUSION))
4141 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION)
4143 if (TARGET_P8_FUSION_SIGN)
4144 error ("-mpower8-fusion-sign requires -mpower8-fusion");
4146 if (TARGET_TOC_FUSION)
4147 error ("-mtoc-fusion requires -mpower8-fusion");
4149 rs6000_isa_flags &= ~OPTION_MASK_P8_FUSION;
4151 else
4152 rs6000_isa_flags |= OPTION_MASK_P8_FUSION;
4155 /* Power9 fusion is a superset over power8 fusion. */
4156 if (TARGET_P9_FUSION && !TARGET_P8_FUSION)
4158 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION)
4160 error ("-mpower9-fusion requires -mpower8-fusion");
4161 rs6000_isa_flags &= ~OPTION_MASK_P9_FUSION;
4163 else
4164 rs6000_isa_flags |= OPTION_MASK_P8_FUSION;
4167 /* Enable power9 fusion if we are tuning for power9, even if we aren't
4168 generating power9 instructions. */
4169 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P9_FUSION))
4170 rs6000_isa_flags |= (processor_target_table[tune_index].target_enable
4171 & OPTION_MASK_P9_FUSION);
4173 /* Power8 does not fuse sign extended loads with the addis. If we are
4174 optimizing at high levels for speed, convert a sign extended load into a
4175 zero extending load, and an explicit sign extension. */
4176 if (TARGET_P8_FUSION
4177 && !(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION_SIGN)
4178 && optimize_function_for_speed_p (cfun)
4179 && optimize >= 3)
4180 rs6000_isa_flags |= OPTION_MASK_P8_FUSION_SIGN;
4182 /* TOC fusion requires 64-bit and medium/large code model. */
4183 if (TARGET_TOC_FUSION && !TARGET_POWERPC64)
4185 rs6000_isa_flags &= ~OPTION_MASK_TOC_FUSION;
4186 if ((rs6000_isa_flags_explicit & OPTION_MASK_TOC_FUSION) != 0)
4187 warning (0, N_("-mtoc-fusion requires 64-bit"));
4190 if (TARGET_TOC_FUSION && (TARGET_CMODEL == CMODEL_SMALL))
4192 rs6000_isa_flags &= ~OPTION_MASK_TOC_FUSION;
4193 if ((rs6000_isa_flags_explicit & OPTION_MASK_TOC_FUSION) != 0)
4194 warning (0, N_("-mtoc-fusion requires medium/large code model"));
4197 /* Turn on -mtoc-fusion by default if p8-fusion and 64-bit medium/large code
4198 model. */
4199 if (TARGET_P8_FUSION && !TARGET_TOC_FUSION && TARGET_POWERPC64
4200 && (TARGET_CMODEL != CMODEL_SMALL)
4201 && !(rs6000_isa_flags_explicit & OPTION_MASK_TOC_FUSION))
4202 rs6000_isa_flags |= OPTION_MASK_TOC_FUSION;
4204 /* ISA 3.0 D-form instructions require p9-vector and upper-regs. */
4205 if (TARGET_P9_DFORM && !TARGET_P9_VECTOR)
4207 if (rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR)
4208 error ("-mpower9-dform requires -mpower9-vector");
4209 rs6000_isa_flags &= ~OPTION_MASK_P9_DFORM;
4212 if (TARGET_P9_DFORM && !TARGET_UPPER_REGS_DF)
4214 if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF)
4215 error ("-mpower9-dform requires -mupper-regs-df");
4216 rs6000_isa_flags &= ~OPTION_MASK_P9_DFORM;
4219 if (TARGET_P9_DFORM && !TARGET_UPPER_REGS_SF)
4221 if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_SF)
4222 error ("-mpower9-dform requires -mupper-regs-sf");
4223 rs6000_isa_flags &= ~OPTION_MASK_P9_DFORM;
4226 /* ISA 3.0 vector instructions include ISA 2.07. */
4227 if (TARGET_P9_VECTOR && !TARGET_P8_VECTOR)
4229 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
4230 error ("-mpower9-vector requires -mpower8-vector");
4231 rs6000_isa_flags &= ~OPTION_MASK_P9_VECTOR;
4234 /* Set -mallow-movmisalign to explicitly on if we have full ISA 2.07
4235 support. If we only have ISA 2.06 support, and the user did not specify
4236 the switch, leave it set to -1 so the movmisalign patterns are enabled,
4237 but we don't enable the full vectorization support */
4238 if (TARGET_ALLOW_MOVMISALIGN == -1 && TARGET_P8_VECTOR && TARGET_DIRECT_MOVE)
4239 TARGET_ALLOW_MOVMISALIGN = 1;
4241 else if (TARGET_ALLOW_MOVMISALIGN && !TARGET_VSX)
4243 if (TARGET_ALLOW_MOVMISALIGN > 0
4244 && global_options_set.x_TARGET_ALLOW_MOVMISALIGN)
4245 error ("-mallow-movmisalign requires -mvsx");
4247 TARGET_ALLOW_MOVMISALIGN = 0;
4250 /* Determine when unaligned vector accesses are permitted, and when
4251 they are preferred over masked Altivec loads. Note that if
4252 TARGET_ALLOW_MOVMISALIGN has been disabled by the user, then
4253 TARGET_EFFICIENT_UNALIGNED_VSX must be as well. The converse is
4254 not true. */
4255 if (TARGET_EFFICIENT_UNALIGNED_VSX)
4257 if (!TARGET_VSX)
4259 if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX)
4260 error ("-mefficient-unaligned-vsx requires -mvsx");
4262 rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX;
4265 else if (!TARGET_ALLOW_MOVMISALIGN)
4267 if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX)
4268 error ("-mefficient-unaligned-vsx requires -mallow-movmisalign");
4270 rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX;
4274 /* __float128 requires VSX support. */
4275 if (TARGET_FLOAT128 && !TARGET_VSX)
4277 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128) != 0)
4278 error ("-mfloat128 requires VSX support");
4280 rs6000_isa_flags &= ~(OPTION_MASK_FLOAT128 | OPTION_MASK_FLOAT128_HW);
4283 /* IEEE 128-bit floating point hardware instructions imply enabling
4284 __float128. */
4285 if (TARGET_FLOAT128_HW
4286 && (rs6000_isa_flags & (OPTION_MASK_P9_VECTOR
4287 | OPTION_MASK_DIRECT_MOVE
4288 | OPTION_MASK_UPPER_REGS_DF
4289 | OPTION_MASK_UPPER_REGS_SF)) == 0)
4291 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0)
4292 error ("-mfloat128-hardware requires full ISA 3.0 support");
4294 rs6000_isa_flags &= ~OPTION_MASK_FLOAT128_HW;
4297 else if (TARGET_P9_VECTOR && !TARGET_FLOAT128_HW
4298 && (rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) == 0)
4299 rs6000_isa_flags |= OPTION_MASK_FLOAT128_HW;
4301 if (TARGET_FLOAT128_HW
4302 && (rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128) == 0)
4303 rs6000_isa_flags |= OPTION_MASK_FLOAT128;
4305 /* Print the options after updating the defaults. */
4306 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4307 rs6000_print_isa_options (stderr, 0, "after defaults", rs6000_isa_flags);
4309 /* E500mc does "better" if we inline more aggressively. Respect the
4310 user's opinion, though. */
4311 if (rs6000_block_move_inline_limit == 0
4312 && (rs6000_cpu == PROCESSOR_PPCE500MC
4313 || rs6000_cpu == PROCESSOR_PPCE500MC64
4314 || rs6000_cpu == PROCESSOR_PPCE5500
4315 || rs6000_cpu == PROCESSOR_PPCE6500))
4316 rs6000_block_move_inline_limit = 128;
4318 /* store_one_arg depends on expand_block_move to handle at least the
4319 size of reg_parm_stack_space. */
4320 if (rs6000_block_move_inline_limit < (TARGET_POWERPC64 ? 64 : 32))
4321 rs6000_block_move_inline_limit = (TARGET_POWERPC64 ? 64 : 32);
4323 if (global_init_p)
4325 /* If the appropriate debug option is enabled, replace the target hooks
4326 with debug versions that call the real version and then prints
4327 debugging information. */
4328 if (TARGET_DEBUG_COST)
4330 targetm.rtx_costs = rs6000_debug_rtx_costs;
4331 targetm.address_cost = rs6000_debug_address_cost;
4332 targetm.sched.adjust_cost = rs6000_debug_adjust_cost;
4335 if (TARGET_DEBUG_ADDR)
4337 targetm.legitimate_address_p = rs6000_debug_legitimate_address_p;
4338 targetm.legitimize_address = rs6000_debug_legitimize_address;
4339 rs6000_secondary_reload_class_ptr
4340 = rs6000_debug_secondary_reload_class;
4341 rs6000_secondary_memory_needed_ptr
4342 = rs6000_debug_secondary_memory_needed;
4343 rs6000_cannot_change_mode_class_ptr
4344 = rs6000_debug_cannot_change_mode_class;
4345 rs6000_preferred_reload_class_ptr
4346 = rs6000_debug_preferred_reload_class;
4347 rs6000_legitimize_reload_address_ptr
4348 = rs6000_debug_legitimize_reload_address;
4349 rs6000_mode_dependent_address_ptr
4350 = rs6000_debug_mode_dependent_address;
4353 if (rs6000_veclibabi_name)
4355 if (strcmp (rs6000_veclibabi_name, "mass") == 0)
4356 rs6000_veclib_handler = rs6000_builtin_vectorized_libmass;
4357 else
4359 error ("unknown vectorization library ABI type (%s) for "
4360 "-mveclibabi= switch", rs6000_veclibabi_name);
4361 ret = false;
4366 if (!global_options_set.x_rs6000_long_double_type_size)
4368 if (main_target_opt != NULL
4369 && (main_target_opt->x_rs6000_long_double_type_size
4370 != RS6000_DEFAULT_LONG_DOUBLE_SIZE))
4371 error ("target attribute or pragma changes long double size");
4372 else
4373 rs6000_long_double_type_size = RS6000_DEFAULT_LONG_DOUBLE_SIZE;
4376 #if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD)
4377 if (!global_options_set.x_rs6000_ieeequad)
4378 rs6000_ieeequad = 1;
4379 #endif
4381 /* Disable VSX and Altivec silently if the user switched cpus to power7 in a
4382 target attribute or pragma which automatically enables both options,
4383 unless the altivec ABI was set. This is set by default for 64-bit, but
4384 not for 32-bit. */
4385 if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi)
4386 rs6000_isa_flags &= ~((OPTION_MASK_VSX | OPTION_MASK_ALTIVEC
4387 | OPTION_MASK_FLOAT128)
4388 & ~rs6000_isa_flags_explicit);
4390 /* Enable Altivec ABI for AIX -maltivec. */
4391 if (TARGET_XCOFF && (TARGET_ALTIVEC || TARGET_VSX))
4393 if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi)
4394 error ("target attribute or pragma changes AltiVec ABI");
4395 else
4396 rs6000_altivec_abi = 1;
4399 /* The AltiVec ABI is the default for PowerPC-64 GNU/Linux. For
4400 PowerPC-32 GNU/Linux, -maltivec implies the AltiVec ABI. It can
4401 be explicitly overridden in either case. */
4402 if (TARGET_ELF)
4404 if (!global_options_set.x_rs6000_altivec_abi
4405 && (TARGET_64BIT || TARGET_ALTIVEC || TARGET_VSX))
4407 if (main_target_opt != NULL &&
4408 !main_target_opt->x_rs6000_altivec_abi)
4409 error ("target attribute or pragma changes AltiVec ABI");
4410 else
4411 rs6000_altivec_abi = 1;
4415 /* Set the Darwin64 ABI as default for 64-bit Darwin.
4416 So far, the only darwin64 targets are also MACH-O. */
4417 if (TARGET_MACHO
4418 && DEFAULT_ABI == ABI_DARWIN
4419 && TARGET_64BIT)
4421 if (main_target_opt != NULL && !main_target_opt->x_rs6000_darwin64_abi)
4422 error ("target attribute or pragma changes darwin64 ABI");
4423 else
4425 rs6000_darwin64_abi = 1;
4426 /* Default to natural alignment, for better performance. */
4427 rs6000_alignment_flags = MASK_ALIGN_NATURAL;
4431 /* Place FP constants in the constant pool instead of TOC
4432 if section anchors enabled. */
4433 if (flag_section_anchors
4434 && !global_options_set.x_TARGET_NO_FP_IN_TOC)
4435 TARGET_NO_FP_IN_TOC = 1;
4437 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4438 rs6000_print_isa_options (stderr, 0, "before subtarget", rs6000_isa_flags);
4440 #ifdef SUBTARGET_OVERRIDE_OPTIONS
4441 SUBTARGET_OVERRIDE_OPTIONS;
4442 #endif
4443 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
4444 SUBSUBTARGET_OVERRIDE_OPTIONS;
4445 #endif
4446 #ifdef SUB3TARGET_OVERRIDE_OPTIONS
4447 SUB3TARGET_OVERRIDE_OPTIONS;
4448 #endif
4450 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4451 rs6000_print_isa_options (stderr, 0, "after subtarget", rs6000_isa_flags);
4453 /* For the E500 family of cores, reset the single/double FP flags to let us
4454 check that they remain constant across attributes or pragmas. Also,
4455 clear a possible request for string instructions, not supported and which
4456 we might have silently queried above for -Os.
4458 For other families, clear ISEL in case it was set implicitly.
4461 switch (rs6000_cpu)
4463 case PROCESSOR_PPC8540:
4464 case PROCESSOR_PPC8548:
4465 case PROCESSOR_PPCE500MC:
4466 case PROCESSOR_PPCE500MC64:
4467 case PROCESSOR_PPCE5500:
4468 case PROCESSOR_PPCE6500:
4470 rs6000_single_float = TARGET_E500_SINGLE || TARGET_E500_DOUBLE;
4471 rs6000_double_float = TARGET_E500_DOUBLE;
4473 rs6000_isa_flags &= ~OPTION_MASK_STRING;
4475 break;
4477 default:
4479 if (have_cpu && !(rs6000_isa_flags_explicit & OPTION_MASK_ISEL))
4480 rs6000_isa_flags &= ~OPTION_MASK_ISEL;
4482 break;
4485 if (main_target_opt)
4487 if (main_target_opt->x_rs6000_single_float != rs6000_single_float)
4488 error ("target attribute or pragma changes single precision floating "
4489 "point");
4490 if (main_target_opt->x_rs6000_double_float != rs6000_double_float)
4491 error ("target attribute or pragma changes double precision floating "
4492 "point");
4495 /* Detect invalid option combinations with E500. */
4496 CHECK_E500_OPTIONS;
4498 rs6000_always_hint = (rs6000_cpu != PROCESSOR_POWER4
4499 && rs6000_cpu != PROCESSOR_POWER5
4500 && rs6000_cpu != PROCESSOR_POWER6
4501 && rs6000_cpu != PROCESSOR_POWER7
4502 && rs6000_cpu != PROCESSOR_POWER8
4503 && rs6000_cpu != PROCESSOR_POWER9
4504 && rs6000_cpu != PROCESSOR_PPCA2
4505 && rs6000_cpu != PROCESSOR_CELL
4506 && rs6000_cpu != PROCESSOR_PPC476);
4507 rs6000_sched_groups = (rs6000_cpu == PROCESSOR_POWER4
4508 || rs6000_cpu == PROCESSOR_POWER5
4509 || rs6000_cpu == PROCESSOR_POWER7
4510 || rs6000_cpu == PROCESSOR_POWER8
4511 || rs6000_cpu == PROCESSOR_POWER9);
4512 rs6000_align_branch_targets = (rs6000_cpu == PROCESSOR_POWER4
4513 || rs6000_cpu == PROCESSOR_POWER5
4514 || rs6000_cpu == PROCESSOR_POWER6
4515 || rs6000_cpu == PROCESSOR_POWER7
4516 || rs6000_cpu == PROCESSOR_POWER8
4517 || rs6000_cpu == PROCESSOR_POWER9
4518 || rs6000_cpu == PROCESSOR_PPCE500MC
4519 || rs6000_cpu == PROCESSOR_PPCE500MC64
4520 || rs6000_cpu == PROCESSOR_PPCE5500
4521 || rs6000_cpu == PROCESSOR_PPCE6500);
4523 /* Allow debug switches to override the above settings. These are set to -1
4524 in rs6000.opt to indicate the user hasn't directly set the switch. */
4525 if (TARGET_ALWAYS_HINT >= 0)
4526 rs6000_always_hint = TARGET_ALWAYS_HINT;
4528 if (TARGET_SCHED_GROUPS >= 0)
4529 rs6000_sched_groups = TARGET_SCHED_GROUPS;
4531 if (TARGET_ALIGN_BRANCH_TARGETS >= 0)
4532 rs6000_align_branch_targets = TARGET_ALIGN_BRANCH_TARGETS;
4534 rs6000_sched_restricted_insns_priority
4535 = (rs6000_sched_groups ? 1 : 0);
4537 /* Handle -msched-costly-dep option. */
4538 rs6000_sched_costly_dep
4539 = (rs6000_sched_groups ? true_store_to_load_dep_costly : no_dep_costly);
4541 if (rs6000_sched_costly_dep_str)
4543 if (! strcmp (rs6000_sched_costly_dep_str, "no"))
4544 rs6000_sched_costly_dep = no_dep_costly;
4545 else if (! strcmp (rs6000_sched_costly_dep_str, "all"))
4546 rs6000_sched_costly_dep = all_deps_costly;
4547 else if (! strcmp (rs6000_sched_costly_dep_str, "true_store_to_load"))
4548 rs6000_sched_costly_dep = true_store_to_load_dep_costly;
4549 else if (! strcmp (rs6000_sched_costly_dep_str, "store_to_load"))
4550 rs6000_sched_costly_dep = store_to_load_dep_costly;
4551 else
4552 rs6000_sched_costly_dep = ((enum rs6000_dependence_cost)
4553 atoi (rs6000_sched_costly_dep_str));
4556 /* Handle -minsert-sched-nops option. */
4557 rs6000_sched_insert_nops
4558 = (rs6000_sched_groups ? sched_finish_regroup_exact : sched_finish_none);
4560 if (rs6000_sched_insert_nops_str)
4562 if (! strcmp (rs6000_sched_insert_nops_str, "no"))
4563 rs6000_sched_insert_nops = sched_finish_none;
4564 else if (! strcmp (rs6000_sched_insert_nops_str, "pad"))
4565 rs6000_sched_insert_nops = sched_finish_pad_groups;
4566 else if (! strcmp (rs6000_sched_insert_nops_str, "regroup_exact"))
4567 rs6000_sched_insert_nops = sched_finish_regroup_exact;
4568 else
4569 rs6000_sched_insert_nops = ((enum rs6000_nop_insertion)
4570 atoi (rs6000_sched_insert_nops_str));
4573 if (global_init_p)
4575 #ifdef TARGET_REGNAMES
4576 /* If the user desires alternate register names, copy in the
4577 alternate names now. */
4578 if (TARGET_REGNAMES)
4579 memcpy (rs6000_reg_names, alt_reg_names, sizeof (rs6000_reg_names));
4580 #endif
4582 /* Set aix_struct_return last, after the ABI is determined.
4583 If -maix-struct-return or -msvr4-struct-return was explicitly
4584 used, don't override with the ABI default. */
4585 if (!global_options_set.x_aix_struct_return)
4586 aix_struct_return = (DEFAULT_ABI != ABI_V4 || DRAFT_V4_STRUCT_RET);
4588 #if 0
4589 /* IBM XL compiler defaults to unsigned bitfields. */
4590 if (TARGET_XL_COMPAT)
4591 flag_signed_bitfields = 0;
4592 #endif
4594 if (TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
4595 REAL_MODE_FORMAT (TFmode) = &ibm_extended_format;
4597 ASM_GENERATE_INTERNAL_LABEL (toc_label_name, "LCTOC", 1);
4599 /* We can only guarantee the availability of DI pseudo-ops when
4600 assembling for 64-bit targets. */
4601 if (!TARGET_64BIT)
4603 targetm.asm_out.aligned_op.di = NULL;
4604 targetm.asm_out.unaligned_op.di = NULL;
4608 /* Set branch target alignment, if not optimizing for size. */
4609 if (!optimize_size)
4611 /* Cell wants to be aligned 8byte for dual issue. Titan wants to be
4612 aligned 8byte to avoid misprediction by the branch predictor. */
4613 if (rs6000_cpu == PROCESSOR_TITAN
4614 || rs6000_cpu == PROCESSOR_CELL)
4616 if (align_functions <= 0)
4617 align_functions = 8;
4618 if (align_jumps <= 0)
4619 align_jumps = 8;
4620 if (align_loops <= 0)
4621 align_loops = 8;
4623 if (rs6000_align_branch_targets)
4625 if (align_functions <= 0)
4626 align_functions = 16;
4627 if (align_jumps <= 0)
4628 align_jumps = 16;
4629 if (align_loops <= 0)
4631 can_override_loop_align = 1;
4632 align_loops = 16;
4635 if (align_jumps_max_skip <= 0)
4636 align_jumps_max_skip = 15;
4637 if (align_loops_max_skip <= 0)
4638 align_loops_max_skip = 15;
4641 /* Arrange to save and restore machine status around nested functions. */
4642 init_machine_status = rs6000_init_machine_status;
4644 /* We should always be splitting complex arguments, but we can't break
4645 Linux and Darwin ABIs at the moment. For now, only AIX is fixed. */
4646 if (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN)
4647 targetm.calls.split_complex_arg = NULL;
4650 /* Initialize rs6000_cost with the appropriate target costs. */
4651 if (optimize_size)
4652 rs6000_cost = TARGET_POWERPC64 ? &size64_cost : &size32_cost;
4653 else
4654 switch (rs6000_cpu)
4656 case PROCESSOR_RS64A:
4657 rs6000_cost = &rs64a_cost;
4658 break;
4660 case PROCESSOR_MPCCORE:
4661 rs6000_cost = &mpccore_cost;
4662 break;
4664 case PROCESSOR_PPC403:
4665 rs6000_cost = &ppc403_cost;
4666 break;
4668 case PROCESSOR_PPC405:
4669 rs6000_cost = &ppc405_cost;
4670 break;
4672 case PROCESSOR_PPC440:
4673 rs6000_cost = &ppc440_cost;
4674 break;
4676 case PROCESSOR_PPC476:
4677 rs6000_cost = &ppc476_cost;
4678 break;
4680 case PROCESSOR_PPC601:
4681 rs6000_cost = &ppc601_cost;
4682 break;
4684 case PROCESSOR_PPC603:
4685 rs6000_cost = &ppc603_cost;
4686 break;
4688 case PROCESSOR_PPC604:
4689 rs6000_cost = &ppc604_cost;
4690 break;
4692 case PROCESSOR_PPC604e:
4693 rs6000_cost = &ppc604e_cost;
4694 break;
4696 case PROCESSOR_PPC620:
4697 rs6000_cost = &ppc620_cost;
4698 break;
4700 case PROCESSOR_PPC630:
4701 rs6000_cost = &ppc630_cost;
4702 break;
4704 case PROCESSOR_CELL:
4705 rs6000_cost = &ppccell_cost;
4706 break;
4708 case PROCESSOR_PPC750:
4709 case PROCESSOR_PPC7400:
4710 rs6000_cost = &ppc750_cost;
4711 break;
4713 case PROCESSOR_PPC7450:
4714 rs6000_cost = &ppc7450_cost;
4715 break;
4717 case PROCESSOR_PPC8540:
4718 case PROCESSOR_PPC8548:
4719 rs6000_cost = &ppc8540_cost;
4720 break;
4722 case PROCESSOR_PPCE300C2:
4723 case PROCESSOR_PPCE300C3:
4724 rs6000_cost = &ppce300c2c3_cost;
4725 break;
4727 case PROCESSOR_PPCE500MC:
4728 rs6000_cost = &ppce500mc_cost;
4729 break;
4731 case PROCESSOR_PPCE500MC64:
4732 rs6000_cost = &ppce500mc64_cost;
4733 break;
4735 case PROCESSOR_PPCE5500:
4736 rs6000_cost = &ppce5500_cost;
4737 break;
4739 case PROCESSOR_PPCE6500:
4740 rs6000_cost = &ppce6500_cost;
4741 break;
4743 case PROCESSOR_TITAN:
4744 rs6000_cost = &titan_cost;
4745 break;
4747 case PROCESSOR_POWER4:
4748 case PROCESSOR_POWER5:
4749 rs6000_cost = &power4_cost;
4750 break;
4752 case PROCESSOR_POWER6:
4753 rs6000_cost = &power6_cost;
4754 break;
4756 case PROCESSOR_POWER7:
4757 rs6000_cost = &power7_cost;
4758 break;
4760 case PROCESSOR_POWER8:
4761 rs6000_cost = &power8_cost;
4762 break;
4764 case PROCESSOR_POWER9:
4765 rs6000_cost = &power9_cost;
4766 break;
4768 case PROCESSOR_PPCA2:
4769 rs6000_cost = &ppca2_cost;
4770 break;
4772 default:
4773 gcc_unreachable ();
4776 if (global_init_p)
4778 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
4779 rs6000_cost->simultaneous_prefetches,
4780 global_options.x_param_values,
4781 global_options_set.x_param_values);
4782 maybe_set_param_value (PARAM_L1_CACHE_SIZE, rs6000_cost->l1_cache_size,
4783 global_options.x_param_values,
4784 global_options_set.x_param_values);
4785 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
4786 rs6000_cost->cache_line_size,
4787 global_options.x_param_values,
4788 global_options_set.x_param_values);
4789 maybe_set_param_value (PARAM_L2_CACHE_SIZE, rs6000_cost->l2_cache_size,
4790 global_options.x_param_values,
4791 global_options_set.x_param_values);
4793 /* Increase loop peeling limits based on performance analysis. */
4794 maybe_set_param_value (PARAM_MAX_PEELED_INSNS, 400,
4795 global_options.x_param_values,
4796 global_options_set.x_param_values);
4797 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEELED_INSNS, 400,
4798 global_options.x_param_values,
4799 global_options_set.x_param_values);
4801 /* If using typedef char *va_list, signal that
4802 __builtin_va_start (&ap, 0) can be optimized to
4803 ap = __builtin_next_arg (0). */
4804 if (DEFAULT_ABI != ABI_V4)
4805 targetm.expand_builtin_va_start = NULL;
4808 /* Set up single/double float flags.
4809 If TARGET_HARD_FLOAT is set, but neither single or double is set,
4810 then set both flags. */
4811 if (TARGET_HARD_FLOAT && TARGET_FPRS
4812 && rs6000_single_float == 0 && rs6000_double_float == 0)
4813 rs6000_single_float = rs6000_double_float = 1;
4815 /* If not explicitly specified via option, decide whether to generate indexed
4816 load/store instructions. */
4817 if (TARGET_AVOID_XFORM == -1)
4818 /* Avoid indexed addressing when targeting Power6 in order to avoid the
4819 DERAT mispredict penalty. However the LVE and STVE altivec instructions
4820 need indexed accesses and the type used is the scalar type of the element
4821 being loaded or stored. */
4822 TARGET_AVOID_XFORM = (rs6000_cpu == PROCESSOR_POWER6 && TARGET_CMPB
4823 && !TARGET_ALTIVEC);
4825 /* Set the -mrecip options. */
4826 if (rs6000_recip_name)
4828 char *p = ASTRDUP (rs6000_recip_name);
4829 char *q;
4830 unsigned int mask, i;
4831 bool invert;
4833 while ((q = strtok (p, ",")) != NULL)
4835 p = NULL;
4836 if (*q == '!')
4838 invert = true;
4839 q++;
4841 else
4842 invert = false;
4844 if (!strcmp (q, "default"))
4845 mask = ((TARGET_RECIP_PRECISION)
4846 ? RECIP_HIGH_PRECISION : RECIP_LOW_PRECISION);
4847 else
4849 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
4850 if (!strcmp (q, recip_options[i].string))
4852 mask = recip_options[i].mask;
4853 break;
4856 if (i == ARRAY_SIZE (recip_options))
4858 error ("unknown option for -mrecip=%s", q);
4859 invert = false;
4860 mask = 0;
4861 ret = false;
4865 if (invert)
4866 rs6000_recip_control &= ~mask;
4867 else
4868 rs6000_recip_control |= mask;
4872 /* Set the builtin mask of the various options used that could affect which
4873 builtins were used. In the past we used target_flags, but we've run out
4874 of bits, and some options like SPE and PAIRED are no longer in
4875 target_flags. */
4876 rs6000_builtin_mask = rs6000_builtin_mask_calculate ();
4877 if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
4878 rs6000_print_builtin_options (stderr, 0, "builtin mask",
4879 rs6000_builtin_mask);
4881 /* Initialize all of the registers. */
4882 rs6000_init_hard_regno_mode_ok (global_init_p);
4884 /* Save the initial options in case the user does function specific options */
4885 if (global_init_p)
4886 target_option_default_node = target_option_current_node
4887 = build_target_option_node (&global_options);
4889 /* If not explicitly specified via option, decide whether to generate the
4890 extra blr's required to preserve the link stack on some cpus (eg, 476). */
4891 if (TARGET_LINK_STACK == -1)
4892 SET_TARGET_LINK_STACK (rs6000_cpu == PROCESSOR_PPC476 && flag_pic);
4894 return ret;
4897 /* Implement TARGET_OPTION_OVERRIDE. On the RS/6000 this is used to
4898 define the target cpu type. */
4900 static void
4901 rs6000_option_override (void)
4903 (void) rs6000_option_override_internal (true);
4905 /* Register machine-specific passes. This needs to be done at start-up.
4906 It's convenient to do it here (like i386 does). */
4907 opt_pass *pass_analyze_swaps = make_pass_analyze_swaps (g);
4909 struct register_pass_info analyze_swaps_info
4910 = { pass_analyze_swaps, "cse1", 1, PASS_POS_INSERT_BEFORE };
4912 register_pass (&analyze_swaps_info);
4916 /* Implement targetm.vectorize.builtin_mask_for_load. */
4917 static tree
4918 rs6000_builtin_mask_for_load (void)
4920 /* Don't use lvsl/vperm for P8 and similarly efficient machines. */
4921 if ((TARGET_ALTIVEC && !TARGET_VSX)
4922 || (TARGET_VSX && !TARGET_EFFICIENT_UNALIGNED_VSX))
4923 return altivec_builtin_mask_for_load;
4924 else
4925 return 0;
4928 /* Implement LOOP_ALIGN. */
4930 rs6000_loop_align (rtx label)
4932 basic_block bb;
4933 int ninsns;
4935 /* Don't override loop alignment if -falign-loops was specified. */
4936 if (!can_override_loop_align)
4937 return align_loops_log;
4939 bb = BLOCK_FOR_INSN (label);
4940 ninsns = num_loop_insns(bb->loop_father);
4942 /* Align small loops to 32 bytes to fit in an icache sector, otherwise return default. */
4943 if (ninsns > 4 && ninsns <= 8
4944 && (rs6000_cpu == PROCESSOR_POWER4
4945 || rs6000_cpu == PROCESSOR_POWER5
4946 || rs6000_cpu == PROCESSOR_POWER6
4947 || rs6000_cpu == PROCESSOR_POWER7
4948 || rs6000_cpu == PROCESSOR_POWER8
4949 || rs6000_cpu == PROCESSOR_POWER9))
4950 return 5;
4951 else
4952 return align_loops_log;
4955 /* Implement TARGET_LOOP_ALIGN_MAX_SKIP. */
4956 static int
4957 rs6000_loop_align_max_skip (rtx_insn *label)
4959 return (1 << rs6000_loop_align (label)) - 1;
4962 /* Return true iff, data reference of TYPE can reach vector alignment (16)
4963 after applying N number of iterations. This routine does not determine
4964 how may iterations are required to reach desired alignment. */
4966 static bool
4967 rs6000_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
4969 if (is_packed)
4970 return false;
4972 if (TARGET_32BIT)
4974 if (rs6000_alignment_flags == MASK_ALIGN_NATURAL)
4975 return true;
4977 if (rs6000_alignment_flags == MASK_ALIGN_POWER)
4978 return true;
4980 return false;
4982 else
4984 if (TARGET_MACHO)
4985 return false;
4987 /* Assuming that all other types are naturally aligned. CHECKME! */
4988 return true;
4992 /* Return true if the vector misalignment factor is supported by the
4993 target. */
4994 static bool
4995 rs6000_builtin_support_vector_misalignment (machine_mode mode,
4996 const_tree type,
4997 int misalignment,
4998 bool is_packed)
5000 if (TARGET_VSX)
5002 if (TARGET_EFFICIENT_UNALIGNED_VSX)
5003 return true;
5005 /* Return if movmisalign pattern is not supported for this mode. */
5006 if (optab_handler (movmisalign_optab, mode) == CODE_FOR_nothing)
5007 return false;
5009 if (misalignment == -1)
5011 /* Misalignment factor is unknown at compile time but we know
5012 it's word aligned. */
5013 if (rs6000_vector_alignment_reachable (type, is_packed))
5015 int element_size = TREE_INT_CST_LOW (TYPE_SIZE (type));
5017 if (element_size == 64 || element_size == 32)
5018 return true;
5021 return false;
5024 /* VSX supports word-aligned vector. */
5025 if (misalignment % 4 == 0)
5026 return true;
5028 return false;
5031 /* Implement targetm.vectorize.builtin_vectorization_cost. */
5032 static int
5033 rs6000_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
5034 tree vectype, int misalign)
5036 unsigned elements;
5037 tree elem_type;
5039 switch (type_of_cost)
5041 case scalar_stmt:
5042 case scalar_load:
5043 case scalar_store:
5044 case vector_stmt:
5045 case vector_load:
5046 case vector_store:
5047 case vec_to_scalar:
5048 case scalar_to_vec:
5049 case cond_branch_not_taken:
5050 return 1;
5052 case vec_perm:
5053 if (TARGET_VSX)
5054 return 3;
5055 else
5056 return 1;
5058 case vec_promote_demote:
5059 if (TARGET_VSX)
5060 return 4;
5061 else
5062 return 1;
5064 case cond_branch_taken:
5065 return 3;
5067 case unaligned_load:
5068 if (TARGET_EFFICIENT_UNALIGNED_VSX)
5069 return 1;
5071 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
5073 elements = TYPE_VECTOR_SUBPARTS (vectype);
5074 if (elements == 2)
5075 /* Double word aligned. */
5076 return 2;
5078 if (elements == 4)
5080 switch (misalign)
5082 case 8:
5083 /* Double word aligned. */
5084 return 2;
5086 case -1:
5087 /* Unknown misalignment. */
5088 case 4:
5089 case 12:
5090 /* Word aligned. */
5091 return 22;
5093 default:
5094 gcc_unreachable ();
5099 if (TARGET_ALTIVEC)
5100 /* Misaligned loads are not supported. */
5101 gcc_unreachable ();
5103 return 2;
5105 case unaligned_store:
5106 if (TARGET_EFFICIENT_UNALIGNED_VSX)
5107 return 1;
5109 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
5111 elements = TYPE_VECTOR_SUBPARTS (vectype);
5112 if (elements == 2)
5113 /* Double word aligned. */
5114 return 2;
5116 if (elements == 4)
5118 switch (misalign)
5120 case 8:
5121 /* Double word aligned. */
5122 return 2;
5124 case -1:
5125 /* Unknown misalignment. */
5126 case 4:
5127 case 12:
5128 /* Word aligned. */
5129 return 23;
5131 default:
5132 gcc_unreachable ();
5137 if (TARGET_ALTIVEC)
5138 /* Misaligned stores are not supported. */
5139 gcc_unreachable ();
5141 return 2;
5143 case vec_construct:
5144 elements = TYPE_VECTOR_SUBPARTS (vectype);
5145 elem_type = TREE_TYPE (vectype);
5146 /* 32-bit vectors loaded into registers are stored as double
5147 precision, so we need n/2 converts in addition to the usual
5148 n/2 merges to construct a vector of short floats from them. */
5149 if (SCALAR_FLOAT_TYPE_P (elem_type)
5150 && TYPE_PRECISION (elem_type) == 32)
5151 return elements + 1;
5152 else
5153 return elements / 2 + 1;
5155 default:
5156 gcc_unreachable ();
5160 /* Implement targetm.vectorize.preferred_simd_mode. */
5162 static machine_mode
5163 rs6000_preferred_simd_mode (machine_mode mode)
5165 if (TARGET_VSX)
5166 switch (mode)
5168 case DFmode:
5169 return V2DFmode;
5170 default:;
5172 if (TARGET_ALTIVEC || TARGET_VSX)
5173 switch (mode)
5175 case SFmode:
5176 return V4SFmode;
5177 case TImode:
5178 return V1TImode;
5179 case DImode:
5180 return V2DImode;
5181 case SImode:
5182 return V4SImode;
5183 case HImode:
5184 return V8HImode;
5185 case QImode:
5186 return V16QImode;
5187 default:;
5189 if (TARGET_SPE)
5190 switch (mode)
5192 case SFmode:
5193 return V2SFmode;
5194 case SImode:
5195 return V2SImode;
5196 default:;
5198 if (TARGET_PAIRED_FLOAT
5199 && mode == SFmode)
5200 return V2SFmode;
5201 return word_mode;
5204 typedef struct _rs6000_cost_data
5206 struct loop *loop_info;
5207 unsigned cost[3];
5208 } rs6000_cost_data;
5210 /* Test for likely overcommitment of vector hardware resources. If a
5211 loop iteration is relatively large, and too large a percentage of
5212 instructions in the loop are vectorized, the cost model may not
5213 adequately reflect delays from unavailable vector resources.
5214 Penalize the loop body cost for this case. */
5216 static void
5217 rs6000_density_test (rs6000_cost_data *data)
5219 const int DENSITY_PCT_THRESHOLD = 85;
5220 const int DENSITY_SIZE_THRESHOLD = 70;
5221 const int DENSITY_PENALTY = 10;
5222 struct loop *loop = data->loop_info;
5223 basic_block *bbs = get_loop_body (loop);
5224 int nbbs = loop->num_nodes;
5225 int vec_cost = data->cost[vect_body], not_vec_cost = 0;
5226 int i, density_pct;
5228 for (i = 0; i < nbbs; i++)
5230 basic_block bb = bbs[i];
5231 gimple_stmt_iterator gsi;
5233 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
5235 gimple *stmt = gsi_stmt (gsi);
5236 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5238 if (!STMT_VINFO_RELEVANT_P (stmt_info)
5239 && !STMT_VINFO_IN_PATTERN_P (stmt_info))
5240 not_vec_cost++;
5244 free (bbs);
5245 density_pct = (vec_cost * 100) / (vec_cost + not_vec_cost);
5247 if (density_pct > DENSITY_PCT_THRESHOLD
5248 && vec_cost + not_vec_cost > DENSITY_SIZE_THRESHOLD)
5250 data->cost[vect_body] = vec_cost * (100 + DENSITY_PENALTY) / 100;
5251 if (dump_enabled_p ())
5252 dump_printf_loc (MSG_NOTE, vect_location,
5253 "density %d%%, cost %d exceeds threshold, penalizing "
5254 "loop body cost by %d%%", density_pct,
5255 vec_cost + not_vec_cost, DENSITY_PENALTY);
5259 /* Implement targetm.vectorize.init_cost. */
5261 static void *
5262 rs6000_init_cost (struct loop *loop_info)
5264 rs6000_cost_data *data = XNEW (struct _rs6000_cost_data);
5265 data->loop_info = loop_info;
5266 data->cost[vect_prologue] = 0;
5267 data->cost[vect_body] = 0;
5268 data->cost[vect_epilogue] = 0;
5269 return data;
5272 /* Implement targetm.vectorize.add_stmt_cost. */
5274 static unsigned
5275 rs6000_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
5276 struct _stmt_vec_info *stmt_info, int misalign,
5277 enum vect_cost_model_location where)
5279 rs6000_cost_data *cost_data = (rs6000_cost_data*) data;
5280 unsigned retval = 0;
5282 if (flag_vect_cost_model)
5284 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
5285 int stmt_cost = rs6000_builtin_vectorization_cost (kind, vectype,
5286 misalign);
5287 /* Statements in an inner loop relative to the loop being
5288 vectorized are weighted more heavily. The value here is
5289 arbitrary and could potentially be improved with analysis. */
5290 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
5291 count *= 50; /* FIXME. */
5293 retval = (unsigned) (count * stmt_cost);
5294 cost_data->cost[where] += retval;
5297 return retval;
5300 /* Implement targetm.vectorize.finish_cost. */
5302 static void
5303 rs6000_finish_cost (void *data, unsigned *prologue_cost,
5304 unsigned *body_cost, unsigned *epilogue_cost)
5306 rs6000_cost_data *cost_data = (rs6000_cost_data*) data;
5308 if (cost_data->loop_info)
5309 rs6000_density_test (cost_data);
5311 *prologue_cost = cost_data->cost[vect_prologue];
5312 *body_cost = cost_data->cost[vect_body];
5313 *epilogue_cost = cost_data->cost[vect_epilogue];
5316 /* Implement targetm.vectorize.destroy_cost_data. */
5318 static void
5319 rs6000_destroy_cost_data (void *data)
5321 free (data);
5324 /* Handler for the Mathematical Acceleration Subsystem (mass) interface to a
5325 library with vectorized intrinsics. */
5327 static tree
5328 rs6000_builtin_vectorized_libmass (combined_fn fn, tree type_out,
5329 tree type_in)
5331 char name[32];
5332 const char *suffix = NULL;
5333 tree fntype, new_fndecl, bdecl = NULL_TREE;
5334 int n_args = 1;
5335 const char *bname;
5336 machine_mode el_mode, in_mode;
5337 int n, in_n;
5339 /* Libmass is suitable for unsafe math only as it does not correctly support
5340 parts of IEEE with the required precision such as denormals. Only support
5341 it if we have VSX to use the simd d2 or f4 functions.
5342 XXX: Add variable length support. */
5343 if (!flag_unsafe_math_optimizations || !TARGET_VSX)
5344 return NULL_TREE;
5346 el_mode = TYPE_MODE (TREE_TYPE (type_out));
5347 n = TYPE_VECTOR_SUBPARTS (type_out);
5348 in_mode = TYPE_MODE (TREE_TYPE (type_in));
5349 in_n = TYPE_VECTOR_SUBPARTS (type_in);
5350 if (el_mode != in_mode
5351 || n != in_n)
5352 return NULL_TREE;
5354 switch (fn)
5356 CASE_CFN_ATAN2:
5357 CASE_CFN_HYPOT:
5358 CASE_CFN_POW:
5359 n_args = 2;
5360 /* fall through */
5362 CASE_CFN_ACOS:
5363 CASE_CFN_ACOSH:
5364 CASE_CFN_ASIN:
5365 CASE_CFN_ASINH:
5366 CASE_CFN_ATAN:
5367 CASE_CFN_ATANH:
5368 CASE_CFN_CBRT:
5369 CASE_CFN_COS:
5370 CASE_CFN_COSH:
5371 CASE_CFN_ERF:
5372 CASE_CFN_ERFC:
5373 CASE_CFN_EXP2:
5374 CASE_CFN_EXP:
5375 CASE_CFN_EXPM1:
5376 CASE_CFN_LGAMMA:
5377 CASE_CFN_LOG10:
5378 CASE_CFN_LOG1P:
5379 CASE_CFN_LOG2:
5380 CASE_CFN_LOG:
5381 CASE_CFN_SIN:
5382 CASE_CFN_SINH:
5383 CASE_CFN_SQRT:
5384 CASE_CFN_TAN:
5385 CASE_CFN_TANH:
5386 if (el_mode == DFmode && n == 2)
5388 bdecl = mathfn_built_in (double_type_node, fn);
5389 suffix = "d2"; /* pow -> powd2 */
5391 else if (el_mode == SFmode && n == 4)
5393 bdecl = mathfn_built_in (float_type_node, fn);
5394 suffix = "4"; /* powf -> powf4 */
5396 else
5397 return NULL_TREE;
5398 if (!bdecl)
5399 return NULL_TREE;
5400 break;
5402 default:
5403 return NULL_TREE;
5406 gcc_assert (suffix != NULL);
5407 bname = IDENTIFIER_POINTER (DECL_NAME (bdecl));
5408 if (!bname)
5409 return NULL_TREE;
5411 strcpy (name, bname + sizeof ("__builtin_") - 1);
5412 strcat (name, suffix);
5414 if (n_args == 1)
5415 fntype = build_function_type_list (type_out, type_in, NULL);
5416 else if (n_args == 2)
5417 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
5418 else
5419 gcc_unreachable ();
5421 /* Build a function declaration for the vectorized function. */
5422 new_fndecl = build_decl (BUILTINS_LOCATION,
5423 FUNCTION_DECL, get_identifier (name), fntype);
5424 TREE_PUBLIC (new_fndecl) = 1;
5425 DECL_EXTERNAL (new_fndecl) = 1;
5426 DECL_IS_NOVOPS (new_fndecl) = 1;
5427 TREE_READONLY (new_fndecl) = 1;
5429 return new_fndecl;
5432 /* Returns a function decl for a vectorized version of the builtin function
5433 with builtin function code FN and the result vector type TYPE, or NULL_TREE
5434 if it is not available. */
5436 static tree
5437 rs6000_builtin_vectorized_function (unsigned int fn, tree type_out,
5438 tree type_in)
5440 machine_mode in_mode, out_mode;
5441 int in_n, out_n;
5443 if (TARGET_DEBUG_BUILTIN)
5444 fprintf (stderr, "rs6000_builtin_vectorized_function (%s, %s, %s)\n",
5445 combined_fn_name (combined_fn (fn)),
5446 GET_MODE_NAME (TYPE_MODE (type_out)),
5447 GET_MODE_NAME (TYPE_MODE (type_in)));
5449 if (TREE_CODE (type_out) != VECTOR_TYPE
5450 || TREE_CODE (type_in) != VECTOR_TYPE
5451 || !TARGET_VECTORIZE_BUILTINS)
5452 return NULL_TREE;
5454 out_mode = TYPE_MODE (TREE_TYPE (type_out));
5455 out_n = TYPE_VECTOR_SUBPARTS (type_out);
5456 in_mode = TYPE_MODE (TREE_TYPE (type_in));
5457 in_n = TYPE_VECTOR_SUBPARTS (type_in);
5459 switch (fn)
5461 CASE_CFN_COPYSIGN:
5462 if (VECTOR_UNIT_VSX_P (V2DFmode)
5463 && out_mode == DFmode && out_n == 2
5464 && in_mode == DFmode && in_n == 2)
5465 return rs6000_builtin_decls[VSX_BUILTIN_CPSGNDP];
5466 if (VECTOR_UNIT_VSX_P (V4SFmode)
5467 && out_mode == SFmode && out_n == 4
5468 && in_mode == SFmode && in_n == 4)
5469 return rs6000_builtin_decls[VSX_BUILTIN_CPSGNSP];
5470 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5471 && out_mode == SFmode && out_n == 4
5472 && in_mode == SFmode && in_n == 4)
5473 return rs6000_builtin_decls[ALTIVEC_BUILTIN_COPYSIGN_V4SF];
5474 break;
5475 CASE_CFN_CEIL:
5476 if (VECTOR_UNIT_VSX_P (V2DFmode)
5477 && out_mode == DFmode && out_n == 2
5478 && in_mode == DFmode && in_n == 2)
5479 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIP];
5480 if (VECTOR_UNIT_VSX_P (V4SFmode)
5481 && out_mode == SFmode && out_n == 4
5482 && in_mode == SFmode && in_n == 4)
5483 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIP];
5484 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5485 && out_mode == SFmode && out_n == 4
5486 && in_mode == SFmode && in_n == 4)
5487 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIP];
5488 break;
5489 CASE_CFN_FLOOR:
5490 if (VECTOR_UNIT_VSX_P (V2DFmode)
5491 && out_mode == DFmode && out_n == 2
5492 && in_mode == DFmode && in_n == 2)
5493 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIM];
5494 if (VECTOR_UNIT_VSX_P (V4SFmode)
5495 && out_mode == SFmode && out_n == 4
5496 && in_mode == SFmode && in_n == 4)
5497 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIM];
5498 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5499 && out_mode == SFmode && out_n == 4
5500 && in_mode == SFmode && in_n == 4)
5501 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIM];
5502 break;
5503 CASE_CFN_FMA:
5504 if (VECTOR_UNIT_VSX_P (V2DFmode)
5505 && out_mode == DFmode && out_n == 2
5506 && in_mode == DFmode && in_n == 2)
5507 return rs6000_builtin_decls[VSX_BUILTIN_XVMADDDP];
5508 if (VECTOR_UNIT_VSX_P (V4SFmode)
5509 && out_mode == SFmode && out_n == 4
5510 && in_mode == SFmode && in_n == 4)
5511 return rs6000_builtin_decls[VSX_BUILTIN_XVMADDSP];
5512 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5513 && out_mode == SFmode && out_n == 4
5514 && in_mode == SFmode && in_n == 4)
5515 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VMADDFP];
5516 break;
5517 CASE_CFN_TRUNC:
5518 if (VECTOR_UNIT_VSX_P (V2DFmode)
5519 && out_mode == DFmode && out_n == 2
5520 && in_mode == DFmode && in_n == 2)
5521 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIZ];
5522 if (VECTOR_UNIT_VSX_P (V4SFmode)
5523 && out_mode == SFmode && out_n == 4
5524 && in_mode == SFmode && in_n == 4)
5525 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIZ];
5526 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5527 && out_mode == SFmode && out_n == 4
5528 && in_mode == SFmode && in_n == 4)
5529 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIZ];
5530 break;
5531 CASE_CFN_NEARBYINT:
5532 if (VECTOR_UNIT_VSX_P (V2DFmode)
5533 && flag_unsafe_math_optimizations
5534 && out_mode == DFmode && out_n == 2
5535 && in_mode == DFmode && in_n == 2)
5536 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPI];
5537 if (VECTOR_UNIT_VSX_P (V4SFmode)
5538 && flag_unsafe_math_optimizations
5539 && out_mode == SFmode && out_n == 4
5540 && in_mode == SFmode && in_n == 4)
5541 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPI];
5542 break;
5543 CASE_CFN_RINT:
5544 if (VECTOR_UNIT_VSX_P (V2DFmode)
5545 && !flag_trapping_math
5546 && out_mode == DFmode && out_n == 2
5547 && in_mode == DFmode && in_n == 2)
5548 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIC];
5549 if (VECTOR_UNIT_VSX_P (V4SFmode)
5550 && !flag_trapping_math
5551 && out_mode == SFmode && out_n == 4
5552 && in_mode == SFmode && in_n == 4)
5553 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIC];
5554 break;
5555 default:
5556 break;
5559 /* Generate calls to libmass if appropriate. */
5560 if (rs6000_veclib_handler)
5561 return rs6000_veclib_handler (combined_fn (fn), type_out, type_in);
5563 return NULL_TREE;
5566 /* Implement TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION. */
5568 static tree
5569 rs6000_builtin_md_vectorized_function (tree fndecl, tree type_out,
5570 tree type_in)
5572 machine_mode in_mode, out_mode;
5573 int in_n, out_n;
5575 if (TARGET_DEBUG_BUILTIN)
5576 fprintf (stderr, "rs6000_builtin_md_vectorized_function (%s, %s, %s)\n",
5577 IDENTIFIER_POINTER (DECL_NAME (fndecl)),
5578 GET_MODE_NAME (TYPE_MODE (type_out)),
5579 GET_MODE_NAME (TYPE_MODE (type_in)));
5581 if (TREE_CODE (type_out) != VECTOR_TYPE
5582 || TREE_CODE (type_in) != VECTOR_TYPE
5583 || !TARGET_VECTORIZE_BUILTINS)
5584 return NULL_TREE;
5586 out_mode = TYPE_MODE (TREE_TYPE (type_out));
5587 out_n = TYPE_VECTOR_SUBPARTS (type_out);
5588 in_mode = TYPE_MODE (TREE_TYPE (type_in));
5589 in_n = TYPE_VECTOR_SUBPARTS (type_in);
5591 enum rs6000_builtins fn
5592 = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
5593 switch (fn)
5595 case RS6000_BUILTIN_RSQRTF:
5596 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)
5597 && out_mode == SFmode && out_n == 4
5598 && in_mode == SFmode && in_n == 4)
5599 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRSQRTFP];
5600 break;
5601 case RS6000_BUILTIN_RSQRT:
5602 if (VECTOR_UNIT_VSX_P (V2DFmode)
5603 && out_mode == DFmode && out_n == 2
5604 && in_mode == DFmode && in_n == 2)
5605 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_2DF];
5606 break;
5607 case RS6000_BUILTIN_RECIPF:
5608 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)
5609 && out_mode == SFmode && out_n == 4
5610 && in_mode == SFmode && in_n == 4)
5611 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRECIPFP];
5612 break;
5613 case RS6000_BUILTIN_RECIP:
5614 if (VECTOR_UNIT_VSX_P (V2DFmode)
5615 && out_mode == DFmode && out_n == 2
5616 && in_mode == DFmode && in_n == 2)
5617 return rs6000_builtin_decls[VSX_BUILTIN_RECIP_V2DF];
5618 break;
5619 default:
5620 break;
5622 return NULL_TREE;
5625 /* Default CPU string for rs6000*_file_start functions. */
5626 static const char *rs6000_default_cpu;
5628 /* Do anything needed at the start of the asm file. */
5630 static void
5631 rs6000_file_start (void)
5633 char buffer[80];
5634 const char *start = buffer;
5635 FILE *file = asm_out_file;
5637 rs6000_default_cpu = TARGET_CPU_DEFAULT;
5639 default_file_start ();
5641 if (flag_verbose_asm)
5643 sprintf (buffer, "\n%s rs6000/powerpc options:", ASM_COMMENT_START);
5645 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
5647 fprintf (file, "%s --with-cpu=%s", start, rs6000_default_cpu);
5648 start = "";
5651 if (global_options_set.x_rs6000_cpu_index)
5653 fprintf (file, "%s -mcpu=%s", start,
5654 processor_target_table[rs6000_cpu_index].name);
5655 start = "";
5658 if (global_options_set.x_rs6000_tune_index)
5660 fprintf (file, "%s -mtune=%s", start,
5661 processor_target_table[rs6000_tune_index].name);
5662 start = "";
5665 if (PPC405_ERRATUM77)
5667 fprintf (file, "%s PPC405CR_ERRATUM77", start);
5668 start = "";
5671 #ifdef USING_ELFOS_H
5672 switch (rs6000_sdata)
5674 case SDATA_NONE: fprintf (file, "%s -msdata=none", start); start = ""; break;
5675 case SDATA_DATA: fprintf (file, "%s -msdata=data", start); start = ""; break;
5676 case SDATA_SYSV: fprintf (file, "%s -msdata=sysv", start); start = ""; break;
5677 case SDATA_EABI: fprintf (file, "%s -msdata=eabi", start); start = ""; break;
5680 if (rs6000_sdata && g_switch_value)
5682 fprintf (file, "%s -G %d", start,
5683 g_switch_value);
5684 start = "";
5686 #endif
5688 if (*start == '\0')
5689 putc ('\n', file);
5692 #ifdef USING_ELFOS_H
5693 if (rs6000_default_cpu == 0 || rs6000_default_cpu[0] == '\0'
5694 || !global_options_set.x_rs6000_cpu_index)
5696 fputs ("\t.machine ", asm_out_file);
5697 if ((rs6000_isa_flags & OPTION_MASK_MODULO) != 0)
5698 fputs ("power9\n", asm_out_file);
5699 else if ((rs6000_isa_flags & OPTION_MASK_DIRECT_MOVE) != 0)
5700 fputs ("power8\n", asm_out_file);
5701 else if ((rs6000_isa_flags & OPTION_MASK_POPCNTD) != 0)
5702 fputs ("power7\n", asm_out_file);
5703 else if ((rs6000_isa_flags & OPTION_MASK_CMPB) != 0)
5704 fputs ("power6\n", asm_out_file);
5705 else if ((rs6000_isa_flags & OPTION_MASK_POPCNTB) != 0)
5706 fputs ("power5\n", asm_out_file);
5707 else if ((rs6000_isa_flags & OPTION_MASK_MFCRF) != 0)
5708 fputs ("power4\n", asm_out_file);
5709 else if ((rs6000_isa_flags & OPTION_MASK_POWERPC64) != 0)
5710 fputs ("ppc64\n", asm_out_file);
5711 else
5712 fputs ("ppc\n", asm_out_file);
5714 #endif
5716 if (DEFAULT_ABI == ABI_ELFv2)
5717 fprintf (file, "\t.abiversion 2\n");
5721 /* Return nonzero if this function is known to have a null epilogue. */
5724 direct_return (void)
5726 if (reload_completed)
5728 rs6000_stack_t *info = rs6000_stack_info ();
5730 if (info->first_gp_reg_save == 32
5731 && info->first_fp_reg_save == 64
5732 && info->first_altivec_reg_save == LAST_ALTIVEC_REGNO + 1
5733 && ! info->lr_save_p
5734 && ! info->cr_save_p
5735 && info->vrsave_size == 0
5736 && ! info->push_p)
5737 return 1;
5740 return 0;
5743 /* Return the number of instructions it takes to form a constant in an
5744 integer register. */
5747 num_insns_constant_wide (HOST_WIDE_INT value)
5749 /* signed constant loadable with addi */
5750 if (((unsigned HOST_WIDE_INT) value + 0x8000) < 0x10000)
5751 return 1;
5753 /* constant loadable with addis */
5754 else if ((value & 0xffff) == 0
5755 && (value >> 31 == -1 || value >> 31 == 0))
5756 return 1;
5758 else if (TARGET_POWERPC64)
5760 HOST_WIDE_INT low = ((value & 0xffffffff) ^ 0x80000000) - 0x80000000;
5761 HOST_WIDE_INT high = value >> 31;
5763 if (high == 0 || high == -1)
5764 return 2;
5766 high >>= 1;
5768 if (low == 0)
5769 return num_insns_constant_wide (high) + 1;
5770 else if (high == 0)
5771 return num_insns_constant_wide (low) + 1;
5772 else
5773 return (num_insns_constant_wide (high)
5774 + num_insns_constant_wide (low) + 1);
5777 else
5778 return 2;
5782 num_insns_constant (rtx op, machine_mode mode)
5784 HOST_WIDE_INT low, high;
5786 switch (GET_CODE (op))
5788 case CONST_INT:
5789 if ((INTVAL (op) >> 31) != 0 && (INTVAL (op) >> 31) != -1
5790 && rs6000_is_valid_and_mask (op, mode))
5791 return 2;
5792 else
5793 return num_insns_constant_wide (INTVAL (op));
5795 case CONST_WIDE_INT:
5797 int i;
5798 int ins = CONST_WIDE_INT_NUNITS (op) - 1;
5799 for (i = 0; i < CONST_WIDE_INT_NUNITS (op); i++)
5800 ins += num_insns_constant_wide (CONST_WIDE_INT_ELT (op, i));
5801 return ins;
5804 case CONST_DOUBLE:
5805 if (mode == SFmode || mode == SDmode)
5807 long l;
5809 if (DECIMAL_FLOAT_MODE_P (mode))
5810 REAL_VALUE_TO_TARGET_DECIMAL32
5811 (*CONST_DOUBLE_REAL_VALUE (op), l);
5812 else
5813 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), l);
5814 return num_insns_constant_wide ((HOST_WIDE_INT) l);
5817 long l[2];
5818 if (DECIMAL_FLOAT_MODE_P (mode))
5819 REAL_VALUE_TO_TARGET_DECIMAL64 (*CONST_DOUBLE_REAL_VALUE (op), l);
5820 else
5821 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (op), l);
5822 high = l[WORDS_BIG_ENDIAN == 0];
5823 low = l[WORDS_BIG_ENDIAN != 0];
5825 if (TARGET_32BIT)
5826 return (num_insns_constant_wide (low)
5827 + num_insns_constant_wide (high));
5828 else
5830 if ((high == 0 && low >= 0)
5831 || (high == -1 && low < 0))
5832 return num_insns_constant_wide (low);
5834 else if (rs6000_is_valid_and_mask (op, mode))
5835 return 2;
5837 else if (low == 0)
5838 return num_insns_constant_wide (high) + 1;
5840 else
5841 return (num_insns_constant_wide (high)
5842 + num_insns_constant_wide (low) + 1);
5845 default:
5846 gcc_unreachable ();
5850 /* Interpret element ELT of the CONST_VECTOR OP as an integer value.
5851 If the mode of OP is MODE_VECTOR_INT, this simply returns the
5852 corresponding element of the vector, but for V4SFmode and V2SFmode,
5853 the corresponding "float" is interpreted as an SImode integer. */
5855 HOST_WIDE_INT
5856 const_vector_elt_as_int (rtx op, unsigned int elt)
5858 rtx tmp;
5860 /* We can't handle V2DImode and V2DFmode vector constants here yet. */
5861 gcc_assert (GET_MODE (op) != V2DImode
5862 && GET_MODE (op) != V2DFmode);
5864 tmp = CONST_VECTOR_ELT (op, elt);
5865 if (GET_MODE (op) == V4SFmode
5866 || GET_MODE (op) == V2SFmode)
5867 tmp = gen_lowpart (SImode, tmp);
5868 return INTVAL (tmp);
5871 /* Return true if OP can be synthesized with a particular vspltisb, vspltish
5872 or vspltisw instruction. OP is a CONST_VECTOR. Which instruction is used
5873 depends on STEP and COPIES, one of which will be 1. If COPIES > 1,
5874 all items are set to the same value and contain COPIES replicas of the
5875 vsplt's operand; if STEP > 1, one in STEP elements is set to the vsplt's
5876 operand and the others are set to the value of the operand's msb. */
5878 static bool
5879 vspltis_constant (rtx op, unsigned step, unsigned copies)
5881 machine_mode mode = GET_MODE (op);
5882 machine_mode inner = GET_MODE_INNER (mode);
5884 unsigned i;
5885 unsigned nunits;
5886 unsigned bitsize;
5887 unsigned mask;
5889 HOST_WIDE_INT val;
5890 HOST_WIDE_INT splat_val;
5891 HOST_WIDE_INT msb_val;
5893 if (mode == V2DImode || mode == V2DFmode || mode == V1TImode)
5894 return false;
5896 nunits = GET_MODE_NUNITS (mode);
5897 bitsize = GET_MODE_BITSIZE (inner);
5898 mask = GET_MODE_MASK (inner);
5900 val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
5901 splat_val = val;
5902 msb_val = val >= 0 ? 0 : -1;
5904 /* Construct the value to be splatted, if possible. If not, return 0. */
5905 for (i = 2; i <= copies; i *= 2)
5907 HOST_WIDE_INT small_val;
5908 bitsize /= 2;
5909 small_val = splat_val >> bitsize;
5910 mask >>= bitsize;
5911 if (splat_val != ((small_val << bitsize) | (small_val & mask)))
5912 return false;
5913 splat_val = small_val;
5916 /* Check if SPLAT_VAL can really be the operand of a vspltis[bhw]. */
5917 if (EASY_VECTOR_15 (splat_val))
5920 /* Also check if we can splat, and then add the result to itself. Do so if
5921 the value is positive, of if the splat instruction is using OP's mode;
5922 for splat_val < 0, the splat and the add should use the same mode. */
5923 else if (EASY_VECTOR_15_ADD_SELF (splat_val)
5924 && (splat_val >= 0 || (step == 1 && copies == 1)))
5927 /* Also check if are loading up the most significant bit which can be done by
5928 loading up -1 and shifting the value left by -1. */
5929 else if (EASY_VECTOR_MSB (splat_val, inner))
5932 else
5933 return false;
5935 /* Check if VAL is present in every STEP-th element, and the
5936 other elements are filled with its most significant bit. */
5937 for (i = 1; i < nunits; ++i)
5939 HOST_WIDE_INT desired_val;
5940 unsigned elt = BYTES_BIG_ENDIAN ? nunits - 1 - i : i;
5941 if ((i & (step - 1)) == 0)
5942 desired_val = val;
5943 else
5944 desired_val = msb_val;
5946 if (desired_val != const_vector_elt_as_int (op, elt))
5947 return false;
5950 return true;
5953 /* Like vsplitis_constant, but allow the value to be shifted left with a VSLDOI
5954 instruction, filling in the bottom elements with 0 or -1.
5956 Return 0 if the constant cannot be generated with VSLDOI. Return positive
5957 for the number of zeroes to shift in, or negative for the number of 0xff
5958 bytes to shift in.
5960 OP is a CONST_VECTOR. */
5963 vspltis_shifted (rtx op)
5965 machine_mode mode = GET_MODE (op);
5966 machine_mode inner = GET_MODE_INNER (mode);
5968 unsigned i, j;
5969 unsigned nunits;
5970 unsigned mask;
5972 HOST_WIDE_INT val;
5974 if (mode != V16QImode && mode != V8HImode && mode != V4SImode)
5975 return false;
5977 /* We need to create pseudo registers to do the shift, so don't recognize
5978 shift vector constants after reload. */
5979 if (!can_create_pseudo_p ())
5980 return false;
5982 nunits = GET_MODE_NUNITS (mode);
5983 mask = GET_MODE_MASK (inner);
5985 val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? 0 : nunits - 1);
5987 /* Check if the value can really be the operand of a vspltis[bhw]. */
5988 if (EASY_VECTOR_15 (val))
5991 /* Also check if we are loading up the most significant bit which can be done
5992 by loading up -1 and shifting the value left by -1. */
5993 else if (EASY_VECTOR_MSB (val, inner))
5996 else
5997 return 0;
5999 /* Check if VAL is present in every STEP-th element until we find elements
6000 that are 0 or all 1 bits. */
6001 for (i = 1; i < nunits; ++i)
6003 unsigned elt = BYTES_BIG_ENDIAN ? i : nunits - 1 - i;
6004 HOST_WIDE_INT elt_val = const_vector_elt_as_int (op, elt);
6006 /* If the value isn't the splat value, check for the remaining elements
6007 being 0/-1. */
6008 if (val != elt_val)
6010 if (elt_val == 0)
6012 for (j = i+1; j < nunits; ++j)
6014 unsigned elt2 = BYTES_BIG_ENDIAN ? j : nunits - 1 - j;
6015 if (const_vector_elt_as_int (op, elt2) != 0)
6016 return 0;
6019 return (nunits - i) * GET_MODE_SIZE (inner);
6022 else if ((elt_val & mask) == mask)
6024 for (j = i+1; j < nunits; ++j)
6026 unsigned elt2 = BYTES_BIG_ENDIAN ? j : nunits - 1 - j;
6027 if ((const_vector_elt_as_int (op, elt2) & mask) != mask)
6028 return 0;
6031 return -((nunits - i) * GET_MODE_SIZE (inner));
6034 else
6035 return 0;
6039 /* If all elements are equal, we don't need to do VLSDOI. */
6040 return 0;
6044 /* Return true if OP is of the given MODE and can be synthesized
6045 with a vspltisb, vspltish or vspltisw. */
6047 bool
6048 easy_altivec_constant (rtx op, machine_mode mode)
6050 unsigned step, copies;
6052 if (mode == VOIDmode)
6053 mode = GET_MODE (op);
6054 else if (mode != GET_MODE (op))
6055 return false;
6057 /* V2DI/V2DF was added with VSX. Only allow 0 and all 1's as easy
6058 constants. */
6059 if (mode == V2DFmode)
6060 return zero_constant (op, mode);
6062 else if (mode == V2DImode)
6064 if (GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_INT
6065 || GET_CODE (CONST_VECTOR_ELT (op, 1)) != CONST_INT)
6066 return false;
6068 if (zero_constant (op, mode))
6069 return true;
6071 if (INTVAL (CONST_VECTOR_ELT (op, 0)) == -1
6072 && INTVAL (CONST_VECTOR_ELT (op, 1)) == -1)
6073 return true;
6075 return false;
6078 /* V1TImode is a special container for TImode. Ignore for now. */
6079 else if (mode == V1TImode)
6080 return false;
6082 /* Start with a vspltisw. */
6083 step = GET_MODE_NUNITS (mode) / 4;
6084 copies = 1;
6086 if (vspltis_constant (op, step, copies))
6087 return true;
6089 /* Then try with a vspltish. */
6090 if (step == 1)
6091 copies <<= 1;
6092 else
6093 step >>= 1;
6095 if (vspltis_constant (op, step, copies))
6096 return true;
6098 /* And finally a vspltisb. */
6099 if (step == 1)
6100 copies <<= 1;
6101 else
6102 step >>= 1;
6104 if (vspltis_constant (op, step, copies))
6105 return true;
6107 if (vspltis_shifted (op) != 0)
6108 return true;
6110 return false;
6113 /* Generate a VEC_DUPLICATE representing a vspltis[bhw] instruction whose
6114 result is OP. Abort if it is not possible. */
6117 gen_easy_altivec_constant (rtx op)
6119 machine_mode mode = GET_MODE (op);
6120 int nunits = GET_MODE_NUNITS (mode);
6121 rtx val = CONST_VECTOR_ELT (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
6122 unsigned step = nunits / 4;
6123 unsigned copies = 1;
6125 /* Start with a vspltisw. */
6126 if (vspltis_constant (op, step, copies))
6127 return gen_rtx_VEC_DUPLICATE (V4SImode, gen_lowpart (SImode, val));
6129 /* Then try with a vspltish. */
6130 if (step == 1)
6131 copies <<= 1;
6132 else
6133 step >>= 1;
6135 if (vspltis_constant (op, step, copies))
6136 return gen_rtx_VEC_DUPLICATE (V8HImode, gen_lowpart (HImode, val));
6138 /* And finally a vspltisb. */
6139 if (step == 1)
6140 copies <<= 1;
6141 else
6142 step >>= 1;
6144 if (vspltis_constant (op, step, copies))
6145 return gen_rtx_VEC_DUPLICATE (V16QImode, gen_lowpart (QImode, val));
6147 gcc_unreachable ();
6150 const char *
6151 output_vec_const_move (rtx *operands)
6153 int cst, cst2, shift;
6154 machine_mode mode;
6155 rtx dest, vec;
6157 dest = operands[0];
6158 vec = operands[1];
6159 mode = GET_MODE (dest);
6161 if (TARGET_VSX)
6163 if (zero_constant (vec, mode))
6164 return "xxlxor %x0,%x0,%x0";
6166 if (TARGET_P8_VECTOR && vec == CONSTM1_RTX (mode))
6167 return "xxlorc %x0,%x0,%x0";
6169 if ((mode == V2DImode || mode == V1TImode)
6170 && INTVAL (CONST_VECTOR_ELT (vec, 0)) == -1
6171 && INTVAL (CONST_VECTOR_ELT (vec, 1)) == -1)
6172 return (TARGET_P8_VECTOR) ? "xxlorc %x0,%x0,%x0" : "vspltisw %0,-1";
6175 if (TARGET_ALTIVEC)
6177 rtx splat_vec;
6178 if (zero_constant (vec, mode))
6179 return "vxor %0,%0,%0";
6181 /* Do we need to construct a value using VSLDOI? */
6182 shift = vspltis_shifted (vec);
6183 if (shift != 0)
6184 return "#";
6186 splat_vec = gen_easy_altivec_constant (vec);
6187 gcc_assert (GET_CODE (splat_vec) == VEC_DUPLICATE);
6188 operands[1] = XEXP (splat_vec, 0);
6189 if (!EASY_VECTOR_15 (INTVAL (operands[1])))
6190 return "#";
6192 switch (GET_MODE (splat_vec))
6194 case V4SImode:
6195 return "vspltisw %0,%1";
6197 case V8HImode:
6198 return "vspltish %0,%1";
6200 case V16QImode:
6201 return "vspltisb %0,%1";
6203 default:
6204 gcc_unreachable ();
6208 gcc_assert (TARGET_SPE);
6210 /* Vector constant 0 is handled as a splitter of V2SI, and in the
6211 pattern of V1DI, V4HI, and V2SF.
6213 FIXME: We should probably return # and add post reload
6214 splitters for these, but this way is so easy ;-). */
6215 cst = INTVAL (CONST_VECTOR_ELT (vec, 0));
6216 cst2 = INTVAL (CONST_VECTOR_ELT (vec, 1));
6217 operands[1] = CONST_VECTOR_ELT (vec, 0);
6218 operands[2] = CONST_VECTOR_ELT (vec, 1);
6219 if (cst == cst2)
6220 return "li %0,%1\n\tevmergelo %0,%0,%0";
6221 else if (WORDS_BIG_ENDIAN)
6222 return "li %0,%1\n\tevmergelo %0,%0,%0\n\tli %0,%2";
6223 else
6224 return "li %0,%2\n\tevmergelo %0,%0,%0\n\tli %0,%1";
6227 /* Initialize TARGET of vector PAIRED to VALS. */
6229 void
6230 paired_expand_vector_init (rtx target, rtx vals)
6232 machine_mode mode = GET_MODE (target);
6233 int n_elts = GET_MODE_NUNITS (mode);
6234 int n_var = 0;
6235 rtx x, new_rtx, tmp, constant_op, op1, op2;
6236 int i;
6238 for (i = 0; i < n_elts; ++i)
6240 x = XVECEXP (vals, 0, i);
6241 if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x)))
6242 ++n_var;
6244 if (n_var == 0)
6246 /* Load from constant pool. */
6247 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
6248 return;
6251 if (n_var == 2)
6253 /* The vector is initialized only with non-constants. */
6254 new_rtx = gen_rtx_VEC_CONCAT (V2SFmode, XVECEXP (vals, 0, 0),
6255 XVECEXP (vals, 0, 1));
6257 emit_move_insn (target, new_rtx);
6258 return;
6261 /* One field is non-constant and the other one is a constant. Load the
6262 constant from the constant pool and use ps_merge instruction to
6263 construct the whole vector. */
6264 op1 = XVECEXP (vals, 0, 0);
6265 op2 = XVECEXP (vals, 0, 1);
6267 constant_op = (CONSTANT_P (op1)) ? op1 : op2;
6269 tmp = gen_reg_rtx (GET_MODE (constant_op));
6270 emit_move_insn (tmp, constant_op);
6272 if (CONSTANT_P (op1))
6273 new_rtx = gen_rtx_VEC_CONCAT (V2SFmode, tmp, op2);
6274 else
6275 new_rtx = gen_rtx_VEC_CONCAT (V2SFmode, op1, tmp);
6277 emit_move_insn (target, new_rtx);
6280 void
6281 paired_expand_vector_move (rtx operands[])
6283 rtx op0 = operands[0], op1 = operands[1];
6285 emit_move_insn (op0, op1);
6288 /* Emit vector compare for code RCODE. DEST is destination, OP1 and
6289 OP2 are two VEC_COND_EXPR operands, CC_OP0 and CC_OP1 are the two
6290 operands for the relation operation COND. This is a recursive
6291 function. */
6293 static void
6294 paired_emit_vector_compare (enum rtx_code rcode,
6295 rtx dest, rtx op0, rtx op1,
6296 rtx cc_op0, rtx cc_op1)
6298 rtx tmp = gen_reg_rtx (V2SFmode);
6299 rtx tmp1, max, min;
6301 gcc_assert (TARGET_PAIRED_FLOAT);
6302 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
6304 switch (rcode)
6306 case LT:
6307 case LTU:
6308 paired_emit_vector_compare (GE, dest, op1, op0, cc_op0, cc_op1);
6309 return;
6310 case GE:
6311 case GEU:
6312 emit_insn (gen_subv2sf3 (tmp, cc_op0, cc_op1));
6313 emit_insn (gen_selv2sf4 (dest, tmp, op0, op1, CONST0_RTX (SFmode)));
6314 return;
6315 case LE:
6316 case LEU:
6317 paired_emit_vector_compare (GE, dest, op0, op1, cc_op1, cc_op0);
6318 return;
6319 case GT:
6320 paired_emit_vector_compare (LE, dest, op1, op0, cc_op0, cc_op1);
6321 return;
6322 case EQ:
6323 tmp1 = gen_reg_rtx (V2SFmode);
6324 max = gen_reg_rtx (V2SFmode);
6325 min = gen_reg_rtx (V2SFmode);
6326 gen_reg_rtx (V2SFmode);
6328 emit_insn (gen_subv2sf3 (tmp, cc_op0, cc_op1));
6329 emit_insn (gen_selv2sf4
6330 (max, tmp, cc_op0, cc_op1, CONST0_RTX (SFmode)));
6331 emit_insn (gen_subv2sf3 (tmp, cc_op1, cc_op0));
6332 emit_insn (gen_selv2sf4
6333 (min, tmp, cc_op0, cc_op1, CONST0_RTX (SFmode)));
6334 emit_insn (gen_subv2sf3 (tmp1, min, max));
6335 emit_insn (gen_selv2sf4 (dest, tmp1, op0, op1, CONST0_RTX (SFmode)));
6336 return;
6337 case NE:
6338 paired_emit_vector_compare (EQ, dest, op1, op0, cc_op0, cc_op1);
6339 return;
6340 case UNLE:
6341 paired_emit_vector_compare (LE, dest, op1, op0, cc_op0, cc_op1);
6342 return;
6343 case UNLT:
6344 paired_emit_vector_compare (LT, dest, op1, op0, cc_op0, cc_op1);
6345 return;
6346 case UNGE:
6347 paired_emit_vector_compare (GE, dest, op1, op0, cc_op0, cc_op1);
6348 return;
6349 case UNGT:
6350 paired_emit_vector_compare (GT, dest, op1, op0, cc_op0, cc_op1);
6351 return;
6352 default:
6353 gcc_unreachable ();
6356 return;
6359 /* Emit vector conditional expression.
6360 DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands.
6361 CC_OP0 and CC_OP1 are the two operands for the relation operation COND. */
6364 paired_emit_vector_cond_expr (rtx dest, rtx op1, rtx op2,
6365 rtx cond, rtx cc_op0, rtx cc_op1)
6367 enum rtx_code rcode = GET_CODE (cond);
6369 if (!TARGET_PAIRED_FLOAT)
6370 return 0;
6372 paired_emit_vector_compare (rcode, dest, op1, op2, cc_op0, cc_op1);
6374 return 1;
6377 /* Initialize vector TARGET to VALS. */
6379 void
6380 rs6000_expand_vector_init (rtx target, rtx vals)
6382 machine_mode mode = GET_MODE (target);
6383 machine_mode inner_mode = GET_MODE_INNER (mode);
6384 int n_elts = GET_MODE_NUNITS (mode);
6385 int n_var = 0, one_var = -1;
6386 bool all_same = true, all_const_zero = true;
6387 rtx x, mem;
6388 int i;
6390 for (i = 0; i < n_elts; ++i)
6392 x = XVECEXP (vals, 0, i);
6393 if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x)))
6394 ++n_var, one_var = i;
6395 else if (x != CONST0_RTX (inner_mode))
6396 all_const_zero = false;
6398 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
6399 all_same = false;
6402 if (n_var == 0)
6404 rtx const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
6405 bool int_vector_p = (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
6406 if ((int_vector_p || TARGET_VSX) && all_const_zero)
6408 /* Zero register. */
6409 emit_insn (gen_rtx_SET (target, gen_rtx_XOR (mode, target, target)));
6410 return;
6412 else if (int_vector_p && easy_vector_constant (const_vec, mode))
6414 /* Splat immediate. */
6415 emit_insn (gen_rtx_SET (target, const_vec));
6416 return;
6418 else
6420 /* Load from constant pool. */
6421 emit_move_insn (target, const_vec);
6422 return;
6426 /* Double word values on VSX can use xxpermdi or lxvdsx. */
6427 if (VECTOR_MEM_VSX_P (mode) && (mode == V2DFmode || mode == V2DImode))
6429 rtx op0 = XVECEXP (vals, 0, 0);
6430 rtx op1 = XVECEXP (vals, 0, 1);
6431 if (all_same)
6433 if (!MEM_P (op0) && !REG_P (op0))
6434 op0 = force_reg (inner_mode, op0);
6435 if (mode == V2DFmode)
6436 emit_insn (gen_vsx_splat_v2df (target, op0));
6437 else
6438 emit_insn (gen_vsx_splat_v2di (target, op0));
6440 else
6442 op0 = force_reg (inner_mode, op0);
6443 op1 = force_reg (inner_mode, op1);
6444 if (mode == V2DFmode)
6445 emit_insn (gen_vsx_concat_v2df (target, op0, op1));
6446 else
6447 emit_insn (gen_vsx_concat_v2di (target, op0, op1));
6449 return;
6452 /* With single precision floating point on VSX, know that internally single
6453 precision is actually represented as a double, and either make 2 V2DF
6454 vectors, and convert these vectors to single precision, or do one
6455 conversion, and splat the result to the other elements. */
6456 if (mode == V4SFmode && VECTOR_MEM_VSX_P (mode))
6458 if (all_same)
6460 rtx freg = gen_reg_rtx (V4SFmode);
6461 rtx sreg = force_reg (SFmode, XVECEXP (vals, 0, 0));
6462 rtx cvt = ((TARGET_XSCVDPSPN)
6463 ? gen_vsx_xscvdpspn_scalar (freg, sreg)
6464 : gen_vsx_xscvdpsp_scalar (freg, sreg));
6466 emit_insn (cvt);
6467 emit_insn (gen_vsx_xxspltw_v4sf_direct (target, freg, const0_rtx));
6469 else
6471 rtx dbl_even = gen_reg_rtx (V2DFmode);
6472 rtx dbl_odd = gen_reg_rtx (V2DFmode);
6473 rtx flt_even = gen_reg_rtx (V4SFmode);
6474 rtx flt_odd = gen_reg_rtx (V4SFmode);
6475 rtx op0 = force_reg (SFmode, XVECEXP (vals, 0, 0));
6476 rtx op1 = force_reg (SFmode, XVECEXP (vals, 0, 1));
6477 rtx op2 = force_reg (SFmode, XVECEXP (vals, 0, 2));
6478 rtx op3 = force_reg (SFmode, XVECEXP (vals, 0, 3));
6480 emit_insn (gen_vsx_concat_v2sf (dbl_even, op0, op1));
6481 emit_insn (gen_vsx_concat_v2sf (dbl_odd, op2, op3));
6482 emit_insn (gen_vsx_xvcvdpsp (flt_even, dbl_even));
6483 emit_insn (gen_vsx_xvcvdpsp (flt_odd, dbl_odd));
6484 rs6000_expand_extract_even (target, flt_even, flt_odd);
6486 return;
6489 /* Store value to stack temp. Load vector element. Splat. However, splat
6490 of 64-bit items is not supported on Altivec. */
6491 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
6493 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
6494 emit_move_insn (adjust_address_nv (mem, inner_mode, 0),
6495 XVECEXP (vals, 0, 0));
6496 x = gen_rtx_UNSPEC (VOIDmode,
6497 gen_rtvec (1, const0_rtx), UNSPEC_LVE);
6498 emit_insn (gen_rtx_PARALLEL (VOIDmode,
6499 gen_rtvec (2,
6500 gen_rtx_SET (target, mem),
6501 x)));
6502 x = gen_rtx_VEC_SELECT (inner_mode, target,
6503 gen_rtx_PARALLEL (VOIDmode,
6504 gen_rtvec (1, const0_rtx)));
6505 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_DUPLICATE (mode, x)));
6506 return;
6509 /* One field is non-constant. Load constant then overwrite
6510 varying field. */
6511 if (n_var == 1)
6513 rtx copy = copy_rtx (vals);
6515 /* Load constant part of vector, substitute neighboring value for
6516 varying element. */
6517 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
6518 rs6000_expand_vector_init (target, copy);
6520 /* Insert variable. */
6521 rs6000_expand_vector_set (target, XVECEXP (vals, 0, one_var), one_var);
6522 return;
6525 /* Construct the vector in memory one field at a time
6526 and load the whole vector. */
6527 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
6528 for (i = 0; i < n_elts; i++)
6529 emit_move_insn (adjust_address_nv (mem, inner_mode,
6530 i * GET_MODE_SIZE (inner_mode)),
6531 XVECEXP (vals, 0, i));
6532 emit_move_insn (target, mem);
6535 /* Set field ELT of TARGET to VAL. */
6537 void
6538 rs6000_expand_vector_set (rtx target, rtx val, int elt)
6540 machine_mode mode = GET_MODE (target);
6541 machine_mode inner_mode = GET_MODE_INNER (mode);
6542 rtx reg = gen_reg_rtx (mode);
6543 rtx mask, mem, x;
6544 int width = GET_MODE_SIZE (inner_mode);
6545 int i;
6547 if (VECTOR_MEM_VSX_P (mode) && (mode == V2DFmode || mode == V2DImode))
6549 rtx (*set_func) (rtx, rtx, rtx, rtx)
6550 = ((mode == V2DFmode) ? gen_vsx_set_v2df : gen_vsx_set_v2di);
6551 emit_insn (set_func (target, target, val, GEN_INT (elt)));
6552 return;
6555 /* Simplify setting single element vectors like V1TImode. */
6556 if (GET_MODE_SIZE (mode) == GET_MODE_SIZE (inner_mode) && elt == 0)
6558 emit_move_insn (target, gen_lowpart (mode, val));
6559 return;
6562 /* Load single variable value. */
6563 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
6564 emit_move_insn (adjust_address_nv (mem, inner_mode, 0), val);
6565 x = gen_rtx_UNSPEC (VOIDmode,
6566 gen_rtvec (1, const0_rtx), UNSPEC_LVE);
6567 emit_insn (gen_rtx_PARALLEL (VOIDmode,
6568 gen_rtvec (2,
6569 gen_rtx_SET (reg, mem),
6570 x)));
6572 /* Linear sequence. */
6573 mask = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
6574 for (i = 0; i < 16; ++i)
6575 XVECEXP (mask, 0, i) = GEN_INT (i);
6577 /* Set permute mask to insert element into target. */
6578 for (i = 0; i < width; ++i)
6579 XVECEXP (mask, 0, elt*width + i)
6580 = GEN_INT (i + 0x10);
6581 x = gen_rtx_CONST_VECTOR (V16QImode, XVEC (mask, 0));
6583 if (BYTES_BIG_ENDIAN)
6584 x = gen_rtx_UNSPEC (mode,
6585 gen_rtvec (3, target, reg,
6586 force_reg (V16QImode, x)),
6587 UNSPEC_VPERM);
6588 else
6590 /* Invert selector. We prefer to generate VNAND on P8 so
6591 that future fusion opportunities can kick in, but must
6592 generate VNOR elsewhere. */
6593 rtx notx = gen_rtx_NOT (V16QImode, force_reg (V16QImode, x));
6594 rtx iorx = (TARGET_P8_VECTOR
6595 ? gen_rtx_IOR (V16QImode, notx, notx)
6596 : gen_rtx_AND (V16QImode, notx, notx));
6597 rtx tmp = gen_reg_rtx (V16QImode);
6598 emit_insn (gen_rtx_SET (tmp, iorx));
6600 /* Permute with operands reversed and adjusted selector. */
6601 x = gen_rtx_UNSPEC (mode, gen_rtvec (3, reg, target, tmp),
6602 UNSPEC_VPERM);
6605 emit_insn (gen_rtx_SET (target, x));
6608 /* Extract field ELT from VEC into TARGET. */
6610 void
6611 rs6000_expand_vector_extract (rtx target, rtx vec, int elt)
6613 machine_mode mode = GET_MODE (vec);
6614 machine_mode inner_mode = GET_MODE_INNER (mode);
6615 rtx mem;
6617 if (VECTOR_MEM_VSX_P (mode))
6619 switch (mode)
6621 default:
6622 break;
6623 case V1TImode:
6624 gcc_assert (elt == 0 && inner_mode == TImode);
6625 emit_move_insn (target, gen_lowpart (TImode, vec));
6626 break;
6627 case V2DFmode:
6628 emit_insn (gen_vsx_extract_v2df (target, vec, GEN_INT (elt)));
6629 return;
6630 case V2DImode:
6631 emit_insn (gen_vsx_extract_v2di (target, vec, GEN_INT (elt)));
6632 return;
6633 case V4SFmode:
6634 emit_insn (gen_vsx_extract_v4sf (target, vec, GEN_INT (elt)));
6635 return;
6639 /* Allocate mode-sized buffer. */
6640 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
6642 emit_move_insn (mem, vec);
6644 /* Add offset to field within buffer matching vector element. */
6645 mem = adjust_address_nv (mem, inner_mode, elt * GET_MODE_SIZE (inner_mode));
6647 emit_move_insn (target, adjust_address_nv (mem, inner_mode, 0));
6650 /* Return TRUE if OP is an invalid SUBREG operation on the e500. */
6652 bool
6653 invalid_e500_subreg (rtx op, machine_mode mode)
6655 if (TARGET_E500_DOUBLE)
6657 /* Reject (subreg:SI (reg:DF)); likewise with subreg:DI or
6658 subreg:TI and reg:TF. Decimal float modes are like integer
6659 modes (only low part of each register used) for this
6660 purpose. */
6661 if (GET_CODE (op) == SUBREG
6662 && (mode == SImode || mode == DImode || mode == TImode
6663 || mode == DDmode || mode == TDmode || mode == PTImode)
6664 && REG_P (SUBREG_REG (op))
6665 && (GET_MODE (SUBREG_REG (op)) == DFmode
6666 || GET_MODE (SUBREG_REG (op)) == TFmode
6667 || GET_MODE (SUBREG_REG (op)) == IFmode
6668 || GET_MODE (SUBREG_REG (op)) == KFmode))
6669 return true;
6671 /* Reject (subreg:DF (reg:DI)); likewise with subreg:TF and
6672 reg:TI. */
6673 if (GET_CODE (op) == SUBREG
6674 && (mode == DFmode || mode == TFmode || mode == IFmode
6675 || mode == KFmode)
6676 && REG_P (SUBREG_REG (op))
6677 && (GET_MODE (SUBREG_REG (op)) == DImode
6678 || GET_MODE (SUBREG_REG (op)) == TImode
6679 || GET_MODE (SUBREG_REG (op)) == PTImode
6680 || GET_MODE (SUBREG_REG (op)) == DDmode
6681 || GET_MODE (SUBREG_REG (op)) == TDmode))
6682 return true;
6685 if (TARGET_SPE
6686 && GET_CODE (op) == SUBREG
6687 && mode == SImode
6688 && REG_P (SUBREG_REG (op))
6689 && SPE_VECTOR_MODE (GET_MODE (SUBREG_REG (op))))
6690 return true;
6692 return false;
6695 /* Return alignment of TYPE. Existing alignment is ALIGN. HOW
6696 selects whether the alignment is abi mandated, optional, or
6697 both abi and optional alignment. */
6699 unsigned int
6700 rs6000_data_alignment (tree type, unsigned int align, enum data_align how)
6702 if (how != align_opt)
6704 if (TREE_CODE (type) == VECTOR_TYPE)
6706 if ((TARGET_SPE && SPE_VECTOR_MODE (TYPE_MODE (type)))
6707 || (TARGET_PAIRED_FLOAT && PAIRED_VECTOR_MODE (TYPE_MODE (type))))
6709 if (align < 64)
6710 align = 64;
6712 else if (align < 128)
6713 align = 128;
6715 else if (TARGET_E500_DOUBLE
6716 && TREE_CODE (type) == REAL_TYPE
6717 && TYPE_MODE (type) == DFmode)
6719 if (align < 64)
6720 align = 64;
6724 if (how != align_abi)
6726 if (TREE_CODE (type) == ARRAY_TYPE
6727 && TYPE_MODE (TREE_TYPE (type)) == QImode)
6729 if (align < BITS_PER_WORD)
6730 align = BITS_PER_WORD;
6734 return align;
6737 /* Previous GCC releases forced all vector types to have 16-byte alignment. */
6739 bool
6740 rs6000_special_adjust_field_align_p (tree field, unsigned int computed)
6742 if (TARGET_ALTIVEC && TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
6744 if (computed != 128)
6746 static bool warned;
6747 if (!warned && warn_psabi)
6749 warned = true;
6750 inform (input_location,
6751 "the layout of aggregates containing vectors with"
6752 " %d-byte alignment has changed in GCC 5",
6753 computed / BITS_PER_UNIT);
6756 /* In current GCC there is no special case. */
6757 return false;
6760 return false;
6763 /* AIX increases natural record alignment to doubleword if the first
6764 field is an FP double while the FP fields remain word aligned. */
6766 unsigned int
6767 rs6000_special_round_type_align (tree type, unsigned int computed,
6768 unsigned int specified)
6770 unsigned int align = MAX (computed, specified);
6771 tree field = TYPE_FIELDS (type);
6773 /* Skip all non field decls */
6774 while (field != NULL && TREE_CODE (field) != FIELD_DECL)
6775 field = DECL_CHAIN (field);
6777 if (field != NULL && field != type)
6779 type = TREE_TYPE (field);
6780 while (TREE_CODE (type) == ARRAY_TYPE)
6781 type = TREE_TYPE (type);
6783 if (type != error_mark_node && TYPE_MODE (type) == DFmode)
6784 align = MAX (align, 64);
6787 return align;
6790 /* Darwin increases record alignment to the natural alignment of
6791 the first field. */
6793 unsigned int
6794 darwin_rs6000_special_round_type_align (tree type, unsigned int computed,
6795 unsigned int specified)
6797 unsigned int align = MAX (computed, specified);
6799 if (TYPE_PACKED (type))
6800 return align;
6802 /* Find the first field, looking down into aggregates. */
6803 do {
6804 tree field = TYPE_FIELDS (type);
6805 /* Skip all non field decls */
6806 while (field != NULL && TREE_CODE (field) != FIELD_DECL)
6807 field = DECL_CHAIN (field);
6808 if (! field)
6809 break;
6810 /* A packed field does not contribute any extra alignment. */
6811 if (DECL_PACKED (field))
6812 return align;
6813 type = TREE_TYPE (field);
6814 while (TREE_CODE (type) == ARRAY_TYPE)
6815 type = TREE_TYPE (type);
6816 } while (AGGREGATE_TYPE_P (type));
6818 if (! AGGREGATE_TYPE_P (type) && type != error_mark_node)
6819 align = MAX (align, TYPE_ALIGN (type));
6821 return align;
6824 /* Return 1 for an operand in small memory on V.4/eabi. */
6827 small_data_operand (rtx op ATTRIBUTE_UNUSED,
6828 machine_mode mode ATTRIBUTE_UNUSED)
6830 #if TARGET_ELF
6831 rtx sym_ref;
6833 if (rs6000_sdata == SDATA_NONE || rs6000_sdata == SDATA_DATA)
6834 return 0;
6836 if (DEFAULT_ABI != ABI_V4)
6837 return 0;
6839 /* Vector and float memory instructions have a limited offset on the
6840 SPE, so using a vector or float variable directly as an operand is
6841 not useful. */
6842 if (TARGET_SPE
6843 && (SPE_VECTOR_MODE (mode) || FLOAT_MODE_P (mode)))
6844 return 0;
6846 if (GET_CODE (op) == SYMBOL_REF)
6847 sym_ref = op;
6849 else if (GET_CODE (op) != CONST
6850 || GET_CODE (XEXP (op, 0)) != PLUS
6851 || GET_CODE (XEXP (XEXP (op, 0), 0)) != SYMBOL_REF
6852 || GET_CODE (XEXP (XEXP (op, 0), 1)) != CONST_INT)
6853 return 0;
6855 else
6857 rtx sum = XEXP (op, 0);
6858 HOST_WIDE_INT summand;
6860 /* We have to be careful here, because it is the referenced address
6861 that must be 32k from _SDA_BASE_, not just the symbol. */
6862 summand = INTVAL (XEXP (sum, 1));
6863 if (summand < 0 || summand > g_switch_value)
6864 return 0;
6866 sym_ref = XEXP (sum, 0);
6869 return SYMBOL_REF_SMALL_P (sym_ref);
6870 #else
6871 return 0;
6872 #endif
6875 /* Return true if either operand is a general purpose register. */
6877 bool
6878 gpr_or_gpr_p (rtx op0, rtx op1)
6880 return ((REG_P (op0) && INT_REGNO_P (REGNO (op0)))
6881 || (REG_P (op1) && INT_REGNO_P (REGNO (op1))));
6884 /* Return true if this is a move direct operation between GPR registers and
6885 floating point/VSX registers. */
6887 bool
6888 direct_move_p (rtx op0, rtx op1)
6890 int regno0, regno1;
6892 if (!REG_P (op0) || !REG_P (op1))
6893 return false;
6895 if (!TARGET_DIRECT_MOVE && !TARGET_MFPGPR)
6896 return false;
6898 regno0 = REGNO (op0);
6899 regno1 = REGNO (op1);
6900 if (regno0 >= FIRST_PSEUDO_REGISTER || regno1 >= FIRST_PSEUDO_REGISTER)
6901 return false;
6903 if (INT_REGNO_P (regno0))
6904 return (TARGET_DIRECT_MOVE) ? VSX_REGNO_P (regno1) : FP_REGNO_P (regno1);
6906 else if (INT_REGNO_P (regno1))
6908 if (TARGET_MFPGPR && FP_REGNO_P (regno0))
6909 return true;
6911 else if (TARGET_DIRECT_MOVE && VSX_REGNO_P (regno0))
6912 return true;
6915 return false;
6918 /* Return true if this is a load or store quad operation. This function does
6919 not handle the atomic quad memory instructions. */
6921 bool
6922 quad_load_store_p (rtx op0, rtx op1)
6924 bool ret;
6926 if (!TARGET_QUAD_MEMORY)
6927 ret = false;
6929 else if (REG_P (op0) && MEM_P (op1))
6930 ret = (quad_int_reg_operand (op0, GET_MODE (op0))
6931 && quad_memory_operand (op1, GET_MODE (op1))
6932 && !reg_overlap_mentioned_p (op0, op1));
6934 else if (MEM_P (op0) && REG_P (op1))
6935 ret = (quad_memory_operand (op0, GET_MODE (op0))
6936 && quad_int_reg_operand (op1, GET_MODE (op1)));
6938 else
6939 ret = false;
6941 if (TARGET_DEBUG_ADDR)
6943 fprintf (stderr, "\n========== quad_load_store, return %s\n",
6944 ret ? "true" : "false");
6945 debug_rtx (gen_rtx_SET (op0, op1));
6948 return ret;
6951 /* Given an address, return a constant offset term if one exists. */
6953 static rtx
6954 address_offset (rtx op)
6956 if (GET_CODE (op) == PRE_INC
6957 || GET_CODE (op) == PRE_DEC)
6958 op = XEXP (op, 0);
6959 else if (GET_CODE (op) == PRE_MODIFY
6960 || GET_CODE (op) == LO_SUM)
6961 op = XEXP (op, 1);
6963 if (GET_CODE (op) == CONST)
6964 op = XEXP (op, 0);
6966 if (GET_CODE (op) == PLUS)
6967 op = XEXP (op, 1);
6969 if (CONST_INT_P (op))
6970 return op;
6972 return NULL_RTX;
6975 /* Return true if the MEM operand is a memory operand suitable for use
6976 with a (full width, possibly multiple) gpr load/store. On
6977 powerpc64 this means the offset must be divisible by 4.
6978 Implements 'Y' constraint.
6980 Accept direct, indexed, offset, lo_sum and tocref. Since this is
6981 a constraint function we know the operand has satisfied a suitable
6982 memory predicate. Also accept some odd rtl generated by reload
6983 (see rs6000_legitimize_reload_address for various forms). It is
6984 important that reload rtl be accepted by appropriate constraints
6985 but not by the operand predicate.
6987 Offsetting a lo_sum should not be allowed, except where we know by
6988 alignment that a 32k boundary is not crossed, but see the ???
6989 comment in rs6000_legitimize_reload_address. Note that by
6990 "offsetting" here we mean a further offset to access parts of the
6991 MEM. It's fine to have a lo_sum where the inner address is offset
6992 from a sym, since the same sym+offset will appear in the high part
6993 of the address calculation. */
6995 bool
6996 mem_operand_gpr (rtx op, machine_mode mode)
6998 unsigned HOST_WIDE_INT offset;
6999 int extra;
7000 rtx addr = XEXP (op, 0);
7002 op = address_offset (addr);
7003 if (op == NULL_RTX)
7004 return true;
7006 offset = INTVAL (op);
7007 if (TARGET_POWERPC64 && (offset & 3) != 0)
7008 return false;
7010 extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD;
7011 if (extra < 0)
7012 extra = 0;
7014 if (GET_CODE (addr) == LO_SUM)
7015 /* For lo_sum addresses, we must allow any offset except one that
7016 causes a wrap, so test only the low 16 bits. */
7017 offset = ((offset & 0xffff) ^ 0x8000) - 0x8000;
7019 return offset + 0x8000 < 0x10000u - extra;
7022 /* Subroutines of rs6000_legitimize_address and rs6000_legitimate_address_p. */
7024 static bool
7025 reg_offset_addressing_ok_p (machine_mode mode)
7027 switch (mode)
7029 case V16QImode:
7030 case V8HImode:
7031 case V4SFmode:
7032 case V4SImode:
7033 case V2DFmode:
7034 case V2DImode:
7035 case V1TImode:
7036 case TImode:
7037 case TFmode:
7038 case KFmode:
7039 /* AltiVec/VSX vector modes. Only reg+reg addressing is valid. While
7040 TImode is not a vector mode, if we want to use the VSX registers to
7041 move it around, we need to restrict ourselves to reg+reg addressing.
7042 Similarly for IEEE 128-bit floating point that is passed in a single
7043 vector register. */
7044 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode))
7045 return false;
7046 break;
7048 case V4HImode:
7049 case V2SImode:
7050 case V1DImode:
7051 case V2SFmode:
7052 /* Paired vector modes. Only reg+reg addressing is valid. */
7053 if (TARGET_PAIRED_FLOAT)
7054 return false;
7055 break;
7057 case SDmode:
7058 /* If we can do direct load/stores of SDmode, restrict it to reg+reg
7059 addressing for the LFIWZX and STFIWX instructions. */
7060 if (TARGET_NO_SDMODE_STACK)
7061 return false;
7062 break;
7064 default:
7065 break;
7068 return true;
7071 static bool
7072 virtual_stack_registers_memory_p (rtx op)
7074 int regnum;
7076 if (GET_CODE (op) == REG)
7077 regnum = REGNO (op);
7079 else if (GET_CODE (op) == PLUS
7080 && GET_CODE (XEXP (op, 0)) == REG
7081 && GET_CODE (XEXP (op, 1)) == CONST_INT)
7082 regnum = REGNO (XEXP (op, 0));
7084 else
7085 return false;
7087 return (regnum >= FIRST_VIRTUAL_REGISTER
7088 && regnum <= LAST_VIRTUAL_POINTER_REGISTER);
7091 /* Return true if a MODE sized memory accesses to OP plus OFFSET
7092 is known to not straddle a 32k boundary. This function is used
7093 to determine whether -mcmodel=medium code can use TOC pointer
7094 relative addressing for OP. This means the alignment of the TOC
7095 pointer must also be taken into account, and unfortunately that is
7096 only 8 bytes. */
7098 #ifndef POWERPC64_TOC_POINTER_ALIGNMENT
7099 #define POWERPC64_TOC_POINTER_ALIGNMENT 8
7100 #endif
7102 static bool
7103 offsettable_ok_by_alignment (rtx op, HOST_WIDE_INT offset,
7104 machine_mode mode)
7106 tree decl;
7107 unsigned HOST_WIDE_INT dsize, dalign, lsb, mask;
7109 if (GET_CODE (op) != SYMBOL_REF)
7110 return false;
7112 dsize = GET_MODE_SIZE (mode);
7113 decl = SYMBOL_REF_DECL (op);
7114 if (!decl)
7116 if (dsize == 0)
7117 return false;
7119 /* -fsection-anchors loses the original SYMBOL_REF_DECL when
7120 replacing memory addresses with an anchor plus offset. We
7121 could find the decl by rummaging around in the block->objects
7122 VEC for the given offset but that seems like too much work. */
7123 dalign = BITS_PER_UNIT;
7124 if (SYMBOL_REF_HAS_BLOCK_INFO_P (op)
7125 && SYMBOL_REF_ANCHOR_P (op)
7126 && SYMBOL_REF_BLOCK (op) != NULL)
7128 struct object_block *block = SYMBOL_REF_BLOCK (op);
7130 dalign = block->alignment;
7131 offset += SYMBOL_REF_BLOCK_OFFSET (op);
7133 else if (CONSTANT_POOL_ADDRESS_P (op))
7135 /* It would be nice to have get_pool_align().. */
7136 machine_mode cmode = get_pool_mode (op);
7138 dalign = GET_MODE_ALIGNMENT (cmode);
7141 else if (DECL_P (decl))
7143 dalign = DECL_ALIGN (decl);
7145 if (dsize == 0)
7147 /* Allow BLKmode when the entire object is known to not
7148 cross a 32k boundary. */
7149 if (!DECL_SIZE_UNIT (decl))
7150 return false;
7152 if (!tree_fits_uhwi_p (DECL_SIZE_UNIT (decl)))
7153 return false;
7155 dsize = tree_to_uhwi (DECL_SIZE_UNIT (decl));
7156 if (dsize > 32768)
7157 return false;
7159 dalign /= BITS_PER_UNIT;
7160 if (dalign > POWERPC64_TOC_POINTER_ALIGNMENT)
7161 dalign = POWERPC64_TOC_POINTER_ALIGNMENT;
7162 return dalign >= dsize;
7165 else
7166 gcc_unreachable ();
7168 /* Find how many bits of the alignment we know for this access. */
7169 dalign /= BITS_PER_UNIT;
7170 if (dalign > POWERPC64_TOC_POINTER_ALIGNMENT)
7171 dalign = POWERPC64_TOC_POINTER_ALIGNMENT;
7172 mask = dalign - 1;
7173 lsb = offset & -offset;
7174 mask &= lsb - 1;
7175 dalign = mask + 1;
7177 return dalign >= dsize;
7180 static bool
7181 constant_pool_expr_p (rtx op)
7183 rtx base, offset;
7185 split_const (op, &base, &offset);
7186 return (GET_CODE (base) == SYMBOL_REF
7187 && CONSTANT_POOL_ADDRESS_P (base)
7188 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (base), Pmode));
7191 static const_rtx tocrel_base, tocrel_offset;
7193 /* Return true if OP is a toc pointer relative address (the output
7194 of create_TOC_reference). If STRICT, do not match high part or
7195 non-split -mcmodel=large/medium toc pointer relative addresses. */
7197 bool
7198 toc_relative_expr_p (const_rtx op, bool strict)
7200 if (!TARGET_TOC)
7201 return false;
7203 if (TARGET_CMODEL != CMODEL_SMALL)
7205 /* Only match the low part. */
7206 if (GET_CODE (op) == LO_SUM
7207 && REG_P (XEXP (op, 0))
7208 && INT_REG_OK_FOR_BASE_P (XEXP (op, 0), strict))
7209 op = XEXP (op, 1);
7210 else if (strict)
7211 return false;
7214 tocrel_base = op;
7215 tocrel_offset = const0_rtx;
7216 if (GET_CODE (op) == PLUS && add_cint_operand (XEXP (op, 1), GET_MODE (op)))
7218 tocrel_base = XEXP (op, 0);
7219 tocrel_offset = XEXP (op, 1);
7222 return (GET_CODE (tocrel_base) == UNSPEC
7223 && XINT (tocrel_base, 1) == UNSPEC_TOCREL);
7226 /* Return true if X is a constant pool address, and also for cmodel=medium
7227 if X is a toc-relative address known to be offsettable within MODE. */
7229 bool
7230 legitimate_constant_pool_address_p (const_rtx x, machine_mode mode,
7231 bool strict)
7233 return (toc_relative_expr_p (x, strict)
7234 && (TARGET_CMODEL != CMODEL_MEDIUM
7235 || constant_pool_expr_p (XVECEXP (tocrel_base, 0, 0))
7236 || mode == QImode
7237 || offsettable_ok_by_alignment (XVECEXP (tocrel_base, 0, 0),
7238 INTVAL (tocrel_offset), mode)));
7241 static bool
7242 legitimate_small_data_p (machine_mode mode, rtx x)
7244 return (DEFAULT_ABI == ABI_V4
7245 && !flag_pic && !TARGET_TOC
7246 && (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == CONST)
7247 && small_data_operand (x, mode));
7250 /* SPE offset addressing is limited to 5-bits worth of double words. */
7251 #define SPE_CONST_OFFSET_OK(x) (((x) & ~0xf8) == 0)
7253 bool
7254 rs6000_legitimate_offset_address_p (machine_mode mode, rtx x,
7255 bool strict, bool worst_case)
7257 unsigned HOST_WIDE_INT offset;
7258 unsigned int extra;
7260 if (GET_CODE (x) != PLUS)
7261 return false;
7262 if (!REG_P (XEXP (x, 0)))
7263 return false;
7264 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
7265 return false;
7266 if (!reg_offset_addressing_ok_p (mode))
7267 return virtual_stack_registers_memory_p (x);
7268 if (legitimate_constant_pool_address_p (x, mode, strict || lra_in_progress))
7269 return true;
7270 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
7271 return false;
7273 offset = INTVAL (XEXP (x, 1));
7274 extra = 0;
7275 switch (mode)
7277 case V4HImode:
7278 case V2SImode:
7279 case V1DImode:
7280 case V2SFmode:
7281 /* SPE vector modes. */
7282 return SPE_CONST_OFFSET_OK (offset);
7284 case DFmode:
7285 case DDmode:
7286 case DImode:
7287 /* On e500v2, we may have:
7289 (subreg:DF (mem:DI (plus (reg) (const_int))) 0).
7291 Which gets addressed with evldd instructions. */
7292 if (TARGET_E500_DOUBLE)
7293 return SPE_CONST_OFFSET_OK (offset);
7295 /* If we are using VSX scalar loads, restrict ourselves to reg+reg
7296 addressing. */
7297 if (VECTOR_MEM_VSX_P (mode))
7298 return false;
7300 if (!worst_case)
7301 break;
7302 if (!TARGET_POWERPC64)
7303 extra = 4;
7304 else if (offset & 3)
7305 return false;
7306 break;
7308 case TFmode:
7309 case IFmode:
7310 case KFmode:
7311 if (TARGET_E500_DOUBLE)
7312 return (SPE_CONST_OFFSET_OK (offset)
7313 && SPE_CONST_OFFSET_OK (offset + 8));
7314 /* fall through */
7316 case TDmode:
7317 case TImode:
7318 case PTImode:
7319 extra = 8;
7320 if (!worst_case)
7321 break;
7322 if (!TARGET_POWERPC64)
7323 extra = 12;
7324 else if (offset & 3)
7325 return false;
7326 break;
7328 default:
7329 break;
7332 offset += 0x8000;
7333 return offset < 0x10000 - extra;
7336 bool
7337 legitimate_indexed_address_p (rtx x, int strict)
7339 rtx op0, op1;
7341 if (GET_CODE (x) != PLUS)
7342 return false;
7344 op0 = XEXP (x, 0);
7345 op1 = XEXP (x, 1);
7347 /* Recognize the rtl generated by reload which we know will later be
7348 replaced with proper base and index regs. */
7349 if (!strict
7350 && reload_in_progress
7351 && (REG_P (op0) || GET_CODE (op0) == PLUS)
7352 && REG_P (op1))
7353 return true;
7355 return (REG_P (op0) && REG_P (op1)
7356 && ((INT_REG_OK_FOR_BASE_P (op0, strict)
7357 && INT_REG_OK_FOR_INDEX_P (op1, strict))
7358 || (INT_REG_OK_FOR_BASE_P (op1, strict)
7359 && INT_REG_OK_FOR_INDEX_P (op0, strict))));
7362 bool
7363 avoiding_indexed_address_p (machine_mode mode)
7365 /* Avoid indexed addressing for modes that have non-indexed
7366 load/store instruction forms. */
7367 return (TARGET_AVOID_XFORM && VECTOR_MEM_NONE_P (mode));
7370 bool
7371 legitimate_indirect_address_p (rtx x, int strict)
7373 return GET_CODE (x) == REG && INT_REG_OK_FOR_BASE_P (x, strict);
7376 bool
7377 macho_lo_sum_memory_operand (rtx x, machine_mode mode)
7379 if (!TARGET_MACHO || !flag_pic
7380 || mode != SImode || GET_CODE (x) != MEM)
7381 return false;
7382 x = XEXP (x, 0);
7384 if (GET_CODE (x) != LO_SUM)
7385 return false;
7386 if (GET_CODE (XEXP (x, 0)) != REG)
7387 return false;
7388 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), 0))
7389 return false;
7390 x = XEXP (x, 1);
7392 return CONSTANT_P (x);
7395 static bool
7396 legitimate_lo_sum_address_p (machine_mode mode, rtx x, int strict)
7398 if (GET_CODE (x) != LO_SUM)
7399 return false;
7400 if (GET_CODE (XEXP (x, 0)) != REG)
7401 return false;
7402 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
7403 return false;
7404 /* Restrict addressing for DI because of our SUBREG hackery. */
7405 if (TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
7406 return false;
7407 x = XEXP (x, 1);
7409 if (TARGET_ELF || TARGET_MACHO)
7411 bool large_toc_ok;
7413 if (DEFAULT_ABI == ABI_V4 && flag_pic)
7414 return false;
7415 /* LRA don't use LEGITIMIZE_RELOAD_ADDRESS as it usually calls
7416 push_reload from reload pass code. LEGITIMIZE_RELOAD_ADDRESS
7417 recognizes some LO_SUM addresses as valid although this
7418 function says opposite. In most cases, LRA through different
7419 transformations can generate correct code for address reloads.
7420 It can not manage only some LO_SUM cases. So we need to add
7421 code analogous to one in rs6000_legitimize_reload_address for
7422 LOW_SUM here saying that some addresses are still valid. */
7423 large_toc_ok = (lra_in_progress && TARGET_CMODEL != CMODEL_SMALL
7424 && small_toc_ref (x, VOIDmode));
7425 if (TARGET_TOC && ! large_toc_ok)
7426 return false;
7427 if (GET_MODE_NUNITS (mode) != 1)
7428 return false;
7429 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
7430 && !(/* ??? Assume floating point reg based on mode? */
7431 TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
7432 && (mode == DFmode || mode == DDmode)))
7433 return false;
7435 return CONSTANT_P (x) || large_toc_ok;
7438 return false;
7442 /* Try machine-dependent ways of modifying an illegitimate address
7443 to be legitimate. If we find one, return the new, valid address.
7444 This is used from only one place: `memory_address' in explow.c.
7446 OLDX is the address as it was before break_out_memory_refs was
7447 called. In some cases it is useful to look at this to decide what
7448 needs to be done.
7450 It is always safe for this function to do nothing. It exists to
7451 recognize opportunities to optimize the output.
7453 On RS/6000, first check for the sum of a register with a constant
7454 integer that is out of range. If so, generate code to add the
7455 constant with the low-order 16 bits masked to the register and force
7456 this result into another register (this can be done with `cau').
7457 Then generate an address of REG+(CONST&0xffff), allowing for the
7458 possibility of bit 16 being a one.
7460 Then check for the sum of a register and something not constant, try to
7461 load the other things into a register and return the sum. */
7463 static rtx
7464 rs6000_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
7465 machine_mode mode)
7467 unsigned int extra;
7469 if (!reg_offset_addressing_ok_p (mode))
7471 if (virtual_stack_registers_memory_p (x))
7472 return x;
7474 /* In theory we should not be seeing addresses of the form reg+0,
7475 but just in case it is generated, optimize it away. */
7476 if (GET_CODE (x) == PLUS && XEXP (x, 1) == const0_rtx)
7477 return force_reg (Pmode, XEXP (x, 0));
7479 /* For TImode with load/store quad, restrict addresses to just a single
7480 pointer, so it works with both GPRs and VSX registers. */
7481 /* Make sure both operands are registers. */
7482 else if (GET_CODE (x) == PLUS
7483 && (mode != TImode || !TARGET_QUAD_MEMORY))
7484 return gen_rtx_PLUS (Pmode,
7485 force_reg (Pmode, XEXP (x, 0)),
7486 force_reg (Pmode, XEXP (x, 1)));
7487 else
7488 return force_reg (Pmode, x);
7490 if (GET_CODE (x) == SYMBOL_REF)
7492 enum tls_model model = SYMBOL_REF_TLS_MODEL (x);
7493 if (model != 0)
7494 return rs6000_legitimize_tls_address (x, model);
7497 extra = 0;
7498 switch (mode)
7500 case TFmode:
7501 case TDmode:
7502 case TImode:
7503 case PTImode:
7504 case IFmode:
7505 case KFmode:
7506 /* As in legitimate_offset_address_p we do not assume
7507 worst-case. The mode here is just a hint as to the registers
7508 used. A TImode is usually in gprs, but may actually be in
7509 fprs. Leave worst-case scenario for reload to handle via
7510 insn constraints. PTImode is only GPRs. */
7511 extra = 8;
7512 break;
7513 default:
7514 break;
7517 if (GET_CODE (x) == PLUS
7518 && GET_CODE (XEXP (x, 0)) == REG
7519 && GET_CODE (XEXP (x, 1)) == CONST_INT
7520 && ((unsigned HOST_WIDE_INT) (INTVAL (XEXP (x, 1)) + 0x8000)
7521 >= 0x10000 - extra)
7522 && !(SPE_VECTOR_MODE (mode)
7523 || (TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD)))
7525 HOST_WIDE_INT high_int, low_int;
7526 rtx sum;
7527 low_int = ((INTVAL (XEXP (x, 1)) & 0xffff) ^ 0x8000) - 0x8000;
7528 if (low_int >= 0x8000 - extra)
7529 low_int = 0;
7530 high_int = INTVAL (XEXP (x, 1)) - low_int;
7531 sum = force_operand (gen_rtx_PLUS (Pmode, XEXP (x, 0),
7532 GEN_INT (high_int)), 0);
7533 return plus_constant (Pmode, sum, low_int);
7535 else if (GET_CODE (x) == PLUS
7536 && GET_CODE (XEXP (x, 0)) == REG
7537 && GET_CODE (XEXP (x, 1)) != CONST_INT
7538 && GET_MODE_NUNITS (mode) == 1
7539 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
7540 || (/* ??? Assume floating point reg based on mode? */
7541 (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
7542 && (mode == DFmode || mode == DDmode)))
7543 && !avoiding_indexed_address_p (mode))
7545 return gen_rtx_PLUS (Pmode, XEXP (x, 0),
7546 force_reg (Pmode, force_operand (XEXP (x, 1), 0)));
7548 else if (SPE_VECTOR_MODE (mode)
7549 || (TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD))
7551 if (mode == DImode)
7552 return x;
7553 /* We accept [reg + reg] and [reg + OFFSET]. */
7555 if (GET_CODE (x) == PLUS)
7557 rtx op1 = XEXP (x, 0);
7558 rtx op2 = XEXP (x, 1);
7559 rtx y;
7561 op1 = force_reg (Pmode, op1);
7563 if (GET_CODE (op2) != REG
7564 && (GET_CODE (op2) != CONST_INT
7565 || !SPE_CONST_OFFSET_OK (INTVAL (op2))
7566 || (GET_MODE_SIZE (mode) > 8
7567 && !SPE_CONST_OFFSET_OK (INTVAL (op2) + 8))))
7568 op2 = force_reg (Pmode, op2);
7570 /* We can't always do [reg + reg] for these, because [reg +
7571 reg + offset] is not a legitimate addressing mode. */
7572 y = gen_rtx_PLUS (Pmode, op1, op2);
7574 if ((GET_MODE_SIZE (mode) > 8 || mode == DDmode) && REG_P (op2))
7575 return force_reg (Pmode, y);
7576 else
7577 return y;
7580 return force_reg (Pmode, x);
7582 else if ((TARGET_ELF
7583 #if TARGET_MACHO
7584 || !MACHO_DYNAMIC_NO_PIC_P
7585 #endif
7587 && TARGET_32BIT
7588 && TARGET_NO_TOC
7589 && ! flag_pic
7590 && GET_CODE (x) != CONST_INT
7591 && GET_CODE (x) != CONST_WIDE_INT
7592 && GET_CODE (x) != CONST_DOUBLE
7593 && CONSTANT_P (x)
7594 && GET_MODE_NUNITS (mode) == 1
7595 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
7596 || (/* ??? Assume floating point reg based on mode? */
7597 (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
7598 && (mode == DFmode || mode == DDmode))))
7600 rtx reg = gen_reg_rtx (Pmode);
7601 if (TARGET_ELF)
7602 emit_insn (gen_elf_high (reg, x));
7603 else
7604 emit_insn (gen_macho_high (reg, x));
7605 return gen_rtx_LO_SUM (Pmode, reg, x);
7607 else if (TARGET_TOC
7608 && GET_CODE (x) == SYMBOL_REF
7609 && constant_pool_expr_p (x)
7610 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (x), Pmode))
7611 return create_TOC_reference (x, NULL_RTX);
7612 else
7613 return x;
7616 /* Debug version of rs6000_legitimize_address. */
7617 static rtx
7618 rs6000_debug_legitimize_address (rtx x, rtx oldx, machine_mode mode)
7620 rtx ret;
7621 rtx_insn *insns;
7623 start_sequence ();
7624 ret = rs6000_legitimize_address (x, oldx, mode);
7625 insns = get_insns ();
7626 end_sequence ();
7628 if (ret != x)
7630 fprintf (stderr,
7631 "\nrs6000_legitimize_address: mode %s, old code %s, "
7632 "new code %s, modified\n",
7633 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)),
7634 GET_RTX_NAME (GET_CODE (ret)));
7636 fprintf (stderr, "Original address:\n");
7637 debug_rtx (x);
7639 fprintf (stderr, "oldx:\n");
7640 debug_rtx (oldx);
7642 fprintf (stderr, "New address:\n");
7643 debug_rtx (ret);
7645 if (insns)
7647 fprintf (stderr, "Insns added:\n");
7648 debug_rtx_list (insns, 20);
7651 else
7653 fprintf (stderr,
7654 "\nrs6000_legitimize_address: mode %s, code %s, no change:\n",
7655 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)));
7657 debug_rtx (x);
7660 if (insns)
7661 emit_insn (insns);
7663 return ret;
7666 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
7667 We need to emit DTP-relative relocations. */
7669 static void rs6000_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
7670 static void
7671 rs6000_output_dwarf_dtprel (FILE *file, int size, rtx x)
7673 switch (size)
7675 case 4:
7676 fputs ("\t.long\t", file);
7677 break;
7678 case 8:
7679 fputs (DOUBLE_INT_ASM_OP, file);
7680 break;
7681 default:
7682 gcc_unreachable ();
7684 output_addr_const (file, x);
7685 if (TARGET_ELF)
7686 fputs ("@dtprel+0x8000", file);
7687 else if (TARGET_XCOFF && GET_CODE (x) == SYMBOL_REF)
7689 switch (SYMBOL_REF_TLS_MODEL (x))
7691 case 0:
7692 break;
7693 case TLS_MODEL_LOCAL_EXEC:
7694 fputs ("@le", file);
7695 break;
7696 case TLS_MODEL_INITIAL_EXEC:
7697 fputs ("@ie", file);
7698 break;
7699 case TLS_MODEL_GLOBAL_DYNAMIC:
7700 case TLS_MODEL_LOCAL_DYNAMIC:
7701 fputs ("@m", file);
7702 break;
7703 default:
7704 gcc_unreachable ();
7709 /* Return true if X is a symbol that refers to real (rather than emulated)
7710 TLS. */
7712 static bool
7713 rs6000_real_tls_symbol_ref_p (rtx x)
7715 return (GET_CODE (x) == SYMBOL_REF
7716 && SYMBOL_REF_TLS_MODEL (x) >= TLS_MODEL_REAL);
7719 /* In the name of slightly smaller debug output, and to cater to
7720 general assembler lossage, recognize various UNSPEC sequences
7721 and turn them back into a direct symbol reference. */
7723 static rtx
7724 rs6000_delegitimize_address (rtx orig_x)
7726 rtx x, y, offset;
7728 orig_x = delegitimize_mem_from_attrs (orig_x);
7729 x = orig_x;
7730 if (MEM_P (x))
7731 x = XEXP (x, 0);
7733 y = x;
7734 if (TARGET_CMODEL != CMODEL_SMALL
7735 && GET_CODE (y) == LO_SUM)
7736 y = XEXP (y, 1);
7738 offset = NULL_RTX;
7739 if (GET_CODE (y) == PLUS
7740 && GET_MODE (y) == Pmode
7741 && CONST_INT_P (XEXP (y, 1)))
7743 offset = XEXP (y, 1);
7744 y = XEXP (y, 0);
7747 if (GET_CODE (y) == UNSPEC
7748 && XINT (y, 1) == UNSPEC_TOCREL)
7750 y = XVECEXP (y, 0, 0);
7752 #ifdef HAVE_AS_TLS
7753 /* Do not associate thread-local symbols with the original
7754 constant pool symbol. */
7755 if (TARGET_XCOFF
7756 && GET_CODE (y) == SYMBOL_REF
7757 && CONSTANT_POOL_ADDRESS_P (y)
7758 && rs6000_real_tls_symbol_ref_p (get_pool_constant (y)))
7759 return orig_x;
7760 #endif
7762 if (offset != NULL_RTX)
7763 y = gen_rtx_PLUS (Pmode, y, offset);
7764 if (!MEM_P (orig_x))
7765 return y;
7766 else
7767 return replace_equiv_address_nv (orig_x, y);
7770 if (TARGET_MACHO
7771 && GET_CODE (orig_x) == LO_SUM
7772 && GET_CODE (XEXP (orig_x, 1)) == CONST)
7774 y = XEXP (XEXP (orig_x, 1), 0);
7775 if (GET_CODE (y) == UNSPEC
7776 && XINT (y, 1) == UNSPEC_MACHOPIC_OFFSET)
7777 return XVECEXP (y, 0, 0);
7780 return orig_x;
7783 /* Return true if X shouldn't be emitted into the debug info.
7784 The linker doesn't like .toc section references from
7785 .debug_* sections, so reject .toc section symbols. */
7787 static bool
7788 rs6000_const_not_ok_for_debug_p (rtx x)
7790 if (GET_CODE (x) == SYMBOL_REF
7791 && CONSTANT_POOL_ADDRESS_P (x))
7793 rtx c = get_pool_constant (x);
7794 machine_mode cmode = get_pool_mode (x);
7795 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (c, cmode))
7796 return true;
7799 return false;
7802 /* Construct the SYMBOL_REF for the tls_get_addr function. */
7804 static GTY(()) rtx rs6000_tls_symbol;
7805 static rtx
7806 rs6000_tls_get_addr (void)
7808 if (!rs6000_tls_symbol)
7809 rs6000_tls_symbol = init_one_libfunc ("__tls_get_addr");
7811 return rs6000_tls_symbol;
7814 /* Construct the SYMBOL_REF for TLS GOT references. */
7816 static GTY(()) rtx rs6000_got_symbol;
7817 static rtx
7818 rs6000_got_sym (void)
7820 if (!rs6000_got_symbol)
7822 rs6000_got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
7823 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_LOCAL;
7824 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_EXTERNAL;
7827 return rs6000_got_symbol;
7830 /* AIX Thread-Local Address support. */
7832 static rtx
7833 rs6000_legitimize_tls_address_aix (rtx addr, enum tls_model model)
7835 rtx sym, mem, tocref, tlsreg, tmpreg, dest, tlsaddr;
7836 const char *name;
7837 char *tlsname;
7839 name = XSTR (addr, 0);
7840 /* Append TLS CSECT qualifier, unless the symbol already is qualified
7841 or the symbol will be in TLS private data section. */
7842 if (name[strlen (name) - 1] != ']'
7843 && (TREE_PUBLIC (SYMBOL_REF_DECL (addr))
7844 || bss_initializer_p (SYMBOL_REF_DECL (addr))))
7846 tlsname = XALLOCAVEC (char, strlen (name) + 4);
7847 strcpy (tlsname, name);
7848 strcat (tlsname,
7849 bss_initializer_p (SYMBOL_REF_DECL (addr)) ? "[UL]" : "[TL]");
7850 tlsaddr = copy_rtx (addr);
7851 XSTR (tlsaddr, 0) = ggc_strdup (tlsname);
7853 else
7854 tlsaddr = addr;
7856 /* Place addr into TOC constant pool. */
7857 sym = force_const_mem (GET_MODE (tlsaddr), tlsaddr);
7859 /* Output the TOC entry and create the MEM referencing the value. */
7860 if (constant_pool_expr_p (XEXP (sym, 0))
7861 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (XEXP (sym, 0)), Pmode))
7863 tocref = create_TOC_reference (XEXP (sym, 0), NULL_RTX);
7864 mem = gen_const_mem (Pmode, tocref);
7865 set_mem_alias_set (mem, get_TOC_alias_set ());
7867 else
7868 return sym;
7870 /* Use global-dynamic for local-dynamic. */
7871 if (model == TLS_MODEL_GLOBAL_DYNAMIC
7872 || model == TLS_MODEL_LOCAL_DYNAMIC)
7874 /* Create new TOC reference for @m symbol. */
7875 name = XSTR (XVECEXP (XEXP (mem, 0), 0, 0), 0);
7876 tlsname = XALLOCAVEC (char, strlen (name) + 1);
7877 strcpy (tlsname, "*LCM");
7878 strcat (tlsname, name + 3);
7879 rtx modaddr = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (tlsname));
7880 SYMBOL_REF_FLAGS (modaddr) |= SYMBOL_FLAG_LOCAL;
7881 tocref = create_TOC_reference (modaddr, NULL_RTX);
7882 rtx modmem = gen_const_mem (Pmode, tocref);
7883 set_mem_alias_set (modmem, get_TOC_alias_set ());
7885 rtx modreg = gen_reg_rtx (Pmode);
7886 emit_insn (gen_rtx_SET (modreg, modmem));
7888 tmpreg = gen_reg_rtx (Pmode);
7889 emit_insn (gen_rtx_SET (tmpreg, mem));
7891 dest = gen_reg_rtx (Pmode);
7892 if (TARGET_32BIT)
7893 emit_insn (gen_tls_get_addrsi (dest, modreg, tmpreg));
7894 else
7895 emit_insn (gen_tls_get_addrdi (dest, modreg, tmpreg));
7896 return dest;
7898 /* Obtain TLS pointer: 32 bit call or 64 bit GPR 13. */
7899 else if (TARGET_32BIT)
7901 tlsreg = gen_reg_rtx (SImode);
7902 emit_insn (gen_tls_get_tpointer (tlsreg));
7904 else
7905 tlsreg = gen_rtx_REG (DImode, 13);
7907 /* Load the TOC value into temporary register. */
7908 tmpreg = gen_reg_rtx (Pmode);
7909 emit_insn (gen_rtx_SET (tmpreg, mem));
7910 set_unique_reg_note (get_last_insn (), REG_EQUAL,
7911 gen_rtx_MINUS (Pmode, addr, tlsreg));
7913 /* Add TOC symbol value to TLS pointer. */
7914 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tmpreg, tlsreg));
7916 return dest;
7919 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
7920 this (thread-local) address. */
7922 static rtx
7923 rs6000_legitimize_tls_address (rtx addr, enum tls_model model)
7925 rtx dest, insn;
7927 if (TARGET_XCOFF)
7928 return rs6000_legitimize_tls_address_aix (addr, model);
7930 dest = gen_reg_rtx (Pmode);
7931 if (model == TLS_MODEL_LOCAL_EXEC && rs6000_tls_size == 16)
7933 rtx tlsreg;
7935 if (TARGET_64BIT)
7937 tlsreg = gen_rtx_REG (Pmode, 13);
7938 insn = gen_tls_tprel_64 (dest, tlsreg, addr);
7940 else
7942 tlsreg = gen_rtx_REG (Pmode, 2);
7943 insn = gen_tls_tprel_32 (dest, tlsreg, addr);
7945 emit_insn (insn);
7947 else if (model == TLS_MODEL_LOCAL_EXEC && rs6000_tls_size == 32)
7949 rtx tlsreg, tmp;
7951 tmp = gen_reg_rtx (Pmode);
7952 if (TARGET_64BIT)
7954 tlsreg = gen_rtx_REG (Pmode, 13);
7955 insn = gen_tls_tprel_ha_64 (tmp, tlsreg, addr);
7957 else
7959 tlsreg = gen_rtx_REG (Pmode, 2);
7960 insn = gen_tls_tprel_ha_32 (tmp, tlsreg, addr);
7962 emit_insn (insn);
7963 if (TARGET_64BIT)
7964 insn = gen_tls_tprel_lo_64 (dest, tmp, addr);
7965 else
7966 insn = gen_tls_tprel_lo_32 (dest, tmp, addr);
7967 emit_insn (insn);
7969 else
7971 rtx r3, got, tga, tmp1, tmp2, call_insn;
7973 /* We currently use relocations like @got@tlsgd for tls, which
7974 means the linker will handle allocation of tls entries, placing
7975 them in the .got section. So use a pointer to the .got section,
7976 not one to secondary TOC sections used by 64-bit -mminimal-toc,
7977 or to secondary GOT sections used by 32-bit -fPIC. */
7978 if (TARGET_64BIT)
7979 got = gen_rtx_REG (Pmode, 2);
7980 else
7982 if (flag_pic == 1)
7983 got = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
7984 else
7986 rtx gsym = rs6000_got_sym ();
7987 got = gen_reg_rtx (Pmode);
7988 if (flag_pic == 0)
7989 rs6000_emit_move (got, gsym, Pmode);
7990 else
7992 rtx mem, lab, last;
7994 tmp1 = gen_reg_rtx (Pmode);
7995 tmp2 = gen_reg_rtx (Pmode);
7996 mem = gen_const_mem (Pmode, tmp1);
7997 lab = gen_label_rtx ();
7998 emit_insn (gen_load_toc_v4_PIC_1b (gsym, lab));
7999 emit_move_insn (tmp1, gen_rtx_REG (Pmode, LR_REGNO));
8000 if (TARGET_LINK_STACK)
8001 emit_insn (gen_addsi3 (tmp1, tmp1, GEN_INT (4)));
8002 emit_move_insn (tmp2, mem);
8003 last = emit_insn (gen_addsi3 (got, tmp1, tmp2));
8004 set_unique_reg_note (last, REG_EQUAL, gsym);
8009 if (model == TLS_MODEL_GLOBAL_DYNAMIC)
8011 tga = rs6000_tls_get_addr ();
8012 emit_library_call_value (tga, dest, LCT_CONST, Pmode,
8013 1, const0_rtx, Pmode);
8015 r3 = gen_rtx_REG (Pmode, 3);
8016 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
8018 if (TARGET_64BIT)
8019 insn = gen_tls_gd_aix64 (r3, got, addr, tga, const0_rtx);
8020 else
8021 insn = gen_tls_gd_aix32 (r3, got, addr, tga, const0_rtx);
8023 else if (DEFAULT_ABI == ABI_V4)
8024 insn = gen_tls_gd_sysvsi (r3, got, addr, tga, const0_rtx);
8025 else
8026 gcc_unreachable ();
8027 call_insn = last_call_insn ();
8028 PATTERN (call_insn) = insn;
8029 if (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
8030 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn),
8031 pic_offset_table_rtx);
8033 else if (model == TLS_MODEL_LOCAL_DYNAMIC)
8035 tga = rs6000_tls_get_addr ();
8036 tmp1 = gen_reg_rtx (Pmode);
8037 emit_library_call_value (tga, tmp1, LCT_CONST, Pmode,
8038 1, const0_rtx, Pmode);
8040 r3 = gen_rtx_REG (Pmode, 3);
8041 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
8043 if (TARGET_64BIT)
8044 insn = gen_tls_ld_aix64 (r3, got, tga, const0_rtx);
8045 else
8046 insn = gen_tls_ld_aix32 (r3, got, tga, const0_rtx);
8048 else if (DEFAULT_ABI == ABI_V4)
8049 insn = gen_tls_ld_sysvsi (r3, got, tga, const0_rtx);
8050 else
8051 gcc_unreachable ();
8052 call_insn = last_call_insn ();
8053 PATTERN (call_insn) = insn;
8054 if (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
8055 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn),
8056 pic_offset_table_rtx);
8058 if (rs6000_tls_size == 16)
8060 if (TARGET_64BIT)
8061 insn = gen_tls_dtprel_64 (dest, tmp1, addr);
8062 else
8063 insn = gen_tls_dtprel_32 (dest, tmp1, addr);
8065 else if (rs6000_tls_size == 32)
8067 tmp2 = gen_reg_rtx (Pmode);
8068 if (TARGET_64BIT)
8069 insn = gen_tls_dtprel_ha_64 (tmp2, tmp1, addr);
8070 else
8071 insn = gen_tls_dtprel_ha_32 (tmp2, tmp1, addr);
8072 emit_insn (insn);
8073 if (TARGET_64BIT)
8074 insn = gen_tls_dtprel_lo_64 (dest, tmp2, addr);
8075 else
8076 insn = gen_tls_dtprel_lo_32 (dest, tmp2, addr);
8078 else
8080 tmp2 = gen_reg_rtx (Pmode);
8081 if (TARGET_64BIT)
8082 insn = gen_tls_got_dtprel_64 (tmp2, got, addr);
8083 else
8084 insn = gen_tls_got_dtprel_32 (tmp2, got, addr);
8085 emit_insn (insn);
8086 insn = gen_rtx_SET (dest, gen_rtx_PLUS (Pmode, tmp2, tmp1));
8088 emit_insn (insn);
8090 else
8092 /* IE, or 64-bit offset LE. */
8093 tmp2 = gen_reg_rtx (Pmode);
8094 if (TARGET_64BIT)
8095 insn = gen_tls_got_tprel_64 (tmp2, got, addr);
8096 else
8097 insn = gen_tls_got_tprel_32 (tmp2, got, addr);
8098 emit_insn (insn);
8099 if (TARGET_64BIT)
8100 insn = gen_tls_tls_64 (dest, tmp2, addr);
8101 else
8102 insn = gen_tls_tls_32 (dest, tmp2, addr);
8103 emit_insn (insn);
8107 return dest;
8110 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
8112 static bool
8113 rs6000_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8115 if (GET_CODE (x) == HIGH
8116 && GET_CODE (XEXP (x, 0)) == UNSPEC)
8117 return true;
8119 /* A TLS symbol in the TOC cannot contain a sum. */
8120 if (GET_CODE (x) == CONST
8121 && GET_CODE (XEXP (x, 0)) == PLUS
8122 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
8123 && SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0)) != 0)
8124 return true;
8126 /* Do not place an ELF TLS symbol in the constant pool. */
8127 return TARGET_ELF && tls_referenced_p (x);
8130 /* Return true iff the given SYMBOL_REF refers to a constant pool entry
8131 that we have put in the TOC, or for cmodel=medium, if the SYMBOL_REF
8132 can be addressed relative to the toc pointer. */
8134 static bool
8135 use_toc_relative_ref (rtx sym, machine_mode mode)
8137 return ((constant_pool_expr_p (sym)
8138 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (sym),
8139 get_pool_mode (sym)))
8140 || (TARGET_CMODEL == CMODEL_MEDIUM
8141 && SYMBOL_REF_LOCAL_P (sym)
8142 && GET_MODE_SIZE (mode) <= POWERPC64_TOC_POINTER_ALIGNMENT));
8145 /* Our implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
8146 replace the input X, or the original X if no replacement is called for.
8147 The output parameter *WIN is 1 if the calling macro should goto WIN,
8148 0 if it should not.
8150 For RS/6000, we wish to handle large displacements off a base
8151 register by splitting the addend across an addiu/addis and the mem insn.
8152 This cuts number of extra insns needed from 3 to 1.
8154 On Darwin, we use this to generate code for floating point constants.
8155 A movsf_low is generated so we wind up with 2 instructions rather than 3.
8156 The Darwin code is inside #if TARGET_MACHO because only then are the
8157 machopic_* functions defined. */
8158 static rtx
8159 rs6000_legitimize_reload_address (rtx x, machine_mode mode,
8160 int opnum, int type,
8161 int ind_levels ATTRIBUTE_UNUSED, int *win)
8163 bool reg_offset_p = reg_offset_addressing_ok_p (mode);
8165 /* Nasty hack for vsx_splat_V2DF/V2DI load from mem, which takes a
8166 DFmode/DImode MEM. */
8167 if (reg_offset_p
8168 && opnum == 1
8169 && ((mode == DFmode && recog_data.operand_mode[0] == V2DFmode)
8170 || (mode == DImode && recog_data.operand_mode[0] == V2DImode)))
8171 reg_offset_p = false;
8173 /* We must recognize output that we have already generated ourselves. */
8174 if (GET_CODE (x) == PLUS
8175 && GET_CODE (XEXP (x, 0)) == PLUS
8176 && GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
8177 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
8178 && GET_CODE (XEXP (x, 1)) == CONST_INT)
8180 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
8181 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
8182 opnum, (enum reload_type) type);
8183 *win = 1;
8184 return x;
8187 /* Likewise for (lo_sum (high ...) ...) output we have generated. */
8188 if (GET_CODE (x) == LO_SUM
8189 && GET_CODE (XEXP (x, 0)) == HIGH)
8191 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
8192 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
8193 opnum, (enum reload_type) type);
8194 *win = 1;
8195 return x;
8198 #if TARGET_MACHO
8199 if (DEFAULT_ABI == ABI_DARWIN && flag_pic
8200 && GET_CODE (x) == LO_SUM
8201 && GET_CODE (XEXP (x, 0)) == PLUS
8202 && XEXP (XEXP (x, 0), 0) == pic_offset_table_rtx
8203 && GET_CODE (XEXP (XEXP (x, 0), 1)) == HIGH
8204 && XEXP (XEXP (XEXP (x, 0), 1), 0) == XEXP (x, 1)
8205 && machopic_operand_p (XEXP (x, 1)))
8207 /* Result of previous invocation of this function on Darwin
8208 floating point constant. */
8209 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
8210 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
8211 opnum, (enum reload_type) type);
8212 *win = 1;
8213 return x;
8215 #endif
8217 if (TARGET_CMODEL != CMODEL_SMALL
8218 && reg_offset_p
8219 && small_toc_ref (x, VOIDmode))
8221 rtx hi = gen_rtx_HIGH (Pmode, copy_rtx (x));
8222 x = gen_rtx_LO_SUM (Pmode, hi, x);
8223 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
8224 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
8225 opnum, (enum reload_type) type);
8226 *win = 1;
8227 return x;
8230 if (GET_CODE (x) == PLUS
8231 && GET_CODE (XEXP (x, 0)) == REG
8232 && REGNO (XEXP (x, 0)) < FIRST_PSEUDO_REGISTER
8233 && INT_REG_OK_FOR_BASE_P (XEXP (x, 0), 1)
8234 && GET_CODE (XEXP (x, 1)) == CONST_INT
8235 && reg_offset_p
8236 && !SPE_VECTOR_MODE (mode)
8237 && !(TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
8238 && (!VECTOR_MODE_P (mode) || VECTOR_MEM_NONE_P (mode)))
8240 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
8241 HOST_WIDE_INT low = ((val & 0xffff) ^ 0x8000) - 0x8000;
8242 HOST_WIDE_INT high
8243 = (((val - low) & 0xffffffff) ^ 0x80000000) - 0x80000000;
8245 /* Check for 32-bit overflow. */
8246 if (high + low != val)
8248 *win = 0;
8249 return x;
8252 /* Reload the high part into a base reg; leave the low part
8253 in the mem directly. */
8255 x = gen_rtx_PLUS (GET_MODE (x),
8256 gen_rtx_PLUS (GET_MODE (x), XEXP (x, 0),
8257 GEN_INT (high)),
8258 GEN_INT (low));
8260 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
8261 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
8262 opnum, (enum reload_type) type);
8263 *win = 1;
8264 return x;
8267 if (GET_CODE (x) == SYMBOL_REF
8268 && reg_offset_p
8269 && (!VECTOR_MODE_P (mode) || VECTOR_MEM_NONE_P (mode))
8270 && !SPE_VECTOR_MODE (mode)
8271 #if TARGET_MACHO
8272 && DEFAULT_ABI == ABI_DARWIN
8273 && (flag_pic || MACHO_DYNAMIC_NO_PIC_P)
8274 && machopic_symbol_defined_p (x)
8275 #else
8276 && DEFAULT_ABI == ABI_V4
8277 && !flag_pic
8278 #endif
8279 /* Don't do this for TFmode or TDmode, since the result isn't offsettable.
8280 The same goes for DImode without 64-bit gprs and DFmode and DDmode
8281 without fprs.
8282 ??? Assume floating point reg based on mode? This assumption is
8283 violated by eg. powerpc-linux -m32 compile of gcc.dg/pr28796-2.c
8284 where reload ends up doing a DFmode load of a constant from
8285 mem using two gprs. Unfortunately, at this point reload
8286 hasn't yet selected regs so poking around in reload data
8287 won't help and even if we could figure out the regs reliably,
8288 we'd still want to allow this transformation when the mem is
8289 naturally aligned. Since we say the address is good here, we
8290 can't disable offsets from LO_SUMs in mem_operand_gpr.
8291 FIXME: Allow offset from lo_sum for other modes too, when
8292 mem is sufficiently aligned.
8294 Also disallow this if the type can go in VMX/Altivec registers, since
8295 those registers do not have d-form (reg+offset) address modes. */
8296 && !reg_addr[mode].scalar_in_vmx_p
8297 && mode != TFmode
8298 && mode != TDmode
8299 && mode != IFmode
8300 && mode != KFmode
8301 && (mode != TImode || !TARGET_VSX_TIMODE)
8302 && mode != PTImode
8303 && (mode != DImode || TARGET_POWERPC64)
8304 && ((mode != DFmode && mode != DDmode) || TARGET_POWERPC64
8305 || (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)))
8307 #if TARGET_MACHO
8308 if (flag_pic)
8310 rtx offset = machopic_gen_offset (x);
8311 x = gen_rtx_LO_SUM (GET_MODE (x),
8312 gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
8313 gen_rtx_HIGH (Pmode, offset)), offset);
8315 else
8316 #endif
8317 x = gen_rtx_LO_SUM (GET_MODE (x),
8318 gen_rtx_HIGH (Pmode, x), x);
8320 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
8321 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
8322 opnum, (enum reload_type) type);
8323 *win = 1;
8324 return x;
8327 /* Reload an offset address wrapped by an AND that represents the
8328 masking of the lower bits. Strip the outer AND and let reload
8329 convert the offset address into an indirect address. For VSX,
8330 force reload to create the address with an AND in a separate
8331 register, because we can't guarantee an altivec register will
8332 be used. */
8333 if (VECTOR_MEM_ALTIVEC_P (mode)
8334 && GET_CODE (x) == AND
8335 && GET_CODE (XEXP (x, 0)) == PLUS
8336 && GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
8337 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
8338 && GET_CODE (XEXP (x, 1)) == CONST_INT
8339 && INTVAL (XEXP (x, 1)) == -16)
8341 x = XEXP (x, 0);
8342 *win = 1;
8343 return x;
8346 if (TARGET_TOC
8347 && reg_offset_p
8348 && GET_CODE (x) == SYMBOL_REF
8349 && use_toc_relative_ref (x, mode))
8351 x = create_TOC_reference (x, NULL_RTX);
8352 if (TARGET_CMODEL != CMODEL_SMALL)
8353 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
8354 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
8355 opnum, (enum reload_type) type);
8356 *win = 1;
8357 return x;
8359 *win = 0;
8360 return x;
8363 /* Debug version of rs6000_legitimize_reload_address. */
8364 static rtx
8365 rs6000_debug_legitimize_reload_address (rtx x, machine_mode mode,
8366 int opnum, int type,
8367 int ind_levels, int *win)
8369 rtx ret = rs6000_legitimize_reload_address (x, mode, opnum, type,
8370 ind_levels, win);
8371 fprintf (stderr,
8372 "\nrs6000_legitimize_reload_address: mode = %s, opnum = %d, "
8373 "type = %d, ind_levels = %d, win = %d, original addr:\n",
8374 GET_MODE_NAME (mode), opnum, type, ind_levels, *win);
8375 debug_rtx (x);
8377 if (x == ret)
8378 fprintf (stderr, "Same address returned\n");
8379 else if (!ret)
8380 fprintf (stderr, "NULL returned\n");
8381 else
8383 fprintf (stderr, "New address:\n");
8384 debug_rtx (ret);
8387 return ret;
8390 /* TARGET_LEGITIMATE_ADDRESS_P recognizes an RTL expression
8391 that is a valid memory address for an instruction.
8392 The MODE argument is the machine mode for the MEM expression
8393 that wants to use this address.
8395 On the RS/6000, there are four valid address: a SYMBOL_REF that
8396 refers to a constant pool entry of an address (or the sum of it
8397 plus a constant), a short (16-bit signed) constant plus a register,
8398 the sum of two registers, or a register indirect, possibly with an
8399 auto-increment. For DFmode, DDmode and DImode with a constant plus
8400 register, we must ensure that both words are addressable or PowerPC64
8401 with offset word aligned.
8403 For modes spanning multiple registers (DFmode and DDmode in 32-bit GPRs,
8404 32-bit DImode, TImode, TFmode, TDmode), indexed addressing cannot be used
8405 because adjacent memory cells are accessed by adding word-sized offsets
8406 during assembly output. */
8407 static bool
8408 rs6000_legitimate_address_p (machine_mode mode, rtx x, bool reg_ok_strict)
8410 bool reg_offset_p = reg_offset_addressing_ok_p (mode);
8412 /* If this is an unaligned stvx/ldvx type address, discard the outer AND. */
8413 if (VECTOR_MEM_ALTIVEC_P (mode)
8414 && GET_CODE (x) == AND
8415 && GET_CODE (XEXP (x, 1)) == CONST_INT
8416 && INTVAL (XEXP (x, 1)) == -16)
8417 x = XEXP (x, 0);
8419 if (TARGET_ELF && RS6000_SYMBOL_REF_TLS_P (x))
8420 return 0;
8421 if (legitimate_indirect_address_p (x, reg_ok_strict))
8422 return 1;
8423 if (TARGET_UPDATE
8424 && (GET_CODE (x) == PRE_INC || GET_CODE (x) == PRE_DEC)
8425 && mode_supports_pre_incdec_p (mode)
8426 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict))
8427 return 1;
8428 if (virtual_stack_registers_memory_p (x))
8429 return 1;
8430 if (reg_offset_p && legitimate_small_data_p (mode, x))
8431 return 1;
8432 if (reg_offset_p
8433 && legitimate_constant_pool_address_p (x, mode,
8434 reg_ok_strict || lra_in_progress))
8435 return 1;
8436 if (reg_offset_p && reg_addr[mode].fused_toc && GET_CODE (x) == UNSPEC
8437 && XINT (x, 1) == UNSPEC_FUSION_ADDIS)
8438 return 1;
8439 /* For TImode, if we have load/store quad and TImode in VSX registers, only
8440 allow register indirect addresses. This will allow the values to go in
8441 either GPRs or VSX registers without reloading. The vector types would
8442 tend to go into VSX registers, so we allow REG+REG, while TImode seems
8443 somewhat split, in that some uses are GPR based, and some VSX based. */
8444 if (mode == TImode && TARGET_QUAD_MEMORY && TARGET_VSX_TIMODE)
8445 return 0;
8446 /* If not REG_OK_STRICT (before reload) let pass any stack offset. */
8447 if (! reg_ok_strict
8448 && reg_offset_p
8449 && GET_CODE (x) == PLUS
8450 && GET_CODE (XEXP (x, 0)) == REG
8451 && (XEXP (x, 0) == virtual_stack_vars_rtx
8452 || XEXP (x, 0) == arg_pointer_rtx)
8453 && GET_CODE (XEXP (x, 1)) == CONST_INT)
8454 return 1;
8455 if (rs6000_legitimate_offset_address_p (mode, x, reg_ok_strict, false))
8456 return 1;
8457 if (!FLOAT128_2REG_P (mode)
8458 && ((TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
8459 || TARGET_POWERPC64
8460 || (mode != DFmode && mode != DDmode)
8461 || (TARGET_E500_DOUBLE && mode != DDmode))
8462 && (TARGET_POWERPC64 || mode != DImode)
8463 && (mode != TImode || VECTOR_MEM_VSX_P (TImode))
8464 && mode != PTImode
8465 && !avoiding_indexed_address_p (mode)
8466 && legitimate_indexed_address_p (x, reg_ok_strict))
8467 return 1;
8468 if (TARGET_UPDATE && GET_CODE (x) == PRE_MODIFY
8469 && mode_supports_pre_modify_p (mode)
8470 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict)
8471 && (rs6000_legitimate_offset_address_p (mode, XEXP (x, 1),
8472 reg_ok_strict, false)
8473 || (!avoiding_indexed_address_p (mode)
8474 && legitimate_indexed_address_p (XEXP (x, 1), reg_ok_strict)))
8475 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
8476 return 1;
8477 if (reg_offset_p && legitimate_lo_sum_address_p (mode, x, reg_ok_strict))
8478 return 1;
8479 return 0;
8482 /* Debug version of rs6000_legitimate_address_p. */
8483 static bool
8484 rs6000_debug_legitimate_address_p (machine_mode mode, rtx x,
8485 bool reg_ok_strict)
8487 bool ret = rs6000_legitimate_address_p (mode, x, reg_ok_strict);
8488 fprintf (stderr,
8489 "\nrs6000_legitimate_address_p: return = %s, mode = %s, "
8490 "strict = %d, reload = %s, code = %s\n",
8491 ret ? "true" : "false",
8492 GET_MODE_NAME (mode),
8493 reg_ok_strict,
8494 (reload_completed
8495 ? "after"
8496 : (reload_in_progress ? "progress" : "before")),
8497 GET_RTX_NAME (GET_CODE (x)));
8498 debug_rtx (x);
8500 return ret;
8503 /* Implement TARGET_MODE_DEPENDENT_ADDRESS_P. */
8505 static bool
8506 rs6000_mode_dependent_address_p (const_rtx addr,
8507 addr_space_t as ATTRIBUTE_UNUSED)
8509 return rs6000_mode_dependent_address_ptr (addr);
8512 /* Go to LABEL if ADDR (a legitimate address expression)
8513 has an effect that depends on the machine mode it is used for.
8515 On the RS/6000 this is true of all integral offsets (since AltiVec
8516 and VSX modes don't allow them) or is a pre-increment or decrement.
8518 ??? Except that due to conceptual problems in offsettable_address_p
8519 we can't really report the problems of integral offsets. So leave
8520 this assuming that the adjustable offset must be valid for the
8521 sub-words of a TFmode operand, which is what we had before. */
8523 static bool
8524 rs6000_mode_dependent_address (const_rtx addr)
8526 switch (GET_CODE (addr))
8528 case PLUS:
8529 /* Any offset from virtual_stack_vars_rtx and arg_pointer_rtx
8530 is considered a legitimate address before reload, so there
8531 are no offset restrictions in that case. Note that this
8532 condition is safe in strict mode because any address involving
8533 virtual_stack_vars_rtx or arg_pointer_rtx would already have
8534 been rejected as illegitimate. */
8535 if (XEXP (addr, 0) != virtual_stack_vars_rtx
8536 && XEXP (addr, 0) != arg_pointer_rtx
8537 && GET_CODE (XEXP (addr, 1)) == CONST_INT)
8539 unsigned HOST_WIDE_INT val = INTVAL (XEXP (addr, 1));
8540 return val + 0x8000 >= 0x10000 - (TARGET_POWERPC64 ? 8 : 12);
8542 break;
8544 case LO_SUM:
8545 /* Anything in the constant pool is sufficiently aligned that
8546 all bytes have the same high part address. */
8547 return !legitimate_constant_pool_address_p (addr, QImode, false);
8549 /* Auto-increment cases are now treated generically in recog.c. */
8550 case PRE_MODIFY:
8551 return TARGET_UPDATE;
8553 /* AND is only allowed in Altivec loads. */
8554 case AND:
8555 return true;
8557 default:
8558 break;
8561 return false;
8564 /* Debug version of rs6000_mode_dependent_address. */
8565 static bool
8566 rs6000_debug_mode_dependent_address (const_rtx addr)
8568 bool ret = rs6000_mode_dependent_address (addr);
8570 fprintf (stderr, "\nrs6000_mode_dependent_address: ret = %s\n",
8571 ret ? "true" : "false");
8572 debug_rtx (addr);
8574 return ret;
8577 /* Implement FIND_BASE_TERM. */
8580 rs6000_find_base_term (rtx op)
8582 rtx base;
8584 base = op;
8585 if (GET_CODE (base) == CONST)
8586 base = XEXP (base, 0);
8587 if (GET_CODE (base) == PLUS)
8588 base = XEXP (base, 0);
8589 if (GET_CODE (base) == UNSPEC)
8590 switch (XINT (base, 1))
8592 case UNSPEC_TOCREL:
8593 case UNSPEC_MACHOPIC_OFFSET:
8594 /* OP represents SYM [+ OFFSET] - ANCHOR. SYM is the base term
8595 for aliasing purposes. */
8596 return XVECEXP (base, 0, 0);
8599 return op;
8602 /* More elaborate version of recog's offsettable_memref_p predicate
8603 that works around the ??? note of rs6000_mode_dependent_address.
8604 In particular it accepts
8606 (mem:DI (plus:SI (reg/f:SI 31 31) (const_int 32760 [0x7ff8])))
8608 in 32-bit mode, that the recog predicate rejects. */
8610 static bool
8611 rs6000_offsettable_memref_p (rtx op, machine_mode reg_mode)
8613 bool worst_case;
8615 if (!MEM_P (op))
8616 return false;
8618 /* First mimic offsettable_memref_p. */
8619 if (offsettable_address_p (true, GET_MODE (op), XEXP (op, 0)))
8620 return true;
8622 /* offsettable_address_p invokes rs6000_mode_dependent_address, but
8623 the latter predicate knows nothing about the mode of the memory
8624 reference and, therefore, assumes that it is the largest supported
8625 mode (TFmode). As a consequence, legitimate offsettable memory
8626 references are rejected. rs6000_legitimate_offset_address_p contains
8627 the correct logic for the PLUS case of rs6000_mode_dependent_address,
8628 at least with a little bit of help here given that we know the
8629 actual registers used. */
8630 worst_case = ((TARGET_POWERPC64 && GET_MODE_CLASS (reg_mode) == MODE_INT)
8631 || GET_MODE_SIZE (reg_mode) == 4);
8632 return rs6000_legitimate_offset_address_p (GET_MODE (op), XEXP (op, 0),
8633 true, worst_case);
8636 /* Change register usage conditional on target flags. */
8637 static void
8638 rs6000_conditional_register_usage (void)
8640 int i;
8642 if (TARGET_DEBUG_TARGET)
8643 fprintf (stderr, "rs6000_conditional_register_usage called\n");
8645 /* Set MQ register fixed (already call_used) so that it will not be
8646 allocated. */
8647 fixed_regs[64] = 1;
8649 /* 64-bit AIX and Linux reserve GPR13 for thread-private data. */
8650 if (TARGET_64BIT)
8651 fixed_regs[13] = call_used_regs[13]
8652 = call_really_used_regs[13] = 1;
8654 /* Conditionally disable FPRs. */
8655 if (TARGET_SOFT_FLOAT || !TARGET_FPRS)
8656 for (i = 32; i < 64; i++)
8657 fixed_regs[i] = call_used_regs[i]
8658 = call_really_used_regs[i] = 1;
8660 /* The TOC register is not killed across calls in a way that is
8661 visible to the compiler. */
8662 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
8663 call_really_used_regs[2] = 0;
8665 if (DEFAULT_ABI == ABI_V4 && flag_pic == 2)
8666 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
8668 if (DEFAULT_ABI == ABI_V4 && flag_pic == 1)
8669 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
8670 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
8671 = call_really_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
8673 if (DEFAULT_ABI == ABI_DARWIN && flag_pic)
8674 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
8675 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
8676 = call_really_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
8678 if (TARGET_TOC && TARGET_MINIMAL_TOC)
8679 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
8680 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
8682 if (TARGET_SPE)
8684 global_regs[SPEFSCR_REGNO] = 1;
8685 /* We used to use r14 as FIXED_SCRATCH to address SPE 64-bit
8686 registers in prologues and epilogues. We no longer use r14
8687 for FIXED_SCRATCH, but we're keeping r14 out of the allocation
8688 pool for link-compatibility with older versions of GCC. Once
8689 "old" code has died out, we can return r14 to the allocation
8690 pool. */
8691 fixed_regs[14]
8692 = call_used_regs[14]
8693 = call_really_used_regs[14] = 1;
8696 if (!TARGET_ALTIVEC && !TARGET_VSX)
8698 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
8699 fixed_regs[i] = call_used_regs[i] = call_really_used_regs[i] = 1;
8700 call_really_used_regs[VRSAVE_REGNO] = 1;
8703 if (TARGET_ALTIVEC || TARGET_VSX)
8704 global_regs[VSCR_REGNO] = 1;
8706 if (TARGET_ALTIVEC_ABI)
8708 for (i = FIRST_ALTIVEC_REGNO; i < FIRST_ALTIVEC_REGNO + 20; ++i)
8709 call_used_regs[i] = call_really_used_regs[i] = 1;
8711 /* AIX reserves VR20:31 in non-extended ABI mode. */
8712 if (TARGET_XCOFF)
8713 for (i = FIRST_ALTIVEC_REGNO + 20; i < FIRST_ALTIVEC_REGNO + 32; ++i)
8714 fixed_regs[i] = call_used_regs[i] = call_really_used_regs[i] = 1;
8719 /* Output insns to set DEST equal to the constant SOURCE as a series of
8720 lis, ori and shl instructions and return TRUE. */
8722 bool
8723 rs6000_emit_set_const (rtx dest, rtx source)
8725 machine_mode mode = GET_MODE (dest);
8726 rtx temp, set;
8727 rtx_insn *insn;
8728 HOST_WIDE_INT c;
8730 gcc_checking_assert (CONST_INT_P (source));
8731 c = INTVAL (source);
8732 switch (mode)
8734 case QImode:
8735 case HImode:
8736 emit_insn (gen_rtx_SET (dest, source));
8737 return true;
8739 case SImode:
8740 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (SImode);
8742 emit_insn (gen_rtx_SET (copy_rtx (temp),
8743 GEN_INT (c & ~(HOST_WIDE_INT) 0xffff)));
8744 emit_insn (gen_rtx_SET (dest,
8745 gen_rtx_IOR (SImode, copy_rtx (temp),
8746 GEN_INT (c & 0xffff))));
8747 break;
8749 case DImode:
8750 if (!TARGET_POWERPC64)
8752 rtx hi, lo;
8754 hi = operand_subword_force (copy_rtx (dest), WORDS_BIG_ENDIAN == 0,
8755 DImode);
8756 lo = operand_subword_force (dest, WORDS_BIG_ENDIAN != 0,
8757 DImode);
8758 emit_move_insn (hi, GEN_INT (c >> 32));
8759 c = ((c & 0xffffffff) ^ 0x80000000) - 0x80000000;
8760 emit_move_insn (lo, GEN_INT (c));
8762 else
8763 rs6000_emit_set_long_const (dest, c);
8764 break;
8766 default:
8767 gcc_unreachable ();
8770 insn = get_last_insn ();
8771 set = single_set (insn);
8772 if (! CONSTANT_P (SET_SRC (set)))
8773 set_unique_reg_note (insn, REG_EQUAL, GEN_INT (c));
8775 return true;
8778 /* Subroutine of rs6000_emit_set_const, handling PowerPC64 DImode.
8779 Output insns to set DEST equal to the constant C as a series of
8780 lis, ori and shl instructions. */
8782 static void
8783 rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c)
8785 rtx temp;
8786 HOST_WIDE_INT ud1, ud2, ud3, ud4;
8788 ud1 = c & 0xffff;
8789 c = c >> 16;
8790 ud2 = c & 0xffff;
8791 c = c >> 16;
8792 ud3 = c & 0xffff;
8793 c = c >> 16;
8794 ud4 = c & 0xffff;
8796 if ((ud4 == 0xffff && ud3 == 0xffff && ud2 == 0xffff && (ud1 & 0x8000))
8797 || (ud4 == 0 && ud3 == 0 && ud2 == 0 && ! (ud1 & 0x8000)))
8798 emit_move_insn (dest, GEN_INT ((ud1 ^ 0x8000) - 0x8000));
8800 else if ((ud4 == 0xffff && ud3 == 0xffff && (ud2 & 0x8000))
8801 || (ud4 == 0 && ud3 == 0 && ! (ud2 & 0x8000)))
8803 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
8805 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
8806 GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
8807 if (ud1 != 0)
8808 emit_move_insn (dest,
8809 gen_rtx_IOR (DImode, copy_rtx (temp),
8810 GEN_INT (ud1)));
8812 else if (ud3 == 0 && ud4 == 0)
8814 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
8816 gcc_assert (ud2 & 0x8000);
8817 emit_move_insn (copy_rtx (temp),
8818 GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
8819 if (ud1 != 0)
8820 emit_move_insn (copy_rtx (temp),
8821 gen_rtx_IOR (DImode, copy_rtx (temp),
8822 GEN_INT (ud1)));
8823 emit_move_insn (dest,
8824 gen_rtx_ZERO_EXTEND (DImode,
8825 gen_lowpart (SImode,
8826 copy_rtx (temp))));
8828 else if ((ud4 == 0xffff && (ud3 & 0x8000))
8829 || (ud4 == 0 && ! (ud3 & 0x8000)))
8831 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
8833 emit_move_insn (copy_rtx (temp),
8834 GEN_INT (((ud3 << 16) ^ 0x80000000) - 0x80000000));
8835 if (ud2 != 0)
8836 emit_move_insn (copy_rtx (temp),
8837 gen_rtx_IOR (DImode, copy_rtx (temp),
8838 GEN_INT (ud2)));
8839 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
8840 gen_rtx_ASHIFT (DImode, copy_rtx (temp),
8841 GEN_INT (16)));
8842 if (ud1 != 0)
8843 emit_move_insn (dest,
8844 gen_rtx_IOR (DImode, copy_rtx (temp),
8845 GEN_INT (ud1)));
8847 else
8849 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
8851 emit_move_insn (copy_rtx (temp),
8852 GEN_INT (((ud4 << 16) ^ 0x80000000) - 0x80000000));
8853 if (ud3 != 0)
8854 emit_move_insn (copy_rtx (temp),
8855 gen_rtx_IOR (DImode, copy_rtx (temp),
8856 GEN_INT (ud3)));
8858 emit_move_insn (ud2 != 0 || ud1 != 0 ? copy_rtx (temp) : dest,
8859 gen_rtx_ASHIFT (DImode, copy_rtx (temp),
8860 GEN_INT (32)));
8861 if (ud2 != 0)
8862 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
8863 gen_rtx_IOR (DImode, copy_rtx (temp),
8864 GEN_INT (ud2 << 16)));
8865 if (ud1 != 0)
8866 emit_move_insn (dest,
8867 gen_rtx_IOR (DImode, copy_rtx (temp),
8868 GEN_INT (ud1)));
8872 /* Helper for the following. Get rid of [r+r] memory refs
8873 in cases where it won't work (TImode, TFmode, TDmode, PTImode). */
8875 static void
8876 rs6000_eliminate_indexed_memrefs (rtx operands[2])
8878 if (reload_in_progress)
8879 return;
8881 if (GET_CODE (operands[0]) == MEM
8882 && GET_CODE (XEXP (operands[0], 0)) != REG
8883 && ! legitimate_constant_pool_address_p (XEXP (operands[0], 0),
8884 GET_MODE (operands[0]), false))
8885 operands[0]
8886 = replace_equiv_address (operands[0],
8887 copy_addr_to_reg (XEXP (operands[0], 0)));
8889 if (GET_CODE (operands[1]) == MEM
8890 && GET_CODE (XEXP (operands[1], 0)) != REG
8891 && ! legitimate_constant_pool_address_p (XEXP (operands[1], 0),
8892 GET_MODE (operands[1]), false))
8893 operands[1]
8894 = replace_equiv_address (operands[1],
8895 copy_addr_to_reg (XEXP (operands[1], 0)));
8898 /* Generate a vector of constants to permute MODE for a little-endian
8899 storage operation by swapping the two halves of a vector. */
8900 static rtvec
8901 rs6000_const_vec (machine_mode mode)
8903 int i, subparts;
8904 rtvec v;
8906 switch (mode)
8908 case V1TImode:
8909 subparts = 1;
8910 break;
8911 case V2DFmode:
8912 case V2DImode:
8913 subparts = 2;
8914 break;
8915 case V4SFmode:
8916 case V4SImode:
8917 subparts = 4;
8918 break;
8919 case V8HImode:
8920 subparts = 8;
8921 break;
8922 case V16QImode:
8923 subparts = 16;
8924 break;
8925 default:
8926 gcc_unreachable();
8929 v = rtvec_alloc (subparts);
8931 for (i = 0; i < subparts / 2; ++i)
8932 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i + subparts / 2);
8933 for (i = subparts / 2; i < subparts; ++i)
8934 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i - subparts / 2);
8936 return v;
8939 /* Generate a permute rtx that represents an lxvd2x, stxvd2x, or xxpermdi
8940 for a VSX load or store operation. */
8942 rs6000_gen_le_vsx_permute (rtx source, machine_mode mode)
8944 /* Use ROTATE instead of VEC_SELECT on IEEE 128-bit floating point, and
8945 128-bit integers if they are allowed in VSX registers. */
8946 if (FLOAT128_VECTOR_P (mode) || mode == TImode)
8947 return gen_rtx_ROTATE (mode, source, GEN_INT (64));
8948 else
8950 rtx par = gen_rtx_PARALLEL (VOIDmode, rs6000_const_vec (mode));
8951 return gen_rtx_VEC_SELECT (mode, source, par);
8955 /* Emit a little-endian load from vector memory location SOURCE to VSX
8956 register DEST in mode MODE. The load is done with two permuting
8957 insn's that represent an lxvd2x and xxpermdi. */
8958 void
8959 rs6000_emit_le_vsx_load (rtx dest, rtx source, machine_mode mode)
8961 rtx tmp, permute_mem, permute_reg;
8963 /* Use V2DImode to do swaps of types with 128-bit scalare parts (TImode,
8964 V1TImode). */
8965 if (mode == TImode || mode == V1TImode)
8967 mode = V2DImode;
8968 dest = gen_lowpart (V2DImode, dest);
8969 source = adjust_address (source, V2DImode, 0);
8972 tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (dest) : dest;
8973 permute_mem = rs6000_gen_le_vsx_permute (source, mode);
8974 permute_reg = rs6000_gen_le_vsx_permute (tmp, mode);
8975 emit_insn (gen_rtx_SET (tmp, permute_mem));
8976 emit_insn (gen_rtx_SET (dest, permute_reg));
8979 /* Emit a little-endian store to vector memory location DEST from VSX
8980 register SOURCE in mode MODE. The store is done with two permuting
8981 insn's that represent an xxpermdi and an stxvd2x. */
8982 void
8983 rs6000_emit_le_vsx_store (rtx dest, rtx source, machine_mode mode)
8985 rtx tmp, permute_src, permute_tmp;
8987 /* This should never be called during or after reload, because it does
8988 not re-permute the source register. It is intended only for use
8989 during expand. */
8990 gcc_assert (!reload_in_progress && !lra_in_progress && !reload_completed);
8992 /* Use V2DImode to do swaps of types with 128-bit scalar parts (TImode,
8993 V1TImode). */
8994 if (mode == TImode || mode == V1TImode)
8996 mode = V2DImode;
8997 dest = adjust_address (dest, V2DImode, 0);
8998 source = gen_lowpart (V2DImode, source);
9001 tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (source) : source;
9002 permute_src = rs6000_gen_le_vsx_permute (source, mode);
9003 permute_tmp = rs6000_gen_le_vsx_permute (tmp, mode);
9004 emit_insn (gen_rtx_SET (tmp, permute_src));
9005 emit_insn (gen_rtx_SET (dest, permute_tmp));
9008 /* Emit a sequence representing a little-endian VSX load or store,
9009 moving data from SOURCE to DEST in mode MODE. This is done
9010 separately from rs6000_emit_move to ensure it is called only
9011 during expand. LE VSX loads and stores introduced later are
9012 handled with a split. The expand-time RTL generation allows
9013 us to optimize away redundant pairs of register-permutes. */
9014 void
9015 rs6000_emit_le_vsx_move (rtx dest, rtx source, machine_mode mode)
9017 gcc_assert (!BYTES_BIG_ENDIAN
9018 && VECTOR_MEM_VSX_P (mode)
9019 && !TARGET_P9_VECTOR
9020 && !gpr_or_gpr_p (dest, source)
9021 && (MEM_P (source) ^ MEM_P (dest)));
9023 if (MEM_P (source))
9025 gcc_assert (REG_P (dest) || GET_CODE (dest) == SUBREG);
9026 rs6000_emit_le_vsx_load (dest, source, mode);
9028 else
9030 if (!REG_P (source))
9031 source = force_reg (mode, source);
9032 rs6000_emit_le_vsx_store (dest, source, mode);
9036 /* Emit a move from SOURCE to DEST in mode MODE. */
9037 void
9038 rs6000_emit_move (rtx dest, rtx source, machine_mode mode)
9040 rtx operands[2];
9041 operands[0] = dest;
9042 operands[1] = source;
9044 if (TARGET_DEBUG_ADDR)
9046 fprintf (stderr,
9047 "\nrs6000_emit_move: mode = %s, reload_in_progress = %d, "
9048 "reload_completed = %d, can_create_pseudos = %d.\ndest:\n",
9049 GET_MODE_NAME (mode),
9050 reload_in_progress,
9051 reload_completed,
9052 can_create_pseudo_p ());
9053 debug_rtx (dest);
9054 fprintf (stderr, "source:\n");
9055 debug_rtx (source);
9058 /* Sanity checks. Check that we get CONST_DOUBLE only when we should. */
9059 if (CONST_WIDE_INT_P (operands[1])
9060 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
9062 /* This should be fixed with the introduction of CONST_WIDE_INT. */
9063 gcc_unreachable ();
9066 /* Check if GCC is setting up a block move that will end up using FP
9067 registers as temporaries. We must make sure this is acceptable. */
9068 if (GET_CODE (operands[0]) == MEM
9069 && GET_CODE (operands[1]) == MEM
9070 && mode == DImode
9071 && (SLOW_UNALIGNED_ACCESS (DImode, MEM_ALIGN (operands[0]))
9072 || SLOW_UNALIGNED_ACCESS (DImode, MEM_ALIGN (operands[1])))
9073 && ! (SLOW_UNALIGNED_ACCESS (SImode, (MEM_ALIGN (operands[0]) > 32
9074 ? 32 : MEM_ALIGN (operands[0])))
9075 || SLOW_UNALIGNED_ACCESS (SImode, (MEM_ALIGN (operands[1]) > 32
9076 ? 32
9077 : MEM_ALIGN (operands[1]))))
9078 && ! MEM_VOLATILE_P (operands [0])
9079 && ! MEM_VOLATILE_P (operands [1]))
9081 emit_move_insn (adjust_address (operands[0], SImode, 0),
9082 adjust_address (operands[1], SImode, 0));
9083 emit_move_insn (adjust_address (copy_rtx (operands[0]), SImode, 4),
9084 adjust_address (copy_rtx (operands[1]), SImode, 4));
9085 return;
9088 if (can_create_pseudo_p () && GET_CODE (operands[0]) == MEM
9089 && !gpc_reg_operand (operands[1], mode))
9090 operands[1] = force_reg (mode, operands[1]);
9092 /* Recognize the case where operand[1] is a reference to thread-local
9093 data and load its address to a register. */
9094 if (tls_referenced_p (operands[1]))
9096 enum tls_model model;
9097 rtx tmp = operands[1];
9098 rtx addend = NULL;
9100 if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS)
9102 addend = XEXP (XEXP (tmp, 0), 1);
9103 tmp = XEXP (XEXP (tmp, 0), 0);
9106 gcc_assert (GET_CODE (tmp) == SYMBOL_REF);
9107 model = SYMBOL_REF_TLS_MODEL (tmp);
9108 gcc_assert (model != 0);
9110 tmp = rs6000_legitimize_tls_address (tmp, model);
9111 if (addend)
9113 tmp = gen_rtx_PLUS (mode, tmp, addend);
9114 tmp = force_operand (tmp, operands[0]);
9116 operands[1] = tmp;
9119 /* Handle the case where reload calls us with an invalid address. */
9120 if (reload_in_progress && mode == Pmode
9121 && (! general_operand (operands[1], mode)
9122 || ! nonimmediate_operand (operands[0], mode)))
9123 goto emit_set;
9125 /* 128-bit constant floating-point values on Darwin should really be loaded
9126 as two parts. However, this premature splitting is a problem when DFmode
9127 values can go into Altivec registers. */
9128 if (FLOAT128_IBM_P (mode) && !reg_addr[DFmode].scalar_in_vmx_p
9129 && GET_CODE (operands[1]) == CONST_DOUBLE)
9131 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode, 0),
9132 simplify_gen_subreg (DFmode, operands[1], mode, 0),
9133 DFmode);
9134 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode,
9135 GET_MODE_SIZE (DFmode)),
9136 simplify_gen_subreg (DFmode, operands[1], mode,
9137 GET_MODE_SIZE (DFmode)),
9138 DFmode);
9139 return;
9142 if (reload_in_progress && cfun->machine->sdmode_stack_slot != NULL_RTX)
9143 cfun->machine->sdmode_stack_slot =
9144 eliminate_regs (cfun->machine->sdmode_stack_slot, VOIDmode, NULL_RTX);
9147 /* Transform (p0:DD, (SUBREG:DD p1:SD)) to ((SUBREG:SD p0:DD),
9148 p1:SD) if p1 is not of floating point class and p0 is spilled as
9149 we can have no analogous movsd_store for this. */
9150 if (lra_in_progress && mode == DDmode
9151 && REG_P (operands[0]) && REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER
9152 && reg_preferred_class (REGNO (operands[0])) == NO_REGS
9153 && GET_CODE (operands[1]) == SUBREG && REG_P (SUBREG_REG (operands[1]))
9154 && GET_MODE (SUBREG_REG (operands[1])) == SDmode)
9156 enum reg_class cl;
9157 int regno = REGNO (SUBREG_REG (operands[1]));
9159 if (regno >= FIRST_PSEUDO_REGISTER)
9161 cl = reg_preferred_class (regno);
9162 regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][1];
9164 if (regno >= 0 && ! FP_REGNO_P (regno))
9166 mode = SDmode;
9167 operands[0] = gen_lowpart_SUBREG (SDmode, operands[0]);
9168 operands[1] = SUBREG_REG (operands[1]);
9171 if (lra_in_progress
9172 && mode == SDmode
9173 && REG_P (operands[0]) && REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER
9174 && reg_preferred_class (REGNO (operands[0])) == NO_REGS
9175 && (REG_P (operands[1])
9176 || (GET_CODE (operands[1]) == SUBREG
9177 && REG_P (SUBREG_REG (operands[1])))))
9179 int regno = REGNO (GET_CODE (operands[1]) == SUBREG
9180 ? SUBREG_REG (operands[1]) : operands[1]);
9181 enum reg_class cl;
9183 if (regno >= FIRST_PSEUDO_REGISTER)
9185 cl = reg_preferred_class (regno);
9186 gcc_assert (cl != NO_REGS);
9187 regno = ira_class_hard_regs[cl][0];
9189 if (FP_REGNO_P (regno))
9191 if (GET_MODE (operands[0]) != DDmode)
9192 operands[0] = gen_rtx_SUBREG (DDmode, operands[0], 0);
9193 emit_insn (gen_movsd_store (operands[0], operands[1]));
9195 else if (INT_REGNO_P (regno))
9196 emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
9197 else
9198 gcc_unreachable();
9199 return;
9201 /* Transform ((SUBREG:DD p0:SD), p1:DD) to (p0:SD, (SUBREG:SD
9202 p:DD)) if p0 is not of floating point class and p1 is spilled as
9203 we can have no analogous movsd_load for this. */
9204 if (lra_in_progress && mode == DDmode
9205 && GET_CODE (operands[0]) == SUBREG && REG_P (SUBREG_REG (operands[0]))
9206 && GET_MODE (SUBREG_REG (operands[0])) == SDmode
9207 && REG_P (operands[1]) && REGNO (operands[1]) >= FIRST_PSEUDO_REGISTER
9208 && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
9210 enum reg_class cl;
9211 int regno = REGNO (SUBREG_REG (operands[0]));
9213 if (regno >= FIRST_PSEUDO_REGISTER)
9215 cl = reg_preferred_class (regno);
9216 regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][0];
9218 if (regno >= 0 && ! FP_REGNO_P (regno))
9220 mode = SDmode;
9221 operands[0] = SUBREG_REG (operands[0]);
9222 operands[1] = gen_lowpart_SUBREG (SDmode, operands[1]);
9225 if (lra_in_progress
9226 && mode == SDmode
9227 && (REG_P (operands[0])
9228 || (GET_CODE (operands[0]) == SUBREG
9229 && REG_P (SUBREG_REG (operands[0]))))
9230 && REG_P (operands[1]) && REGNO (operands[1]) >= FIRST_PSEUDO_REGISTER
9231 && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
9233 int regno = REGNO (GET_CODE (operands[0]) == SUBREG
9234 ? SUBREG_REG (operands[0]) : operands[0]);
9235 enum reg_class cl;
9237 if (regno >= FIRST_PSEUDO_REGISTER)
9239 cl = reg_preferred_class (regno);
9240 gcc_assert (cl != NO_REGS);
9241 regno = ira_class_hard_regs[cl][0];
9243 if (FP_REGNO_P (regno))
9245 if (GET_MODE (operands[1]) != DDmode)
9246 operands[1] = gen_rtx_SUBREG (DDmode, operands[1], 0);
9247 emit_insn (gen_movsd_load (operands[0], operands[1]));
9249 else if (INT_REGNO_P (regno))
9250 emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
9251 else
9252 gcc_unreachable();
9253 return;
9256 if (reload_in_progress
9257 && mode == SDmode
9258 && cfun->machine->sdmode_stack_slot != NULL_RTX
9259 && MEM_P (operands[0])
9260 && rtx_equal_p (operands[0], cfun->machine->sdmode_stack_slot)
9261 && REG_P (operands[1]))
9263 if (FP_REGNO_P (REGNO (operands[1])))
9265 rtx mem = adjust_address_nv (operands[0], DDmode, 0);
9266 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
9267 emit_insn (gen_movsd_store (mem, operands[1]));
9269 else if (INT_REGNO_P (REGNO (operands[1])))
9271 rtx mem = operands[0];
9272 if (BYTES_BIG_ENDIAN)
9273 mem = adjust_address_nv (mem, mode, 4);
9274 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
9275 emit_insn (gen_movsd_hardfloat (mem, operands[1]));
9277 else
9278 gcc_unreachable();
9279 return;
9281 if (reload_in_progress
9282 && mode == SDmode
9283 && REG_P (operands[0])
9284 && MEM_P (operands[1])
9285 && cfun->machine->sdmode_stack_slot != NULL_RTX
9286 && rtx_equal_p (operands[1], cfun->machine->sdmode_stack_slot))
9288 if (FP_REGNO_P (REGNO (operands[0])))
9290 rtx mem = adjust_address_nv (operands[1], DDmode, 0);
9291 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
9292 emit_insn (gen_movsd_load (operands[0], mem));
9294 else if (INT_REGNO_P (REGNO (operands[0])))
9296 rtx mem = operands[1];
9297 if (BYTES_BIG_ENDIAN)
9298 mem = adjust_address_nv (mem, mode, 4);
9299 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
9300 emit_insn (gen_movsd_hardfloat (operands[0], mem));
9302 else
9303 gcc_unreachable();
9304 return;
9307 /* FIXME: In the long term, this switch statement should go away
9308 and be replaced by a sequence of tests based on things like
9309 mode == Pmode. */
9310 switch (mode)
9312 case HImode:
9313 case QImode:
9314 if (CONSTANT_P (operands[1])
9315 && GET_CODE (operands[1]) != CONST_INT)
9316 operands[1] = force_const_mem (mode, operands[1]);
9317 break;
9319 case TFmode:
9320 case TDmode:
9321 case IFmode:
9322 case KFmode:
9323 if (FLOAT128_2REG_P (mode))
9324 rs6000_eliminate_indexed_memrefs (operands);
9325 /* fall through */
9327 case DFmode:
9328 case DDmode:
9329 case SFmode:
9330 case SDmode:
9331 if (CONSTANT_P (operands[1])
9332 && ! easy_fp_constant (operands[1], mode))
9333 operands[1] = force_const_mem (mode, operands[1]);
9334 break;
9336 case V16QImode:
9337 case V8HImode:
9338 case V4SFmode:
9339 case V4SImode:
9340 case V4HImode:
9341 case V2SFmode:
9342 case V2SImode:
9343 case V1DImode:
9344 case V2DFmode:
9345 case V2DImode:
9346 case V1TImode:
9347 if (CONSTANT_P (operands[1])
9348 && !easy_vector_constant (operands[1], mode))
9349 operands[1] = force_const_mem (mode, operands[1]);
9350 break;
9352 case SImode:
9353 case DImode:
9354 /* Use default pattern for address of ELF small data */
9355 if (TARGET_ELF
9356 && mode == Pmode
9357 && DEFAULT_ABI == ABI_V4
9358 && (GET_CODE (operands[1]) == SYMBOL_REF
9359 || GET_CODE (operands[1]) == CONST)
9360 && small_data_operand (operands[1], mode))
9362 emit_insn (gen_rtx_SET (operands[0], operands[1]));
9363 return;
9366 if (DEFAULT_ABI == ABI_V4
9367 && mode == Pmode && mode == SImode
9368 && flag_pic == 1 && got_operand (operands[1], mode))
9370 emit_insn (gen_movsi_got (operands[0], operands[1]));
9371 return;
9374 if ((TARGET_ELF || DEFAULT_ABI == ABI_DARWIN)
9375 && TARGET_NO_TOC
9376 && ! flag_pic
9377 && mode == Pmode
9378 && CONSTANT_P (operands[1])
9379 && GET_CODE (operands[1]) != HIGH
9380 && GET_CODE (operands[1]) != CONST_INT)
9382 rtx target = (!can_create_pseudo_p ()
9383 ? operands[0]
9384 : gen_reg_rtx (mode));
9386 /* If this is a function address on -mcall-aixdesc,
9387 convert it to the address of the descriptor. */
9388 if (DEFAULT_ABI == ABI_AIX
9389 && GET_CODE (operands[1]) == SYMBOL_REF
9390 && XSTR (operands[1], 0)[0] == '.')
9392 const char *name = XSTR (operands[1], 0);
9393 rtx new_ref;
9394 while (*name == '.')
9395 name++;
9396 new_ref = gen_rtx_SYMBOL_REF (Pmode, name);
9397 CONSTANT_POOL_ADDRESS_P (new_ref)
9398 = CONSTANT_POOL_ADDRESS_P (operands[1]);
9399 SYMBOL_REF_FLAGS (new_ref) = SYMBOL_REF_FLAGS (operands[1]);
9400 SYMBOL_REF_USED (new_ref) = SYMBOL_REF_USED (operands[1]);
9401 SYMBOL_REF_DATA (new_ref) = SYMBOL_REF_DATA (operands[1]);
9402 operands[1] = new_ref;
9405 if (DEFAULT_ABI == ABI_DARWIN)
9407 #if TARGET_MACHO
9408 if (MACHO_DYNAMIC_NO_PIC_P)
9410 /* Take care of any required data indirection. */
9411 operands[1] = rs6000_machopic_legitimize_pic_address (
9412 operands[1], mode, operands[0]);
9413 if (operands[0] != operands[1])
9414 emit_insn (gen_rtx_SET (operands[0], operands[1]));
9415 return;
9417 #endif
9418 emit_insn (gen_macho_high (target, operands[1]));
9419 emit_insn (gen_macho_low (operands[0], target, operands[1]));
9420 return;
9423 emit_insn (gen_elf_high (target, operands[1]));
9424 emit_insn (gen_elf_low (operands[0], target, operands[1]));
9425 return;
9428 /* If this is a SYMBOL_REF that refers to a constant pool entry,
9429 and we have put it in the TOC, we just need to make a TOC-relative
9430 reference to it. */
9431 if (TARGET_TOC
9432 && GET_CODE (operands[1]) == SYMBOL_REF
9433 && use_toc_relative_ref (operands[1], mode))
9434 operands[1] = create_TOC_reference (operands[1], operands[0]);
9435 else if (mode == Pmode
9436 && CONSTANT_P (operands[1])
9437 && GET_CODE (operands[1]) != HIGH
9438 && ((GET_CODE (operands[1]) != CONST_INT
9439 && ! easy_fp_constant (operands[1], mode))
9440 || (GET_CODE (operands[1]) == CONST_INT
9441 && (num_insns_constant (operands[1], mode)
9442 > (TARGET_CMODEL != CMODEL_SMALL ? 3 : 2)))
9443 || (GET_CODE (operands[0]) == REG
9444 && FP_REGNO_P (REGNO (operands[0]))))
9445 && !toc_relative_expr_p (operands[1], false)
9446 && (TARGET_CMODEL == CMODEL_SMALL
9447 || can_create_pseudo_p ()
9448 || (REG_P (operands[0])
9449 && INT_REG_OK_FOR_BASE_P (operands[0], true))))
9452 #if TARGET_MACHO
9453 /* Darwin uses a special PIC legitimizer. */
9454 if (DEFAULT_ABI == ABI_DARWIN && MACHOPIC_INDIRECT)
9456 operands[1] =
9457 rs6000_machopic_legitimize_pic_address (operands[1], mode,
9458 operands[0]);
9459 if (operands[0] != operands[1])
9460 emit_insn (gen_rtx_SET (operands[0], operands[1]));
9461 return;
9463 #endif
9465 /* If we are to limit the number of things we put in the TOC and
9466 this is a symbol plus a constant we can add in one insn,
9467 just put the symbol in the TOC and add the constant. Don't do
9468 this if reload is in progress. */
9469 if (GET_CODE (operands[1]) == CONST
9470 && TARGET_NO_SUM_IN_TOC && ! reload_in_progress
9471 && GET_CODE (XEXP (operands[1], 0)) == PLUS
9472 && add_operand (XEXP (XEXP (operands[1], 0), 1), mode)
9473 && (GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == LABEL_REF
9474 || GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == SYMBOL_REF)
9475 && ! side_effects_p (operands[0]))
9477 rtx sym =
9478 force_const_mem (mode, XEXP (XEXP (operands[1], 0), 0));
9479 rtx other = XEXP (XEXP (operands[1], 0), 1);
9481 sym = force_reg (mode, sym);
9482 emit_insn (gen_add3_insn (operands[0], sym, other));
9483 return;
9486 operands[1] = force_const_mem (mode, operands[1]);
9488 if (TARGET_TOC
9489 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
9490 && constant_pool_expr_p (XEXP (operands[1], 0))
9491 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (
9492 get_pool_constant (XEXP (operands[1], 0)),
9493 get_pool_mode (XEXP (operands[1], 0))))
9495 rtx tocref = create_TOC_reference (XEXP (operands[1], 0),
9496 operands[0]);
9497 operands[1] = gen_const_mem (mode, tocref);
9498 set_mem_alias_set (operands[1], get_TOC_alias_set ());
9501 break;
9503 case TImode:
9504 if (!VECTOR_MEM_VSX_P (TImode))
9505 rs6000_eliminate_indexed_memrefs (operands);
9506 break;
9508 case PTImode:
9509 rs6000_eliminate_indexed_memrefs (operands);
9510 break;
9512 default:
9513 fatal_insn ("bad move", gen_rtx_SET (dest, source));
9516 /* Above, we may have called force_const_mem which may have returned
9517 an invalid address. If we can, fix this up; otherwise, reload will
9518 have to deal with it. */
9519 if (GET_CODE (operands[1]) == MEM && ! reload_in_progress)
9520 operands[1] = validize_mem (operands[1]);
9522 emit_set:
9523 emit_insn (gen_rtx_SET (operands[0], operands[1]));
9526 /* Return true if a structure, union or array containing FIELD should be
9527 accessed using `BLKMODE'.
9529 For the SPE, simd types are V2SI, and gcc can be tempted to put the
9530 entire thing in a DI and use subregs to access the internals.
9531 store_bit_field() will force (subreg:DI (reg:V2SI x))'s to the
9532 back-end. Because a single GPR can hold a V2SI, but not a DI, the
9533 best thing to do is set structs to BLKmode and avoid Severe Tire
9534 Damage.
9536 On e500 v2, DF and DI modes suffer from the same anomaly. DF can
9537 fit into 1, whereas DI still needs two. */
9539 static bool
9540 rs6000_member_type_forces_blk (const_tree field, machine_mode mode)
9542 return ((TARGET_SPE && TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
9543 || (TARGET_E500_DOUBLE && mode == DFmode));
9546 /* Nonzero if we can use a floating-point register to pass this arg. */
9547 #define USE_FP_FOR_ARG_P(CUM,MODE) \
9548 (SCALAR_FLOAT_MODE_NOT_VECTOR_P (MODE) \
9549 && (CUM)->fregno <= FP_ARG_MAX_REG \
9550 && TARGET_HARD_FLOAT && TARGET_FPRS)
9552 /* Nonzero if we can use an AltiVec register to pass this arg. */
9553 #define USE_ALTIVEC_FOR_ARG_P(CUM,MODE,NAMED) \
9554 (ALTIVEC_OR_VSX_VECTOR_MODE (MODE) \
9555 && (CUM)->vregno <= ALTIVEC_ARG_MAX_REG \
9556 && TARGET_ALTIVEC_ABI \
9557 && (NAMED))
9559 /* Walk down the type tree of TYPE counting consecutive base elements.
9560 If *MODEP is VOIDmode, then set it to the first valid floating point
9561 or vector type. If a non-floating point or vector type is found, or
9562 if a floating point or vector type that doesn't match a non-VOIDmode
9563 *MODEP is found, then return -1, otherwise return the count in the
9564 sub-tree. */
9566 static int
9567 rs6000_aggregate_candidate (const_tree type, machine_mode *modep)
9569 machine_mode mode;
9570 HOST_WIDE_INT size;
9572 switch (TREE_CODE (type))
9574 case REAL_TYPE:
9575 mode = TYPE_MODE (type);
9576 if (!SCALAR_FLOAT_MODE_P (mode))
9577 return -1;
9579 if (*modep == VOIDmode)
9580 *modep = mode;
9582 if (*modep == mode)
9583 return 1;
9585 break;
9587 case COMPLEX_TYPE:
9588 mode = TYPE_MODE (TREE_TYPE (type));
9589 if (!SCALAR_FLOAT_MODE_P (mode))
9590 return -1;
9592 if (*modep == VOIDmode)
9593 *modep = mode;
9595 if (*modep == mode)
9596 return 2;
9598 break;
9600 case VECTOR_TYPE:
9601 if (!TARGET_ALTIVEC_ABI || !TARGET_ALTIVEC)
9602 return -1;
9604 /* Use V4SImode as representative of all 128-bit vector types. */
9605 size = int_size_in_bytes (type);
9606 switch (size)
9608 case 16:
9609 mode = V4SImode;
9610 break;
9611 default:
9612 return -1;
9615 if (*modep == VOIDmode)
9616 *modep = mode;
9618 /* Vector modes are considered to be opaque: two vectors are
9619 equivalent for the purposes of being homogeneous aggregates
9620 if they are the same size. */
9621 if (*modep == mode)
9622 return 1;
9624 break;
9626 case ARRAY_TYPE:
9628 int count;
9629 tree index = TYPE_DOMAIN (type);
9631 /* Can't handle incomplete types nor sizes that are not
9632 fixed. */
9633 if (!COMPLETE_TYPE_P (type)
9634 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
9635 return -1;
9637 count = rs6000_aggregate_candidate (TREE_TYPE (type), modep);
9638 if (count == -1
9639 || !index
9640 || !TYPE_MAX_VALUE (index)
9641 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
9642 || !TYPE_MIN_VALUE (index)
9643 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
9644 || count < 0)
9645 return -1;
9647 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
9648 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
9650 /* There must be no padding. */
9651 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
9652 return -1;
9654 return count;
9657 case RECORD_TYPE:
9659 int count = 0;
9660 int sub_count;
9661 tree field;
9663 /* Can't handle incomplete types nor sizes that are not
9664 fixed. */
9665 if (!COMPLETE_TYPE_P (type)
9666 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
9667 return -1;
9669 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
9671 if (TREE_CODE (field) != FIELD_DECL)
9672 continue;
9674 sub_count = rs6000_aggregate_candidate (TREE_TYPE (field), modep);
9675 if (sub_count < 0)
9676 return -1;
9677 count += sub_count;
9680 /* There must be no padding. */
9681 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
9682 return -1;
9684 return count;
9687 case UNION_TYPE:
9688 case QUAL_UNION_TYPE:
9690 /* These aren't very interesting except in a degenerate case. */
9691 int count = 0;
9692 int sub_count;
9693 tree field;
9695 /* Can't handle incomplete types nor sizes that are not
9696 fixed. */
9697 if (!COMPLETE_TYPE_P (type)
9698 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
9699 return -1;
9701 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
9703 if (TREE_CODE (field) != FIELD_DECL)
9704 continue;
9706 sub_count = rs6000_aggregate_candidate (TREE_TYPE (field), modep);
9707 if (sub_count < 0)
9708 return -1;
9709 count = count > sub_count ? count : sub_count;
9712 /* There must be no padding. */
9713 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
9714 return -1;
9716 return count;
9719 default:
9720 break;
9723 return -1;
9726 /* If an argument, whose type is described by TYPE and MODE, is a homogeneous
9727 float or vector aggregate that shall be passed in FP/vector registers
9728 according to the ELFv2 ABI, return the homogeneous element mode in
9729 *ELT_MODE and the number of elements in *N_ELTS, and return TRUE.
9731 Otherwise, set *ELT_MODE to MODE and *N_ELTS to 1, and return FALSE. */
9733 static bool
9734 rs6000_discover_homogeneous_aggregate (machine_mode mode, const_tree type,
9735 machine_mode *elt_mode,
9736 int *n_elts)
9738 /* Note that we do not accept complex types at the top level as
9739 homogeneous aggregates; these types are handled via the
9740 targetm.calls.split_complex_arg mechanism. Complex types
9741 can be elements of homogeneous aggregates, however. */
9742 if (DEFAULT_ABI == ABI_ELFv2 && type && AGGREGATE_TYPE_P (type))
9744 machine_mode field_mode = VOIDmode;
9745 int field_count = rs6000_aggregate_candidate (type, &field_mode);
9747 if (field_count > 0)
9749 int n_regs = (SCALAR_FLOAT_MODE_P (field_mode) ?
9750 (GET_MODE_SIZE (field_mode) + 7) >> 3 : 1);
9752 /* The ELFv2 ABI allows homogeneous aggregates to occupy
9753 up to AGGR_ARG_NUM_REG registers. */
9754 if (field_count * n_regs <= AGGR_ARG_NUM_REG)
9756 if (elt_mode)
9757 *elt_mode = field_mode;
9758 if (n_elts)
9759 *n_elts = field_count;
9760 return true;
9765 if (elt_mode)
9766 *elt_mode = mode;
9767 if (n_elts)
9768 *n_elts = 1;
9769 return false;
9772 /* Return a nonzero value to say to return the function value in
9773 memory, just as large structures are always returned. TYPE will be
9774 the data type of the value, and FNTYPE will be the type of the
9775 function doing the returning, or @code{NULL} for libcalls.
9777 The AIX ABI for the RS/6000 specifies that all structures are
9778 returned in memory. The Darwin ABI does the same.
9780 For the Darwin 64 Bit ABI, a function result can be returned in
9781 registers or in memory, depending on the size of the return data
9782 type. If it is returned in registers, the value occupies the same
9783 registers as it would if it were the first and only function
9784 argument. Otherwise, the function places its result in memory at
9785 the location pointed to by GPR3.
9787 The SVR4 ABI specifies that structures <= 8 bytes are returned in r3/r4,
9788 but a draft put them in memory, and GCC used to implement the draft
9789 instead of the final standard. Therefore, aix_struct_return
9790 controls this instead of DEFAULT_ABI; V.4 targets needing backward
9791 compatibility can change DRAFT_V4_STRUCT_RET to override the
9792 default, and -m switches get the final word. See
9793 rs6000_option_override_internal for more details.
9795 The PPC32 SVR4 ABI uses IEEE double extended for long double, if 128-bit
9796 long double support is enabled. These values are returned in memory.
9798 int_size_in_bytes returns -1 for variable size objects, which go in
9799 memory always. The cast to unsigned makes -1 > 8. */
9801 static bool
9802 rs6000_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
9804 /* For the Darwin64 ABI, test if we can fit the return value in regs. */
9805 if (TARGET_MACHO
9806 && rs6000_darwin64_abi
9807 && TREE_CODE (type) == RECORD_TYPE
9808 && int_size_in_bytes (type) > 0)
9810 CUMULATIVE_ARGS valcum;
9811 rtx valret;
9813 valcum.words = 0;
9814 valcum.fregno = FP_ARG_MIN_REG;
9815 valcum.vregno = ALTIVEC_ARG_MIN_REG;
9816 /* Do a trial code generation as if this were going to be passed
9817 as an argument; if any part goes in memory, we return NULL. */
9818 valret = rs6000_darwin64_record_arg (&valcum, type, true, true);
9819 if (valret)
9820 return false;
9821 /* Otherwise fall through to more conventional ABI rules. */
9824 /* The ELFv2 ABI returns homogeneous VFP aggregates in registers */
9825 if (rs6000_discover_homogeneous_aggregate (TYPE_MODE (type), type,
9826 NULL, NULL))
9827 return false;
9829 /* The ELFv2 ABI returns aggregates up to 16B in registers */
9830 if (DEFAULT_ABI == ABI_ELFv2 && AGGREGATE_TYPE_P (type)
9831 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) <= 16)
9832 return false;
9834 if (AGGREGATE_TYPE_P (type)
9835 && (aix_struct_return
9836 || (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8))
9837 return true;
9839 /* Allow -maltivec -mabi=no-altivec without warning. Altivec vector
9840 modes only exist for GCC vector types if -maltivec. */
9841 if (TARGET_32BIT && !TARGET_ALTIVEC_ABI
9842 && ALTIVEC_VECTOR_MODE (TYPE_MODE (type)))
9843 return false;
9845 /* Return synthetic vectors in memory. */
9846 if (TREE_CODE (type) == VECTOR_TYPE
9847 && int_size_in_bytes (type) > (TARGET_ALTIVEC_ABI ? 16 : 8))
9849 static bool warned_for_return_big_vectors = false;
9850 if (!warned_for_return_big_vectors)
9852 warning (0, "GCC vector returned by reference: "
9853 "non-standard ABI extension with no compatibility guarantee");
9854 warned_for_return_big_vectors = true;
9856 return true;
9859 if (DEFAULT_ABI == ABI_V4 && TARGET_IEEEQUAD
9860 && FLOAT128_IEEE_P (TYPE_MODE (type)))
9861 return true;
9863 return false;
9866 /* Specify whether values returned in registers should be at the most
9867 significant end of a register. We want aggregates returned by
9868 value to match the way aggregates are passed to functions. */
9870 static bool
9871 rs6000_return_in_msb (const_tree valtype)
9873 return (DEFAULT_ABI == ABI_ELFv2
9874 && BYTES_BIG_ENDIAN
9875 && AGGREGATE_TYPE_P (valtype)
9876 && FUNCTION_ARG_PADDING (TYPE_MODE (valtype), valtype) == upward);
9879 #ifdef HAVE_AS_GNU_ATTRIBUTE
9880 /* Return TRUE if a call to function FNDECL may be one that
9881 potentially affects the function calling ABI of the object file. */
9883 static bool
9884 call_ABI_of_interest (tree fndecl)
9886 if (symtab->state == EXPANSION)
9888 struct cgraph_node *c_node;
9890 /* Libcalls are always interesting. */
9891 if (fndecl == NULL_TREE)
9892 return true;
9894 /* Any call to an external function is interesting. */
9895 if (DECL_EXTERNAL (fndecl))
9896 return true;
9898 /* Interesting functions that we are emitting in this object file. */
9899 c_node = cgraph_node::get (fndecl);
9900 c_node = c_node->ultimate_alias_target ();
9901 return !c_node->only_called_directly_p ();
9903 return false;
9905 #endif
9907 /* Initialize a variable CUM of type CUMULATIVE_ARGS
9908 for a call to a function whose data type is FNTYPE.
9909 For a library call, FNTYPE is 0 and RETURN_MODE the return value mode.
9911 For incoming args we set the number of arguments in the prototype large
9912 so we never return a PARALLEL. */
9914 void
9915 init_cumulative_args (CUMULATIVE_ARGS *cum, tree fntype,
9916 rtx libname ATTRIBUTE_UNUSED, int incoming,
9917 int libcall, int n_named_args,
9918 tree fndecl ATTRIBUTE_UNUSED,
9919 machine_mode return_mode ATTRIBUTE_UNUSED)
9921 static CUMULATIVE_ARGS zero_cumulative;
9923 *cum = zero_cumulative;
9924 cum->words = 0;
9925 cum->fregno = FP_ARG_MIN_REG;
9926 cum->vregno = ALTIVEC_ARG_MIN_REG;
9927 cum->prototype = (fntype && prototype_p (fntype));
9928 cum->call_cookie = ((DEFAULT_ABI == ABI_V4 && libcall)
9929 ? CALL_LIBCALL : CALL_NORMAL);
9930 cum->sysv_gregno = GP_ARG_MIN_REG;
9931 cum->stdarg = stdarg_p (fntype);
9932 cum->libcall = libcall;
9934 cum->nargs_prototype = 0;
9935 if (incoming || cum->prototype)
9936 cum->nargs_prototype = n_named_args;
9938 /* Check for a longcall attribute. */
9939 if ((!fntype && rs6000_default_long_calls)
9940 || (fntype
9941 && lookup_attribute ("longcall", TYPE_ATTRIBUTES (fntype))
9942 && !lookup_attribute ("shortcall", TYPE_ATTRIBUTES (fntype))))
9943 cum->call_cookie |= CALL_LONG;
9945 if (TARGET_DEBUG_ARG)
9947 fprintf (stderr, "\ninit_cumulative_args:");
9948 if (fntype)
9950 tree ret_type = TREE_TYPE (fntype);
9951 fprintf (stderr, " ret code = %s,",
9952 get_tree_code_name (TREE_CODE (ret_type)));
9955 if (cum->call_cookie & CALL_LONG)
9956 fprintf (stderr, " longcall,");
9958 fprintf (stderr, " proto = %d, nargs = %d\n",
9959 cum->prototype, cum->nargs_prototype);
9962 #ifdef HAVE_AS_GNU_ATTRIBUTE
9963 if (DEFAULT_ABI == ABI_V4)
9965 cum->escapes = call_ABI_of_interest (fndecl);
9966 if (cum->escapes)
9968 tree return_type;
9970 if (fntype)
9972 return_type = TREE_TYPE (fntype);
9973 return_mode = TYPE_MODE (return_type);
9975 else
9976 return_type = lang_hooks.types.type_for_mode (return_mode, 0);
9978 if (return_type != NULL)
9980 if (TREE_CODE (return_type) == RECORD_TYPE
9981 && TYPE_TRANSPARENT_AGGR (return_type))
9983 return_type = TREE_TYPE (first_field (return_type));
9984 return_mode = TYPE_MODE (return_type);
9986 if (AGGREGATE_TYPE_P (return_type)
9987 && ((unsigned HOST_WIDE_INT) int_size_in_bytes (return_type)
9988 <= 8))
9989 rs6000_returns_struct = true;
9991 if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (return_mode))
9992 rs6000_passes_float = true;
9993 else if (ALTIVEC_OR_VSX_VECTOR_MODE (return_mode)
9994 || SPE_VECTOR_MODE (return_mode))
9995 rs6000_passes_vector = true;
9998 #endif
10000 if (fntype
10001 && !TARGET_ALTIVEC
10002 && TARGET_ALTIVEC_ABI
10003 && ALTIVEC_VECTOR_MODE (TYPE_MODE (TREE_TYPE (fntype))))
10005 error ("cannot return value in vector register because"
10006 " altivec instructions are disabled, use -maltivec"
10007 " to enable them");
10011 /* The mode the ABI uses for a word. This is not the same as word_mode
10012 for -m32 -mpowerpc64. This is used to implement various target hooks. */
10014 static machine_mode
10015 rs6000_abi_word_mode (void)
10017 return TARGET_32BIT ? SImode : DImode;
10020 /* Implement the TARGET_OFFLOAD_OPTIONS hook. */
10021 static char *
10022 rs6000_offload_options (void)
10024 if (TARGET_64BIT)
10025 return xstrdup ("-foffload-abi=lp64");
10026 else
10027 return xstrdup ("-foffload-abi=ilp32");
10030 /* On rs6000, function arguments are promoted, as are function return
10031 values. */
10033 static machine_mode
10034 rs6000_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
10035 machine_mode mode,
10036 int *punsignedp ATTRIBUTE_UNUSED,
10037 const_tree, int)
10039 PROMOTE_MODE (mode, *punsignedp, type);
10041 return mode;
10044 /* Return true if TYPE must be passed on the stack and not in registers. */
10046 static bool
10047 rs6000_must_pass_in_stack (machine_mode mode, const_tree type)
10049 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2 || TARGET_64BIT)
10050 return must_pass_in_stack_var_size (mode, type);
10051 else
10052 return must_pass_in_stack_var_size_or_pad (mode, type);
10055 /* If defined, a C expression which determines whether, and in which
10056 direction, to pad out an argument with extra space. The value
10057 should be of type `enum direction': either `upward' to pad above
10058 the argument, `downward' to pad below, or `none' to inhibit
10059 padding.
10061 For the AIX ABI structs are always stored left shifted in their
10062 argument slot. */
10064 enum direction
10065 function_arg_padding (machine_mode mode, const_tree type)
10067 #ifndef AGGREGATE_PADDING_FIXED
10068 #define AGGREGATE_PADDING_FIXED 0
10069 #endif
10070 #ifndef AGGREGATES_PAD_UPWARD_ALWAYS
10071 #define AGGREGATES_PAD_UPWARD_ALWAYS 0
10072 #endif
10074 if (!AGGREGATE_PADDING_FIXED)
10076 /* GCC used to pass structures of the same size as integer types as
10077 if they were in fact integers, ignoring FUNCTION_ARG_PADDING.
10078 i.e. Structures of size 1 or 2 (or 4 when TARGET_64BIT) were
10079 passed padded downward, except that -mstrict-align further
10080 muddied the water in that multi-component structures of 2 and 4
10081 bytes in size were passed padded upward.
10083 The following arranges for best compatibility with previous
10084 versions of gcc, but removes the -mstrict-align dependency. */
10085 if (BYTES_BIG_ENDIAN)
10087 HOST_WIDE_INT size = 0;
10089 if (mode == BLKmode)
10091 if (type && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
10092 size = int_size_in_bytes (type);
10094 else
10095 size = GET_MODE_SIZE (mode);
10097 if (size == 1 || size == 2 || size == 4)
10098 return downward;
10100 return upward;
10103 if (AGGREGATES_PAD_UPWARD_ALWAYS)
10105 if (type != 0 && AGGREGATE_TYPE_P (type))
10106 return upward;
10109 /* Fall back to the default. */
10110 return DEFAULT_FUNCTION_ARG_PADDING (mode, type);
10113 /* If defined, a C expression that gives the alignment boundary, in bits,
10114 of an argument with the specified mode and type. If it is not defined,
10115 PARM_BOUNDARY is used for all arguments.
10117 V.4 wants long longs and doubles to be double word aligned. Just
10118 testing the mode size is a boneheaded way to do this as it means
10119 that other types such as complex int are also double word aligned.
10120 However, we're stuck with this because changing the ABI might break
10121 existing library interfaces.
10123 Doubleword align SPE vectors.
10124 Quadword align Altivec/VSX vectors.
10125 Quadword align large synthetic vector types. */
10127 static unsigned int
10128 rs6000_function_arg_boundary (machine_mode mode, const_tree type)
10130 machine_mode elt_mode;
10131 int n_elts;
10133 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
10135 if (DEFAULT_ABI == ABI_V4
10136 && (GET_MODE_SIZE (mode) == 8
10137 || (TARGET_HARD_FLOAT
10138 && TARGET_FPRS
10139 && FLOAT128_2REG_P (mode))))
10140 return 64;
10141 else if (FLOAT128_VECTOR_P (mode))
10142 return 128;
10143 else if (SPE_VECTOR_MODE (mode)
10144 || (type && TREE_CODE (type) == VECTOR_TYPE
10145 && int_size_in_bytes (type) >= 8
10146 && int_size_in_bytes (type) < 16))
10147 return 64;
10148 else if (ALTIVEC_OR_VSX_VECTOR_MODE (elt_mode)
10149 || (type && TREE_CODE (type) == VECTOR_TYPE
10150 && int_size_in_bytes (type) >= 16))
10151 return 128;
10153 /* Aggregate types that need > 8 byte alignment are quadword-aligned
10154 in the parameter area in the ELFv2 ABI, and in the AIX ABI unless
10155 -mcompat-align-parm is used. */
10156 if (((DEFAULT_ABI == ABI_AIX && !rs6000_compat_align_parm)
10157 || DEFAULT_ABI == ABI_ELFv2)
10158 && type && TYPE_ALIGN (type) > 64)
10160 /* "Aggregate" means any AGGREGATE_TYPE except for single-element
10161 or homogeneous float/vector aggregates here. We already handled
10162 vector aggregates above, but still need to check for float here. */
10163 bool aggregate_p = (AGGREGATE_TYPE_P (type)
10164 && !SCALAR_FLOAT_MODE_P (elt_mode));
10166 /* We used to check for BLKmode instead of the above aggregate type
10167 check. Warn when this results in any difference to the ABI. */
10168 if (aggregate_p != (mode == BLKmode))
10170 static bool warned;
10171 if (!warned && warn_psabi)
10173 warned = true;
10174 inform (input_location,
10175 "the ABI of passing aggregates with %d-byte alignment"
10176 " has changed in GCC 5",
10177 (int) TYPE_ALIGN (type) / BITS_PER_UNIT);
10181 if (aggregate_p)
10182 return 128;
10185 /* Similar for the Darwin64 ABI. Note that for historical reasons we
10186 implement the "aggregate type" check as a BLKmode check here; this
10187 means certain aggregate types are in fact not aligned. */
10188 if (TARGET_MACHO && rs6000_darwin64_abi
10189 && mode == BLKmode
10190 && type && TYPE_ALIGN (type) > 64)
10191 return 128;
10193 return PARM_BOUNDARY;
10196 /* The offset in words to the start of the parameter save area. */
10198 static unsigned int
10199 rs6000_parm_offset (void)
10201 return (DEFAULT_ABI == ABI_V4 ? 2
10202 : DEFAULT_ABI == ABI_ELFv2 ? 4
10203 : 6);
10206 /* For a function parm of MODE and TYPE, return the starting word in
10207 the parameter area. NWORDS of the parameter area are already used. */
10209 static unsigned int
10210 rs6000_parm_start (machine_mode mode, const_tree type,
10211 unsigned int nwords)
10213 unsigned int align;
10215 align = rs6000_function_arg_boundary (mode, type) / PARM_BOUNDARY - 1;
10216 return nwords + (-(rs6000_parm_offset () + nwords) & align);
10219 /* Compute the size (in words) of a function argument. */
10221 static unsigned long
10222 rs6000_arg_size (machine_mode mode, const_tree type)
10224 unsigned long size;
10226 if (mode != BLKmode)
10227 size = GET_MODE_SIZE (mode);
10228 else
10229 size = int_size_in_bytes (type);
10231 if (TARGET_32BIT)
10232 return (size + 3) >> 2;
10233 else
10234 return (size + 7) >> 3;
10237 /* Use this to flush pending int fields. */
10239 static void
10240 rs6000_darwin64_record_arg_advance_flush (CUMULATIVE_ARGS *cum,
10241 HOST_WIDE_INT bitpos, int final)
10243 unsigned int startbit, endbit;
10244 int intregs, intoffset;
10245 machine_mode mode;
10247 /* Handle the situations where a float is taking up the first half
10248 of the GPR, and the other half is empty (typically due to
10249 alignment restrictions). We can detect this by a 8-byte-aligned
10250 int field, or by seeing that this is the final flush for this
10251 argument. Count the word and continue on. */
10252 if (cum->floats_in_gpr == 1
10253 && (cum->intoffset % 64 == 0
10254 || (cum->intoffset == -1 && final)))
10256 cum->words++;
10257 cum->floats_in_gpr = 0;
10260 if (cum->intoffset == -1)
10261 return;
10263 intoffset = cum->intoffset;
10264 cum->intoffset = -1;
10265 cum->floats_in_gpr = 0;
10267 if (intoffset % BITS_PER_WORD != 0)
10269 mode = mode_for_size (BITS_PER_WORD - intoffset % BITS_PER_WORD,
10270 MODE_INT, 0);
10271 if (mode == BLKmode)
10273 /* We couldn't find an appropriate mode, which happens,
10274 e.g., in packed structs when there are 3 bytes to load.
10275 Back intoffset back to the beginning of the word in this
10276 case. */
10277 intoffset = ROUND_DOWN (intoffset, BITS_PER_WORD);
10281 startbit = ROUND_DOWN (intoffset, BITS_PER_WORD);
10282 endbit = ROUND_UP (bitpos, BITS_PER_WORD);
10283 intregs = (endbit - startbit) / BITS_PER_WORD;
10284 cum->words += intregs;
10285 /* words should be unsigned. */
10286 if ((unsigned)cum->words < (endbit/BITS_PER_WORD))
10288 int pad = (endbit/BITS_PER_WORD) - cum->words;
10289 cum->words += pad;
10293 /* The darwin64 ABI calls for us to recurse down through structs,
10294 looking for elements passed in registers. Unfortunately, we have
10295 to track int register count here also because of misalignments
10296 in powerpc alignment mode. */
10298 static void
10299 rs6000_darwin64_record_arg_advance_recurse (CUMULATIVE_ARGS *cum,
10300 const_tree type,
10301 HOST_WIDE_INT startbitpos)
10303 tree f;
10305 for (f = TYPE_FIELDS (type); f ; f = DECL_CHAIN (f))
10306 if (TREE_CODE (f) == FIELD_DECL)
10308 HOST_WIDE_INT bitpos = startbitpos;
10309 tree ftype = TREE_TYPE (f);
10310 machine_mode mode;
10311 if (ftype == error_mark_node)
10312 continue;
10313 mode = TYPE_MODE (ftype);
10315 if (DECL_SIZE (f) != 0
10316 && tree_fits_uhwi_p (bit_position (f)))
10317 bitpos += int_bit_position (f);
10319 /* ??? FIXME: else assume zero offset. */
10321 if (TREE_CODE (ftype) == RECORD_TYPE)
10322 rs6000_darwin64_record_arg_advance_recurse (cum, ftype, bitpos);
10323 else if (USE_FP_FOR_ARG_P (cum, mode))
10325 unsigned n_fpregs = (GET_MODE_SIZE (mode) + 7) >> 3;
10326 rs6000_darwin64_record_arg_advance_flush (cum, bitpos, 0);
10327 cum->fregno += n_fpregs;
10328 /* Single-precision floats present a special problem for
10329 us, because they are smaller than an 8-byte GPR, and so
10330 the structure-packing rules combined with the standard
10331 varargs behavior mean that we want to pack float/float
10332 and float/int combinations into a single register's
10333 space. This is complicated by the arg advance flushing,
10334 which works on arbitrarily large groups of int-type
10335 fields. */
10336 if (mode == SFmode)
10338 if (cum->floats_in_gpr == 1)
10340 /* Two floats in a word; count the word and reset
10341 the float count. */
10342 cum->words++;
10343 cum->floats_in_gpr = 0;
10345 else if (bitpos % 64 == 0)
10347 /* A float at the beginning of an 8-byte word;
10348 count it and put off adjusting cum->words until
10349 we see if a arg advance flush is going to do it
10350 for us. */
10351 cum->floats_in_gpr++;
10353 else
10355 /* The float is at the end of a word, preceded
10356 by integer fields, so the arg advance flush
10357 just above has already set cum->words and
10358 everything is taken care of. */
10361 else
10362 cum->words += n_fpregs;
10364 else if (USE_ALTIVEC_FOR_ARG_P (cum, mode, 1))
10366 rs6000_darwin64_record_arg_advance_flush (cum, bitpos, 0);
10367 cum->vregno++;
10368 cum->words += 2;
10370 else if (cum->intoffset == -1)
10371 cum->intoffset = bitpos;
10375 /* Check for an item that needs to be considered specially under the darwin 64
10376 bit ABI. These are record types where the mode is BLK or the structure is
10377 8 bytes in size. */
10378 static int
10379 rs6000_darwin64_struct_check_p (machine_mode mode, const_tree type)
10381 return rs6000_darwin64_abi
10382 && ((mode == BLKmode
10383 && TREE_CODE (type) == RECORD_TYPE
10384 && int_size_in_bytes (type) > 0)
10385 || (type && TREE_CODE (type) == RECORD_TYPE
10386 && int_size_in_bytes (type) == 8)) ? 1 : 0;
10389 /* Update the data in CUM to advance over an argument
10390 of mode MODE and data type TYPE.
10391 (TYPE is null for libcalls where that information may not be available.)
10393 Note that for args passed by reference, function_arg will be called
10394 with MODE and TYPE set to that of the pointer to the arg, not the arg
10395 itself. */
10397 static void
10398 rs6000_function_arg_advance_1 (CUMULATIVE_ARGS *cum, machine_mode mode,
10399 const_tree type, bool named, int depth)
10401 machine_mode elt_mode;
10402 int n_elts;
10404 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
10406 /* Only tick off an argument if we're not recursing. */
10407 if (depth == 0)
10408 cum->nargs_prototype--;
10410 #ifdef HAVE_AS_GNU_ATTRIBUTE
10411 if (DEFAULT_ABI == ABI_V4
10412 && cum->escapes)
10414 if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode))
10415 rs6000_passes_float = true;
10416 else if (named && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
10417 rs6000_passes_vector = true;
10418 else if (SPE_VECTOR_MODE (mode)
10419 && !cum->stdarg
10420 && cum->sysv_gregno <= GP_ARG_MAX_REG)
10421 rs6000_passes_vector = true;
10423 #endif
10425 if (TARGET_ALTIVEC_ABI
10426 && (ALTIVEC_OR_VSX_VECTOR_MODE (elt_mode)
10427 || (type && TREE_CODE (type) == VECTOR_TYPE
10428 && int_size_in_bytes (type) == 16)))
10430 bool stack = false;
10432 if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
10434 cum->vregno += n_elts;
10436 if (!TARGET_ALTIVEC)
10437 error ("cannot pass argument in vector register because"
10438 " altivec instructions are disabled, use -maltivec"
10439 " to enable them");
10441 /* PowerPC64 Linux and AIX allocate GPRs for a vector argument
10442 even if it is going to be passed in a vector register.
10443 Darwin does the same for variable-argument functions. */
10444 if (((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
10445 && TARGET_64BIT)
10446 || (cum->stdarg && DEFAULT_ABI != ABI_V4))
10447 stack = true;
10449 else
10450 stack = true;
10452 if (stack)
10454 int align;
10456 /* Vector parameters must be 16-byte aligned. In 32-bit
10457 mode this means we need to take into account the offset
10458 to the parameter save area. In 64-bit mode, they just
10459 have to start on an even word, since the parameter save
10460 area is 16-byte aligned. */
10461 if (TARGET_32BIT)
10462 align = -(rs6000_parm_offset () + cum->words) & 3;
10463 else
10464 align = cum->words & 1;
10465 cum->words += align + rs6000_arg_size (mode, type);
10467 if (TARGET_DEBUG_ARG)
10469 fprintf (stderr, "function_adv: words = %2d, align=%d, ",
10470 cum->words, align);
10471 fprintf (stderr, "nargs = %4d, proto = %d, mode = %4s\n",
10472 cum->nargs_prototype, cum->prototype,
10473 GET_MODE_NAME (mode));
10477 else if (TARGET_SPE_ABI && TARGET_SPE && SPE_VECTOR_MODE (mode)
10478 && !cum->stdarg
10479 && cum->sysv_gregno <= GP_ARG_MAX_REG)
10480 cum->sysv_gregno++;
10482 else if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
10484 int size = int_size_in_bytes (type);
10485 /* Variable sized types have size == -1 and are
10486 treated as if consisting entirely of ints.
10487 Pad to 16 byte boundary if needed. */
10488 if (TYPE_ALIGN (type) >= 2 * BITS_PER_WORD
10489 && (cum->words % 2) != 0)
10490 cum->words++;
10491 /* For varargs, we can just go up by the size of the struct. */
10492 if (!named)
10493 cum->words += (size + 7) / 8;
10494 else
10496 /* It is tempting to say int register count just goes up by
10497 sizeof(type)/8, but this is wrong in a case such as
10498 { int; double; int; } [powerpc alignment]. We have to
10499 grovel through the fields for these too. */
10500 cum->intoffset = 0;
10501 cum->floats_in_gpr = 0;
10502 rs6000_darwin64_record_arg_advance_recurse (cum, type, 0);
10503 rs6000_darwin64_record_arg_advance_flush (cum,
10504 size * BITS_PER_UNIT, 1);
10506 if (TARGET_DEBUG_ARG)
10508 fprintf (stderr, "function_adv: words = %2d, align=%d, size=%d",
10509 cum->words, TYPE_ALIGN (type), size);
10510 fprintf (stderr,
10511 "nargs = %4d, proto = %d, mode = %4s (darwin64 abi)\n",
10512 cum->nargs_prototype, cum->prototype,
10513 GET_MODE_NAME (mode));
10516 else if (DEFAULT_ABI == ABI_V4)
10518 if (TARGET_HARD_FLOAT && TARGET_FPRS
10519 && ((TARGET_SINGLE_FLOAT && mode == SFmode)
10520 || (TARGET_DOUBLE_FLOAT && mode == DFmode)
10521 || FLOAT128_2REG_P (mode)
10522 || DECIMAL_FLOAT_MODE_P (mode)))
10524 /* _Decimal128 must use an even/odd register pair. This assumes
10525 that the register number is odd when fregno is odd. */
10526 if (mode == TDmode && (cum->fregno % 2) == 1)
10527 cum->fregno++;
10529 if (cum->fregno + (FLOAT128_2REG_P (mode) ? 1 : 0)
10530 <= FP_ARG_V4_MAX_REG)
10531 cum->fregno += (GET_MODE_SIZE (mode) + 7) >> 3;
10532 else
10534 cum->fregno = FP_ARG_V4_MAX_REG + 1;
10535 if (mode == DFmode || FLOAT128_IBM_P (mode)
10536 || mode == DDmode || mode == TDmode)
10537 cum->words += cum->words & 1;
10538 cum->words += rs6000_arg_size (mode, type);
10541 else
10543 int n_words = rs6000_arg_size (mode, type);
10544 int gregno = cum->sysv_gregno;
10546 /* Long long and SPE vectors are put in (r3,r4), (r5,r6),
10547 (r7,r8) or (r9,r10). As does any other 2 word item such
10548 as complex int due to a historical mistake. */
10549 if (n_words == 2)
10550 gregno += (1 - gregno) & 1;
10552 /* Multi-reg args are not split between registers and stack. */
10553 if (gregno + n_words - 1 > GP_ARG_MAX_REG)
10555 /* Long long and SPE vectors are aligned on the stack.
10556 So are other 2 word items such as complex int due to
10557 a historical mistake. */
10558 if (n_words == 2)
10559 cum->words += cum->words & 1;
10560 cum->words += n_words;
10563 /* Note: continuing to accumulate gregno past when we've started
10564 spilling to the stack indicates the fact that we've started
10565 spilling to the stack to expand_builtin_saveregs. */
10566 cum->sysv_gregno = gregno + n_words;
10569 if (TARGET_DEBUG_ARG)
10571 fprintf (stderr, "function_adv: words = %2d, fregno = %2d, ",
10572 cum->words, cum->fregno);
10573 fprintf (stderr, "gregno = %2d, nargs = %4d, proto = %d, ",
10574 cum->sysv_gregno, cum->nargs_prototype, cum->prototype);
10575 fprintf (stderr, "mode = %4s, named = %d\n",
10576 GET_MODE_NAME (mode), named);
10579 else
10581 int n_words = rs6000_arg_size (mode, type);
10582 int start_words = cum->words;
10583 int align_words = rs6000_parm_start (mode, type, start_words);
10585 cum->words = align_words + n_words;
10587 if (SCALAR_FLOAT_MODE_P (elt_mode) && TARGET_HARD_FLOAT && TARGET_FPRS)
10589 /* _Decimal128 must be passed in an even/odd float register pair.
10590 This assumes that the register number is odd when fregno is
10591 odd. */
10592 if (elt_mode == TDmode && (cum->fregno % 2) == 1)
10593 cum->fregno++;
10594 cum->fregno += n_elts * ((GET_MODE_SIZE (elt_mode) + 7) >> 3);
10597 if (TARGET_DEBUG_ARG)
10599 fprintf (stderr, "function_adv: words = %2d, fregno = %2d, ",
10600 cum->words, cum->fregno);
10601 fprintf (stderr, "nargs = %4d, proto = %d, mode = %4s, ",
10602 cum->nargs_prototype, cum->prototype, GET_MODE_NAME (mode));
10603 fprintf (stderr, "named = %d, align = %d, depth = %d\n",
10604 named, align_words - start_words, depth);
10609 static void
10610 rs6000_function_arg_advance (cumulative_args_t cum, machine_mode mode,
10611 const_tree type, bool named)
10613 rs6000_function_arg_advance_1 (get_cumulative_args (cum), mode, type, named,
10617 static rtx
10618 spe_build_register_parallel (machine_mode mode, int gregno)
10620 rtx r1, r3, r5, r7;
10622 switch (mode)
10624 case DFmode:
10625 r1 = gen_rtx_REG (DImode, gregno);
10626 r1 = gen_rtx_EXPR_LIST (VOIDmode, r1, const0_rtx);
10627 return gen_rtx_PARALLEL (mode, gen_rtvec (1, r1));
10629 case DCmode:
10630 case TFmode:
10631 r1 = gen_rtx_REG (DImode, gregno);
10632 r1 = gen_rtx_EXPR_LIST (VOIDmode, r1, const0_rtx);
10633 r3 = gen_rtx_REG (DImode, gregno + 2);
10634 r3 = gen_rtx_EXPR_LIST (VOIDmode, r3, GEN_INT (8));
10635 return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r3));
10637 case TCmode:
10638 r1 = gen_rtx_REG (DImode, gregno);
10639 r1 = gen_rtx_EXPR_LIST (VOIDmode, r1, const0_rtx);
10640 r3 = gen_rtx_REG (DImode, gregno + 2);
10641 r3 = gen_rtx_EXPR_LIST (VOIDmode, r3, GEN_INT (8));
10642 r5 = gen_rtx_REG (DImode, gregno + 4);
10643 r5 = gen_rtx_EXPR_LIST (VOIDmode, r5, GEN_INT (16));
10644 r7 = gen_rtx_REG (DImode, gregno + 6);
10645 r7 = gen_rtx_EXPR_LIST (VOIDmode, r7, GEN_INT (24));
10646 return gen_rtx_PARALLEL (mode, gen_rtvec (4, r1, r3, r5, r7));
10648 default:
10649 gcc_unreachable ();
10653 /* Determine where to put a SIMD argument on the SPE. */
10654 static rtx
10655 rs6000_spe_function_arg (const CUMULATIVE_ARGS *cum, machine_mode mode,
10656 const_tree type)
10658 int gregno = cum->sysv_gregno;
10660 /* On E500 v2, double arithmetic is done on the full 64-bit GPR, but
10661 are passed and returned in a pair of GPRs for ABI compatibility. */
10662 if (TARGET_E500_DOUBLE && (mode == DFmode || mode == TFmode
10663 || mode == DCmode || mode == TCmode))
10665 int n_words = rs6000_arg_size (mode, type);
10667 /* Doubles go in an odd/even register pair (r5/r6, etc). */
10668 if (mode == DFmode)
10669 gregno += (1 - gregno) & 1;
10671 /* Multi-reg args are not split between registers and stack. */
10672 if (gregno + n_words - 1 > GP_ARG_MAX_REG)
10673 return NULL_RTX;
10675 return spe_build_register_parallel (mode, gregno);
10677 if (cum->stdarg)
10679 int n_words = rs6000_arg_size (mode, type);
10681 /* SPE vectors are put in odd registers. */
10682 if (n_words == 2 && (gregno & 1) == 0)
10683 gregno += 1;
10685 if (gregno + n_words - 1 <= GP_ARG_MAX_REG)
10687 rtx r1, r2;
10688 machine_mode m = SImode;
10690 r1 = gen_rtx_REG (m, gregno);
10691 r1 = gen_rtx_EXPR_LIST (m, r1, const0_rtx);
10692 r2 = gen_rtx_REG (m, gregno + 1);
10693 r2 = gen_rtx_EXPR_LIST (m, r2, GEN_INT (4));
10694 return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r2));
10696 else
10697 return NULL_RTX;
10699 else
10701 if (gregno <= GP_ARG_MAX_REG)
10702 return gen_rtx_REG (mode, gregno);
10703 else
10704 return NULL_RTX;
10708 /* A subroutine of rs6000_darwin64_record_arg. Assign the bits of the
10709 structure between cum->intoffset and bitpos to integer registers. */
10711 static void
10712 rs6000_darwin64_record_arg_flush (CUMULATIVE_ARGS *cum,
10713 HOST_WIDE_INT bitpos, rtx rvec[], int *k)
10715 machine_mode mode;
10716 unsigned int regno;
10717 unsigned int startbit, endbit;
10718 int this_regno, intregs, intoffset;
10719 rtx reg;
10721 if (cum->intoffset == -1)
10722 return;
10724 intoffset = cum->intoffset;
10725 cum->intoffset = -1;
10727 /* If this is the trailing part of a word, try to only load that
10728 much into the register. Otherwise load the whole register. Note
10729 that in the latter case we may pick up unwanted bits. It's not a
10730 problem at the moment but may wish to revisit. */
10732 if (intoffset % BITS_PER_WORD != 0)
10734 mode = mode_for_size (BITS_PER_WORD - intoffset % BITS_PER_WORD,
10735 MODE_INT, 0);
10736 if (mode == BLKmode)
10738 /* We couldn't find an appropriate mode, which happens,
10739 e.g., in packed structs when there are 3 bytes to load.
10740 Back intoffset back to the beginning of the word in this
10741 case. */
10742 intoffset = ROUND_DOWN (intoffset, BITS_PER_WORD);
10743 mode = word_mode;
10746 else
10747 mode = word_mode;
10749 startbit = ROUND_DOWN (intoffset, BITS_PER_WORD);
10750 endbit = ROUND_UP (bitpos, BITS_PER_WORD);
10751 intregs = (endbit - startbit) / BITS_PER_WORD;
10752 this_regno = cum->words + intoffset / BITS_PER_WORD;
10754 if (intregs > 0 && intregs > GP_ARG_NUM_REG - this_regno)
10755 cum->use_stack = 1;
10757 intregs = MIN (intregs, GP_ARG_NUM_REG - this_regno);
10758 if (intregs <= 0)
10759 return;
10761 intoffset /= BITS_PER_UNIT;
10764 regno = GP_ARG_MIN_REG + this_regno;
10765 reg = gen_rtx_REG (mode, regno);
10766 rvec[(*k)++] =
10767 gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (intoffset));
10769 this_regno += 1;
10770 intoffset = (intoffset | (UNITS_PER_WORD-1)) + 1;
10771 mode = word_mode;
10772 intregs -= 1;
10774 while (intregs > 0);
10777 /* Recursive workhorse for the following. */
10779 static void
10780 rs6000_darwin64_record_arg_recurse (CUMULATIVE_ARGS *cum, const_tree type,
10781 HOST_WIDE_INT startbitpos, rtx rvec[],
10782 int *k)
10784 tree f;
10786 for (f = TYPE_FIELDS (type); f ; f = DECL_CHAIN (f))
10787 if (TREE_CODE (f) == FIELD_DECL)
10789 HOST_WIDE_INT bitpos = startbitpos;
10790 tree ftype = TREE_TYPE (f);
10791 machine_mode mode;
10792 if (ftype == error_mark_node)
10793 continue;
10794 mode = TYPE_MODE (ftype);
10796 if (DECL_SIZE (f) != 0
10797 && tree_fits_uhwi_p (bit_position (f)))
10798 bitpos += int_bit_position (f);
10800 /* ??? FIXME: else assume zero offset. */
10802 if (TREE_CODE (ftype) == RECORD_TYPE)
10803 rs6000_darwin64_record_arg_recurse (cum, ftype, bitpos, rvec, k);
10804 else if (cum->named && USE_FP_FOR_ARG_P (cum, mode))
10806 unsigned n_fpreg = (GET_MODE_SIZE (mode) + 7) >> 3;
10807 #if 0
10808 switch (mode)
10810 case SCmode: mode = SFmode; break;
10811 case DCmode: mode = DFmode; break;
10812 case TCmode: mode = TFmode; break;
10813 default: break;
10815 #endif
10816 rs6000_darwin64_record_arg_flush (cum, bitpos, rvec, k);
10817 if (cum->fregno + n_fpreg > FP_ARG_MAX_REG + 1)
10819 gcc_assert (cum->fregno == FP_ARG_MAX_REG
10820 && (mode == TFmode || mode == TDmode));
10821 /* Long double or _Decimal128 split over regs and memory. */
10822 mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode : DFmode;
10823 cum->use_stack=1;
10825 rvec[(*k)++]
10826 = gen_rtx_EXPR_LIST (VOIDmode,
10827 gen_rtx_REG (mode, cum->fregno++),
10828 GEN_INT (bitpos / BITS_PER_UNIT));
10829 if (FLOAT128_2REG_P (mode))
10830 cum->fregno++;
10832 else if (cum->named && USE_ALTIVEC_FOR_ARG_P (cum, mode, 1))
10834 rs6000_darwin64_record_arg_flush (cum, bitpos, rvec, k);
10835 rvec[(*k)++]
10836 = gen_rtx_EXPR_LIST (VOIDmode,
10837 gen_rtx_REG (mode, cum->vregno++),
10838 GEN_INT (bitpos / BITS_PER_UNIT));
10840 else if (cum->intoffset == -1)
10841 cum->intoffset = bitpos;
10845 /* For the darwin64 ABI, we want to construct a PARALLEL consisting of
10846 the register(s) to be used for each field and subfield of a struct
10847 being passed by value, along with the offset of where the
10848 register's value may be found in the block. FP fields go in FP
10849 register, vector fields go in vector registers, and everything
10850 else goes in int registers, packed as in memory.
10852 This code is also used for function return values. RETVAL indicates
10853 whether this is the case.
10855 Much of this is taken from the SPARC V9 port, which has a similar
10856 calling convention. */
10858 static rtx
10859 rs6000_darwin64_record_arg (CUMULATIVE_ARGS *orig_cum, const_tree type,
10860 bool named, bool retval)
10862 rtx rvec[FIRST_PSEUDO_REGISTER];
10863 int k = 1, kbase = 1;
10864 HOST_WIDE_INT typesize = int_size_in_bytes (type);
10865 /* This is a copy; modifications are not visible to our caller. */
10866 CUMULATIVE_ARGS copy_cum = *orig_cum;
10867 CUMULATIVE_ARGS *cum = &copy_cum;
10869 /* Pad to 16 byte boundary if needed. */
10870 if (!retval && TYPE_ALIGN (type) >= 2 * BITS_PER_WORD
10871 && (cum->words % 2) != 0)
10872 cum->words++;
10874 cum->intoffset = 0;
10875 cum->use_stack = 0;
10876 cum->named = named;
10878 /* Put entries into rvec[] for individual FP and vector fields, and
10879 for the chunks of memory that go in int regs. Note we start at
10880 element 1; 0 is reserved for an indication of using memory, and
10881 may or may not be filled in below. */
10882 rs6000_darwin64_record_arg_recurse (cum, type, /* startbit pos= */ 0, rvec, &k);
10883 rs6000_darwin64_record_arg_flush (cum, typesize * BITS_PER_UNIT, rvec, &k);
10885 /* If any part of the struct went on the stack put all of it there.
10886 This hack is because the generic code for
10887 FUNCTION_ARG_PARTIAL_NREGS cannot handle cases where the register
10888 parts of the struct are not at the beginning. */
10889 if (cum->use_stack)
10891 if (retval)
10892 return NULL_RTX; /* doesn't go in registers at all */
10893 kbase = 0;
10894 rvec[0] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
10896 if (k > 1 || cum->use_stack)
10897 return gen_rtx_PARALLEL (BLKmode, gen_rtvec_v (k - kbase, &rvec[kbase]));
10898 else
10899 return NULL_RTX;
10902 /* Determine where to place an argument in 64-bit mode with 32-bit ABI. */
10904 static rtx
10905 rs6000_mixed_function_arg (machine_mode mode, const_tree type,
10906 int align_words)
10908 int n_units;
10909 int i, k;
10910 rtx rvec[GP_ARG_NUM_REG + 1];
10912 if (align_words >= GP_ARG_NUM_REG)
10913 return NULL_RTX;
10915 n_units = rs6000_arg_size (mode, type);
10917 /* Optimize the simple case where the arg fits in one gpr, except in
10918 the case of BLKmode due to assign_parms assuming that registers are
10919 BITS_PER_WORD wide. */
10920 if (n_units == 0
10921 || (n_units == 1 && mode != BLKmode))
10922 return gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
10924 k = 0;
10925 if (align_words + n_units > GP_ARG_NUM_REG)
10926 /* Not all of the arg fits in gprs. Say that it goes in memory too,
10927 using a magic NULL_RTX component.
10928 This is not strictly correct. Only some of the arg belongs in
10929 memory, not all of it. However, the normal scheme using
10930 function_arg_partial_nregs can result in unusual subregs, eg.
10931 (subreg:SI (reg:DF) 4), which are not handled well. The code to
10932 store the whole arg to memory is often more efficient than code
10933 to store pieces, and we know that space is available in the right
10934 place for the whole arg. */
10935 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
10937 i = 0;
10940 rtx r = gen_rtx_REG (SImode, GP_ARG_MIN_REG + align_words);
10941 rtx off = GEN_INT (i++ * 4);
10942 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
10944 while (++align_words < GP_ARG_NUM_REG && --n_units != 0);
10946 return gen_rtx_PARALLEL (mode, gen_rtvec_v (k, rvec));
10949 /* We have an argument of MODE and TYPE that goes into FPRs or VRs,
10950 but must also be copied into the parameter save area starting at
10951 offset ALIGN_WORDS. Fill in RVEC with the elements corresponding
10952 to the GPRs and/or memory. Return the number of elements used. */
10954 static int
10955 rs6000_psave_function_arg (machine_mode mode, const_tree type,
10956 int align_words, rtx *rvec)
10958 int k = 0;
10960 if (align_words < GP_ARG_NUM_REG)
10962 int n_words = rs6000_arg_size (mode, type);
10964 if (align_words + n_words > GP_ARG_NUM_REG
10965 || mode == BLKmode
10966 || (TARGET_32BIT && TARGET_POWERPC64))
10968 /* If this is partially on the stack, then we only
10969 include the portion actually in registers here. */
10970 machine_mode rmode = TARGET_32BIT ? SImode : DImode;
10971 int i = 0;
10973 if (align_words + n_words > GP_ARG_NUM_REG)
10975 /* Not all of the arg fits in gprs. Say that it goes in memory
10976 too, using a magic NULL_RTX component. Also see comment in
10977 rs6000_mixed_function_arg for why the normal
10978 function_arg_partial_nregs scheme doesn't work in this case. */
10979 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
10984 rtx r = gen_rtx_REG (rmode, GP_ARG_MIN_REG + align_words);
10985 rtx off = GEN_INT (i++ * GET_MODE_SIZE (rmode));
10986 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
10988 while (++align_words < GP_ARG_NUM_REG && --n_words != 0);
10990 else
10992 /* The whole arg fits in gprs. */
10993 rtx r = gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
10994 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, const0_rtx);
10997 else
10999 /* It's entirely in memory. */
11000 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
11003 return k;
11006 /* RVEC is a vector of K components of an argument of mode MODE.
11007 Construct the final function_arg return value from it. */
11009 static rtx
11010 rs6000_finish_function_arg (machine_mode mode, rtx *rvec, int k)
11012 gcc_assert (k >= 1);
11014 /* Avoid returning a PARALLEL in the trivial cases. */
11015 if (k == 1)
11017 if (XEXP (rvec[0], 0) == NULL_RTX)
11018 return NULL_RTX;
11020 if (GET_MODE (XEXP (rvec[0], 0)) == mode)
11021 return XEXP (rvec[0], 0);
11024 return gen_rtx_PARALLEL (mode, gen_rtvec_v (k, rvec));
11027 /* Determine where to put an argument to a function.
11028 Value is zero to push the argument on the stack,
11029 or a hard register in which to store the argument.
11031 MODE is the argument's machine mode.
11032 TYPE is the data type of the argument (as a tree).
11033 This is null for libcalls where that information may
11034 not be available.
11035 CUM is a variable of type CUMULATIVE_ARGS which gives info about
11036 the preceding args and about the function being called. It is
11037 not modified in this routine.
11038 NAMED is nonzero if this argument is a named parameter
11039 (otherwise it is an extra parameter matching an ellipsis).
11041 On RS/6000 the first eight words of non-FP are normally in registers
11042 and the rest are pushed. Under AIX, the first 13 FP args are in registers.
11043 Under V.4, the first 8 FP args are in registers.
11045 If this is floating-point and no prototype is specified, we use
11046 both an FP and integer register (or possibly FP reg and stack). Library
11047 functions (when CALL_LIBCALL is set) always have the proper types for args,
11048 so we can pass the FP value just in one register. emit_library_function
11049 doesn't support PARALLEL anyway.
11051 Note that for args passed by reference, function_arg will be called
11052 with MODE and TYPE set to that of the pointer to the arg, not the arg
11053 itself. */
11055 static rtx
11056 rs6000_function_arg (cumulative_args_t cum_v, machine_mode mode,
11057 const_tree type, bool named)
11059 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
11060 enum rs6000_abi abi = DEFAULT_ABI;
11061 machine_mode elt_mode;
11062 int n_elts;
11064 /* Return a marker to indicate whether CR1 needs to set or clear the
11065 bit that V.4 uses to say fp args were passed in registers.
11066 Assume that we don't need the marker for software floating point,
11067 or compiler generated library calls. */
11068 if (mode == VOIDmode)
11070 if (abi == ABI_V4
11071 && (cum->call_cookie & CALL_LIBCALL) == 0
11072 && (cum->stdarg
11073 || (cum->nargs_prototype < 0
11074 && (cum->prototype || TARGET_NO_PROTOTYPE))))
11076 /* For the SPE, we need to crxor CR6 always. */
11077 if (TARGET_SPE_ABI)
11078 return GEN_INT (cum->call_cookie | CALL_V4_SET_FP_ARGS);
11079 else if (TARGET_HARD_FLOAT && TARGET_FPRS)
11080 return GEN_INT (cum->call_cookie
11081 | ((cum->fregno == FP_ARG_MIN_REG)
11082 ? CALL_V4_SET_FP_ARGS
11083 : CALL_V4_CLEAR_FP_ARGS));
11086 return GEN_INT (cum->call_cookie & ~CALL_LIBCALL);
11089 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
11091 if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
11093 rtx rslt = rs6000_darwin64_record_arg (cum, type, named, /*retval= */false);
11094 if (rslt != NULL_RTX)
11095 return rslt;
11096 /* Else fall through to usual handling. */
11099 if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
11101 rtx rvec[GP_ARG_NUM_REG + AGGR_ARG_NUM_REG + 1];
11102 rtx r, off;
11103 int i, k = 0;
11105 /* Do we also need to pass this argument in the parameter save area?
11106 Library support functions for IEEE 128-bit are assumed to not need the
11107 value passed both in GPRs and in vector registers. */
11108 if (TARGET_64BIT && !cum->prototype
11109 && (!cum->libcall || !FLOAT128_VECTOR_P (elt_mode)))
11111 int align_words = ROUND_UP (cum->words, 2);
11112 k = rs6000_psave_function_arg (mode, type, align_words, rvec);
11115 /* Describe where this argument goes in the vector registers. */
11116 for (i = 0; i < n_elts && cum->vregno + i <= ALTIVEC_ARG_MAX_REG; i++)
11118 r = gen_rtx_REG (elt_mode, cum->vregno + i);
11119 off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
11120 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
11123 return rs6000_finish_function_arg (mode, rvec, k);
11125 else if (TARGET_ALTIVEC_ABI
11126 && (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
11127 || (type && TREE_CODE (type) == VECTOR_TYPE
11128 && int_size_in_bytes (type) == 16)))
11130 if (named || abi == ABI_V4)
11131 return NULL_RTX;
11132 else
11134 /* Vector parameters to varargs functions under AIX or Darwin
11135 get passed in memory and possibly also in GPRs. */
11136 int align, align_words, n_words;
11137 machine_mode part_mode;
11139 /* Vector parameters must be 16-byte aligned. In 32-bit
11140 mode this means we need to take into account the offset
11141 to the parameter save area. In 64-bit mode, they just
11142 have to start on an even word, since the parameter save
11143 area is 16-byte aligned. */
11144 if (TARGET_32BIT)
11145 align = -(rs6000_parm_offset () + cum->words) & 3;
11146 else
11147 align = cum->words & 1;
11148 align_words = cum->words + align;
11150 /* Out of registers? Memory, then. */
11151 if (align_words >= GP_ARG_NUM_REG)
11152 return NULL_RTX;
11154 if (TARGET_32BIT && TARGET_POWERPC64)
11155 return rs6000_mixed_function_arg (mode, type, align_words);
11157 /* The vector value goes in GPRs. Only the part of the
11158 value in GPRs is reported here. */
11159 part_mode = mode;
11160 n_words = rs6000_arg_size (mode, type);
11161 if (align_words + n_words > GP_ARG_NUM_REG)
11162 /* Fortunately, there are only two possibilities, the value
11163 is either wholly in GPRs or half in GPRs and half not. */
11164 part_mode = DImode;
11166 return gen_rtx_REG (part_mode, GP_ARG_MIN_REG + align_words);
11169 else if (TARGET_SPE_ABI && TARGET_SPE
11170 && (SPE_VECTOR_MODE (mode)
11171 || (TARGET_E500_DOUBLE && (mode == DFmode
11172 || mode == DCmode
11173 || mode == TFmode
11174 || mode == TCmode))))
11175 return rs6000_spe_function_arg (cum, mode, type);
11177 else if (abi == ABI_V4)
11179 if (TARGET_HARD_FLOAT && TARGET_FPRS
11180 && ((TARGET_SINGLE_FLOAT && mode == SFmode)
11181 || (TARGET_DOUBLE_FLOAT && mode == DFmode)
11182 || FLOAT128_2REG_P (mode)
11183 || DECIMAL_FLOAT_MODE_P (mode)))
11185 /* _Decimal128 must use an even/odd register pair. This assumes
11186 that the register number is odd when fregno is odd. */
11187 if (mode == TDmode && (cum->fregno % 2) == 1)
11188 cum->fregno++;
11190 if (cum->fregno + (FLOAT128_2REG_P (mode) ? 1 : 0)
11191 <= FP_ARG_V4_MAX_REG)
11192 return gen_rtx_REG (mode, cum->fregno);
11193 else
11194 return NULL_RTX;
11196 else
11198 int n_words = rs6000_arg_size (mode, type);
11199 int gregno = cum->sysv_gregno;
11201 /* Long long and SPE vectors are put in (r3,r4), (r5,r6),
11202 (r7,r8) or (r9,r10). As does any other 2 word item such
11203 as complex int due to a historical mistake. */
11204 if (n_words == 2)
11205 gregno += (1 - gregno) & 1;
11207 /* Multi-reg args are not split between registers and stack. */
11208 if (gregno + n_words - 1 > GP_ARG_MAX_REG)
11209 return NULL_RTX;
11211 if (TARGET_32BIT && TARGET_POWERPC64)
11212 return rs6000_mixed_function_arg (mode, type,
11213 gregno - GP_ARG_MIN_REG);
11214 return gen_rtx_REG (mode, gregno);
11217 else
11219 int align_words = rs6000_parm_start (mode, type, cum->words);
11221 /* _Decimal128 must be passed in an even/odd float register pair.
11222 This assumes that the register number is odd when fregno is odd. */
11223 if (elt_mode == TDmode && (cum->fregno % 2) == 1)
11224 cum->fregno++;
11226 if (USE_FP_FOR_ARG_P (cum, elt_mode))
11228 rtx rvec[GP_ARG_NUM_REG + AGGR_ARG_NUM_REG + 1];
11229 rtx r, off;
11230 int i, k = 0;
11231 unsigned long n_fpreg = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
11232 int fpr_words;
11234 /* Do we also need to pass this argument in the parameter
11235 save area? */
11236 if (type && (cum->nargs_prototype <= 0
11237 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
11238 && TARGET_XL_COMPAT
11239 && align_words >= GP_ARG_NUM_REG)))
11240 k = rs6000_psave_function_arg (mode, type, align_words, rvec);
11242 /* Describe where this argument goes in the fprs. */
11243 for (i = 0; i < n_elts
11244 && cum->fregno + i * n_fpreg <= FP_ARG_MAX_REG; i++)
11246 /* Check if the argument is split over registers and memory.
11247 This can only ever happen for long double or _Decimal128;
11248 complex types are handled via split_complex_arg. */
11249 machine_mode fmode = elt_mode;
11250 if (cum->fregno + (i + 1) * n_fpreg > FP_ARG_MAX_REG + 1)
11252 gcc_assert (FLOAT128_2REG_P (fmode));
11253 fmode = DECIMAL_FLOAT_MODE_P (fmode) ? DDmode : DFmode;
11256 r = gen_rtx_REG (fmode, cum->fregno + i * n_fpreg);
11257 off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
11258 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
11261 /* If there were not enough FPRs to hold the argument, the rest
11262 usually goes into memory. However, if the current position
11263 is still within the register parameter area, a portion may
11264 actually have to go into GPRs.
11266 Note that it may happen that the portion of the argument
11267 passed in the first "half" of the first GPR was already
11268 passed in the last FPR as well.
11270 For unnamed arguments, we already set up GPRs to cover the
11271 whole argument in rs6000_psave_function_arg, so there is
11272 nothing further to do at this point. */
11273 fpr_words = (i * GET_MODE_SIZE (elt_mode)) / (TARGET_32BIT ? 4 : 8);
11274 if (i < n_elts && align_words + fpr_words < GP_ARG_NUM_REG
11275 && cum->nargs_prototype > 0)
11277 static bool warned;
11279 machine_mode rmode = TARGET_32BIT ? SImode : DImode;
11280 int n_words = rs6000_arg_size (mode, type);
11282 align_words += fpr_words;
11283 n_words -= fpr_words;
11287 r = gen_rtx_REG (rmode, GP_ARG_MIN_REG + align_words);
11288 off = GEN_INT (fpr_words++ * GET_MODE_SIZE (rmode));
11289 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
11291 while (++align_words < GP_ARG_NUM_REG && --n_words != 0);
11293 if (!warned && warn_psabi)
11295 warned = true;
11296 inform (input_location,
11297 "the ABI of passing homogeneous float aggregates"
11298 " has changed in GCC 5");
11302 return rs6000_finish_function_arg (mode, rvec, k);
11304 else if (align_words < GP_ARG_NUM_REG)
11306 if (TARGET_32BIT && TARGET_POWERPC64)
11307 return rs6000_mixed_function_arg (mode, type, align_words);
11309 return gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
11311 else
11312 return NULL_RTX;
11316 /* For an arg passed partly in registers and partly in memory, this is
11317 the number of bytes passed in registers. For args passed entirely in
11318 registers or entirely in memory, zero. When an arg is described by a
11319 PARALLEL, perhaps using more than one register type, this function
11320 returns the number of bytes used by the first element of the PARALLEL. */
11322 static int
11323 rs6000_arg_partial_bytes (cumulative_args_t cum_v, machine_mode mode,
11324 tree type, bool named)
11326 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
11327 bool passed_in_gprs = true;
11328 int ret = 0;
11329 int align_words;
11330 machine_mode elt_mode;
11331 int n_elts;
11333 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
11335 if (DEFAULT_ABI == ABI_V4)
11336 return 0;
11338 if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
11340 /* If we are passing this arg in the fixed parameter save area (gprs or
11341 memory) as well as VRs, we do not use the partial bytes mechanism;
11342 instead, rs6000_function_arg will return a PARALLEL including a memory
11343 element as necessary. Library support functions for IEEE 128-bit are
11344 assumed to not need the value passed both in GPRs and in vector
11345 registers. */
11346 if (TARGET_64BIT && !cum->prototype
11347 && (!cum->libcall || !FLOAT128_VECTOR_P (elt_mode)))
11348 return 0;
11350 /* Otherwise, we pass in VRs only. Check for partial copies. */
11351 passed_in_gprs = false;
11352 if (cum->vregno + n_elts > ALTIVEC_ARG_MAX_REG + 1)
11353 ret = (ALTIVEC_ARG_MAX_REG + 1 - cum->vregno) * 16;
11356 /* In this complicated case we just disable the partial_nregs code. */
11357 if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
11358 return 0;
11360 align_words = rs6000_parm_start (mode, type, cum->words);
11362 if (USE_FP_FOR_ARG_P (cum, elt_mode))
11364 unsigned long n_fpreg = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
11366 /* If we are passing this arg in the fixed parameter save area
11367 (gprs or memory) as well as FPRs, we do not use the partial
11368 bytes mechanism; instead, rs6000_function_arg will return a
11369 PARALLEL including a memory element as necessary. */
11370 if (type
11371 && (cum->nargs_prototype <= 0
11372 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
11373 && TARGET_XL_COMPAT
11374 && align_words >= GP_ARG_NUM_REG)))
11375 return 0;
11377 /* Otherwise, we pass in FPRs only. Check for partial copies. */
11378 passed_in_gprs = false;
11379 if (cum->fregno + n_elts * n_fpreg > FP_ARG_MAX_REG + 1)
11381 /* Compute number of bytes / words passed in FPRs. If there
11382 is still space available in the register parameter area
11383 *after* that amount, a part of the argument will be passed
11384 in GPRs. In that case, the total amount passed in any
11385 registers is equal to the amount that would have been passed
11386 in GPRs if everything were passed there, so we fall back to
11387 the GPR code below to compute the appropriate value. */
11388 int fpr = ((FP_ARG_MAX_REG + 1 - cum->fregno)
11389 * MIN (8, GET_MODE_SIZE (elt_mode)));
11390 int fpr_words = fpr / (TARGET_32BIT ? 4 : 8);
11392 if (align_words + fpr_words < GP_ARG_NUM_REG)
11393 passed_in_gprs = true;
11394 else
11395 ret = fpr;
11399 if (passed_in_gprs
11400 && align_words < GP_ARG_NUM_REG
11401 && GP_ARG_NUM_REG < align_words + rs6000_arg_size (mode, type))
11402 ret = (GP_ARG_NUM_REG - align_words) * (TARGET_32BIT ? 4 : 8);
11404 if (ret != 0 && TARGET_DEBUG_ARG)
11405 fprintf (stderr, "rs6000_arg_partial_bytes: %d\n", ret);
11407 return ret;
11410 /* A C expression that indicates when an argument must be passed by
11411 reference. If nonzero for an argument, a copy of that argument is
11412 made in memory and a pointer to the argument is passed instead of
11413 the argument itself. The pointer is passed in whatever way is
11414 appropriate for passing a pointer to that type.
11416 Under V.4, aggregates and long double are passed by reference.
11418 As an extension to all 32-bit ABIs, AltiVec vectors are passed by
11419 reference unless the AltiVec vector extension ABI is in force.
11421 As an extension to all ABIs, variable sized types are passed by
11422 reference. */
11424 static bool
11425 rs6000_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
11426 machine_mode mode, const_tree type,
11427 bool named ATTRIBUTE_UNUSED)
11429 if (!type)
11430 return 0;
11432 if (DEFAULT_ABI == ABI_V4 && TARGET_IEEEQUAD
11433 && FLOAT128_IEEE_P (TYPE_MODE (type)))
11435 if (TARGET_DEBUG_ARG)
11436 fprintf (stderr, "function_arg_pass_by_reference: V4 IEEE 128-bit\n");
11437 return 1;
11440 if (DEFAULT_ABI == ABI_V4 && AGGREGATE_TYPE_P (type))
11442 if (TARGET_DEBUG_ARG)
11443 fprintf (stderr, "function_arg_pass_by_reference: V4 aggregate\n");
11444 return 1;
11447 if (int_size_in_bytes (type) < 0)
11449 if (TARGET_DEBUG_ARG)
11450 fprintf (stderr, "function_arg_pass_by_reference: variable size\n");
11451 return 1;
11454 /* Allow -maltivec -mabi=no-altivec without warning. Altivec vector
11455 modes only exist for GCC vector types if -maltivec. */
11456 if (TARGET_32BIT && !TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (mode))
11458 if (TARGET_DEBUG_ARG)
11459 fprintf (stderr, "function_arg_pass_by_reference: AltiVec\n");
11460 return 1;
11463 /* Pass synthetic vectors in memory. */
11464 if (TREE_CODE (type) == VECTOR_TYPE
11465 && int_size_in_bytes (type) > (TARGET_ALTIVEC_ABI ? 16 : 8))
11467 static bool warned_for_pass_big_vectors = false;
11468 if (TARGET_DEBUG_ARG)
11469 fprintf (stderr, "function_arg_pass_by_reference: synthetic vector\n");
11470 if (!warned_for_pass_big_vectors)
11472 warning (0, "GCC vector passed by reference: "
11473 "non-standard ABI extension with no compatibility guarantee");
11474 warned_for_pass_big_vectors = true;
11476 return 1;
11479 return 0;
11482 /* Process parameter of type TYPE after ARGS_SO_FAR parameters were
11483 already processes. Return true if the parameter must be passed
11484 (fully or partially) on the stack. */
11486 static bool
11487 rs6000_parm_needs_stack (cumulative_args_t args_so_far, tree type)
11489 machine_mode mode;
11490 int unsignedp;
11491 rtx entry_parm;
11493 /* Catch errors. */
11494 if (type == NULL || type == error_mark_node)
11495 return true;
11497 /* Handle types with no storage requirement. */
11498 if (TYPE_MODE (type) == VOIDmode)
11499 return false;
11501 /* Handle complex types. */
11502 if (TREE_CODE (type) == COMPLEX_TYPE)
11503 return (rs6000_parm_needs_stack (args_so_far, TREE_TYPE (type))
11504 || rs6000_parm_needs_stack (args_so_far, TREE_TYPE (type)));
11506 /* Handle transparent aggregates. */
11507 if ((TREE_CODE (type) == UNION_TYPE || TREE_CODE (type) == RECORD_TYPE)
11508 && TYPE_TRANSPARENT_AGGR (type))
11509 type = TREE_TYPE (first_field (type));
11511 /* See if this arg was passed by invisible reference. */
11512 if (pass_by_reference (get_cumulative_args (args_so_far),
11513 TYPE_MODE (type), type, true))
11514 type = build_pointer_type (type);
11516 /* Find mode as it is passed by the ABI. */
11517 unsignedp = TYPE_UNSIGNED (type);
11518 mode = promote_mode (type, TYPE_MODE (type), &unsignedp);
11520 /* If we must pass in stack, we need a stack. */
11521 if (rs6000_must_pass_in_stack (mode, type))
11522 return true;
11524 /* If there is no incoming register, we need a stack. */
11525 entry_parm = rs6000_function_arg (args_so_far, mode, type, true);
11526 if (entry_parm == NULL)
11527 return true;
11529 /* Likewise if we need to pass both in registers and on the stack. */
11530 if (GET_CODE (entry_parm) == PARALLEL
11531 && XEXP (XVECEXP (entry_parm, 0, 0), 0) == NULL_RTX)
11532 return true;
11534 /* Also true if we're partially in registers and partially not. */
11535 if (rs6000_arg_partial_bytes (args_so_far, mode, type, true) != 0)
11536 return true;
11538 /* Update info on where next arg arrives in registers. */
11539 rs6000_function_arg_advance (args_so_far, mode, type, true);
11540 return false;
11543 /* Return true if FUN has no prototype, has a variable argument
11544 list, or passes any parameter in memory. */
11546 static bool
11547 rs6000_function_parms_need_stack (tree fun, bool incoming)
11549 tree fntype, result;
11550 CUMULATIVE_ARGS args_so_far_v;
11551 cumulative_args_t args_so_far;
11553 if (!fun)
11554 /* Must be a libcall, all of which only use reg parms. */
11555 return false;
11557 fntype = fun;
11558 if (!TYPE_P (fun))
11559 fntype = TREE_TYPE (fun);
11561 /* Varargs functions need the parameter save area. */
11562 if ((!incoming && !prototype_p (fntype)) || stdarg_p (fntype))
11563 return true;
11565 INIT_CUMULATIVE_INCOMING_ARGS (args_so_far_v, fntype, NULL_RTX);
11566 args_so_far = pack_cumulative_args (&args_so_far_v);
11568 /* When incoming, we will have been passed the function decl.
11569 It is necessary to use the decl to handle K&R style functions,
11570 where TYPE_ARG_TYPES may not be available. */
11571 if (incoming)
11573 gcc_assert (DECL_P (fun));
11574 result = DECL_RESULT (fun);
11576 else
11577 result = TREE_TYPE (fntype);
11579 if (result && aggregate_value_p (result, fntype))
11581 if (!TYPE_P (result))
11582 result = TREE_TYPE (result);
11583 result = build_pointer_type (result);
11584 rs6000_parm_needs_stack (args_so_far, result);
11587 if (incoming)
11589 tree parm;
11591 for (parm = DECL_ARGUMENTS (fun);
11592 parm && parm != void_list_node;
11593 parm = TREE_CHAIN (parm))
11594 if (rs6000_parm_needs_stack (args_so_far, TREE_TYPE (parm)))
11595 return true;
11597 else
11599 function_args_iterator args_iter;
11600 tree arg_type;
11602 FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
11603 if (rs6000_parm_needs_stack (args_so_far, arg_type))
11604 return true;
11607 return false;
11610 /* Return the size of the REG_PARM_STACK_SPACE are for FUN. This is
11611 usually a constant depending on the ABI. However, in the ELFv2 ABI
11612 the register parameter area is optional when calling a function that
11613 has a prototype is scope, has no variable argument list, and passes
11614 all parameters in registers. */
11617 rs6000_reg_parm_stack_space (tree fun, bool incoming)
11619 int reg_parm_stack_space;
11621 switch (DEFAULT_ABI)
11623 default:
11624 reg_parm_stack_space = 0;
11625 break;
11627 case ABI_AIX:
11628 case ABI_DARWIN:
11629 reg_parm_stack_space = TARGET_64BIT ? 64 : 32;
11630 break;
11632 case ABI_ELFv2:
11633 /* ??? Recomputing this every time is a bit expensive. Is there
11634 a place to cache this information? */
11635 if (rs6000_function_parms_need_stack (fun, incoming))
11636 reg_parm_stack_space = TARGET_64BIT ? 64 : 32;
11637 else
11638 reg_parm_stack_space = 0;
11639 break;
11642 return reg_parm_stack_space;
11645 static void
11646 rs6000_move_block_from_reg (int regno, rtx x, int nregs)
11648 int i;
11649 machine_mode reg_mode = TARGET_32BIT ? SImode : DImode;
11651 if (nregs == 0)
11652 return;
11654 for (i = 0; i < nregs; i++)
11656 rtx tem = adjust_address_nv (x, reg_mode, i * GET_MODE_SIZE (reg_mode));
11657 if (reload_completed)
11659 if (! strict_memory_address_p (reg_mode, XEXP (tem, 0)))
11660 tem = NULL_RTX;
11661 else
11662 tem = simplify_gen_subreg (reg_mode, x, BLKmode,
11663 i * GET_MODE_SIZE (reg_mode));
11665 else
11666 tem = replace_equiv_address (tem, XEXP (tem, 0));
11668 gcc_assert (tem);
11670 emit_move_insn (tem, gen_rtx_REG (reg_mode, regno + i));
11674 /* Perform any needed actions needed for a function that is receiving a
11675 variable number of arguments.
11677 CUM is as above.
11679 MODE and TYPE are the mode and type of the current parameter.
11681 PRETEND_SIZE is a variable that should be set to the amount of stack
11682 that must be pushed by the prolog to pretend that our caller pushed
11685 Normally, this macro will push all remaining incoming registers on the
11686 stack and set PRETEND_SIZE to the length of the registers pushed. */
11688 static void
11689 setup_incoming_varargs (cumulative_args_t cum, machine_mode mode,
11690 tree type, int *pretend_size ATTRIBUTE_UNUSED,
11691 int no_rtl)
11693 CUMULATIVE_ARGS next_cum;
11694 int reg_size = TARGET_32BIT ? 4 : 8;
11695 rtx save_area = NULL_RTX, mem;
11696 int first_reg_offset;
11697 alias_set_type set;
11699 /* Skip the last named argument. */
11700 next_cum = *get_cumulative_args (cum);
11701 rs6000_function_arg_advance_1 (&next_cum, mode, type, true, 0);
11703 if (DEFAULT_ABI == ABI_V4)
11705 first_reg_offset = next_cum.sysv_gregno - GP_ARG_MIN_REG;
11707 if (! no_rtl)
11709 int gpr_reg_num = 0, gpr_size = 0, fpr_size = 0;
11710 HOST_WIDE_INT offset = 0;
11712 /* Try to optimize the size of the varargs save area.
11713 The ABI requires that ap.reg_save_area is doubleword
11714 aligned, but we don't need to allocate space for all
11715 the bytes, only those to which we actually will save
11716 anything. */
11717 if (cfun->va_list_gpr_size && first_reg_offset < GP_ARG_NUM_REG)
11718 gpr_reg_num = GP_ARG_NUM_REG - first_reg_offset;
11719 if (TARGET_HARD_FLOAT && TARGET_FPRS
11720 && next_cum.fregno <= FP_ARG_V4_MAX_REG
11721 && cfun->va_list_fpr_size)
11723 if (gpr_reg_num)
11724 fpr_size = (next_cum.fregno - FP_ARG_MIN_REG)
11725 * UNITS_PER_FP_WORD;
11726 if (cfun->va_list_fpr_size
11727 < FP_ARG_V4_MAX_REG + 1 - next_cum.fregno)
11728 fpr_size += cfun->va_list_fpr_size * UNITS_PER_FP_WORD;
11729 else
11730 fpr_size += (FP_ARG_V4_MAX_REG + 1 - next_cum.fregno)
11731 * UNITS_PER_FP_WORD;
11733 if (gpr_reg_num)
11735 offset = -((first_reg_offset * reg_size) & ~7);
11736 if (!fpr_size && gpr_reg_num > cfun->va_list_gpr_size)
11738 gpr_reg_num = cfun->va_list_gpr_size;
11739 if (reg_size == 4 && (first_reg_offset & 1))
11740 gpr_reg_num++;
11742 gpr_size = (gpr_reg_num * reg_size + 7) & ~7;
11744 else if (fpr_size)
11745 offset = - (int) (next_cum.fregno - FP_ARG_MIN_REG)
11746 * UNITS_PER_FP_WORD
11747 - (int) (GP_ARG_NUM_REG * reg_size);
11749 if (gpr_size + fpr_size)
11751 rtx reg_save_area
11752 = assign_stack_local (BLKmode, gpr_size + fpr_size, 64);
11753 gcc_assert (GET_CODE (reg_save_area) == MEM);
11754 reg_save_area = XEXP (reg_save_area, 0);
11755 if (GET_CODE (reg_save_area) == PLUS)
11757 gcc_assert (XEXP (reg_save_area, 0)
11758 == virtual_stack_vars_rtx);
11759 gcc_assert (GET_CODE (XEXP (reg_save_area, 1)) == CONST_INT);
11760 offset += INTVAL (XEXP (reg_save_area, 1));
11762 else
11763 gcc_assert (reg_save_area == virtual_stack_vars_rtx);
11766 cfun->machine->varargs_save_offset = offset;
11767 save_area = plus_constant (Pmode, virtual_stack_vars_rtx, offset);
11770 else
11772 first_reg_offset = next_cum.words;
11773 save_area = crtl->args.internal_arg_pointer;
11775 if (targetm.calls.must_pass_in_stack (mode, type))
11776 first_reg_offset += rs6000_arg_size (TYPE_MODE (type), type);
11779 set = get_varargs_alias_set ();
11780 if (! no_rtl && first_reg_offset < GP_ARG_NUM_REG
11781 && cfun->va_list_gpr_size)
11783 int n_gpr, nregs = GP_ARG_NUM_REG - first_reg_offset;
11785 if (va_list_gpr_counter_field)
11786 /* V4 va_list_gpr_size counts number of registers needed. */
11787 n_gpr = cfun->va_list_gpr_size;
11788 else
11789 /* char * va_list instead counts number of bytes needed. */
11790 n_gpr = (cfun->va_list_gpr_size + reg_size - 1) / reg_size;
11792 if (nregs > n_gpr)
11793 nregs = n_gpr;
11795 mem = gen_rtx_MEM (BLKmode,
11796 plus_constant (Pmode, save_area,
11797 first_reg_offset * reg_size));
11798 MEM_NOTRAP_P (mem) = 1;
11799 set_mem_alias_set (mem, set);
11800 set_mem_align (mem, BITS_PER_WORD);
11802 rs6000_move_block_from_reg (GP_ARG_MIN_REG + first_reg_offset, mem,
11803 nregs);
11806 /* Save FP registers if needed. */
11807 if (DEFAULT_ABI == ABI_V4
11808 && TARGET_HARD_FLOAT && TARGET_FPRS
11809 && ! no_rtl
11810 && next_cum.fregno <= FP_ARG_V4_MAX_REG
11811 && cfun->va_list_fpr_size)
11813 int fregno = next_cum.fregno, nregs;
11814 rtx cr1 = gen_rtx_REG (CCmode, CR1_REGNO);
11815 rtx lab = gen_label_rtx ();
11816 int off = (GP_ARG_NUM_REG * reg_size) + ((fregno - FP_ARG_MIN_REG)
11817 * UNITS_PER_FP_WORD);
11819 emit_jump_insn
11820 (gen_rtx_SET (pc_rtx,
11821 gen_rtx_IF_THEN_ELSE (VOIDmode,
11822 gen_rtx_NE (VOIDmode, cr1,
11823 const0_rtx),
11824 gen_rtx_LABEL_REF (VOIDmode, lab),
11825 pc_rtx)));
11827 for (nregs = 0;
11828 fregno <= FP_ARG_V4_MAX_REG && nregs < cfun->va_list_fpr_size;
11829 fregno++, off += UNITS_PER_FP_WORD, nregs++)
11831 mem = gen_rtx_MEM ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
11832 ? DFmode : SFmode,
11833 plus_constant (Pmode, save_area, off));
11834 MEM_NOTRAP_P (mem) = 1;
11835 set_mem_alias_set (mem, set);
11836 set_mem_align (mem, GET_MODE_ALIGNMENT (
11837 (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
11838 ? DFmode : SFmode));
11839 emit_move_insn (mem, gen_rtx_REG (
11840 (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
11841 ? DFmode : SFmode, fregno));
11844 emit_label (lab);
11848 /* Create the va_list data type. */
11850 static tree
11851 rs6000_build_builtin_va_list (void)
11853 tree f_gpr, f_fpr, f_res, f_ovf, f_sav, record, type_decl;
11855 /* For AIX, prefer 'char *' because that's what the system
11856 header files like. */
11857 if (DEFAULT_ABI != ABI_V4)
11858 return build_pointer_type (char_type_node);
11860 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
11861 type_decl = build_decl (BUILTINS_LOCATION, TYPE_DECL,
11862 get_identifier ("__va_list_tag"), record);
11864 f_gpr = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("gpr"),
11865 unsigned_char_type_node);
11866 f_fpr = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("fpr"),
11867 unsigned_char_type_node);
11868 /* Give the two bytes of padding a name, so that -Wpadded won't warn on
11869 every user file. */
11870 f_res = build_decl (BUILTINS_LOCATION, FIELD_DECL,
11871 get_identifier ("reserved"), short_unsigned_type_node);
11872 f_ovf = build_decl (BUILTINS_LOCATION, FIELD_DECL,
11873 get_identifier ("overflow_arg_area"),
11874 ptr_type_node);
11875 f_sav = build_decl (BUILTINS_LOCATION, FIELD_DECL,
11876 get_identifier ("reg_save_area"),
11877 ptr_type_node);
11879 va_list_gpr_counter_field = f_gpr;
11880 va_list_fpr_counter_field = f_fpr;
11882 DECL_FIELD_CONTEXT (f_gpr) = record;
11883 DECL_FIELD_CONTEXT (f_fpr) = record;
11884 DECL_FIELD_CONTEXT (f_res) = record;
11885 DECL_FIELD_CONTEXT (f_ovf) = record;
11886 DECL_FIELD_CONTEXT (f_sav) = record;
11888 TYPE_STUB_DECL (record) = type_decl;
11889 TYPE_NAME (record) = type_decl;
11890 TYPE_FIELDS (record) = f_gpr;
11891 DECL_CHAIN (f_gpr) = f_fpr;
11892 DECL_CHAIN (f_fpr) = f_res;
11893 DECL_CHAIN (f_res) = f_ovf;
11894 DECL_CHAIN (f_ovf) = f_sav;
11896 layout_type (record);
11898 /* The correct type is an array type of one element. */
11899 return build_array_type (record, build_index_type (size_zero_node));
11902 /* Implement va_start. */
11904 static void
11905 rs6000_va_start (tree valist, rtx nextarg)
11907 HOST_WIDE_INT words, n_gpr, n_fpr;
11908 tree f_gpr, f_fpr, f_res, f_ovf, f_sav;
11909 tree gpr, fpr, ovf, sav, t;
11911 /* Only SVR4 needs something special. */
11912 if (DEFAULT_ABI != ABI_V4)
11914 std_expand_builtin_va_start (valist, nextarg);
11915 return;
11918 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
11919 f_fpr = DECL_CHAIN (f_gpr);
11920 f_res = DECL_CHAIN (f_fpr);
11921 f_ovf = DECL_CHAIN (f_res);
11922 f_sav = DECL_CHAIN (f_ovf);
11924 valist = build_simple_mem_ref (valist);
11925 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
11926 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
11927 f_fpr, NULL_TREE);
11928 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
11929 f_ovf, NULL_TREE);
11930 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
11931 f_sav, NULL_TREE);
11933 /* Count number of gp and fp argument registers used. */
11934 words = crtl->args.info.words;
11935 n_gpr = MIN (crtl->args.info.sysv_gregno - GP_ARG_MIN_REG,
11936 GP_ARG_NUM_REG);
11937 n_fpr = MIN (crtl->args.info.fregno - FP_ARG_MIN_REG,
11938 FP_ARG_NUM_REG);
11940 if (TARGET_DEBUG_ARG)
11941 fprintf (stderr, "va_start: words = " HOST_WIDE_INT_PRINT_DEC", n_gpr = "
11942 HOST_WIDE_INT_PRINT_DEC", n_fpr = " HOST_WIDE_INT_PRINT_DEC"\n",
11943 words, n_gpr, n_fpr);
11945 if (cfun->va_list_gpr_size)
11947 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
11948 build_int_cst (NULL_TREE, n_gpr));
11949 TREE_SIDE_EFFECTS (t) = 1;
11950 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
11953 if (cfun->va_list_fpr_size)
11955 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
11956 build_int_cst (NULL_TREE, n_fpr));
11957 TREE_SIDE_EFFECTS (t) = 1;
11958 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
11960 #ifdef HAVE_AS_GNU_ATTRIBUTE
11961 if (call_ABI_of_interest (cfun->decl))
11962 rs6000_passes_float = true;
11963 #endif
11966 /* Find the overflow area. */
11967 t = make_tree (TREE_TYPE (ovf), crtl->args.internal_arg_pointer);
11968 if (words != 0)
11969 t = fold_build_pointer_plus_hwi (t, words * MIN_UNITS_PER_WORD);
11970 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
11971 TREE_SIDE_EFFECTS (t) = 1;
11972 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
11974 /* If there were no va_arg invocations, don't set up the register
11975 save area. */
11976 if (!cfun->va_list_gpr_size
11977 && !cfun->va_list_fpr_size
11978 && n_gpr < GP_ARG_NUM_REG
11979 && n_fpr < FP_ARG_V4_MAX_REG)
11980 return;
11982 /* Find the register save area. */
11983 t = make_tree (TREE_TYPE (sav), virtual_stack_vars_rtx);
11984 if (cfun->machine->varargs_save_offset)
11985 t = fold_build_pointer_plus_hwi (t, cfun->machine->varargs_save_offset);
11986 t = build2 (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
11987 TREE_SIDE_EFFECTS (t) = 1;
11988 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
11991 /* Implement va_arg. */
11993 static tree
11994 rs6000_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
11995 gimple_seq *post_p)
11997 tree f_gpr, f_fpr, f_res, f_ovf, f_sav;
11998 tree gpr, fpr, ovf, sav, reg, t, u;
11999 int size, rsize, n_reg, sav_ofs, sav_scale;
12000 tree lab_false, lab_over, addr;
12001 int align;
12002 tree ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
12003 int regalign = 0;
12004 gimple *stmt;
12006 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
12008 t = rs6000_gimplify_va_arg (valist, ptrtype, pre_p, post_p);
12009 return build_va_arg_indirect_ref (t);
12012 /* We need to deal with the fact that the darwin ppc64 ABI is defined by an
12013 earlier version of gcc, with the property that it always applied alignment
12014 adjustments to the va-args (even for zero-sized types). The cheapest way
12015 to deal with this is to replicate the effect of the part of
12016 std_gimplify_va_arg_expr that carries out the align adjust, for the case
12017 of relevance.
12018 We don't need to check for pass-by-reference because of the test above.
12019 We can return a simplifed answer, since we know there's no offset to add. */
12021 if (((TARGET_MACHO
12022 && rs6000_darwin64_abi)
12023 || DEFAULT_ABI == ABI_ELFv2
12024 || (DEFAULT_ABI == ABI_AIX && !rs6000_compat_align_parm))
12025 && integer_zerop (TYPE_SIZE (type)))
12027 unsigned HOST_WIDE_INT align, boundary;
12028 tree valist_tmp = get_initialized_tmp_var (valist, pre_p, NULL);
12029 align = PARM_BOUNDARY / BITS_PER_UNIT;
12030 boundary = rs6000_function_arg_boundary (TYPE_MODE (type), type);
12031 if (boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
12032 boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
12033 boundary /= BITS_PER_UNIT;
12034 if (boundary > align)
12036 tree t ;
12037 /* This updates arg ptr by the amount that would be necessary
12038 to align the zero-sized (but not zero-alignment) item. */
12039 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist_tmp,
12040 fold_build_pointer_plus_hwi (valist_tmp, boundary - 1));
12041 gimplify_and_add (t, pre_p);
12043 t = fold_convert (sizetype, valist_tmp);
12044 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist_tmp,
12045 fold_convert (TREE_TYPE (valist),
12046 fold_build2 (BIT_AND_EXPR, sizetype, t,
12047 size_int (-boundary))));
12048 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
12049 gimplify_and_add (t, pre_p);
12051 /* Since it is zero-sized there's no increment for the item itself. */
12052 valist_tmp = fold_convert (build_pointer_type (type), valist_tmp);
12053 return build_va_arg_indirect_ref (valist_tmp);
12056 if (DEFAULT_ABI != ABI_V4)
12058 if (targetm.calls.split_complex_arg && TREE_CODE (type) == COMPLEX_TYPE)
12060 tree elem_type = TREE_TYPE (type);
12061 machine_mode elem_mode = TYPE_MODE (elem_type);
12062 int elem_size = GET_MODE_SIZE (elem_mode);
12064 if (elem_size < UNITS_PER_WORD)
12066 tree real_part, imag_part;
12067 gimple_seq post = NULL;
12069 real_part = rs6000_gimplify_va_arg (valist, elem_type, pre_p,
12070 &post);
12071 /* Copy the value into a temporary, lest the formal temporary
12072 be reused out from under us. */
12073 real_part = get_initialized_tmp_var (real_part, pre_p, &post);
12074 gimple_seq_add_seq (pre_p, post);
12076 imag_part = rs6000_gimplify_va_arg (valist, elem_type, pre_p,
12077 post_p);
12079 return build2 (COMPLEX_EXPR, type, real_part, imag_part);
12083 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
12086 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
12087 f_fpr = DECL_CHAIN (f_gpr);
12088 f_res = DECL_CHAIN (f_fpr);
12089 f_ovf = DECL_CHAIN (f_res);
12090 f_sav = DECL_CHAIN (f_ovf);
12092 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
12093 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
12094 f_fpr, NULL_TREE);
12095 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
12096 f_ovf, NULL_TREE);
12097 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
12098 f_sav, NULL_TREE);
12100 size = int_size_in_bytes (type);
12101 rsize = (size + 3) / 4;
12102 align = 1;
12104 if (TARGET_HARD_FLOAT && TARGET_FPRS
12105 && ((TARGET_SINGLE_FLOAT && TYPE_MODE (type) == SFmode)
12106 || (TARGET_DOUBLE_FLOAT
12107 && (TYPE_MODE (type) == DFmode
12108 || FLOAT128_2REG_P (TYPE_MODE (type))
12109 || DECIMAL_FLOAT_MODE_P (TYPE_MODE (type))))))
12111 /* FP args go in FP registers, if present. */
12112 reg = fpr;
12113 n_reg = (size + 7) / 8;
12114 sav_ofs = ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT) ? 8 : 4) * 4;
12115 sav_scale = ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT) ? 8 : 4);
12116 if (TYPE_MODE (type) != SFmode && TYPE_MODE (type) != SDmode)
12117 align = 8;
12119 else
12121 /* Otherwise into GP registers. */
12122 reg = gpr;
12123 n_reg = rsize;
12124 sav_ofs = 0;
12125 sav_scale = 4;
12126 if (n_reg == 2)
12127 align = 8;
12130 /* Pull the value out of the saved registers.... */
12132 lab_over = NULL;
12133 addr = create_tmp_var (ptr_type_node, "addr");
12135 /* AltiVec vectors never go in registers when -mabi=altivec. */
12136 if (TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (TYPE_MODE (type)))
12137 align = 16;
12138 else
12140 lab_false = create_artificial_label (input_location);
12141 lab_over = create_artificial_label (input_location);
12143 /* Long long and SPE vectors are aligned in the registers.
12144 As are any other 2 gpr item such as complex int due to a
12145 historical mistake. */
12146 u = reg;
12147 if (n_reg == 2 && reg == gpr)
12149 regalign = 1;
12150 u = build2 (BIT_AND_EXPR, TREE_TYPE (reg), unshare_expr (reg),
12151 build_int_cst (TREE_TYPE (reg), n_reg - 1));
12152 u = build2 (POSTINCREMENT_EXPR, TREE_TYPE (reg),
12153 unshare_expr (reg), u);
12155 /* _Decimal128 is passed in even/odd fpr pairs; the stored
12156 reg number is 0 for f1, so we want to make it odd. */
12157 else if (reg == fpr && TYPE_MODE (type) == TDmode)
12159 t = build2 (BIT_IOR_EXPR, TREE_TYPE (reg), unshare_expr (reg),
12160 build_int_cst (TREE_TYPE (reg), 1));
12161 u = build2 (MODIFY_EXPR, void_type_node, unshare_expr (reg), t);
12164 t = fold_convert (TREE_TYPE (reg), size_int (8 - n_reg + 1));
12165 t = build2 (GE_EXPR, boolean_type_node, u, t);
12166 u = build1 (GOTO_EXPR, void_type_node, lab_false);
12167 t = build3 (COND_EXPR, void_type_node, t, u, NULL_TREE);
12168 gimplify_and_add (t, pre_p);
12170 t = sav;
12171 if (sav_ofs)
12172 t = fold_build_pointer_plus_hwi (sav, sav_ofs);
12174 u = build2 (POSTINCREMENT_EXPR, TREE_TYPE (reg), unshare_expr (reg),
12175 build_int_cst (TREE_TYPE (reg), n_reg));
12176 u = fold_convert (sizetype, u);
12177 u = build2 (MULT_EXPR, sizetype, u, size_int (sav_scale));
12178 t = fold_build_pointer_plus (t, u);
12180 /* _Decimal32 varargs are located in the second word of the 64-bit
12181 FP register for 32-bit binaries. */
12182 if (TARGET_32BIT
12183 && TARGET_HARD_FLOAT && TARGET_FPRS
12184 && TYPE_MODE (type) == SDmode)
12185 t = fold_build_pointer_plus_hwi (t, size);
12187 gimplify_assign (addr, t, pre_p);
12189 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
12191 stmt = gimple_build_label (lab_false);
12192 gimple_seq_add_stmt (pre_p, stmt);
12194 if ((n_reg == 2 && !regalign) || n_reg > 2)
12196 /* Ensure that we don't find any more args in regs.
12197 Alignment has taken care of for special cases. */
12198 gimplify_assign (reg, build_int_cst (TREE_TYPE (reg), 8), pre_p);
12202 /* ... otherwise out of the overflow area. */
12204 /* Care for on-stack alignment if needed. */
12205 t = ovf;
12206 if (align != 1)
12208 t = fold_build_pointer_plus_hwi (t, align - 1);
12209 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
12210 build_int_cst (TREE_TYPE (t), -align));
12212 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
12214 gimplify_assign (unshare_expr (addr), t, pre_p);
12216 t = fold_build_pointer_plus_hwi (t, size);
12217 gimplify_assign (unshare_expr (ovf), t, pre_p);
12219 if (lab_over)
12221 stmt = gimple_build_label (lab_over);
12222 gimple_seq_add_stmt (pre_p, stmt);
12225 if (STRICT_ALIGNMENT
12226 && (TYPE_ALIGN (type)
12227 > (unsigned) BITS_PER_UNIT * (align < 4 ? 4 : align)))
12229 /* The value (of type complex double, for example) may not be
12230 aligned in memory in the saved registers, so copy via a
12231 temporary. (This is the same code as used for SPARC.) */
12232 tree tmp = create_tmp_var (type, "va_arg_tmp");
12233 tree dest_addr = build_fold_addr_expr (tmp);
12235 tree copy = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
12236 3, dest_addr, addr, size_int (rsize * 4));
12238 gimplify_and_add (copy, pre_p);
12239 addr = dest_addr;
12242 addr = fold_convert (ptrtype, addr);
12243 return build_va_arg_indirect_ref (addr);
12246 /* Builtins. */
12248 static void
12249 def_builtin (const char *name, tree type, enum rs6000_builtins code)
12251 tree t;
12252 unsigned classify = rs6000_builtin_info[(int)code].attr;
12253 const char *attr_string = "";
12255 gcc_assert (name != NULL);
12256 gcc_assert (IN_RANGE ((int)code, 0, (int)RS6000_BUILTIN_COUNT));
12258 if (rs6000_builtin_decls[(int)code])
12259 fatal_error (input_location,
12260 "internal error: builtin function %s already processed", name);
12262 rs6000_builtin_decls[(int)code] = t =
12263 add_builtin_function (name, type, (int)code, BUILT_IN_MD, NULL, NULL_TREE);
12265 /* Set any special attributes. */
12266 if ((classify & RS6000_BTC_CONST) != 0)
12268 /* const function, function only depends on the inputs. */
12269 TREE_READONLY (t) = 1;
12270 TREE_NOTHROW (t) = 1;
12271 attr_string = ", pure";
12273 else if ((classify & RS6000_BTC_PURE) != 0)
12275 /* pure function, function can read global memory, but does not set any
12276 external state. */
12277 DECL_PURE_P (t) = 1;
12278 TREE_NOTHROW (t) = 1;
12279 attr_string = ", const";
12281 else if ((classify & RS6000_BTC_FP) != 0)
12283 /* Function is a math function. If rounding mode is on, then treat the
12284 function as not reading global memory, but it can have arbitrary side
12285 effects. If it is off, then assume the function is a const function.
12286 This mimics the ATTR_MATHFN_FPROUNDING attribute in
12287 builtin-attribute.def that is used for the math functions. */
12288 TREE_NOTHROW (t) = 1;
12289 if (flag_rounding_math)
12291 DECL_PURE_P (t) = 1;
12292 DECL_IS_NOVOPS (t) = 1;
12293 attr_string = ", fp, pure";
12295 else
12297 TREE_READONLY (t) = 1;
12298 attr_string = ", fp, const";
12301 else if ((classify & RS6000_BTC_ATTR_MASK) != 0)
12302 gcc_unreachable ();
12304 if (TARGET_DEBUG_BUILTIN)
12305 fprintf (stderr, "rs6000_builtin, code = %4d, %s%s\n",
12306 (int)code, name, attr_string);
12309 /* Simple ternary operations: VECd = foo (VECa, VECb, VECc). */
12311 #undef RS6000_BUILTIN_1
12312 #undef RS6000_BUILTIN_2
12313 #undef RS6000_BUILTIN_3
12314 #undef RS6000_BUILTIN_A
12315 #undef RS6000_BUILTIN_D
12316 #undef RS6000_BUILTIN_E
12317 #undef RS6000_BUILTIN_H
12318 #undef RS6000_BUILTIN_P
12319 #undef RS6000_BUILTIN_Q
12320 #undef RS6000_BUILTIN_S
12321 #undef RS6000_BUILTIN_X
12323 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
12324 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
12325 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE) \
12326 { MASK, ICODE, NAME, ENUM },
12328 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
12329 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
12330 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
12331 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
12332 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
12333 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
12334 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
12335 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
12337 static const struct builtin_description bdesc_3arg[] =
12339 #include "rs6000-builtin.def"
12342 /* DST operations: void foo (void *, const int, const char). */
12344 #undef RS6000_BUILTIN_1
12345 #undef RS6000_BUILTIN_2
12346 #undef RS6000_BUILTIN_3
12347 #undef RS6000_BUILTIN_A
12348 #undef RS6000_BUILTIN_D
12349 #undef RS6000_BUILTIN_E
12350 #undef RS6000_BUILTIN_H
12351 #undef RS6000_BUILTIN_P
12352 #undef RS6000_BUILTIN_Q
12353 #undef RS6000_BUILTIN_S
12354 #undef RS6000_BUILTIN_X
12356 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
12357 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
12358 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
12359 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
12360 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE) \
12361 { MASK, ICODE, NAME, ENUM },
12363 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
12364 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
12365 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
12366 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
12367 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
12368 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
12370 static const struct builtin_description bdesc_dst[] =
12372 #include "rs6000-builtin.def"
12375 /* Simple binary operations: VECc = foo (VECa, VECb). */
12377 #undef RS6000_BUILTIN_1
12378 #undef RS6000_BUILTIN_2
12379 #undef RS6000_BUILTIN_3
12380 #undef RS6000_BUILTIN_A
12381 #undef RS6000_BUILTIN_D
12382 #undef RS6000_BUILTIN_E
12383 #undef RS6000_BUILTIN_H
12384 #undef RS6000_BUILTIN_P
12385 #undef RS6000_BUILTIN_Q
12386 #undef RS6000_BUILTIN_S
12387 #undef RS6000_BUILTIN_X
12389 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
12390 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE) \
12391 { MASK, ICODE, NAME, ENUM },
12393 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
12394 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
12395 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
12396 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
12397 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
12398 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
12399 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
12400 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
12401 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
12403 static const struct builtin_description bdesc_2arg[] =
12405 #include "rs6000-builtin.def"
12408 #undef RS6000_BUILTIN_1
12409 #undef RS6000_BUILTIN_2
12410 #undef RS6000_BUILTIN_3
12411 #undef RS6000_BUILTIN_A
12412 #undef RS6000_BUILTIN_D
12413 #undef RS6000_BUILTIN_E
12414 #undef RS6000_BUILTIN_H
12415 #undef RS6000_BUILTIN_P
12416 #undef RS6000_BUILTIN_Q
12417 #undef RS6000_BUILTIN_S
12418 #undef RS6000_BUILTIN_X
12420 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
12421 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
12422 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
12423 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
12424 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
12425 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
12426 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
12427 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) \
12428 { MASK, ICODE, NAME, ENUM },
12430 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
12431 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
12432 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
12434 /* AltiVec predicates. */
12436 static const struct builtin_description bdesc_altivec_preds[] =
12438 #include "rs6000-builtin.def"
12441 /* SPE predicates. */
12442 #undef RS6000_BUILTIN_1
12443 #undef RS6000_BUILTIN_2
12444 #undef RS6000_BUILTIN_3
12445 #undef RS6000_BUILTIN_A
12446 #undef RS6000_BUILTIN_D
12447 #undef RS6000_BUILTIN_E
12448 #undef RS6000_BUILTIN_H
12449 #undef RS6000_BUILTIN_P
12450 #undef RS6000_BUILTIN_Q
12451 #undef RS6000_BUILTIN_S
12452 #undef RS6000_BUILTIN_X
12454 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
12455 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
12456 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
12457 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
12458 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
12459 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
12460 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
12461 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
12462 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
12463 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE) \
12464 { MASK, ICODE, NAME, ENUM },
12466 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
12468 static const struct builtin_description bdesc_spe_predicates[] =
12470 #include "rs6000-builtin.def"
12473 /* SPE evsel predicates. */
12474 #undef RS6000_BUILTIN_1
12475 #undef RS6000_BUILTIN_2
12476 #undef RS6000_BUILTIN_3
12477 #undef RS6000_BUILTIN_A
12478 #undef RS6000_BUILTIN_D
12479 #undef RS6000_BUILTIN_E
12480 #undef RS6000_BUILTIN_H
12481 #undef RS6000_BUILTIN_P
12482 #undef RS6000_BUILTIN_Q
12483 #undef RS6000_BUILTIN_S
12484 #undef RS6000_BUILTIN_X
12486 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
12487 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
12488 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
12489 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
12490 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
12491 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE) \
12492 { MASK, ICODE, NAME, ENUM },
12494 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
12495 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
12496 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
12497 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
12498 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
12500 static const struct builtin_description bdesc_spe_evsel[] =
12502 #include "rs6000-builtin.def"
12505 /* PAIRED predicates. */
12506 #undef RS6000_BUILTIN_1
12507 #undef RS6000_BUILTIN_2
12508 #undef RS6000_BUILTIN_3
12509 #undef RS6000_BUILTIN_A
12510 #undef RS6000_BUILTIN_D
12511 #undef RS6000_BUILTIN_E
12512 #undef RS6000_BUILTIN_H
12513 #undef RS6000_BUILTIN_P
12514 #undef RS6000_BUILTIN_Q
12515 #undef RS6000_BUILTIN_S
12516 #undef RS6000_BUILTIN_X
12518 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
12519 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
12520 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
12521 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
12522 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
12523 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
12524 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
12525 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
12526 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE) \
12527 { MASK, ICODE, NAME, ENUM },
12529 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
12530 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
12532 static const struct builtin_description bdesc_paired_preds[] =
12534 #include "rs6000-builtin.def"
12537 /* ABS* operations. */
12539 #undef RS6000_BUILTIN_1
12540 #undef RS6000_BUILTIN_2
12541 #undef RS6000_BUILTIN_3
12542 #undef RS6000_BUILTIN_A
12543 #undef RS6000_BUILTIN_D
12544 #undef RS6000_BUILTIN_E
12545 #undef RS6000_BUILTIN_H
12546 #undef RS6000_BUILTIN_P
12547 #undef RS6000_BUILTIN_Q
12548 #undef RS6000_BUILTIN_S
12549 #undef RS6000_BUILTIN_X
12551 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
12552 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
12553 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
12554 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE) \
12555 { MASK, ICODE, NAME, ENUM },
12557 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
12558 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
12559 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
12560 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
12561 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
12562 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
12563 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
12565 static const struct builtin_description bdesc_abs[] =
12567 #include "rs6000-builtin.def"
12570 /* Simple unary operations: VECb = foo (unsigned literal) or VECb =
12571 foo (VECa). */
12573 #undef RS6000_BUILTIN_1
12574 #undef RS6000_BUILTIN_2
12575 #undef RS6000_BUILTIN_3
12576 #undef RS6000_BUILTIN_A
12577 #undef RS6000_BUILTIN_D
12578 #undef RS6000_BUILTIN_E
12579 #undef RS6000_BUILTIN_H
12580 #undef RS6000_BUILTIN_P
12581 #undef RS6000_BUILTIN_Q
12582 #undef RS6000_BUILTIN_S
12583 #undef RS6000_BUILTIN_X
12585 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE) \
12586 { MASK, ICODE, NAME, ENUM },
12588 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
12589 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
12590 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
12591 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
12592 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
12593 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
12594 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
12595 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
12596 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
12597 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
12599 static const struct builtin_description bdesc_1arg[] =
12601 #include "rs6000-builtin.def"
12604 /* HTM builtins. */
12605 #undef RS6000_BUILTIN_1
12606 #undef RS6000_BUILTIN_2
12607 #undef RS6000_BUILTIN_3
12608 #undef RS6000_BUILTIN_A
12609 #undef RS6000_BUILTIN_D
12610 #undef RS6000_BUILTIN_E
12611 #undef RS6000_BUILTIN_H
12612 #undef RS6000_BUILTIN_P
12613 #undef RS6000_BUILTIN_Q
12614 #undef RS6000_BUILTIN_S
12615 #undef RS6000_BUILTIN_X
12617 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
12618 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
12619 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
12620 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
12621 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
12622 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
12623 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) \
12624 { MASK, ICODE, NAME, ENUM },
12626 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
12627 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
12628 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
12629 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
12631 static const struct builtin_description bdesc_htm[] =
12633 #include "rs6000-builtin.def"
12636 #undef RS6000_BUILTIN_1
12637 #undef RS6000_BUILTIN_2
12638 #undef RS6000_BUILTIN_3
12639 #undef RS6000_BUILTIN_A
12640 #undef RS6000_BUILTIN_D
12641 #undef RS6000_BUILTIN_E
12642 #undef RS6000_BUILTIN_H
12643 #undef RS6000_BUILTIN_P
12644 #undef RS6000_BUILTIN_Q
12645 #undef RS6000_BUILTIN_S
12647 /* Return true if a builtin function is overloaded. */
12648 bool
12649 rs6000_overloaded_builtin_p (enum rs6000_builtins fncode)
12651 return (rs6000_builtin_info[(int)fncode].attr & RS6000_BTC_OVERLOADED) != 0;
12654 /* Expand an expression EXP that calls a builtin without arguments. */
12655 static rtx
12656 rs6000_expand_zeroop_builtin (enum insn_code icode, rtx target)
12658 rtx pat;
12659 machine_mode tmode = insn_data[icode].operand[0].mode;
12661 if (icode == CODE_FOR_nothing)
12662 /* Builtin not supported on this processor. */
12663 return 0;
12665 if (target == 0
12666 || GET_MODE (target) != tmode
12667 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12668 target = gen_reg_rtx (tmode);
12670 pat = GEN_FCN (icode) (target);
12671 if (! pat)
12672 return 0;
12673 emit_insn (pat);
12675 return target;
12679 static rtx
12680 rs6000_expand_mtfsf_builtin (enum insn_code icode, tree exp)
12682 rtx pat;
12683 tree arg0 = CALL_EXPR_ARG (exp, 0);
12684 tree arg1 = CALL_EXPR_ARG (exp, 1);
12685 rtx op0 = expand_normal (arg0);
12686 rtx op1 = expand_normal (arg1);
12687 machine_mode mode0 = insn_data[icode].operand[0].mode;
12688 machine_mode mode1 = insn_data[icode].operand[1].mode;
12690 if (icode == CODE_FOR_nothing)
12691 /* Builtin not supported on this processor. */
12692 return 0;
12694 /* If we got invalid arguments bail out before generating bad rtl. */
12695 if (arg0 == error_mark_node || arg1 == error_mark_node)
12696 return const0_rtx;
12698 if (GET_CODE (op0) != CONST_INT
12699 || INTVAL (op0) > 255
12700 || INTVAL (op0) < 0)
12702 error ("argument 1 must be an 8-bit field value");
12703 return const0_rtx;
12706 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
12707 op0 = copy_to_mode_reg (mode0, op0);
12709 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
12710 op1 = copy_to_mode_reg (mode1, op1);
12712 pat = GEN_FCN (icode) (op0, op1);
12713 if (! pat)
12714 return const0_rtx;
12715 emit_insn (pat);
12717 return NULL_RTX;
12721 static rtx
12722 rs6000_expand_unop_builtin (enum insn_code icode, tree exp, rtx target)
12724 rtx pat;
12725 tree arg0 = CALL_EXPR_ARG (exp, 0);
12726 rtx op0 = expand_normal (arg0);
12727 machine_mode tmode = insn_data[icode].operand[0].mode;
12728 machine_mode mode0 = insn_data[icode].operand[1].mode;
12730 if (icode == CODE_FOR_nothing)
12731 /* Builtin not supported on this processor. */
12732 return 0;
12734 /* If we got invalid arguments bail out before generating bad rtl. */
12735 if (arg0 == error_mark_node)
12736 return const0_rtx;
12738 if (icode == CODE_FOR_altivec_vspltisb
12739 || icode == CODE_FOR_altivec_vspltish
12740 || icode == CODE_FOR_altivec_vspltisw
12741 || icode == CODE_FOR_spe_evsplatfi
12742 || icode == CODE_FOR_spe_evsplati)
12744 /* Only allow 5-bit *signed* literals. */
12745 if (GET_CODE (op0) != CONST_INT
12746 || INTVAL (op0) > 15
12747 || INTVAL (op0) < -16)
12749 error ("argument 1 must be a 5-bit signed literal");
12750 return const0_rtx;
12754 if (target == 0
12755 || GET_MODE (target) != tmode
12756 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12757 target = gen_reg_rtx (tmode);
12759 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12760 op0 = copy_to_mode_reg (mode0, op0);
12762 pat = GEN_FCN (icode) (target, op0);
12763 if (! pat)
12764 return 0;
12765 emit_insn (pat);
12767 return target;
12770 static rtx
12771 altivec_expand_abs_builtin (enum insn_code icode, tree exp, rtx target)
12773 rtx pat, scratch1, scratch2;
12774 tree arg0 = CALL_EXPR_ARG (exp, 0);
12775 rtx op0 = expand_normal (arg0);
12776 machine_mode tmode = insn_data[icode].operand[0].mode;
12777 machine_mode mode0 = insn_data[icode].operand[1].mode;
12779 /* If we have invalid arguments, bail out before generating bad rtl. */
12780 if (arg0 == error_mark_node)
12781 return const0_rtx;
12783 if (target == 0
12784 || GET_MODE (target) != tmode
12785 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12786 target = gen_reg_rtx (tmode);
12788 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12789 op0 = copy_to_mode_reg (mode0, op0);
12791 scratch1 = gen_reg_rtx (mode0);
12792 scratch2 = gen_reg_rtx (mode0);
12794 pat = GEN_FCN (icode) (target, op0, scratch1, scratch2);
12795 if (! pat)
12796 return 0;
12797 emit_insn (pat);
12799 return target;
12802 static rtx
12803 rs6000_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
12805 rtx pat;
12806 tree arg0 = CALL_EXPR_ARG (exp, 0);
12807 tree arg1 = CALL_EXPR_ARG (exp, 1);
12808 rtx op0 = expand_normal (arg0);
12809 rtx op1 = expand_normal (arg1);
12810 machine_mode tmode = insn_data[icode].operand[0].mode;
12811 machine_mode mode0 = insn_data[icode].operand[1].mode;
12812 machine_mode mode1 = insn_data[icode].operand[2].mode;
12814 if (icode == CODE_FOR_nothing)
12815 /* Builtin not supported on this processor. */
12816 return 0;
12818 /* If we got invalid arguments bail out before generating bad rtl. */
12819 if (arg0 == error_mark_node || arg1 == error_mark_node)
12820 return const0_rtx;
12822 if (icode == CODE_FOR_altivec_vcfux
12823 || icode == CODE_FOR_altivec_vcfsx
12824 || icode == CODE_FOR_altivec_vctsxs
12825 || icode == CODE_FOR_altivec_vctuxs
12826 || icode == CODE_FOR_altivec_vspltb
12827 || icode == CODE_FOR_altivec_vsplth
12828 || icode == CODE_FOR_altivec_vspltw
12829 || icode == CODE_FOR_spe_evaddiw
12830 || icode == CODE_FOR_spe_evldd
12831 || icode == CODE_FOR_spe_evldh
12832 || icode == CODE_FOR_spe_evldw
12833 || icode == CODE_FOR_spe_evlhhesplat
12834 || icode == CODE_FOR_spe_evlhhossplat
12835 || icode == CODE_FOR_spe_evlhhousplat
12836 || icode == CODE_FOR_spe_evlwhe
12837 || icode == CODE_FOR_spe_evlwhos
12838 || icode == CODE_FOR_spe_evlwhou
12839 || icode == CODE_FOR_spe_evlwhsplat
12840 || icode == CODE_FOR_spe_evlwwsplat
12841 || icode == CODE_FOR_spe_evrlwi
12842 || icode == CODE_FOR_spe_evslwi
12843 || icode == CODE_FOR_spe_evsrwis
12844 || icode == CODE_FOR_spe_evsubifw
12845 || icode == CODE_FOR_spe_evsrwiu)
12847 /* Only allow 5-bit unsigned literals. */
12848 STRIP_NOPS (arg1);
12849 if (TREE_CODE (arg1) != INTEGER_CST
12850 || TREE_INT_CST_LOW (arg1) & ~0x1f)
12852 error ("argument 2 must be a 5-bit unsigned literal");
12853 return const0_rtx;
12857 if (target == 0
12858 || GET_MODE (target) != tmode
12859 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12860 target = gen_reg_rtx (tmode);
12862 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12863 op0 = copy_to_mode_reg (mode0, op0);
12864 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12865 op1 = copy_to_mode_reg (mode1, op1);
12867 pat = GEN_FCN (icode) (target, op0, op1);
12868 if (! pat)
12869 return 0;
12870 emit_insn (pat);
12872 return target;
12875 static rtx
12876 altivec_expand_predicate_builtin (enum insn_code icode, tree exp, rtx target)
12878 rtx pat, scratch;
12879 tree cr6_form = CALL_EXPR_ARG (exp, 0);
12880 tree arg0 = CALL_EXPR_ARG (exp, 1);
12881 tree arg1 = CALL_EXPR_ARG (exp, 2);
12882 rtx op0 = expand_normal (arg0);
12883 rtx op1 = expand_normal (arg1);
12884 machine_mode tmode = SImode;
12885 machine_mode mode0 = insn_data[icode].operand[1].mode;
12886 machine_mode mode1 = insn_data[icode].operand[2].mode;
12887 int cr6_form_int;
12889 if (TREE_CODE (cr6_form) != INTEGER_CST)
12891 error ("argument 1 of __builtin_altivec_predicate must be a constant");
12892 return const0_rtx;
12894 else
12895 cr6_form_int = TREE_INT_CST_LOW (cr6_form);
12897 gcc_assert (mode0 == mode1);
12899 /* If we have invalid arguments, bail out before generating bad rtl. */
12900 if (arg0 == error_mark_node || arg1 == error_mark_node)
12901 return const0_rtx;
12903 if (target == 0
12904 || GET_MODE (target) != tmode
12905 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12906 target = gen_reg_rtx (tmode);
12908 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12909 op0 = copy_to_mode_reg (mode0, op0);
12910 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12911 op1 = copy_to_mode_reg (mode1, op1);
12913 scratch = gen_reg_rtx (mode0);
12915 pat = GEN_FCN (icode) (scratch, op0, op1);
12916 if (! pat)
12917 return 0;
12918 emit_insn (pat);
12920 /* The vec_any* and vec_all* predicates use the same opcodes for two
12921 different operations, but the bits in CR6 will be different
12922 depending on what information we want. So we have to play tricks
12923 with CR6 to get the right bits out.
12925 If you think this is disgusting, look at the specs for the
12926 AltiVec predicates. */
12928 switch (cr6_form_int)
12930 case 0:
12931 emit_insn (gen_cr6_test_for_zero (target));
12932 break;
12933 case 1:
12934 emit_insn (gen_cr6_test_for_zero_reverse (target));
12935 break;
12936 case 2:
12937 emit_insn (gen_cr6_test_for_lt (target));
12938 break;
12939 case 3:
12940 emit_insn (gen_cr6_test_for_lt_reverse (target));
12941 break;
12942 default:
12943 error ("argument 1 of __builtin_altivec_predicate is out of range");
12944 break;
12947 return target;
12950 static rtx
12951 paired_expand_lv_builtin (enum insn_code icode, tree exp, rtx target)
12953 rtx pat, addr;
12954 tree arg0 = CALL_EXPR_ARG (exp, 0);
12955 tree arg1 = CALL_EXPR_ARG (exp, 1);
12956 machine_mode tmode = insn_data[icode].operand[0].mode;
12957 machine_mode mode0 = Pmode;
12958 machine_mode mode1 = Pmode;
12959 rtx op0 = expand_normal (arg0);
12960 rtx op1 = expand_normal (arg1);
12962 if (icode == CODE_FOR_nothing)
12963 /* Builtin not supported on this processor. */
12964 return 0;
12966 /* If we got invalid arguments bail out before generating bad rtl. */
12967 if (arg0 == error_mark_node || arg1 == error_mark_node)
12968 return const0_rtx;
12970 if (target == 0
12971 || GET_MODE (target) != tmode
12972 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12973 target = gen_reg_rtx (tmode);
12975 op1 = copy_to_mode_reg (mode1, op1);
12977 if (op0 == const0_rtx)
12979 addr = gen_rtx_MEM (tmode, op1);
12981 else
12983 op0 = copy_to_mode_reg (mode0, op0);
12984 addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op0, op1));
12987 pat = GEN_FCN (icode) (target, addr);
12989 if (! pat)
12990 return 0;
12991 emit_insn (pat);
12993 return target;
12996 /* Return a constant vector for use as a little-endian permute control vector
12997 to reverse the order of elements of the given vector mode. */
12998 static rtx
12999 swap_selector_for_mode (machine_mode mode)
13001 /* These are little endian vectors, so their elements are reversed
13002 from what you would normally expect for a permute control vector. */
13003 unsigned int swap2[16] = {7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8};
13004 unsigned int swap4[16] = {3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12};
13005 unsigned int swap8[16] = {1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14};
13006 unsigned int swap16[16] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
13007 unsigned int *swaparray, i;
13008 rtx perm[16];
13010 switch (mode)
13012 case V2DFmode:
13013 case V2DImode:
13014 swaparray = swap2;
13015 break;
13016 case V4SFmode:
13017 case V4SImode:
13018 swaparray = swap4;
13019 break;
13020 case V8HImode:
13021 swaparray = swap8;
13022 break;
13023 case V16QImode:
13024 swaparray = swap16;
13025 break;
13026 default:
13027 gcc_unreachable ();
13030 for (i = 0; i < 16; ++i)
13031 perm[i] = GEN_INT (swaparray[i]);
13033 return force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm)));
13036 /* Generate code for an "lvxl", or "lve*x" built-in for a little endian target
13037 with -maltivec=be specified. Issue the load followed by an element-
13038 reversing permute. */
13039 void
13040 altivec_expand_lvx_be (rtx op0, rtx op1, machine_mode mode, unsigned unspec)
13042 rtx tmp = gen_reg_rtx (mode);
13043 rtx load = gen_rtx_SET (tmp, op1);
13044 rtx lvx = gen_rtx_UNSPEC (mode, gen_rtvec (1, const0_rtx), unspec);
13045 rtx par = gen_rtx_PARALLEL (mode, gen_rtvec (2, load, lvx));
13046 rtx sel = swap_selector_for_mode (mode);
13047 rtx vperm = gen_rtx_UNSPEC (mode, gen_rtvec (3, tmp, tmp, sel), UNSPEC_VPERM);
13049 gcc_assert (REG_P (op0));
13050 emit_insn (par);
13051 emit_insn (gen_rtx_SET (op0, vperm));
13054 /* Generate code for a "stvxl" built-in for a little endian target with
13055 -maltivec=be specified. Issue the store preceded by an element-reversing
13056 permute. */
13057 void
13058 altivec_expand_stvx_be (rtx op0, rtx op1, machine_mode mode, unsigned unspec)
13060 rtx tmp = gen_reg_rtx (mode);
13061 rtx store = gen_rtx_SET (op0, tmp);
13062 rtx stvx = gen_rtx_UNSPEC (mode, gen_rtvec (1, const0_rtx), unspec);
13063 rtx par = gen_rtx_PARALLEL (mode, gen_rtvec (2, store, stvx));
13064 rtx sel = swap_selector_for_mode (mode);
13065 rtx vperm;
13067 gcc_assert (REG_P (op1));
13068 vperm = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op1, sel), UNSPEC_VPERM);
13069 emit_insn (gen_rtx_SET (tmp, vperm));
13070 emit_insn (par);
13073 /* Generate code for a "stve*x" built-in for a little endian target with -maltivec=be
13074 specified. Issue the store preceded by an element-reversing permute. */
13075 void
13076 altivec_expand_stvex_be (rtx op0, rtx op1, machine_mode mode, unsigned unspec)
13078 machine_mode inner_mode = GET_MODE_INNER (mode);
13079 rtx tmp = gen_reg_rtx (mode);
13080 rtx stvx = gen_rtx_UNSPEC (inner_mode, gen_rtvec (1, tmp), unspec);
13081 rtx sel = swap_selector_for_mode (mode);
13082 rtx vperm;
13084 gcc_assert (REG_P (op1));
13085 vperm = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op1, sel), UNSPEC_VPERM);
13086 emit_insn (gen_rtx_SET (tmp, vperm));
13087 emit_insn (gen_rtx_SET (op0, stvx));
13090 static rtx
13091 altivec_expand_lv_builtin (enum insn_code icode, tree exp, rtx target, bool blk)
13093 rtx pat, addr;
13094 tree arg0 = CALL_EXPR_ARG (exp, 0);
13095 tree arg1 = CALL_EXPR_ARG (exp, 1);
13096 machine_mode tmode = insn_data[icode].operand[0].mode;
13097 machine_mode mode0 = Pmode;
13098 machine_mode mode1 = Pmode;
13099 rtx op0 = expand_normal (arg0);
13100 rtx op1 = expand_normal (arg1);
13102 if (icode == CODE_FOR_nothing)
13103 /* Builtin not supported on this processor. */
13104 return 0;
13106 /* If we got invalid arguments bail out before generating bad rtl. */
13107 if (arg0 == error_mark_node || arg1 == error_mark_node)
13108 return const0_rtx;
13110 if (target == 0
13111 || GET_MODE (target) != tmode
13112 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13113 target = gen_reg_rtx (tmode);
13115 op1 = copy_to_mode_reg (mode1, op1);
13117 /* For LVX, express the RTL accurately by ANDing the address with -16.
13118 LVXL and LVE*X expand to use UNSPECs to hide their special behavior,
13119 so the raw address is fine. */
13120 if (icode == CODE_FOR_altivec_lvx_v2df_2op
13121 || icode == CODE_FOR_altivec_lvx_v2di_2op
13122 || icode == CODE_FOR_altivec_lvx_v4sf_2op
13123 || icode == CODE_FOR_altivec_lvx_v4si_2op
13124 || icode == CODE_FOR_altivec_lvx_v8hi_2op
13125 || icode == CODE_FOR_altivec_lvx_v16qi_2op)
13127 rtx rawaddr;
13128 if (op0 == const0_rtx)
13129 rawaddr = op1;
13130 else
13132 op0 = copy_to_mode_reg (mode0, op0);
13133 rawaddr = gen_rtx_PLUS (Pmode, op1, op0);
13135 addr = gen_rtx_AND (Pmode, rawaddr, gen_rtx_CONST_INT (Pmode, -16));
13136 addr = gen_rtx_MEM (blk ? BLKmode : tmode, addr);
13138 /* For -maltivec=be, emit the load and follow it up with a
13139 permute to swap the elements. */
13140 if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
13142 rtx temp = gen_reg_rtx (tmode);
13143 emit_insn (gen_rtx_SET (temp, addr));
13145 rtx sel = swap_selector_for_mode (tmode);
13146 rtx vperm = gen_rtx_UNSPEC (tmode, gen_rtvec (3, temp, temp, sel),
13147 UNSPEC_VPERM);
13148 emit_insn (gen_rtx_SET (target, vperm));
13150 else
13151 emit_insn (gen_rtx_SET (target, addr));
13153 else
13155 if (op0 == const0_rtx)
13156 addr = gen_rtx_MEM (blk ? BLKmode : tmode, op1);
13157 else
13159 op0 = copy_to_mode_reg (mode0, op0);
13160 addr = gen_rtx_MEM (blk ? BLKmode : tmode,
13161 gen_rtx_PLUS (Pmode, op1, op0));
13164 pat = GEN_FCN (icode) (target, addr);
13165 if (! pat)
13166 return 0;
13167 emit_insn (pat);
13170 return target;
13173 static rtx
13174 spe_expand_stv_builtin (enum insn_code icode, tree exp)
13176 tree arg0 = CALL_EXPR_ARG (exp, 0);
13177 tree arg1 = CALL_EXPR_ARG (exp, 1);
13178 tree arg2 = CALL_EXPR_ARG (exp, 2);
13179 rtx op0 = expand_normal (arg0);
13180 rtx op1 = expand_normal (arg1);
13181 rtx op2 = expand_normal (arg2);
13182 rtx pat;
13183 machine_mode mode0 = insn_data[icode].operand[0].mode;
13184 machine_mode mode1 = insn_data[icode].operand[1].mode;
13185 machine_mode mode2 = insn_data[icode].operand[2].mode;
13187 /* Invalid arguments. Bail before doing anything stoopid! */
13188 if (arg0 == error_mark_node
13189 || arg1 == error_mark_node
13190 || arg2 == error_mark_node)
13191 return const0_rtx;
13193 if (! (*insn_data[icode].operand[2].predicate) (op0, mode2))
13194 op0 = copy_to_mode_reg (mode2, op0);
13195 if (! (*insn_data[icode].operand[0].predicate) (op1, mode0))
13196 op1 = copy_to_mode_reg (mode0, op1);
13197 if (! (*insn_data[icode].operand[1].predicate) (op2, mode1))
13198 op2 = copy_to_mode_reg (mode1, op2);
13200 pat = GEN_FCN (icode) (op1, op2, op0);
13201 if (pat)
13202 emit_insn (pat);
13203 return NULL_RTX;
13206 static rtx
13207 paired_expand_stv_builtin (enum insn_code icode, tree exp)
13209 tree arg0 = CALL_EXPR_ARG (exp, 0);
13210 tree arg1 = CALL_EXPR_ARG (exp, 1);
13211 tree arg2 = CALL_EXPR_ARG (exp, 2);
13212 rtx op0 = expand_normal (arg0);
13213 rtx op1 = expand_normal (arg1);
13214 rtx op2 = expand_normal (arg2);
13215 rtx pat, addr;
13216 machine_mode tmode = insn_data[icode].operand[0].mode;
13217 machine_mode mode1 = Pmode;
13218 machine_mode mode2 = Pmode;
13220 /* Invalid arguments. Bail before doing anything stoopid! */
13221 if (arg0 == error_mark_node
13222 || arg1 == error_mark_node
13223 || arg2 == error_mark_node)
13224 return const0_rtx;
13226 if (! (*insn_data[icode].operand[1].predicate) (op0, tmode))
13227 op0 = copy_to_mode_reg (tmode, op0);
13229 op2 = copy_to_mode_reg (mode2, op2);
13231 if (op1 == const0_rtx)
13233 addr = gen_rtx_MEM (tmode, op2);
13235 else
13237 op1 = copy_to_mode_reg (mode1, op1);
13238 addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op1, op2));
13241 pat = GEN_FCN (icode) (addr, op0);
13242 if (pat)
13243 emit_insn (pat);
13244 return NULL_RTX;
13247 static rtx
13248 altivec_expand_stv_builtin (enum insn_code icode, tree exp)
13250 tree arg0 = CALL_EXPR_ARG (exp, 0);
13251 tree arg1 = CALL_EXPR_ARG (exp, 1);
13252 tree arg2 = CALL_EXPR_ARG (exp, 2);
13253 rtx op0 = expand_normal (arg0);
13254 rtx op1 = expand_normal (arg1);
13255 rtx op2 = expand_normal (arg2);
13256 rtx pat, addr, rawaddr;
13257 machine_mode tmode = insn_data[icode].operand[0].mode;
13258 machine_mode smode = insn_data[icode].operand[1].mode;
13259 machine_mode mode1 = Pmode;
13260 machine_mode mode2 = Pmode;
13262 /* Invalid arguments. Bail before doing anything stoopid! */
13263 if (arg0 == error_mark_node
13264 || arg1 == error_mark_node
13265 || arg2 == error_mark_node)
13266 return const0_rtx;
13268 op2 = copy_to_mode_reg (mode2, op2);
13270 /* For STVX, express the RTL accurately by ANDing the address with -16.
13271 STVXL and STVE*X expand to use UNSPECs to hide their special behavior,
13272 so the raw address is fine. */
13273 if (icode == CODE_FOR_altivec_stvx_v2df_2op
13274 || icode == CODE_FOR_altivec_stvx_v2di_2op
13275 || icode == CODE_FOR_altivec_stvx_v4sf_2op
13276 || icode == CODE_FOR_altivec_stvx_v4si_2op
13277 || icode == CODE_FOR_altivec_stvx_v8hi_2op
13278 || icode == CODE_FOR_altivec_stvx_v16qi_2op)
13280 if (op1 == const0_rtx)
13281 rawaddr = op2;
13282 else
13284 op1 = copy_to_mode_reg (mode1, op1);
13285 rawaddr = gen_rtx_PLUS (Pmode, op2, op1);
13288 addr = gen_rtx_AND (Pmode, rawaddr, gen_rtx_CONST_INT (Pmode, -16));
13289 addr = gen_rtx_MEM (tmode, addr);
13291 op0 = copy_to_mode_reg (tmode, op0);
13293 /* For -maltivec=be, emit a permute to swap the elements, followed
13294 by the store. */
13295 if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
13297 rtx temp = gen_reg_rtx (tmode);
13298 rtx sel = swap_selector_for_mode (tmode);
13299 rtx vperm = gen_rtx_UNSPEC (tmode, gen_rtvec (3, op0, op0, sel),
13300 UNSPEC_VPERM);
13301 emit_insn (gen_rtx_SET (temp, vperm));
13302 emit_insn (gen_rtx_SET (addr, temp));
13304 else
13305 emit_insn (gen_rtx_SET (addr, op0));
13307 else
13309 if (! (*insn_data[icode].operand[1].predicate) (op0, smode))
13310 op0 = copy_to_mode_reg (smode, op0);
13312 if (op1 == const0_rtx)
13313 addr = gen_rtx_MEM (tmode, op2);
13314 else
13316 op1 = copy_to_mode_reg (mode1, op1);
13317 addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op2, op1));
13320 pat = GEN_FCN (icode) (addr, op0);
13321 if (pat)
13322 emit_insn (pat);
13325 return NULL_RTX;
13328 /* Return the appropriate SPR number associated with the given builtin. */
13329 static inline HOST_WIDE_INT
13330 htm_spr_num (enum rs6000_builtins code)
13332 if (code == HTM_BUILTIN_GET_TFHAR
13333 || code == HTM_BUILTIN_SET_TFHAR)
13334 return TFHAR_SPR;
13335 else if (code == HTM_BUILTIN_GET_TFIAR
13336 || code == HTM_BUILTIN_SET_TFIAR)
13337 return TFIAR_SPR;
13338 else if (code == HTM_BUILTIN_GET_TEXASR
13339 || code == HTM_BUILTIN_SET_TEXASR)
13340 return TEXASR_SPR;
13341 gcc_assert (code == HTM_BUILTIN_GET_TEXASRU
13342 || code == HTM_BUILTIN_SET_TEXASRU);
13343 return TEXASRU_SPR;
13346 /* Return the appropriate SPR regno associated with the given builtin. */
13347 static inline HOST_WIDE_INT
13348 htm_spr_regno (enum rs6000_builtins code)
13350 if (code == HTM_BUILTIN_GET_TFHAR
13351 || code == HTM_BUILTIN_SET_TFHAR)
13352 return TFHAR_REGNO;
13353 else if (code == HTM_BUILTIN_GET_TFIAR
13354 || code == HTM_BUILTIN_SET_TFIAR)
13355 return TFIAR_REGNO;
13356 gcc_assert (code == HTM_BUILTIN_GET_TEXASR
13357 || code == HTM_BUILTIN_SET_TEXASR
13358 || code == HTM_BUILTIN_GET_TEXASRU
13359 || code == HTM_BUILTIN_SET_TEXASRU);
13360 return TEXASR_REGNO;
13363 /* Return the correct ICODE value depending on whether we are
13364 setting or reading the HTM SPRs. */
13365 static inline enum insn_code
13366 rs6000_htm_spr_icode (bool nonvoid)
13368 if (nonvoid)
13369 return (TARGET_POWERPC64) ? CODE_FOR_htm_mfspr_di : CODE_FOR_htm_mfspr_si;
13370 else
13371 return (TARGET_POWERPC64) ? CODE_FOR_htm_mtspr_di : CODE_FOR_htm_mtspr_si;
13374 /* Expand the HTM builtin in EXP and store the result in TARGET.
13375 Store true in *EXPANDEDP if we found a builtin to expand. */
13376 static rtx
13377 htm_expand_builtin (tree exp, rtx target, bool * expandedp)
13379 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
13380 bool nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
13381 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
13382 const struct builtin_description *d;
13383 size_t i;
13385 *expandedp = true;
13387 if (!TARGET_POWERPC64
13388 && (fcode == HTM_BUILTIN_TABORTDC
13389 || fcode == HTM_BUILTIN_TABORTDCI))
13391 size_t uns_fcode = (size_t)fcode;
13392 const char *name = rs6000_builtin_info[uns_fcode].name;
13393 error ("builtin %s is only valid in 64-bit mode", name);
13394 return const0_rtx;
13397 /* Expand the HTM builtins. */
13398 d = bdesc_htm;
13399 for (i = 0; i < ARRAY_SIZE (bdesc_htm); i++, d++)
13400 if (d->code == fcode)
13402 rtx op[MAX_HTM_OPERANDS], pat;
13403 int nopnds = 0;
13404 tree arg;
13405 call_expr_arg_iterator iter;
13406 unsigned attr = rs6000_builtin_info[fcode].attr;
13407 enum insn_code icode = d->icode;
13408 const struct insn_operand_data *insn_op;
13409 bool uses_spr = (attr & RS6000_BTC_SPR);
13410 rtx cr = NULL_RTX;
13412 if (uses_spr)
13413 icode = rs6000_htm_spr_icode (nonvoid);
13414 insn_op = &insn_data[icode].operand[0];
13416 if (nonvoid)
13418 machine_mode tmode = (uses_spr) ? insn_op->mode : SImode;
13419 if (!target
13420 || GET_MODE (target) != tmode
13421 || (uses_spr && !(*insn_op->predicate) (target, tmode)))
13422 target = gen_reg_rtx (tmode);
13423 if (uses_spr)
13424 op[nopnds++] = target;
13427 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
13429 if (arg == error_mark_node || nopnds >= MAX_HTM_OPERANDS)
13430 return const0_rtx;
13432 insn_op = &insn_data[icode].operand[nopnds];
13434 op[nopnds] = expand_normal (arg);
13436 if (!(*insn_op->predicate) (op[nopnds], insn_op->mode))
13438 if (!strcmp (insn_op->constraint, "n"))
13440 int arg_num = (nonvoid) ? nopnds : nopnds + 1;
13441 if (!CONST_INT_P (op[nopnds]))
13442 error ("argument %d must be an unsigned literal", arg_num);
13443 else
13444 error ("argument %d is an unsigned literal that is "
13445 "out of range", arg_num);
13446 return const0_rtx;
13448 op[nopnds] = copy_to_mode_reg (insn_op->mode, op[nopnds]);
13451 nopnds++;
13454 /* Handle the builtins for extended mnemonics. These accept
13455 no arguments, but map to builtins that take arguments. */
13456 switch (fcode)
13458 case HTM_BUILTIN_TENDALL: /* Alias for: tend. 1 */
13459 case HTM_BUILTIN_TRESUME: /* Alias for: tsr. 1 */
13460 op[nopnds++] = GEN_INT (1);
13461 if (flag_checking)
13462 attr |= RS6000_BTC_UNARY;
13463 break;
13464 case HTM_BUILTIN_TSUSPEND: /* Alias for: tsr. 0 */
13465 op[nopnds++] = GEN_INT (0);
13466 if (flag_checking)
13467 attr |= RS6000_BTC_UNARY;
13468 break;
13469 default:
13470 break;
13473 /* If this builtin accesses SPRs, then pass in the appropriate
13474 SPR number and SPR regno as the last two operands. */
13475 if (uses_spr)
13477 machine_mode mode = (TARGET_POWERPC64) ? DImode : SImode;
13478 op[nopnds++] = gen_rtx_CONST_INT (mode, htm_spr_num (fcode));
13479 op[nopnds++] = gen_rtx_REG (mode, htm_spr_regno (fcode));
13481 /* If this builtin accesses a CR, then pass in a scratch
13482 CR as the last operand. */
13483 else if (attr & RS6000_BTC_CR)
13484 { cr = gen_reg_rtx (CCmode);
13485 op[nopnds++] = cr;
13488 if (flag_checking)
13490 int expected_nopnds = 0;
13491 if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_UNARY)
13492 expected_nopnds = 1;
13493 else if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_BINARY)
13494 expected_nopnds = 2;
13495 else if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_TERNARY)
13496 expected_nopnds = 3;
13497 if (!(attr & RS6000_BTC_VOID))
13498 expected_nopnds += 1;
13499 if (uses_spr)
13500 expected_nopnds += 2;
13502 gcc_assert (nopnds == expected_nopnds
13503 && nopnds <= MAX_HTM_OPERANDS);
13506 switch (nopnds)
13508 case 1:
13509 pat = GEN_FCN (icode) (op[0]);
13510 break;
13511 case 2:
13512 pat = GEN_FCN (icode) (op[0], op[1]);
13513 break;
13514 case 3:
13515 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
13516 break;
13517 case 4:
13518 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
13519 break;
13520 default:
13521 gcc_unreachable ();
13523 if (!pat)
13524 return NULL_RTX;
13525 emit_insn (pat);
13527 if (attr & RS6000_BTC_CR)
13529 if (fcode == HTM_BUILTIN_TBEGIN)
13531 /* Emit code to set TARGET to true or false depending on
13532 whether the tbegin. instruction successfully or failed
13533 to start a transaction. We do this by placing the 1's
13534 complement of CR's EQ bit into TARGET. */
13535 rtx scratch = gen_reg_rtx (SImode);
13536 emit_insn (gen_rtx_SET (scratch,
13537 gen_rtx_EQ (SImode, cr,
13538 const0_rtx)));
13539 emit_insn (gen_rtx_SET (target,
13540 gen_rtx_XOR (SImode, scratch,
13541 GEN_INT (1))));
13543 else
13545 /* Emit code to copy the 4-bit condition register field
13546 CR into the least significant end of register TARGET. */
13547 rtx scratch1 = gen_reg_rtx (SImode);
13548 rtx scratch2 = gen_reg_rtx (SImode);
13549 rtx subreg = simplify_gen_subreg (CCmode, scratch1, SImode, 0);
13550 emit_insn (gen_movcc (subreg, cr));
13551 emit_insn (gen_lshrsi3 (scratch2, scratch1, GEN_INT (28)));
13552 emit_insn (gen_andsi3 (target, scratch2, GEN_INT (0xf)));
13556 if (nonvoid)
13557 return target;
13558 return const0_rtx;
13561 *expandedp = false;
13562 return NULL_RTX;
13565 /* Expand the CPU builtin in FCODE and store the result in TARGET. */
13567 static rtx
13568 cpu_expand_builtin (enum rs6000_builtins fcode, tree exp ATTRIBUTE_UNUSED,
13569 rtx target)
13571 /* __builtin_cpu_init () is a nop, so expand to nothing. */
13572 if (fcode == RS6000_BUILTIN_CPU_INIT)
13573 return const0_rtx;
13575 if (target == 0 || GET_MODE (target) != SImode)
13576 target = gen_reg_rtx (SImode);
13578 #ifdef TARGET_LIBC_PROVIDES_HWCAP_IN_TCB
13579 tree arg = TREE_OPERAND (CALL_EXPR_ARG (exp, 0), 0);
13580 if (TREE_CODE (arg) != STRING_CST)
13582 error ("builtin %s only accepts a string argument",
13583 rs6000_builtin_info[(size_t) fcode].name);
13584 return const0_rtx;
13587 if (fcode == RS6000_BUILTIN_CPU_IS)
13589 const char *cpu = TREE_STRING_POINTER (arg);
13590 rtx cpuid = NULL_RTX;
13591 for (size_t i = 0; i < ARRAY_SIZE (cpu_is_info); i++)
13592 if (strcmp (cpu, cpu_is_info[i].cpu) == 0)
13594 /* The CPUID value in the TCB is offset by _DL_FIRST_PLATFORM. */
13595 cpuid = GEN_INT (cpu_is_info[i].cpuid + _DL_FIRST_PLATFORM);
13596 break;
13598 if (cpuid == NULL_RTX)
13600 /* Invalid CPU argument. */
13601 error ("cpu %s is an invalid argument to builtin %s",
13602 cpu, rs6000_builtin_info[(size_t) fcode].name);
13603 return const0_rtx;
13606 rtx platform = gen_reg_rtx (SImode);
13607 rtx tcbmem = gen_const_mem (SImode,
13608 gen_rtx_PLUS (Pmode,
13609 gen_rtx_REG (Pmode, TLS_REGNUM),
13610 GEN_INT (TCB_PLATFORM_OFFSET)));
13611 emit_move_insn (platform, tcbmem);
13612 emit_insn (gen_eqsi3 (target, platform, cpuid));
13614 else if (fcode == RS6000_BUILTIN_CPU_SUPPORTS)
13616 const char *hwcap = TREE_STRING_POINTER (arg);
13617 rtx mask = NULL_RTX;
13618 int hwcap_offset;
13619 for (size_t i = 0; i < ARRAY_SIZE (cpu_supports_info); i++)
13620 if (strcmp (hwcap, cpu_supports_info[i].hwcap) == 0)
13622 mask = GEN_INT (cpu_supports_info[i].mask);
13623 hwcap_offset = TCB_HWCAP_OFFSET (cpu_supports_info[i].id);
13624 break;
13626 if (mask == NULL_RTX)
13628 /* Invalid HWCAP argument. */
13629 error ("hwcap %s is an invalid argument to builtin %s",
13630 hwcap, rs6000_builtin_info[(size_t) fcode].name);
13631 return const0_rtx;
13634 rtx tcb_hwcap = gen_reg_rtx (SImode);
13635 rtx tcbmem = gen_const_mem (SImode,
13636 gen_rtx_PLUS (Pmode,
13637 gen_rtx_REG (Pmode, TLS_REGNUM),
13638 GEN_INT (hwcap_offset)));
13639 emit_move_insn (tcb_hwcap, tcbmem);
13640 rtx scratch1 = gen_reg_rtx (SImode);
13641 emit_insn (gen_rtx_SET (scratch1, gen_rtx_AND (SImode, tcb_hwcap, mask)));
13642 rtx scratch2 = gen_reg_rtx (SImode);
13643 emit_insn (gen_eqsi3 (scratch2, scratch1, const0_rtx));
13644 emit_insn (gen_rtx_SET (target, gen_rtx_XOR (SImode, scratch2, const1_rtx)));
13647 /* Record that we have expanded a CPU builtin, so that we can later
13648 emit a reference to the special symbol exported by LIBC to ensure we
13649 do not link against an old LIBC that doesn't support this feature. */
13650 cpu_builtin_p = true;
13652 #else
13653 /* For old LIBCs, always return FALSE. */
13654 emit_move_insn (target, GEN_INT (0));
13655 #endif /* TARGET_LIBC_PROVIDES_HWCAP_IN_TCB */
13657 return target;
13660 static rtx
13661 rs6000_expand_ternop_builtin (enum insn_code icode, tree exp, rtx target)
13663 rtx pat;
13664 tree arg0 = CALL_EXPR_ARG (exp, 0);
13665 tree arg1 = CALL_EXPR_ARG (exp, 1);
13666 tree arg2 = CALL_EXPR_ARG (exp, 2);
13667 rtx op0 = expand_normal (arg0);
13668 rtx op1 = expand_normal (arg1);
13669 rtx op2 = expand_normal (arg2);
13670 machine_mode tmode = insn_data[icode].operand[0].mode;
13671 machine_mode mode0 = insn_data[icode].operand[1].mode;
13672 machine_mode mode1 = insn_data[icode].operand[2].mode;
13673 machine_mode mode2 = insn_data[icode].operand[3].mode;
13675 if (icode == CODE_FOR_nothing)
13676 /* Builtin not supported on this processor. */
13677 return 0;
13679 /* If we got invalid arguments bail out before generating bad rtl. */
13680 if (arg0 == error_mark_node
13681 || arg1 == error_mark_node
13682 || arg2 == error_mark_node)
13683 return const0_rtx;
13685 /* Check and prepare argument depending on the instruction code.
13687 Note that a switch statement instead of the sequence of tests
13688 would be incorrect as many of the CODE_FOR values could be
13689 CODE_FOR_nothing and that would yield multiple alternatives
13690 with identical values. We'd never reach here at runtime in
13691 this case. */
13692 if (icode == CODE_FOR_altivec_vsldoi_v4sf
13693 || icode == CODE_FOR_altivec_vsldoi_v4si
13694 || icode == CODE_FOR_altivec_vsldoi_v8hi
13695 || icode == CODE_FOR_altivec_vsldoi_v16qi)
13697 /* Only allow 4-bit unsigned literals. */
13698 STRIP_NOPS (arg2);
13699 if (TREE_CODE (arg2) != INTEGER_CST
13700 || TREE_INT_CST_LOW (arg2) & ~0xf)
13702 error ("argument 3 must be a 4-bit unsigned literal");
13703 return const0_rtx;
13706 else if (icode == CODE_FOR_vsx_xxpermdi_v2df
13707 || icode == CODE_FOR_vsx_xxpermdi_v2di
13708 || icode == CODE_FOR_vsx_xxsldwi_v16qi
13709 || icode == CODE_FOR_vsx_xxsldwi_v8hi
13710 || icode == CODE_FOR_vsx_xxsldwi_v4si
13711 || icode == CODE_FOR_vsx_xxsldwi_v4sf
13712 || icode == CODE_FOR_vsx_xxsldwi_v2di
13713 || icode == CODE_FOR_vsx_xxsldwi_v2df)
13715 /* Only allow 2-bit unsigned literals. */
13716 STRIP_NOPS (arg2);
13717 if (TREE_CODE (arg2) != INTEGER_CST
13718 || TREE_INT_CST_LOW (arg2) & ~0x3)
13720 error ("argument 3 must be a 2-bit unsigned literal");
13721 return const0_rtx;
13724 else if (icode == CODE_FOR_vsx_set_v2df
13725 || icode == CODE_FOR_vsx_set_v2di
13726 || icode == CODE_FOR_bcdadd
13727 || icode == CODE_FOR_bcdadd_lt
13728 || icode == CODE_FOR_bcdadd_eq
13729 || icode == CODE_FOR_bcdadd_gt
13730 || icode == CODE_FOR_bcdsub
13731 || icode == CODE_FOR_bcdsub_lt
13732 || icode == CODE_FOR_bcdsub_eq
13733 || icode == CODE_FOR_bcdsub_gt)
13735 /* Only allow 1-bit unsigned literals. */
13736 STRIP_NOPS (arg2);
13737 if (TREE_CODE (arg2) != INTEGER_CST
13738 || TREE_INT_CST_LOW (arg2) & ~0x1)
13740 error ("argument 3 must be a 1-bit unsigned literal");
13741 return const0_rtx;
13744 else if (icode == CODE_FOR_dfp_ddedpd_dd
13745 || icode == CODE_FOR_dfp_ddedpd_td)
13747 /* Only allow 2-bit unsigned literals where the value is 0 or 2. */
13748 STRIP_NOPS (arg0);
13749 if (TREE_CODE (arg0) != INTEGER_CST
13750 || TREE_INT_CST_LOW (arg2) & ~0x3)
13752 error ("argument 1 must be 0 or 2");
13753 return const0_rtx;
13756 else if (icode == CODE_FOR_dfp_denbcd_dd
13757 || icode == CODE_FOR_dfp_denbcd_td)
13759 /* Only allow 1-bit unsigned literals. */
13760 STRIP_NOPS (arg0);
13761 if (TREE_CODE (arg0) != INTEGER_CST
13762 || TREE_INT_CST_LOW (arg0) & ~0x1)
13764 error ("argument 1 must be a 1-bit unsigned literal");
13765 return const0_rtx;
13768 else if (icode == CODE_FOR_dfp_dscli_dd
13769 || icode == CODE_FOR_dfp_dscli_td
13770 || icode == CODE_FOR_dfp_dscri_dd
13771 || icode == CODE_FOR_dfp_dscri_td)
13773 /* Only allow 6-bit unsigned literals. */
13774 STRIP_NOPS (arg1);
13775 if (TREE_CODE (arg1) != INTEGER_CST
13776 || TREE_INT_CST_LOW (arg1) & ~0x3f)
13778 error ("argument 2 must be a 6-bit unsigned literal");
13779 return const0_rtx;
13782 else if (icode == CODE_FOR_crypto_vshasigmaw
13783 || icode == CODE_FOR_crypto_vshasigmad)
13785 /* Check whether the 2nd and 3rd arguments are integer constants and in
13786 range and prepare arguments. */
13787 STRIP_NOPS (arg1);
13788 if (TREE_CODE (arg1) != INTEGER_CST || wi::geu_p (arg1, 2))
13790 error ("argument 2 must be 0 or 1");
13791 return const0_rtx;
13794 STRIP_NOPS (arg2);
13795 if (TREE_CODE (arg2) != INTEGER_CST || wi::geu_p (arg1, 16))
13797 error ("argument 3 must be in the range 0..15");
13798 return const0_rtx;
13802 if (target == 0
13803 || GET_MODE (target) != tmode
13804 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13805 target = gen_reg_rtx (tmode);
13807 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13808 op0 = copy_to_mode_reg (mode0, op0);
13809 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13810 op1 = copy_to_mode_reg (mode1, op1);
13811 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
13812 op2 = copy_to_mode_reg (mode2, op2);
13814 if (TARGET_PAIRED_FLOAT && icode == CODE_FOR_selv2sf4)
13815 pat = GEN_FCN (icode) (target, op0, op1, op2, CONST0_RTX (SFmode));
13816 else
13817 pat = GEN_FCN (icode) (target, op0, op1, op2);
13818 if (! pat)
13819 return 0;
13820 emit_insn (pat);
13822 return target;
13825 /* Expand the lvx builtins. */
13826 static rtx
13827 altivec_expand_ld_builtin (tree exp, rtx target, bool *expandedp)
13829 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
13830 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
13831 tree arg0;
13832 machine_mode tmode, mode0;
13833 rtx pat, op0;
13834 enum insn_code icode;
13836 switch (fcode)
13838 case ALTIVEC_BUILTIN_LD_INTERNAL_16qi:
13839 icode = CODE_FOR_vector_altivec_load_v16qi;
13840 break;
13841 case ALTIVEC_BUILTIN_LD_INTERNAL_8hi:
13842 icode = CODE_FOR_vector_altivec_load_v8hi;
13843 break;
13844 case ALTIVEC_BUILTIN_LD_INTERNAL_4si:
13845 icode = CODE_FOR_vector_altivec_load_v4si;
13846 break;
13847 case ALTIVEC_BUILTIN_LD_INTERNAL_4sf:
13848 icode = CODE_FOR_vector_altivec_load_v4sf;
13849 break;
13850 case ALTIVEC_BUILTIN_LD_INTERNAL_2df:
13851 icode = CODE_FOR_vector_altivec_load_v2df;
13852 break;
13853 case ALTIVEC_BUILTIN_LD_INTERNAL_2di:
13854 icode = CODE_FOR_vector_altivec_load_v2di;
13855 case ALTIVEC_BUILTIN_LD_INTERNAL_1ti:
13856 icode = CODE_FOR_vector_altivec_load_v1ti;
13857 break;
13858 default:
13859 *expandedp = false;
13860 return NULL_RTX;
13863 *expandedp = true;
13865 arg0 = CALL_EXPR_ARG (exp, 0);
13866 op0 = expand_normal (arg0);
13867 tmode = insn_data[icode].operand[0].mode;
13868 mode0 = insn_data[icode].operand[1].mode;
13870 if (target == 0
13871 || GET_MODE (target) != tmode
13872 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13873 target = gen_reg_rtx (tmode);
13875 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13876 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13878 pat = GEN_FCN (icode) (target, op0);
13879 if (! pat)
13880 return 0;
13881 emit_insn (pat);
13882 return target;
13885 /* Expand the stvx builtins. */
13886 static rtx
13887 altivec_expand_st_builtin (tree exp, rtx target ATTRIBUTE_UNUSED,
13888 bool *expandedp)
13890 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
13891 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
13892 tree arg0, arg1;
13893 machine_mode mode0, mode1;
13894 rtx pat, op0, op1;
13895 enum insn_code icode;
13897 switch (fcode)
13899 case ALTIVEC_BUILTIN_ST_INTERNAL_16qi:
13900 icode = CODE_FOR_vector_altivec_store_v16qi;
13901 break;
13902 case ALTIVEC_BUILTIN_ST_INTERNAL_8hi:
13903 icode = CODE_FOR_vector_altivec_store_v8hi;
13904 break;
13905 case ALTIVEC_BUILTIN_ST_INTERNAL_4si:
13906 icode = CODE_FOR_vector_altivec_store_v4si;
13907 break;
13908 case ALTIVEC_BUILTIN_ST_INTERNAL_4sf:
13909 icode = CODE_FOR_vector_altivec_store_v4sf;
13910 break;
13911 case ALTIVEC_BUILTIN_ST_INTERNAL_2df:
13912 icode = CODE_FOR_vector_altivec_store_v2df;
13913 break;
13914 case ALTIVEC_BUILTIN_ST_INTERNAL_2di:
13915 icode = CODE_FOR_vector_altivec_store_v2di;
13916 case ALTIVEC_BUILTIN_ST_INTERNAL_1ti:
13917 icode = CODE_FOR_vector_altivec_store_v1ti;
13918 break;
13919 default:
13920 *expandedp = false;
13921 return NULL_RTX;
13924 arg0 = CALL_EXPR_ARG (exp, 0);
13925 arg1 = CALL_EXPR_ARG (exp, 1);
13926 op0 = expand_normal (arg0);
13927 op1 = expand_normal (arg1);
13928 mode0 = insn_data[icode].operand[0].mode;
13929 mode1 = insn_data[icode].operand[1].mode;
13931 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
13932 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13933 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
13934 op1 = copy_to_mode_reg (mode1, op1);
13936 pat = GEN_FCN (icode) (op0, op1);
13937 if (pat)
13938 emit_insn (pat);
13940 *expandedp = true;
13941 return NULL_RTX;
13944 /* Expand the dst builtins. */
13945 static rtx
13946 altivec_expand_dst_builtin (tree exp, rtx target ATTRIBUTE_UNUSED,
13947 bool *expandedp)
13949 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
13950 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
13951 tree arg0, arg1, arg2;
13952 machine_mode mode0, mode1;
13953 rtx pat, op0, op1, op2;
13954 const struct builtin_description *d;
13955 size_t i;
13957 *expandedp = false;
13959 /* Handle DST variants. */
13960 d = bdesc_dst;
13961 for (i = 0; i < ARRAY_SIZE (bdesc_dst); i++, d++)
13962 if (d->code == fcode)
13964 arg0 = CALL_EXPR_ARG (exp, 0);
13965 arg1 = CALL_EXPR_ARG (exp, 1);
13966 arg2 = CALL_EXPR_ARG (exp, 2);
13967 op0 = expand_normal (arg0);
13968 op1 = expand_normal (arg1);
13969 op2 = expand_normal (arg2);
13970 mode0 = insn_data[d->icode].operand[0].mode;
13971 mode1 = insn_data[d->icode].operand[1].mode;
13973 /* Invalid arguments, bail out before generating bad rtl. */
13974 if (arg0 == error_mark_node
13975 || arg1 == error_mark_node
13976 || arg2 == error_mark_node)
13977 return const0_rtx;
13979 *expandedp = true;
13980 STRIP_NOPS (arg2);
13981 if (TREE_CODE (arg2) != INTEGER_CST
13982 || TREE_INT_CST_LOW (arg2) & ~0x3)
13984 error ("argument to %qs must be a 2-bit unsigned literal", d->name);
13985 return const0_rtx;
13988 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
13989 op0 = copy_to_mode_reg (Pmode, op0);
13990 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
13991 op1 = copy_to_mode_reg (mode1, op1);
13993 pat = GEN_FCN (d->icode) (op0, op1, op2);
13994 if (pat != 0)
13995 emit_insn (pat);
13997 return NULL_RTX;
14000 return NULL_RTX;
14003 /* Expand vec_init builtin. */
14004 static rtx
14005 altivec_expand_vec_init_builtin (tree type, tree exp, rtx target)
14007 machine_mode tmode = TYPE_MODE (type);
14008 machine_mode inner_mode = GET_MODE_INNER (tmode);
14009 int i, n_elt = GET_MODE_NUNITS (tmode);
14011 gcc_assert (VECTOR_MODE_P (tmode));
14012 gcc_assert (n_elt == call_expr_nargs (exp));
14014 if (!target || !register_operand (target, tmode))
14015 target = gen_reg_rtx (tmode);
14017 /* If we have a vector compromised of a single element, such as V1TImode, do
14018 the initialization directly. */
14019 if (n_elt == 1 && GET_MODE_SIZE (tmode) == GET_MODE_SIZE (inner_mode))
14021 rtx x = expand_normal (CALL_EXPR_ARG (exp, 0));
14022 emit_move_insn (target, gen_lowpart (tmode, x));
14024 else
14026 rtvec v = rtvec_alloc (n_elt);
14028 for (i = 0; i < n_elt; ++i)
14030 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
14031 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
14034 rs6000_expand_vector_init (target, gen_rtx_PARALLEL (tmode, v));
14037 return target;
14040 /* Return the integer constant in ARG. Constrain it to be in the range
14041 of the subparts of VEC_TYPE; issue an error if not. */
14043 static int
14044 get_element_number (tree vec_type, tree arg)
14046 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
14048 if (!tree_fits_uhwi_p (arg)
14049 || (elt = tree_to_uhwi (arg), elt > max))
14051 error ("selector must be an integer constant in the range 0..%wi", max);
14052 return 0;
14055 return elt;
14058 /* Expand vec_set builtin. */
14059 static rtx
14060 altivec_expand_vec_set_builtin (tree exp)
14062 machine_mode tmode, mode1;
14063 tree arg0, arg1, arg2;
14064 int elt;
14065 rtx op0, op1;
14067 arg0 = CALL_EXPR_ARG (exp, 0);
14068 arg1 = CALL_EXPR_ARG (exp, 1);
14069 arg2 = CALL_EXPR_ARG (exp, 2);
14071 tmode = TYPE_MODE (TREE_TYPE (arg0));
14072 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
14073 gcc_assert (VECTOR_MODE_P (tmode));
14075 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
14076 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
14077 elt = get_element_number (TREE_TYPE (arg0), arg2);
14079 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
14080 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
14082 op0 = force_reg (tmode, op0);
14083 op1 = force_reg (mode1, op1);
14085 rs6000_expand_vector_set (op0, op1, elt);
14087 return op0;
14090 /* Expand vec_ext builtin. */
14091 static rtx
14092 altivec_expand_vec_ext_builtin (tree exp, rtx target)
14094 machine_mode tmode, mode0;
14095 tree arg0, arg1;
14096 int elt;
14097 rtx op0;
14099 arg0 = CALL_EXPR_ARG (exp, 0);
14100 arg1 = CALL_EXPR_ARG (exp, 1);
14102 op0 = expand_normal (arg0);
14103 elt = get_element_number (TREE_TYPE (arg0), arg1);
14105 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
14106 mode0 = TYPE_MODE (TREE_TYPE (arg0));
14107 gcc_assert (VECTOR_MODE_P (mode0));
14109 op0 = force_reg (mode0, op0);
14111 if (optimize || !target || !register_operand (target, tmode))
14112 target = gen_reg_rtx (tmode);
14114 rs6000_expand_vector_extract (target, op0, elt);
14116 return target;
14119 /* Expand the builtin in EXP and store the result in TARGET. Store
14120 true in *EXPANDEDP if we found a builtin to expand. */
14121 static rtx
14122 altivec_expand_builtin (tree exp, rtx target, bool *expandedp)
14124 const struct builtin_description *d;
14125 size_t i;
14126 enum insn_code icode;
14127 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
14128 tree arg0;
14129 rtx op0, pat;
14130 machine_mode tmode, mode0;
14131 enum rs6000_builtins fcode
14132 = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
14134 if (rs6000_overloaded_builtin_p (fcode))
14136 *expandedp = true;
14137 error ("unresolved overload for Altivec builtin %qF", fndecl);
14139 /* Given it is invalid, just generate a normal call. */
14140 return expand_call (exp, target, false);
14143 target = altivec_expand_ld_builtin (exp, target, expandedp);
14144 if (*expandedp)
14145 return target;
14147 target = altivec_expand_st_builtin (exp, target, expandedp);
14148 if (*expandedp)
14149 return target;
14151 target = altivec_expand_dst_builtin (exp, target, expandedp);
14152 if (*expandedp)
14153 return target;
14155 *expandedp = true;
14157 switch (fcode)
14159 case ALTIVEC_BUILTIN_STVX_V2DF:
14160 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v2df_2op, exp);
14161 case ALTIVEC_BUILTIN_STVX_V2DI:
14162 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v2di_2op, exp);
14163 case ALTIVEC_BUILTIN_STVX_V4SF:
14164 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v4sf_2op, exp);
14165 case ALTIVEC_BUILTIN_STVX:
14166 case ALTIVEC_BUILTIN_STVX_V4SI:
14167 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v4si_2op, exp);
14168 case ALTIVEC_BUILTIN_STVX_V8HI:
14169 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v8hi_2op, exp);
14170 case ALTIVEC_BUILTIN_STVX_V16QI:
14171 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v16qi_2op, exp);
14172 case ALTIVEC_BUILTIN_STVEBX:
14173 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvebx, exp);
14174 case ALTIVEC_BUILTIN_STVEHX:
14175 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvehx, exp);
14176 case ALTIVEC_BUILTIN_STVEWX:
14177 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvewx, exp);
14178 case ALTIVEC_BUILTIN_STVXL_V2DF:
14179 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v2df, exp);
14180 case ALTIVEC_BUILTIN_STVXL_V2DI:
14181 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v2di, exp);
14182 case ALTIVEC_BUILTIN_STVXL_V4SF:
14183 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v4sf, exp);
14184 case ALTIVEC_BUILTIN_STVXL:
14185 case ALTIVEC_BUILTIN_STVXL_V4SI:
14186 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v4si, exp);
14187 case ALTIVEC_BUILTIN_STVXL_V8HI:
14188 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v8hi, exp);
14189 case ALTIVEC_BUILTIN_STVXL_V16QI:
14190 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v16qi, exp);
14192 case ALTIVEC_BUILTIN_STVLX:
14193 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvlx, exp);
14194 case ALTIVEC_BUILTIN_STVLXL:
14195 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvlxl, exp);
14196 case ALTIVEC_BUILTIN_STVRX:
14197 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvrx, exp);
14198 case ALTIVEC_BUILTIN_STVRXL:
14199 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvrxl, exp);
14201 case VSX_BUILTIN_STXVD2X_V1TI:
14202 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v1ti, exp);
14203 case VSX_BUILTIN_STXVD2X_V2DF:
14204 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v2df, exp);
14205 case VSX_BUILTIN_STXVD2X_V2DI:
14206 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v2di, exp);
14207 case VSX_BUILTIN_STXVW4X_V4SF:
14208 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v4sf, exp);
14209 case VSX_BUILTIN_STXVW4X_V4SI:
14210 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v4si, exp);
14211 case VSX_BUILTIN_STXVW4X_V8HI:
14212 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v8hi, exp);
14213 case VSX_BUILTIN_STXVW4X_V16QI:
14214 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v16qi, exp);
14216 /* For the following on big endian, it's ok to use any appropriate
14217 unaligned-supporting store, so use a generic expander. For
14218 little-endian, the exact element-reversing instruction must
14219 be used. */
14220 case VSX_BUILTIN_ST_ELEMREV_V2DF:
14222 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v2df
14223 : CODE_FOR_vsx_st_elemrev_v2df);
14224 return altivec_expand_stv_builtin (code, exp);
14226 case VSX_BUILTIN_ST_ELEMREV_V2DI:
14228 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v2di
14229 : CODE_FOR_vsx_st_elemrev_v2di);
14230 return altivec_expand_stv_builtin (code, exp);
14232 case VSX_BUILTIN_ST_ELEMREV_V4SF:
14234 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v4sf
14235 : CODE_FOR_vsx_st_elemrev_v4sf);
14236 return altivec_expand_stv_builtin (code, exp);
14238 case VSX_BUILTIN_ST_ELEMREV_V4SI:
14240 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v4si
14241 : CODE_FOR_vsx_st_elemrev_v4si);
14242 return altivec_expand_stv_builtin (code, exp);
14244 case VSX_BUILTIN_ST_ELEMREV_V8HI:
14246 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v8hi
14247 : CODE_FOR_vsx_st_elemrev_v8hi);
14248 return altivec_expand_stv_builtin (code, exp);
14250 case VSX_BUILTIN_ST_ELEMREV_V16QI:
14252 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v16qi
14253 : CODE_FOR_vsx_st_elemrev_v16qi);
14254 return altivec_expand_stv_builtin (code, exp);
14257 case ALTIVEC_BUILTIN_MFVSCR:
14258 icode = CODE_FOR_altivec_mfvscr;
14259 tmode = insn_data[icode].operand[0].mode;
14261 if (target == 0
14262 || GET_MODE (target) != tmode
14263 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14264 target = gen_reg_rtx (tmode);
14266 pat = GEN_FCN (icode) (target);
14267 if (! pat)
14268 return 0;
14269 emit_insn (pat);
14270 return target;
14272 case ALTIVEC_BUILTIN_MTVSCR:
14273 icode = CODE_FOR_altivec_mtvscr;
14274 arg0 = CALL_EXPR_ARG (exp, 0);
14275 op0 = expand_normal (arg0);
14276 mode0 = insn_data[icode].operand[0].mode;
14278 /* If we got invalid arguments bail out before generating bad rtl. */
14279 if (arg0 == error_mark_node)
14280 return const0_rtx;
14282 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
14283 op0 = copy_to_mode_reg (mode0, op0);
14285 pat = GEN_FCN (icode) (op0);
14286 if (pat)
14287 emit_insn (pat);
14288 return NULL_RTX;
14290 case ALTIVEC_BUILTIN_DSSALL:
14291 emit_insn (gen_altivec_dssall ());
14292 return NULL_RTX;
14294 case ALTIVEC_BUILTIN_DSS:
14295 icode = CODE_FOR_altivec_dss;
14296 arg0 = CALL_EXPR_ARG (exp, 0);
14297 STRIP_NOPS (arg0);
14298 op0 = expand_normal (arg0);
14299 mode0 = insn_data[icode].operand[0].mode;
14301 /* If we got invalid arguments bail out before generating bad rtl. */
14302 if (arg0 == error_mark_node)
14303 return const0_rtx;
14305 if (TREE_CODE (arg0) != INTEGER_CST
14306 || TREE_INT_CST_LOW (arg0) & ~0x3)
14308 error ("argument to dss must be a 2-bit unsigned literal");
14309 return const0_rtx;
14312 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
14313 op0 = copy_to_mode_reg (mode0, op0);
14315 emit_insn (gen_altivec_dss (op0));
14316 return NULL_RTX;
14318 case ALTIVEC_BUILTIN_VEC_INIT_V4SI:
14319 case ALTIVEC_BUILTIN_VEC_INIT_V8HI:
14320 case ALTIVEC_BUILTIN_VEC_INIT_V16QI:
14321 case ALTIVEC_BUILTIN_VEC_INIT_V4SF:
14322 case VSX_BUILTIN_VEC_INIT_V2DF:
14323 case VSX_BUILTIN_VEC_INIT_V2DI:
14324 case VSX_BUILTIN_VEC_INIT_V1TI:
14325 return altivec_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
14327 case ALTIVEC_BUILTIN_VEC_SET_V4SI:
14328 case ALTIVEC_BUILTIN_VEC_SET_V8HI:
14329 case ALTIVEC_BUILTIN_VEC_SET_V16QI:
14330 case ALTIVEC_BUILTIN_VEC_SET_V4SF:
14331 case VSX_BUILTIN_VEC_SET_V2DF:
14332 case VSX_BUILTIN_VEC_SET_V2DI:
14333 case VSX_BUILTIN_VEC_SET_V1TI:
14334 return altivec_expand_vec_set_builtin (exp);
14336 case ALTIVEC_BUILTIN_VEC_EXT_V4SI:
14337 case ALTIVEC_BUILTIN_VEC_EXT_V8HI:
14338 case ALTIVEC_BUILTIN_VEC_EXT_V16QI:
14339 case ALTIVEC_BUILTIN_VEC_EXT_V4SF:
14340 case VSX_BUILTIN_VEC_EXT_V2DF:
14341 case VSX_BUILTIN_VEC_EXT_V2DI:
14342 case VSX_BUILTIN_VEC_EXT_V1TI:
14343 return altivec_expand_vec_ext_builtin (exp, target);
14345 default:
14346 break;
14347 /* Fall through. */
14350 /* Expand abs* operations. */
14351 d = bdesc_abs;
14352 for (i = 0; i < ARRAY_SIZE (bdesc_abs); i++, d++)
14353 if (d->code == fcode)
14354 return altivec_expand_abs_builtin (d->icode, exp, target);
14356 /* Expand the AltiVec predicates. */
14357 d = bdesc_altivec_preds;
14358 for (i = 0; i < ARRAY_SIZE (bdesc_altivec_preds); i++, d++)
14359 if (d->code == fcode)
14360 return altivec_expand_predicate_builtin (d->icode, exp, target);
14362 /* LV* are funky. We initialized them differently. */
14363 switch (fcode)
14365 case ALTIVEC_BUILTIN_LVSL:
14366 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvsl,
14367 exp, target, false);
14368 case ALTIVEC_BUILTIN_LVSR:
14369 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvsr,
14370 exp, target, false);
14371 case ALTIVEC_BUILTIN_LVEBX:
14372 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvebx,
14373 exp, target, false);
14374 case ALTIVEC_BUILTIN_LVEHX:
14375 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvehx,
14376 exp, target, false);
14377 case ALTIVEC_BUILTIN_LVEWX:
14378 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvewx,
14379 exp, target, false);
14380 case ALTIVEC_BUILTIN_LVXL_V2DF:
14381 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v2df,
14382 exp, target, false);
14383 case ALTIVEC_BUILTIN_LVXL_V2DI:
14384 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v2di,
14385 exp, target, false);
14386 case ALTIVEC_BUILTIN_LVXL_V4SF:
14387 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v4sf,
14388 exp, target, false);
14389 case ALTIVEC_BUILTIN_LVXL:
14390 case ALTIVEC_BUILTIN_LVXL_V4SI:
14391 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v4si,
14392 exp, target, false);
14393 case ALTIVEC_BUILTIN_LVXL_V8HI:
14394 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v8hi,
14395 exp, target, false);
14396 case ALTIVEC_BUILTIN_LVXL_V16QI:
14397 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v16qi,
14398 exp, target, false);
14399 case ALTIVEC_BUILTIN_LVX_V2DF:
14400 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v2df_2op,
14401 exp, target, false);
14402 case ALTIVEC_BUILTIN_LVX_V2DI:
14403 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v2di_2op,
14404 exp, target, false);
14405 case ALTIVEC_BUILTIN_LVX_V4SF:
14406 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v4sf_2op,
14407 exp, target, false);
14408 case ALTIVEC_BUILTIN_LVX:
14409 case ALTIVEC_BUILTIN_LVX_V4SI:
14410 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v4si_2op,
14411 exp, target, false);
14412 case ALTIVEC_BUILTIN_LVX_V8HI:
14413 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v8hi_2op,
14414 exp, target, false);
14415 case ALTIVEC_BUILTIN_LVX_V16QI:
14416 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v16qi_2op,
14417 exp, target, false);
14418 case ALTIVEC_BUILTIN_LVLX:
14419 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvlx,
14420 exp, target, true);
14421 case ALTIVEC_BUILTIN_LVLXL:
14422 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvlxl,
14423 exp, target, true);
14424 case ALTIVEC_BUILTIN_LVRX:
14425 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvrx,
14426 exp, target, true);
14427 case ALTIVEC_BUILTIN_LVRXL:
14428 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvrxl,
14429 exp, target, true);
14430 case VSX_BUILTIN_LXVD2X_V1TI:
14431 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v1ti,
14432 exp, target, false);
14433 case VSX_BUILTIN_LXVD2X_V2DF:
14434 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v2df,
14435 exp, target, false);
14436 case VSX_BUILTIN_LXVD2X_V2DI:
14437 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v2di,
14438 exp, target, false);
14439 case VSX_BUILTIN_LXVW4X_V4SF:
14440 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v4sf,
14441 exp, target, false);
14442 case VSX_BUILTIN_LXVW4X_V4SI:
14443 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v4si,
14444 exp, target, false);
14445 case VSX_BUILTIN_LXVW4X_V8HI:
14446 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v8hi,
14447 exp, target, false);
14448 case VSX_BUILTIN_LXVW4X_V16QI:
14449 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v16qi,
14450 exp, target, false);
14451 /* For the following on big endian, it's ok to use any appropriate
14452 unaligned-supporting load, so use a generic expander. For
14453 little-endian, the exact element-reversing instruction must
14454 be used. */
14455 case VSX_BUILTIN_LD_ELEMREV_V2DF:
14457 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v2df
14458 : CODE_FOR_vsx_ld_elemrev_v2df);
14459 return altivec_expand_lv_builtin (code, exp, target, false);
14461 case VSX_BUILTIN_LD_ELEMREV_V2DI:
14463 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v2di
14464 : CODE_FOR_vsx_ld_elemrev_v2di);
14465 return altivec_expand_lv_builtin (code, exp, target, false);
14467 case VSX_BUILTIN_LD_ELEMREV_V4SF:
14469 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v4sf
14470 : CODE_FOR_vsx_ld_elemrev_v4sf);
14471 return altivec_expand_lv_builtin (code, exp, target, false);
14473 case VSX_BUILTIN_LD_ELEMREV_V4SI:
14475 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v4si
14476 : CODE_FOR_vsx_ld_elemrev_v4si);
14477 return altivec_expand_lv_builtin (code, exp, target, false);
14479 case VSX_BUILTIN_LD_ELEMREV_V8HI:
14481 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v8hi
14482 : CODE_FOR_vsx_ld_elemrev_v8hi);
14483 return altivec_expand_lv_builtin (code, exp, target, false);
14485 case VSX_BUILTIN_LD_ELEMREV_V16QI:
14487 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v16qi
14488 : CODE_FOR_vsx_ld_elemrev_v16qi);
14489 return altivec_expand_lv_builtin (code, exp, target, false);
14491 break;
14492 default:
14493 break;
14494 /* Fall through. */
14497 *expandedp = false;
14498 return NULL_RTX;
14501 /* Expand the builtin in EXP and store the result in TARGET. Store
14502 true in *EXPANDEDP if we found a builtin to expand. */
14503 static rtx
14504 paired_expand_builtin (tree exp, rtx target, bool * expandedp)
14506 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
14507 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
14508 const struct builtin_description *d;
14509 size_t i;
14511 *expandedp = true;
14513 switch (fcode)
14515 case PAIRED_BUILTIN_STX:
14516 return paired_expand_stv_builtin (CODE_FOR_paired_stx, exp);
14517 case PAIRED_BUILTIN_LX:
14518 return paired_expand_lv_builtin (CODE_FOR_paired_lx, exp, target);
14519 default:
14520 break;
14521 /* Fall through. */
14524 /* Expand the paired predicates. */
14525 d = bdesc_paired_preds;
14526 for (i = 0; i < ARRAY_SIZE (bdesc_paired_preds); i++, d++)
14527 if (d->code == fcode)
14528 return paired_expand_predicate_builtin (d->icode, exp, target);
14530 *expandedp = false;
14531 return NULL_RTX;
14534 /* Binops that need to be initialized manually, but can be expanded
14535 automagically by rs6000_expand_binop_builtin. */
14536 static const struct builtin_description bdesc_2arg_spe[] =
14538 { RS6000_BTM_SPE, CODE_FOR_spe_evlddx, "__builtin_spe_evlddx", SPE_BUILTIN_EVLDDX },
14539 { RS6000_BTM_SPE, CODE_FOR_spe_evldwx, "__builtin_spe_evldwx", SPE_BUILTIN_EVLDWX },
14540 { RS6000_BTM_SPE, CODE_FOR_spe_evldhx, "__builtin_spe_evldhx", SPE_BUILTIN_EVLDHX },
14541 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhex, "__builtin_spe_evlwhex", SPE_BUILTIN_EVLWHEX },
14542 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhoux, "__builtin_spe_evlwhoux", SPE_BUILTIN_EVLWHOUX },
14543 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhosx, "__builtin_spe_evlwhosx", SPE_BUILTIN_EVLWHOSX },
14544 { RS6000_BTM_SPE, CODE_FOR_spe_evlwwsplatx, "__builtin_spe_evlwwsplatx", SPE_BUILTIN_EVLWWSPLATX },
14545 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhsplatx, "__builtin_spe_evlwhsplatx", SPE_BUILTIN_EVLWHSPLATX },
14546 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhesplatx, "__builtin_spe_evlhhesplatx", SPE_BUILTIN_EVLHHESPLATX },
14547 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhousplatx, "__builtin_spe_evlhhousplatx", SPE_BUILTIN_EVLHHOUSPLATX },
14548 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhossplatx, "__builtin_spe_evlhhossplatx", SPE_BUILTIN_EVLHHOSSPLATX },
14549 { RS6000_BTM_SPE, CODE_FOR_spe_evldd, "__builtin_spe_evldd", SPE_BUILTIN_EVLDD },
14550 { RS6000_BTM_SPE, CODE_FOR_spe_evldw, "__builtin_spe_evldw", SPE_BUILTIN_EVLDW },
14551 { RS6000_BTM_SPE, CODE_FOR_spe_evldh, "__builtin_spe_evldh", SPE_BUILTIN_EVLDH },
14552 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhe, "__builtin_spe_evlwhe", SPE_BUILTIN_EVLWHE },
14553 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhou, "__builtin_spe_evlwhou", SPE_BUILTIN_EVLWHOU },
14554 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhos, "__builtin_spe_evlwhos", SPE_BUILTIN_EVLWHOS },
14555 { RS6000_BTM_SPE, CODE_FOR_spe_evlwwsplat, "__builtin_spe_evlwwsplat", SPE_BUILTIN_EVLWWSPLAT },
14556 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhsplat, "__builtin_spe_evlwhsplat", SPE_BUILTIN_EVLWHSPLAT },
14557 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhesplat, "__builtin_spe_evlhhesplat", SPE_BUILTIN_EVLHHESPLAT },
14558 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhousplat, "__builtin_spe_evlhhousplat", SPE_BUILTIN_EVLHHOUSPLAT },
14559 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhossplat, "__builtin_spe_evlhhossplat", SPE_BUILTIN_EVLHHOSSPLAT }
14562 /* Expand the builtin in EXP and store the result in TARGET. Store
14563 true in *EXPANDEDP if we found a builtin to expand.
14565 This expands the SPE builtins that are not simple unary and binary
14566 operations. */
14567 static rtx
14568 spe_expand_builtin (tree exp, rtx target, bool *expandedp)
14570 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
14571 tree arg1, arg0;
14572 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
14573 enum insn_code icode;
14574 machine_mode tmode, mode0;
14575 rtx pat, op0;
14576 const struct builtin_description *d;
14577 size_t i;
14579 *expandedp = true;
14581 /* Syntax check for a 5-bit unsigned immediate. */
14582 switch (fcode)
14584 case SPE_BUILTIN_EVSTDD:
14585 case SPE_BUILTIN_EVSTDH:
14586 case SPE_BUILTIN_EVSTDW:
14587 case SPE_BUILTIN_EVSTWHE:
14588 case SPE_BUILTIN_EVSTWHO:
14589 case SPE_BUILTIN_EVSTWWE:
14590 case SPE_BUILTIN_EVSTWWO:
14591 arg1 = CALL_EXPR_ARG (exp, 2);
14592 if (TREE_CODE (arg1) != INTEGER_CST
14593 || TREE_INT_CST_LOW (arg1) & ~0x1f)
14595 error ("argument 2 must be a 5-bit unsigned literal");
14596 return const0_rtx;
14598 break;
14599 default:
14600 break;
14603 /* The evsplat*i instructions are not quite generic. */
14604 switch (fcode)
14606 case SPE_BUILTIN_EVSPLATFI:
14607 return rs6000_expand_unop_builtin (CODE_FOR_spe_evsplatfi,
14608 exp, target);
14609 case SPE_BUILTIN_EVSPLATI:
14610 return rs6000_expand_unop_builtin (CODE_FOR_spe_evsplati,
14611 exp, target);
14612 default:
14613 break;
14616 d = bdesc_2arg_spe;
14617 for (i = 0; i < ARRAY_SIZE (bdesc_2arg_spe); ++i, ++d)
14618 if (d->code == fcode)
14619 return rs6000_expand_binop_builtin (d->icode, exp, target);
14621 d = bdesc_spe_predicates;
14622 for (i = 0; i < ARRAY_SIZE (bdesc_spe_predicates); ++i, ++d)
14623 if (d->code == fcode)
14624 return spe_expand_predicate_builtin (d->icode, exp, target);
14626 d = bdesc_spe_evsel;
14627 for (i = 0; i < ARRAY_SIZE (bdesc_spe_evsel); ++i, ++d)
14628 if (d->code == fcode)
14629 return spe_expand_evsel_builtin (d->icode, exp, target);
14631 switch (fcode)
14633 case SPE_BUILTIN_EVSTDDX:
14634 return spe_expand_stv_builtin (CODE_FOR_spe_evstddx, exp);
14635 case SPE_BUILTIN_EVSTDHX:
14636 return spe_expand_stv_builtin (CODE_FOR_spe_evstdhx, exp);
14637 case SPE_BUILTIN_EVSTDWX:
14638 return spe_expand_stv_builtin (CODE_FOR_spe_evstdwx, exp);
14639 case SPE_BUILTIN_EVSTWHEX:
14640 return spe_expand_stv_builtin (CODE_FOR_spe_evstwhex, exp);
14641 case SPE_BUILTIN_EVSTWHOX:
14642 return spe_expand_stv_builtin (CODE_FOR_spe_evstwhox, exp);
14643 case SPE_BUILTIN_EVSTWWEX:
14644 return spe_expand_stv_builtin (CODE_FOR_spe_evstwwex, exp);
14645 case SPE_BUILTIN_EVSTWWOX:
14646 return spe_expand_stv_builtin (CODE_FOR_spe_evstwwox, exp);
14647 case SPE_BUILTIN_EVSTDD:
14648 return spe_expand_stv_builtin (CODE_FOR_spe_evstdd, exp);
14649 case SPE_BUILTIN_EVSTDH:
14650 return spe_expand_stv_builtin (CODE_FOR_spe_evstdh, exp);
14651 case SPE_BUILTIN_EVSTDW:
14652 return spe_expand_stv_builtin (CODE_FOR_spe_evstdw, exp);
14653 case SPE_BUILTIN_EVSTWHE:
14654 return spe_expand_stv_builtin (CODE_FOR_spe_evstwhe, exp);
14655 case SPE_BUILTIN_EVSTWHO:
14656 return spe_expand_stv_builtin (CODE_FOR_spe_evstwho, exp);
14657 case SPE_BUILTIN_EVSTWWE:
14658 return spe_expand_stv_builtin (CODE_FOR_spe_evstwwe, exp);
14659 case SPE_BUILTIN_EVSTWWO:
14660 return spe_expand_stv_builtin (CODE_FOR_spe_evstwwo, exp);
14661 case SPE_BUILTIN_MFSPEFSCR:
14662 icode = CODE_FOR_spe_mfspefscr;
14663 tmode = insn_data[icode].operand[0].mode;
14665 if (target == 0
14666 || GET_MODE (target) != tmode
14667 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14668 target = gen_reg_rtx (tmode);
14670 pat = GEN_FCN (icode) (target);
14671 if (! pat)
14672 return 0;
14673 emit_insn (pat);
14674 return target;
14675 case SPE_BUILTIN_MTSPEFSCR:
14676 icode = CODE_FOR_spe_mtspefscr;
14677 arg0 = CALL_EXPR_ARG (exp, 0);
14678 op0 = expand_normal (arg0);
14679 mode0 = insn_data[icode].operand[0].mode;
14681 if (arg0 == error_mark_node)
14682 return const0_rtx;
14684 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
14685 op0 = copy_to_mode_reg (mode0, op0);
14687 pat = GEN_FCN (icode) (op0);
14688 if (pat)
14689 emit_insn (pat);
14690 return NULL_RTX;
14691 default:
14692 break;
14695 *expandedp = false;
14696 return NULL_RTX;
14699 static rtx
14700 paired_expand_predicate_builtin (enum insn_code icode, tree exp, rtx target)
14702 rtx pat, scratch, tmp;
14703 tree form = CALL_EXPR_ARG (exp, 0);
14704 tree arg0 = CALL_EXPR_ARG (exp, 1);
14705 tree arg1 = CALL_EXPR_ARG (exp, 2);
14706 rtx op0 = expand_normal (arg0);
14707 rtx op1 = expand_normal (arg1);
14708 machine_mode mode0 = insn_data[icode].operand[1].mode;
14709 machine_mode mode1 = insn_data[icode].operand[2].mode;
14710 int form_int;
14711 enum rtx_code code;
14713 if (TREE_CODE (form) != INTEGER_CST)
14715 error ("argument 1 of __builtin_paired_predicate must be a constant");
14716 return const0_rtx;
14718 else
14719 form_int = TREE_INT_CST_LOW (form);
14721 gcc_assert (mode0 == mode1);
14723 if (arg0 == error_mark_node || arg1 == error_mark_node)
14724 return const0_rtx;
14726 if (target == 0
14727 || GET_MODE (target) != SImode
14728 || !(*insn_data[icode].operand[0].predicate) (target, SImode))
14729 target = gen_reg_rtx (SImode);
14730 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
14731 op0 = copy_to_mode_reg (mode0, op0);
14732 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
14733 op1 = copy_to_mode_reg (mode1, op1);
14735 scratch = gen_reg_rtx (CCFPmode);
14737 pat = GEN_FCN (icode) (scratch, op0, op1);
14738 if (!pat)
14739 return const0_rtx;
14741 emit_insn (pat);
14743 switch (form_int)
14745 /* LT bit. */
14746 case 0:
14747 code = LT;
14748 break;
14749 /* GT bit. */
14750 case 1:
14751 code = GT;
14752 break;
14753 /* EQ bit. */
14754 case 2:
14755 code = EQ;
14756 break;
14757 /* UN bit. */
14758 case 3:
14759 emit_insn (gen_move_from_CR_ov_bit (target, scratch));
14760 return target;
14761 default:
14762 error ("argument 1 of __builtin_paired_predicate is out of range");
14763 return const0_rtx;
14766 tmp = gen_rtx_fmt_ee (code, SImode, scratch, const0_rtx);
14767 emit_move_insn (target, tmp);
14768 return target;
14771 static rtx
14772 spe_expand_predicate_builtin (enum insn_code icode, tree exp, rtx target)
14774 rtx pat, scratch, tmp;
14775 tree form = CALL_EXPR_ARG (exp, 0);
14776 tree arg0 = CALL_EXPR_ARG (exp, 1);
14777 tree arg1 = CALL_EXPR_ARG (exp, 2);
14778 rtx op0 = expand_normal (arg0);
14779 rtx op1 = expand_normal (arg1);
14780 machine_mode mode0 = insn_data[icode].operand[1].mode;
14781 machine_mode mode1 = insn_data[icode].operand[2].mode;
14782 int form_int;
14783 enum rtx_code code;
14785 if (TREE_CODE (form) != INTEGER_CST)
14787 error ("argument 1 of __builtin_spe_predicate must be a constant");
14788 return const0_rtx;
14790 else
14791 form_int = TREE_INT_CST_LOW (form);
14793 gcc_assert (mode0 == mode1);
14795 if (arg0 == error_mark_node || arg1 == error_mark_node)
14796 return const0_rtx;
14798 if (target == 0
14799 || GET_MODE (target) != SImode
14800 || ! (*insn_data[icode].operand[0].predicate) (target, SImode))
14801 target = gen_reg_rtx (SImode);
14803 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14804 op0 = copy_to_mode_reg (mode0, op0);
14805 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14806 op1 = copy_to_mode_reg (mode1, op1);
14808 scratch = gen_reg_rtx (CCmode);
14810 pat = GEN_FCN (icode) (scratch, op0, op1);
14811 if (! pat)
14812 return const0_rtx;
14813 emit_insn (pat);
14815 /* There are 4 variants for each predicate: _any_, _all_, _upper_,
14816 _lower_. We use one compare, but look in different bits of the
14817 CR for each variant.
14819 There are 2 elements in each SPE simd type (upper/lower). The CR
14820 bits are set as follows:
14822 BIT0 | BIT 1 | BIT 2 | BIT 3
14823 U | L | (U | L) | (U & L)
14825 So, for an "all" relationship, BIT 3 would be set.
14826 For an "any" relationship, BIT 2 would be set. Etc.
14828 Following traditional nomenclature, these bits map to:
14830 BIT0 | BIT 1 | BIT 2 | BIT 3
14831 LT | GT | EQ | OV
14833 Later, we will generate rtl to look in the LT/EQ/EQ/OV bits.
14836 switch (form_int)
14838 /* All variant. OV bit. */
14839 case 0:
14840 /* We need to get to the OV bit, which is the ORDERED bit. We
14841 could generate (ordered:SI (reg:CC xx) (const_int 0)), but
14842 that's ugly and will make validate_condition_mode die.
14843 So let's just use another pattern. */
14844 emit_insn (gen_move_from_CR_ov_bit (target, scratch));
14845 return target;
14846 /* Any variant. EQ bit. */
14847 case 1:
14848 code = EQ;
14849 break;
14850 /* Upper variant. LT bit. */
14851 case 2:
14852 code = LT;
14853 break;
14854 /* Lower variant. GT bit. */
14855 case 3:
14856 code = GT;
14857 break;
14858 default:
14859 error ("argument 1 of __builtin_spe_predicate is out of range");
14860 return const0_rtx;
14863 tmp = gen_rtx_fmt_ee (code, SImode, scratch, const0_rtx);
14864 emit_move_insn (target, tmp);
14866 return target;
14869 /* The evsel builtins look like this:
14871 e = __builtin_spe_evsel_OP (a, b, c, d);
14873 and work like this:
14875 e[upper] = a[upper] *OP* b[upper] ? c[upper] : d[upper];
14876 e[lower] = a[lower] *OP* b[lower] ? c[lower] : d[lower];
14879 static rtx
14880 spe_expand_evsel_builtin (enum insn_code icode, tree exp, rtx target)
14882 rtx pat, scratch;
14883 tree arg0 = CALL_EXPR_ARG (exp, 0);
14884 tree arg1 = CALL_EXPR_ARG (exp, 1);
14885 tree arg2 = CALL_EXPR_ARG (exp, 2);
14886 tree arg3 = CALL_EXPR_ARG (exp, 3);
14887 rtx op0 = expand_normal (arg0);
14888 rtx op1 = expand_normal (arg1);
14889 rtx op2 = expand_normal (arg2);
14890 rtx op3 = expand_normal (arg3);
14891 machine_mode mode0 = insn_data[icode].operand[1].mode;
14892 machine_mode mode1 = insn_data[icode].operand[2].mode;
14894 gcc_assert (mode0 == mode1);
14896 if (arg0 == error_mark_node || arg1 == error_mark_node
14897 || arg2 == error_mark_node || arg3 == error_mark_node)
14898 return const0_rtx;
14900 if (target == 0
14901 || GET_MODE (target) != mode0
14902 || ! (*insn_data[icode].operand[0].predicate) (target, mode0))
14903 target = gen_reg_rtx (mode0);
14905 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14906 op0 = copy_to_mode_reg (mode0, op0);
14907 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
14908 op1 = copy_to_mode_reg (mode0, op1);
14909 if (! (*insn_data[icode].operand[1].predicate) (op2, mode1))
14910 op2 = copy_to_mode_reg (mode0, op2);
14911 if (! (*insn_data[icode].operand[1].predicate) (op3, mode1))
14912 op3 = copy_to_mode_reg (mode0, op3);
14914 /* Generate the compare. */
14915 scratch = gen_reg_rtx (CCmode);
14916 pat = GEN_FCN (icode) (scratch, op0, op1);
14917 if (! pat)
14918 return const0_rtx;
14919 emit_insn (pat);
14921 if (mode0 == V2SImode)
14922 emit_insn (gen_spe_evsel (target, op2, op3, scratch));
14923 else
14924 emit_insn (gen_spe_evsel_fs (target, op2, op3, scratch));
14926 return target;
14929 /* Raise an error message for a builtin function that is called without the
14930 appropriate target options being set. */
14932 static void
14933 rs6000_invalid_builtin (enum rs6000_builtins fncode)
14935 size_t uns_fncode = (size_t)fncode;
14936 const char *name = rs6000_builtin_info[uns_fncode].name;
14937 HOST_WIDE_INT fnmask = rs6000_builtin_info[uns_fncode].mask;
14939 gcc_assert (name != NULL);
14940 if ((fnmask & RS6000_BTM_CELL) != 0)
14941 error ("Builtin function %s is only valid for the cell processor", name);
14942 else if ((fnmask & RS6000_BTM_VSX) != 0)
14943 error ("Builtin function %s requires the -mvsx option", name);
14944 else if ((fnmask & RS6000_BTM_HTM) != 0)
14945 error ("Builtin function %s requires the -mhtm option", name);
14946 else if ((fnmask & RS6000_BTM_ALTIVEC) != 0)
14947 error ("Builtin function %s requires the -maltivec option", name);
14948 else if ((fnmask & RS6000_BTM_PAIRED) != 0)
14949 error ("Builtin function %s requires the -mpaired option", name);
14950 else if ((fnmask & RS6000_BTM_SPE) != 0)
14951 error ("Builtin function %s requires the -mspe option", name);
14952 else if ((fnmask & (RS6000_BTM_DFP | RS6000_BTM_P8_VECTOR))
14953 == (RS6000_BTM_DFP | RS6000_BTM_P8_VECTOR))
14954 error ("Builtin function %s requires the -mhard-dfp and"
14955 " -mpower8-vector options", name);
14956 else if ((fnmask & RS6000_BTM_DFP) != 0)
14957 error ("Builtin function %s requires the -mhard-dfp option", name);
14958 else if ((fnmask & RS6000_BTM_P8_VECTOR) != 0)
14959 error ("Builtin function %s requires the -mpower8-vector option", name);
14960 else if ((fnmask & RS6000_BTM_P9_VECTOR) != 0)
14961 error ("Builtin function %s requires the -mpower9-vector option", name);
14962 else if ((fnmask & (RS6000_BTM_HARD_FLOAT | RS6000_BTM_LDBL128))
14963 == (RS6000_BTM_HARD_FLOAT | RS6000_BTM_LDBL128))
14964 error ("Builtin function %s requires the -mhard-float and"
14965 " -mlong-double-128 options", name);
14966 else if ((fnmask & RS6000_BTM_HARD_FLOAT) != 0)
14967 error ("Builtin function %s requires the -mhard-float option", name);
14968 else
14969 error ("Builtin function %s is not supported with the current options",
14970 name);
14973 /* Expand an expression EXP that calls a built-in function,
14974 with result going to TARGET if that's convenient
14975 (and in mode MODE if that's convenient).
14976 SUBTARGET may be used as the target for computing one of EXP's operands.
14977 IGNORE is nonzero if the value is to be ignored. */
14979 static rtx
14980 rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
14981 machine_mode mode ATTRIBUTE_UNUSED,
14982 int ignore ATTRIBUTE_UNUSED)
14984 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
14985 enum rs6000_builtins fcode
14986 = (enum rs6000_builtins)DECL_FUNCTION_CODE (fndecl);
14987 size_t uns_fcode = (size_t)fcode;
14988 const struct builtin_description *d;
14989 size_t i;
14990 rtx ret;
14991 bool success;
14992 HOST_WIDE_INT mask = rs6000_builtin_info[uns_fcode].mask;
14993 bool func_valid_p = ((rs6000_builtin_mask & mask) == mask);
14995 if (TARGET_DEBUG_BUILTIN)
14997 enum insn_code icode = rs6000_builtin_info[uns_fcode].icode;
14998 const char *name1 = rs6000_builtin_info[uns_fcode].name;
14999 const char *name2 = ((icode != CODE_FOR_nothing)
15000 ? get_insn_name ((int)icode)
15001 : "nothing");
15002 const char *name3;
15004 switch (rs6000_builtin_info[uns_fcode].attr & RS6000_BTC_TYPE_MASK)
15006 default: name3 = "unknown"; break;
15007 case RS6000_BTC_SPECIAL: name3 = "special"; break;
15008 case RS6000_BTC_UNARY: name3 = "unary"; break;
15009 case RS6000_BTC_BINARY: name3 = "binary"; break;
15010 case RS6000_BTC_TERNARY: name3 = "ternary"; break;
15011 case RS6000_BTC_PREDICATE: name3 = "predicate"; break;
15012 case RS6000_BTC_ABS: name3 = "abs"; break;
15013 case RS6000_BTC_EVSEL: name3 = "evsel"; break;
15014 case RS6000_BTC_DST: name3 = "dst"; break;
15018 fprintf (stderr,
15019 "rs6000_expand_builtin, %s (%d), insn = %s (%d), type=%s%s\n",
15020 (name1) ? name1 : "---", fcode,
15021 (name2) ? name2 : "---", (int)icode,
15022 name3,
15023 func_valid_p ? "" : ", not valid");
15026 if (!func_valid_p)
15028 rs6000_invalid_builtin (fcode);
15030 /* Given it is invalid, just generate a normal call. */
15031 return expand_call (exp, target, ignore);
15034 switch (fcode)
15036 case RS6000_BUILTIN_RECIP:
15037 return rs6000_expand_binop_builtin (CODE_FOR_recipdf3, exp, target);
15039 case RS6000_BUILTIN_RECIPF:
15040 return rs6000_expand_binop_builtin (CODE_FOR_recipsf3, exp, target);
15042 case RS6000_BUILTIN_RSQRTF:
15043 return rs6000_expand_unop_builtin (CODE_FOR_rsqrtsf2, exp, target);
15045 case RS6000_BUILTIN_RSQRT:
15046 return rs6000_expand_unop_builtin (CODE_FOR_rsqrtdf2, exp, target);
15048 case POWER7_BUILTIN_BPERMD:
15049 return rs6000_expand_binop_builtin (((TARGET_64BIT)
15050 ? CODE_FOR_bpermd_di
15051 : CODE_FOR_bpermd_si), exp, target);
15053 case RS6000_BUILTIN_GET_TB:
15054 return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_get_timebase,
15055 target);
15057 case RS6000_BUILTIN_MFTB:
15058 return rs6000_expand_zeroop_builtin (((TARGET_64BIT)
15059 ? CODE_FOR_rs6000_mftb_di
15060 : CODE_FOR_rs6000_mftb_si),
15061 target);
15063 case RS6000_BUILTIN_MFFS:
15064 return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_mffs, target);
15066 case RS6000_BUILTIN_MTFSF:
15067 return rs6000_expand_mtfsf_builtin (CODE_FOR_rs6000_mtfsf, exp);
15069 case RS6000_BUILTIN_CPU_INIT:
15070 case RS6000_BUILTIN_CPU_IS:
15071 case RS6000_BUILTIN_CPU_SUPPORTS:
15072 return cpu_expand_builtin (fcode, exp, target);
15074 case ALTIVEC_BUILTIN_MASK_FOR_LOAD:
15075 case ALTIVEC_BUILTIN_MASK_FOR_STORE:
15077 int icode = (BYTES_BIG_ENDIAN ? (int) CODE_FOR_altivec_lvsr_direct
15078 : (int) CODE_FOR_altivec_lvsl_direct);
15079 machine_mode tmode = insn_data[icode].operand[0].mode;
15080 machine_mode mode = insn_data[icode].operand[1].mode;
15081 tree arg;
15082 rtx op, addr, pat;
15084 gcc_assert (TARGET_ALTIVEC);
15086 arg = CALL_EXPR_ARG (exp, 0);
15087 gcc_assert (POINTER_TYPE_P (TREE_TYPE (arg)));
15088 op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
15089 addr = memory_address (mode, op);
15090 if (fcode == ALTIVEC_BUILTIN_MASK_FOR_STORE)
15091 op = addr;
15092 else
15094 /* For the load case need to negate the address. */
15095 op = gen_reg_rtx (GET_MODE (addr));
15096 emit_insn (gen_rtx_SET (op, gen_rtx_NEG (GET_MODE (addr), addr)));
15098 op = gen_rtx_MEM (mode, op);
15100 if (target == 0
15101 || GET_MODE (target) != tmode
15102 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15103 target = gen_reg_rtx (tmode);
15105 pat = GEN_FCN (icode) (target, op);
15106 if (!pat)
15107 return 0;
15108 emit_insn (pat);
15110 return target;
15113 case ALTIVEC_BUILTIN_VCFUX:
15114 case ALTIVEC_BUILTIN_VCFSX:
15115 case ALTIVEC_BUILTIN_VCTUXS:
15116 case ALTIVEC_BUILTIN_VCTSXS:
15117 /* FIXME: There's got to be a nicer way to handle this case than
15118 constructing a new CALL_EXPR. */
15119 if (call_expr_nargs (exp) == 1)
15121 exp = build_call_nary (TREE_TYPE (exp), CALL_EXPR_FN (exp),
15122 2, CALL_EXPR_ARG (exp, 0), integer_zero_node);
15124 break;
15126 default:
15127 break;
15130 if (TARGET_ALTIVEC)
15132 ret = altivec_expand_builtin (exp, target, &success);
15134 if (success)
15135 return ret;
15137 if (TARGET_SPE)
15139 ret = spe_expand_builtin (exp, target, &success);
15141 if (success)
15142 return ret;
15144 if (TARGET_PAIRED_FLOAT)
15146 ret = paired_expand_builtin (exp, target, &success);
15148 if (success)
15149 return ret;
15151 if (TARGET_HTM)
15153 ret = htm_expand_builtin (exp, target, &success);
15155 if (success)
15156 return ret;
15159 unsigned attr = rs6000_builtin_info[uns_fcode].attr & RS6000_BTC_TYPE_MASK;
15160 gcc_assert (attr == RS6000_BTC_UNARY
15161 || attr == RS6000_BTC_BINARY
15162 || attr == RS6000_BTC_TERNARY);
15164 /* Handle simple unary operations. */
15165 d = bdesc_1arg;
15166 for (i = 0; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
15167 if (d->code == fcode)
15168 return rs6000_expand_unop_builtin (d->icode, exp, target);
15170 /* Handle simple binary operations. */
15171 d = bdesc_2arg;
15172 for (i = 0; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
15173 if (d->code == fcode)
15174 return rs6000_expand_binop_builtin (d->icode, exp, target);
15176 /* Handle simple ternary operations. */
15177 d = bdesc_3arg;
15178 for (i = 0; i < ARRAY_SIZE (bdesc_3arg); i++, d++)
15179 if (d->code == fcode)
15180 return rs6000_expand_ternop_builtin (d->icode, exp, target);
15182 gcc_unreachable ();
15185 static void
15186 rs6000_init_builtins (void)
15188 tree tdecl;
15189 tree ftype;
15190 machine_mode mode;
15192 if (TARGET_DEBUG_BUILTIN)
15193 fprintf (stderr, "rs6000_init_builtins%s%s%s%s\n",
15194 (TARGET_PAIRED_FLOAT) ? ", paired" : "",
15195 (TARGET_SPE) ? ", spe" : "",
15196 (TARGET_ALTIVEC) ? ", altivec" : "",
15197 (TARGET_VSX) ? ", vsx" : "");
15199 V2SI_type_node = build_vector_type (intSI_type_node, 2);
15200 V2SF_type_node = build_vector_type (float_type_node, 2);
15201 V2DI_type_node = build_vector_type (intDI_type_node, 2);
15202 V2DF_type_node = build_vector_type (double_type_node, 2);
15203 V4HI_type_node = build_vector_type (intHI_type_node, 4);
15204 V4SI_type_node = build_vector_type (intSI_type_node, 4);
15205 V4SF_type_node = build_vector_type (float_type_node, 4);
15206 V8HI_type_node = build_vector_type (intHI_type_node, 8);
15207 V16QI_type_node = build_vector_type (intQI_type_node, 16);
15209 unsigned_V16QI_type_node = build_vector_type (unsigned_intQI_type_node, 16);
15210 unsigned_V8HI_type_node = build_vector_type (unsigned_intHI_type_node, 8);
15211 unsigned_V4SI_type_node = build_vector_type (unsigned_intSI_type_node, 4);
15212 unsigned_V2DI_type_node = build_vector_type (unsigned_intDI_type_node, 2);
15214 opaque_V2SF_type_node = build_opaque_vector_type (float_type_node, 2);
15215 opaque_V2SI_type_node = build_opaque_vector_type (intSI_type_node, 2);
15216 opaque_p_V2SI_type_node = build_pointer_type (opaque_V2SI_type_node);
15217 opaque_V4SI_type_node = build_opaque_vector_type (intSI_type_node, 4);
15219 /* We use V1TI mode as a special container to hold __int128_t items that
15220 must live in VSX registers. */
15221 if (intTI_type_node)
15223 V1TI_type_node = build_vector_type (intTI_type_node, 1);
15224 unsigned_V1TI_type_node = build_vector_type (unsigned_intTI_type_node, 1);
15227 /* The 'vector bool ...' types must be kept distinct from 'vector unsigned ...'
15228 types, especially in C++ land. Similarly, 'vector pixel' is distinct from
15229 'vector unsigned short'. */
15231 bool_char_type_node = build_distinct_type_copy (unsigned_intQI_type_node);
15232 bool_short_type_node = build_distinct_type_copy (unsigned_intHI_type_node);
15233 bool_int_type_node = build_distinct_type_copy (unsigned_intSI_type_node);
15234 bool_long_type_node = build_distinct_type_copy (unsigned_intDI_type_node);
15235 pixel_type_node = build_distinct_type_copy (unsigned_intHI_type_node);
15237 long_integer_type_internal_node = long_integer_type_node;
15238 long_unsigned_type_internal_node = long_unsigned_type_node;
15239 long_long_integer_type_internal_node = long_long_integer_type_node;
15240 long_long_unsigned_type_internal_node = long_long_unsigned_type_node;
15241 intQI_type_internal_node = intQI_type_node;
15242 uintQI_type_internal_node = unsigned_intQI_type_node;
15243 intHI_type_internal_node = intHI_type_node;
15244 uintHI_type_internal_node = unsigned_intHI_type_node;
15245 intSI_type_internal_node = intSI_type_node;
15246 uintSI_type_internal_node = unsigned_intSI_type_node;
15247 intDI_type_internal_node = intDI_type_node;
15248 uintDI_type_internal_node = unsigned_intDI_type_node;
15249 intTI_type_internal_node = intTI_type_node;
15250 uintTI_type_internal_node = unsigned_intTI_type_node;
15251 float_type_internal_node = float_type_node;
15252 double_type_internal_node = double_type_node;
15253 long_double_type_internal_node = long_double_type_node;
15254 dfloat64_type_internal_node = dfloat64_type_node;
15255 dfloat128_type_internal_node = dfloat128_type_node;
15256 void_type_internal_node = void_type_node;
15258 /* 128-bit floating point support. KFmode is IEEE 128-bit floating point.
15259 IFmode is the IBM extended 128-bit format that is a pair of doubles.
15260 TFmode will be either IEEE 128-bit floating point or the IBM double-double
15261 format that uses a pair of doubles, depending on the switches and
15262 defaults. */
15263 if (TARGET_FLOAT128)
15265 ibm128_float_type_node = make_node (REAL_TYPE);
15266 TYPE_PRECISION (ibm128_float_type_node) = 128;
15267 layout_type (ibm128_float_type_node);
15268 SET_TYPE_MODE (ibm128_float_type_node, IFmode);
15270 ieee128_float_type_node = make_node (REAL_TYPE);
15271 TYPE_PRECISION (ieee128_float_type_node) = 128;
15272 layout_type (ieee128_float_type_node);
15273 SET_TYPE_MODE (ieee128_float_type_node, KFmode);
15275 lang_hooks.types.register_builtin_type (ieee128_float_type_node,
15276 "__float128");
15278 lang_hooks.types.register_builtin_type (ibm128_float_type_node,
15279 "__ibm128");
15282 /* Initialize the modes for builtin_function_type, mapping a machine mode to
15283 tree type node. */
15284 builtin_mode_to_type[QImode][0] = integer_type_node;
15285 builtin_mode_to_type[HImode][0] = integer_type_node;
15286 builtin_mode_to_type[SImode][0] = intSI_type_node;
15287 builtin_mode_to_type[SImode][1] = unsigned_intSI_type_node;
15288 builtin_mode_to_type[DImode][0] = intDI_type_node;
15289 builtin_mode_to_type[DImode][1] = unsigned_intDI_type_node;
15290 builtin_mode_to_type[TImode][0] = intTI_type_node;
15291 builtin_mode_to_type[TImode][1] = unsigned_intTI_type_node;
15292 builtin_mode_to_type[SFmode][0] = float_type_node;
15293 builtin_mode_to_type[DFmode][0] = double_type_node;
15294 builtin_mode_to_type[IFmode][0] = ibm128_float_type_node;
15295 builtin_mode_to_type[KFmode][0] = ieee128_float_type_node;
15296 builtin_mode_to_type[TFmode][0] = long_double_type_node;
15297 builtin_mode_to_type[DDmode][0] = dfloat64_type_node;
15298 builtin_mode_to_type[TDmode][0] = dfloat128_type_node;
15299 builtin_mode_to_type[V1TImode][0] = V1TI_type_node;
15300 builtin_mode_to_type[V1TImode][1] = unsigned_V1TI_type_node;
15301 builtin_mode_to_type[V2SImode][0] = V2SI_type_node;
15302 builtin_mode_to_type[V2SFmode][0] = V2SF_type_node;
15303 builtin_mode_to_type[V2DImode][0] = V2DI_type_node;
15304 builtin_mode_to_type[V2DImode][1] = unsigned_V2DI_type_node;
15305 builtin_mode_to_type[V2DFmode][0] = V2DF_type_node;
15306 builtin_mode_to_type[V4HImode][0] = V4HI_type_node;
15307 builtin_mode_to_type[V4SImode][0] = V4SI_type_node;
15308 builtin_mode_to_type[V4SImode][1] = unsigned_V4SI_type_node;
15309 builtin_mode_to_type[V4SFmode][0] = V4SF_type_node;
15310 builtin_mode_to_type[V8HImode][0] = V8HI_type_node;
15311 builtin_mode_to_type[V8HImode][1] = unsigned_V8HI_type_node;
15312 builtin_mode_to_type[V16QImode][0] = V16QI_type_node;
15313 builtin_mode_to_type[V16QImode][1] = unsigned_V16QI_type_node;
15315 tdecl = add_builtin_type ("__bool char", bool_char_type_node);
15316 TYPE_NAME (bool_char_type_node) = tdecl;
15318 tdecl = add_builtin_type ("__bool short", bool_short_type_node);
15319 TYPE_NAME (bool_short_type_node) = tdecl;
15321 tdecl = add_builtin_type ("__bool int", bool_int_type_node);
15322 TYPE_NAME (bool_int_type_node) = tdecl;
15324 tdecl = add_builtin_type ("__pixel", pixel_type_node);
15325 TYPE_NAME (pixel_type_node) = tdecl;
15327 bool_V16QI_type_node = build_vector_type (bool_char_type_node, 16);
15328 bool_V8HI_type_node = build_vector_type (bool_short_type_node, 8);
15329 bool_V4SI_type_node = build_vector_type (bool_int_type_node, 4);
15330 bool_V2DI_type_node = build_vector_type (bool_long_type_node, 2);
15331 pixel_V8HI_type_node = build_vector_type (pixel_type_node, 8);
15333 tdecl = add_builtin_type ("__vector unsigned char", unsigned_V16QI_type_node);
15334 TYPE_NAME (unsigned_V16QI_type_node) = tdecl;
15336 tdecl = add_builtin_type ("__vector signed char", V16QI_type_node);
15337 TYPE_NAME (V16QI_type_node) = tdecl;
15339 tdecl = add_builtin_type ("__vector __bool char", bool_V16QI_type_node);
15340 TYPE_NAME ( bool_V16QI_type_node) = tdecl;
15342 tdecl = add_builtin_type ("__vector unsigned short", unsigned_V8HI_type_node);
15343 TYPE_NAME (unsigned_V8HI_type_node) = tdecl;
15345 tdecl = add_builtin_type ("__vector signed short", V8HI_type_node);
15346 TYPE_NAME (V8HI_type_node) = tdecl;
15348 tdecl = add_builtin_type ("__vector __bool short", bool_V8HI_type_node);
15349 TYPE_NAME (bool_V8HI_type_node) = tdecl;
15351 tdecl = add_builtin_type ("__vector unsigned int", unsigned_V4SI_type_node);
15352 TYPE_NAME (unsigned_V4SI_type_node) = tdecl;
15354 tdecl = add_builtin_type ("__vector signed int", V4SI_type_node);
15355 TYPE_NAME (V4SI_type_node) = tdecl;
15357 tdecl = add_builtin_type ("__vector __bool int", bool_V4SI_type_node);
15358 TYPE_NAME (bool_V4SI_type_node) = tdecl;
15360 tdecl = add_builtin_type ("__vector float", V4SF_type_node);
15361 TYPE_NAME (V4SF_type_node) = tdecl;
15363 tdecl = add_builtin_type ("__vector __pixel", pixel_V8HI_type_node);
15364 TYPE_NAME (pixel_V8HI_type_node) = tdecl;
15366 tdecl = add_builtin_type ("__vector double", V2DF_type_node);
15367 TYPE_NAME (V2DF_type_node) = tdecl;
15369 if (TARGET_POWERPC64)
15371 tdecl = add_builtin_type ("__vector long", V2DI_type_node);
15372 TYPE_NAME (V2DI_type_node) = tdecl;
15374 tdecl = add_builtin_type ("__vector unsigned long",
15375 unsigned_V2DI_type_node);
15376 TYPE_NAME (unsigned_V2DI_type_node) = tdecl;
15378 tdecl = add_builtin_type ("__vector __bool long", bool_V2DI_type_node);
15379 TYPE_NAME (bool_V2DI_type_node) = tdecl;
15381 else
15383 tdecl = add_builtin_type ("__vector long long", V2DI_type_node);
15384 TYPE_NAME (V2DI_type_node) = tdecl;
15386 tdecl = add_builtin_type ("__vector unsigned long long",
15387 unsigned_V2DI_type_node);
15388 TYPE_NAME (unsigned_V2DI_type_node) = tdecl;
15390 tdecl = add_builtin_type ("__vector __bool long long",
15391 bool_V2DI_type_node);
15392 TYPE_NAME (bool_V2DI_type_node) = tdecl;
15395 if (V1TI_type_node)
15397 tdecl = add_builtin_type ("__vector __int128", V1TI_type_node);
15398 TYPE_NAME (V1TI_type_node) = tdecl;
15400 tdecl = add_builtin_type ("__vector unsigned __int128",
15401 unsigned_V1TI_type_node);
15402 TYPE_NAME (unsigned_V1TI_type_node) = tdecl;
15405 /* Paired and SPE builtins are only available if you build a compiler with
15406 the appropriate options, so only create those builtins with the
15407 appropriate compiler option. Create Altivec and VSX builtins on machines
15408 with at least the general purpose extensions (970 and newer) to allow the
15409 use of the target attribute. */
15410 if (TARGET_PAIRED_FLOAT)
15411 paired_init_builtins ();
15412 if (TARGET_SPE)
15413 spe_init_builtins ();
15414 if (TARGET_EXTRA_BUILTINS)
15415 altivec_init_builtins ();
15416 if (TARGET_HTM)
15417 htm_init_builtins ();
15419 if (TARGET_EXTRA_BUILTINS || TARGET_SPE || TARGET_PAIRED_FLOAT)
15420 rs6000_common_init_builtins ();
15422 ftype = builtin_function_type (DFmode, DFmode, DFmode, VOIDmode,
15423 RS6000_BUILTIN_RECIP, "__builtin_recipdiv");
15424 def_builtin ("__builtin_recipdiv", ftype, RS6000_BUILTIN_RECIP);
15426 ftype = builtin_function_type (SFmode, SFmode, SFmode, VOIDmode,
15427 RS6000_BUILTIN_RECIPF, "__builtin_recipdivf");
15428 def_builtin ("__builtin_recipdivf", ftype, RS6000_BUILTIN_RECIPF);
15430 ftype = builtin_function_type (DFmode, DFmode, VOIDmode, VOIDmode,
15431 RS6000_BUILTIN_RSQRT, "__builtin_rsqrt");
15432 def_builtin ("__builtin_rsqrt", ftype, RS6000_BUILTIN_RSQRT);
15434 ftype = builtin_function_type (SFmode, SFmode, VOIDmode, VOIDmode,
15435 RS6000_BUILTIN_RSQRTF, "__builtin_rsqrtf");
15436 def_builtin ("__builtin_rsqrtf", ftype, RS6000_BUILTIN_RSQRTF);
15438 mode = (TARGET_64BIT) ? DImode : SImode;
15439 ftype = builtin_function_type (mode, mode, mode, VOIDmode,
15440 POWER7_BUILTIN_BPERMD, "__builtin_bpermd");
15441 def_builtin ("__builtin_bpermd", ftype, POWER7_BUILTIN_BPERMD);
15443 ftype = build_function_type_list (unsigned_intDI_type_node,
15444 NULL_TREE);
15445 def_builtin ("__builtin_ppc_get_timebase", ftype, RS6000_BUILTIN_GET_TB);
15447 if (TARGET_64BIT)
15448 ftype = build_function_type_list (unsigned_intDI_type_node,
15449 NULL_TREE);
15450 else
15451 ftype = build_function_type_list (unsigned_intSI_type_node,
15452 NULL_TREE);
15453 def_builtin ("__builtin_ppc_mftb", ftype, RS6000_BUILTIN_MFTB);
15455 ftype = build_function_type_list (double_type_node, NULL_TREE);
15456 def_builtin ("__builtin_mffs", ftype, RS6000_BUILTIN_MFFS);
15458 ftype = build_function_type_list (void_type_node,
15459 intSI_type_node, double_type_node,
15460 NULL_TREE);
15461 def_builtin ("__builtin_mtfsf", ftype, RS6000_BUILTIN_MTFSF);
15463 ftype = build_function_type_list (void_type_node, NULL_TREE);
15464 def_builtin ("__builtin_cpu_init", ftype, RS6000_BUILTIN_CPU_INIT);
15466 ftype = build_function_type_list (bool_int_type_node, const_ptr_type_node,
15467 NULL_TREE);
15468 def_builtin ("__builtin_cpu_is", ftype, RS6000_BUILTIN_CPU_IS);
15469 def_builtin ("__builtin_cpu_supports", ftype, RS6000_BUILTIN_CPU_SUPPORTS);
15471 #if TARGET_XCOFF
15472 /* AIX libm provides clog as __clog. */
15473 if ((tdecl = builtin_decl_explicit (BUILT_IN_CLOG)) != NULL_TREE)
15474 set_user_assembler_name (tdecl, "__clog");
15475 #endif
15477 #ifdef SUBTARGET_INIT_BUILTINS
15478 SUBTARGET_INIT_BUILTINS;
15479 #endif
15482 /* Returns the rs6000 builtin decl for CODE. */
15484 static tree
15485 rs6000_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
15487 HOST_WIDE_INT fnmask;
15489 if (code >= RS6000_BUILTIN_COUNT)
15490 return error_mark_node;
15492 fnmask = rs6000_builtin_info[code].mask;
15493 if ((fnmask & rs6000_builtin_mask) != fnmask)
15495 rs6000_invalid_builtin ((enum rs6000_builtins)code);
15496 return error_mark_node;
15499 return rs6000_builtin_decls[code];
15502 static void
15503 spe_init_builtins (void)
15505 tree puint_type_node = build_pointer_type (unsigned_type_node);
15506 tree pushort_type_node = build_pointer_type (short_unsigned_type_node);
15507 const struct builtin_description *d;
15508 size_t i;
15510 tree v2si_ftype_4_v2si
15511 = build_function_type_list (opaque_V2SI_type_node,
15512 opaque_V2SI_type_node,
15513 opaque_V2SI_type_node,
15514 opaque_V2SI_type_node,
15515 opaque_V2SI_type_node,
15516 NULL_TREE);
15518 tree v2sf_ftype_4_v2sf
15519 = build_function_type_list (opaque_V2SF_type_node,
15520 opaque_V2SF_type_node,
15521 opaque_V2SF_type_node,
15522 opaque_V2SF_type_node,
15523 opaque_V2SF_type_node,
15524 NULL_TREE);
15526 tree int_ftype_int_v2si_v2si
15527 = build_function_type_list (integer_type_node,
15528 integer_type_node,
15529 opaque_V2SI_type_node,
15530 opaque_V2SI_type_node,
15531 NULL_TREE);
15533 tree int_ftype_int_v2sf_v2sf
15534 = build_function_type_list (integer_type_node,
15535 integer_type_node,
15536 opaque_V2SF_type_node,
15537 opaque_V2SF_type_node,
15538 NULL_TREE);
15540 tree void_ftype_v2si_puint_int
15541 = build_function_type_list (void_type_node,
15542 opaque_V2SI_type_node,
15543 puint_type_node,
15544 integer_type_node,
15545 NULL_TREE);
15547 tree void_ftype_v2si_puint_char
15548 = build_function_type_list (void_type_node,
15549 opaque_V2SI_type_node,
15550 puint_type_node,
15551 char_type_node,
15552 NULL_TREE);
15554 tree void_ftype_v2si_pv2si_int
15555 = build_function_type_list (void_type_node,
15556 opaque_V2SI_type_node,
15557 opaque_p_V2SI_type_node,
15558 integer_type_node,
15559 NULL_TREE);
15561 tree void_ftype_v2si_pv2si_char
15562 = build_function_type_list (void_type_node,
15563 opaque_V2SI_type_node,
15564 opaque_p_V2SI_type_node,
15565 char_type_node,
15566 NULL_TREE);
15568 tree void_ftype_int
15569 = build_function_type_list (void_type_node, integer_type_node, NULL_TREE);
15571 tree int_ftype_void
15572 = build_function_type_list (integer_type_node, NULL_TREE);
15574 tree v2si_ftype_pv2si_int
15575 = build_function_type_list (opaque_V2SI_type_node,
15576 opaque_p_V2SI_type_node,
15577 integer_type_node,
15578 NULL_TREE);
15580 tree v2si_ftype_puint_int
15581 = build_function_type_list (opaque_V2SI_type_node,
15582 puint_type_node,
15583 integer_type_node,
15584 NULL_TREE);
15586 tree v2si_ftype_pushort_int
15587 = build_function_type_list (opaque_V2SI_type_node,
15588 pushort_type_node,
15589 integer_type_node,
15590 NULL_TREE);
15592 tree v2si_ftype_signed_char
15593 = build_function_type_list (opaque_V2SI_type_node,
15594 signed_char_type_node,
15595 NULL_TREE);
15597 add_builtin_type ("__ev64_opaque__", opaque_V2SI_type_node);
15599 /* Initialize irregular SPE builtins. */
15601 def_builtin ("__builtin_spe_mtspefscr", void_ftype_int, SPE_BUILTIN_MTSPEFSCR);
15602 def_builtin ("__builtin_spe_mfspefscr", int_ftype_void, SPE_BUILTIN_MFSPEFSCR);
15603 def_builtin ("__builtin_spe_evstddx", void_ftype_v2si_pv2si_int, SPE_BUILTIN_EVSTDDX);
15604 def_builtin ("__builtin_spe_evstdhx", void_ftype_v2si_pv2si_int, SPE_BUILTIN_EVSTDHX);
15605 def_builtin ("__builtin_spe_evstdwx", void_ftype_v2si_pv2si_int, SPE_BUILTIN_EVSTDWX);
15606 def_builtin ("__builtin_spe_evstwhex", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWHEX);
15607 def_builtin ("__builtin_spe_evstwhox", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWHOX);
15608 def_builtin ("__builtin_spe_evstwwex", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWWEX);
15609 def_builtin ("__builtin_spe_evstwwox", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWWOX);
15610 def_builtin ("__builtin_spe_evstdd", void_ftype_v2si_pv2si_char, SPE_BUILTIN_EVSTDD);
15611 def_builtin ("__builtin_spe_evstdh", void_ftype_v2si_pv2si_char, SPE_BUILTIN_EVSTDH);
15612 def_builtin ("__builtin_spe_evstdw", void_ftype_v2si_pv2si_char, SPE_BUILTIN_EVSTDW);
15613 def_builtin ("__builtin_spe_evstwhe", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWHE);
15614 def_builtin ("__builtin_spe_evstwho", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWHO);
15615 def_builtin ("__builtin_spe_evstwwe", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWWE);
15616 def_builtin ("__builtin_spe_evstwwo", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWWO);
15617 def_builtin ("__builtin_spe_evsplatfi", v2si_ftype_signed_char, SPE_BUILTIN_EVSPLATFI);
15618 def_builtin ("__builtin_spe_evsplati", v2si_ftype_signed_char, SPE_BUILTIN_EVSPLATI);
15620 /* Loads. */
15621 def_builtin ("__builtin_spe_evlddx", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDDX);
15622 def_builtin ("__builtin_spe_evldwx", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDWX);
15623 def_builtin ("__builtin_spe_evldhx", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDHX);
15624 def_builtin ("__builtin_spe_evlwhex", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHEX);
15625 def_builtin ("__builtin_spe_evlwhoux", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOUX);
15626 def_builtin ("__builtin_spe_evlwhosx", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOSX);
15627 def_builtin ("__builtin_spe_evlwwsplatx", v2si_ftype_puint_int, SPE_BUILTIN_EVLWWSPLATX);
15628 def_builtin ("__builtin_spe_evlwhsplatx", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHSPLATX);
15629 def_builtin ("__builtin_spe_evlhhesplatx", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHESPLATX);
15630 def_builtin ("__builtin_spe_evlhhousplatx", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOUSPLATX);
15631 def_builtin ("__builtin_spe_evlhhossplatx", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOSSPLATX);
15632 def_builtin ("__builtin_spe_evldd", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDD);
15633 def_builtin ("__builtin_spe_evldw", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDW);
15634 def_builtin ("__builtin_spe_evldh", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDH);
15635 def_builtin ("__builtin_spe_evlhhesplat", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHESPLAT);
15636 def_builtin ("__builtin_spe_evlhhossplat", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOSSPLAT);
15637 def_builtin ("__builtin_spe_evlhhousplat", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOUSPLAT);
15638 def_builtin ("__builtin_spe_evlwhe", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHE);
15639 def_builtin ("__builtin_spe_evlwhos", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOS);
15640 def_builtin ("__builtin_spe_evlwhou", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOU);
15641 def_builtin ("__builtin_spe_evlwhsplat", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHSPLAT);
15642 def_builtin ("__builtin_spe_evlwwsplat", v2si_ftype_puint_int, SPE_BUILTIN_EVLWWSPLAT);
15644 /* Predicates. */
15645 d = bdesc_spe_predicates;
15646 for (i = 0; i < ARRAY_SIZE (bdesc_spe_predicates); ++i, d++)
15648 tree type;
15650 switch (insn_data[d->icode].operand[1].mode)
15652 case V2SImode:
15653 type = int_ftype_int_v2si_v2si;
15654 break;
15655 case V2SFmode:
15656 type = int_ftype_int_v2sf_v2sf;
15657 break;
15658 default:
15659 gcc_unreachable ();
15662 def_builtin (d->name, type, d->code);
15665 /* Evsel predicates. */
15666 d = bdesc_spe_evsel;
15667 for (i = 0; i < ARRAY_SIZE (bdesc_spe_evsel); ++i, d++)
15669 tree type;
15671 switch (insn_data[d->icode].operand[1].mode)
15673 case V2SImode:
15674 type = v2si_ftype_4_v2si;
15675 break;
15676 case V2SFmode:
15677 type = v2sf_ftype_4_v2sf;
15678 break;
15679 default:
15680 gcc_unreachable ();
15683 def_builtin (d->name, type, d->code);
15687 static void
15688 paired_init_builtins (void)
15690 const struct builtin_description *d;
15691 size_t i;
15693 tree int_ftype_int_v2sf_v2sf
15694 = build_function_type_list (integer_type_node,
15695 integer_type_node,
15696 V2SF_type_node,
15697 V2SF_type_node,
15698 NULL_TREE);
15699 tree pcfloat_type_node =
15700 build_pointer_type (build_qualified_type
15701 (float_type_node, TYPE_QUAL_CONST));
15703 tree v2sf_ftype_long_pcfloat = build_function_type_list (V2SF_type_node,
15704 long_integer_type_node,
15705 pcfloat_type_node,
15706 NULL_TREE);
15707 tree void_ftype_v2sf_long_pcfloat =
15708 build_function_type_list (void_type_node,
15709 V2SF_type_node,
15710 long_integer_type_node,
15711 pcfloat_type_node,
15712 NULL_TREE);
15715 def_builtin ("__builtin_paired_lx", v2sf_ftype_long_pcfloat,
15716 PAIRED_BUILTIN_LX);
15719 def_builtin ("__builtin_paired_stx", void_ftype_v2sf_long_pcfloat,
15720 PAIRED_BUILTIN_STX);
15722 /* Predicates. */
15723 d = bdesc_paired_preds;
15724 for (i = 0; i < ARRAY_SIZE (bdesc_paired_preds); ++i, d++)
15726 tree type;
15728 if (TARGET_DEBUG_BUILTIN)
15729 fprintf (stderr, "paired pred #%d, insn = %s [%d], mode = %s\n",
15730 (int)i, get_insn_name (d->icode), (int)d->icode,
15731 GET_MODE_NAME (insn_data[d->icode].operand[1].mode));
15733 switch (insn_data[d->icode].operand[1].mode)
15735 case V2SFmode:
15736 type = int_ftype_int_v2sf_v2sf;
15737 break;
15738 default:
15739 gcc_unreachable ();
15742 def_builtin (d->name, type, d->code);
15746 static void
15747 altivec_init_builtins (void)
15749 const struct builtin_description *d;
15750 size_t i;
15751 tree ftype;
15752 tree decl;
15754 tree pvoid_type_node = build_pointer_type (void_type_node);
15756 tree pcvoid_type_node
15757 = build_pointer_type (build_qualified_type (void_type_node,
15758 TYPE_QUAL_CONST));
15760 tree int_ftype_opaque
15761 = build_function_type_list (integer_type_node,
15762 opaque_V4SI_type_node, NULL_TREE);
15763 tree opaque_ftype_opaque
15764 = build_function_type_list (integer_type_node, NULL_TREE);
15765 tree opaque_ftype_opaque_int
15766 = build_function_type_list (opaque_V4SI_type_node,
15767 opaque_V4SI_type_node, integer_type_node, NULL_TREE);
15768 tree opaque_ftype_opaque_opaque_int
15769 = build_function_type_list (opaque_V4SI_type_node,
15770 opaque_V4SI_type_node, opaque_V4SI_type_node,
15771 integer_type_node, NULL_TREE);
15772 tree opaque_ftype_opaque_opaque_opaque
15773 = build_function_type_list (opaque_V4SI_type_node,
15774 opaque_V4SI_type_node, opaque_V4SI_type_node,
15775 opaque_V4SI_type_node, NULL_TREE);
15776 tree int_ftype_int_opaque_opaque
15777 = build_function_type_list (integer_type_node,
15778 integer_type_node, opaque_V4SI_type_node,
15779 opaque_V4SI_type_node, NULL_TREE);
15780 tree int_ftype_int_v4si_v4si
15781 = build_function_type_list (integer_type_node,
15782 integer_type_node, V4SI_type_node,
15783 V4SI_type_node, NULL_TREE);
15784 tree int_ftype_int_v2di_v2di
15785 = build_function_type_list (integer_type_node,
15786 integer_type_node, V2DI_type_node,
15787 V2DI_type_node, NULL_TREE);
15788 tree void_ftype_v4si
15789 = build_function_type_list (void_type_node, V4SI_type_node, NULL_TREE);
15790 tree v8hi_ftype_void
15791 = build_function_type_list (V8HI_type_node, NULL_TREE);
15792 tree void_ftype_void
15793 = build_function_type_list (void_type_node, NULL_TREE);
15794 tree void_ftype_int
15795 = build_function_type_list (void_type_node, integer_type_node, NULL_TREE);
15797 tree opaque_ftype_long_pcvoid
15798 = build_function_type_list (opaque_V4SI_type_node,
15799 long_integer_type_node, pcvoid_type_node,
15800 NULL_TREE);
15801 tree v16qi_ftype_long_pcvoid
15802 = build_function_type_list (V16QI_type_node,
15803 long_integer_type_node, pcvoid_type_node,
15804 NULL_TREE);
15805 tree v8hi_ftype_long_pcvoid
15806 = build_function_type_list (V8HI_type_node,
15807 long_integer_type_node, pcvoid_type_node,
15808 NULL_TREE);
15809 tree v4si_ftype_long_pcvoid
15810 = build_function_type_list (V4SI_type_node,
15811 long_integer_type_node, pcvoid_type_node,
15812 NULL_TREE);
15813 tree v4sf_ftype_long_pcvoid
15814 = build_function_type_list (V4SF_type_node,
15815 long_integer_type_node, pcvoid_type_node,
15816 NULL_TREE);
15817 tree v2df_ftype_long_pcvoid
15818 = build_function_type_list (V2DF_type_node,
15819 long_integer_type_node, pcvoid_type_node,
15820 NULL_TREE);
15821 tree v2di_ftype_long_pcvoid
15822 = build_function_type_list (V2DI_type_node,
15823 long_integer_type_node, pcvoid_type_node,
15824 NULL_TREE);
15826 tree void_ftype_opaque_long_pvoid
15827 = build_function_type_list (void_type_node,
15828 opaque_V4SI_type_node, long_integer_type_node,
15829 pvoid_type_node, NULL_TREE);
15830 tree void_ftype_v4si_long_pvoid
15831 = build_function_type_list (void_type_node,
15832 V4SI_type_node, long_integer_type_node,
15833 pvoid_type_node, NULL_TREE);
15834 tree void_ftype_v16qi_long_pvoid
15835 = build_function_type_list (void_type_node,
15836 V16QI_type_node, long_integer_type_node,
15837 pvoid_type_node, NULL_TREE);
15838 tree void_ftype_v8hi_long_pvoid
15839 = build_function_type_list (void_type_node,
15840 V8HI_type_node, long_integer_type_node,
15841 pvoid_type_node, NULL_TREE);
15842 tree void_ftype_v4sf_long_pvoid
15843 = build_function_type_list (void_type_node,
15844 V4SF_type_node, long_integer_type_node,
15845 pvoid_type_node, NULL_TREE);
15846 tree void_ftype_v2df_long_pvoid
15847 = build_function_type_list (void_type_node,
15848 V2DF_type_node, long_integer_type_node,
15849 pvoid_type_node, NULL_TREE);
15850 tree void_ftype_v2di_long_pvoid
15851 = build_function_type_list (void_type_node,
15852 V2DI_type_node, long_integer_type_node,
15853 pvoid_type_node, NULL_TREE);
15854 tree int_ftype_int_v8hi_v8hi
15855 = build_function_type_list (integer_type_node,
15856 integer_type_node, V8HI_type_node,
15857 V8HI_type_node, NULL_TREE);
15858 tree int_ftype_int_v16qi_v16qi
15859 = build_function_type_list (integer_type_node,
15860 integer_type_node, V16QI_type_node,
15861 V16QI_type_node, NULL_TREE);
15862 tree int_ftype_int_v4sf_v4sf
15863 = build_function_type_list (integer_type_node,
15864 integer_type_node, V4SF_type_node,
15865 V4SF_type_node, NULL_TREE);
15866 tree int_ftype_int_v2df_v2df
15867 = build_function_type_list (integer_type_node,
15868 integer_type_node, V2DF_type_node,
15869 V2DF_type_node, NULL_TREE);
15870 tree v2di_ftype_v2di
15871 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
15872 tree v4si_ftype_v4si
15873 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
15874 tree v8hi_ftype_v8hi
15875 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
15876 tree v16qi_ftype_v16qi
15877 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
15878 tree v4sf_ftype_v4sf
15879 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
15880 tree v2df_ftype_v2df
15881 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
15882 tree void_ftype_pcvoid_int_int
15883 = build_function_type_list (void_type_node,
15884 pcvoid_type_node, integer_type_node,
15885 integer_type_node, NULL_TREE);
15887 def_builtin ("__builtin_altivec_mtvscr", void_ftype_v4si, ALTIVEC_BUILTIN_MTVSCR);
15888 def_builtin ("__builtin_altivec_mfvscr", v8hi_ftype_void, ALTIVEC_BUILTIN_MFVSCR);
15889 def_builtin ("__builtin_altivec_dssall", void_ftype_void, ALTIVEC_BUILTIN_DSSALL);
15890 def_builtin ("__builtin_altivec_dss", void_ftype_int, ALTIVEC_BUILTIN_DSS);
15891 def_builtin ("__builtin_altivec_lvsl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVSL);
15892 def_builtin ("__builtin_altivec_lvsr", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVSR);
15893 def_builtin ("__builtin_altivec_lvebx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEBX);
15894 def_builtin ("__builtin_altivec_lvehx", v8hi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEHX);
15895 def_builtin ("__builtin_altivec_lvewx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEWX);
15896 def_builtin ("__builtin_altivec_lvxl", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVXL);
15897 def_builtin ("__builtin_altivec_lvxl_v2df", v2df_ftype_long_pcvoid,
15898 ALTIVEC_BUILTIN_LVXL_V2DF);
15899 def_builtin ("__builtin_altivec_lvxl_v2di", v2di_ftype_long_pcvoid,
15900 ALTIVEC_BUILTIN_LVXL_V2DI);
15901 def_builtin ("__builtin_altivec_lvxl_v4sf", v4sf_ftype_long_pcvoid,
15902 ALTIVEC_BUILTIN_LVXL_V4SF);
15903 def_builtin ("__builtin_altivec_lvxl_v4si", v4si_ftype_long_pcvoid,
15904 ALTIVEC_BUILTIN_LVXL_V4SI);
15905 def_builtin ("__builtin_altivec_lvxl_v8hi", v8hi_ftype_long_pcvoid,
15906 ALTIVEC_BUILTIN_LVXL_V8HI);
15907 def_builtin ("__builtin_altivec_lvxl_v16qi", v16qi_ftype_long_pcvoid,
15908 ALTIVEC_BUILTIN_LVXL_V16QI);
15909 def_builtin ("__builtin_altivec_lvx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVX);
15910 def_builtin ("__builtin_altivec_lvx_v2df", v2df_ftype_long_pcvoid,
15911 ALTIVEC_BUILTIN_LVX_V2DF);
15912 def_builtin ("__builtin_altivec_lvx_v2di", v2di_ftype_long_pcvoid,
15913 ALTIVEC_BUILTIN_LVX_V2DI);
15914 def_builtin ("__builtin_altivec_lvx_v4sf", v4sf_ftype_long_pcvoid,
15915 ALTIVEC_BUILTIN_LVX_V4SF);
15916 def_builtin ("__builtin_altivec_lvx_v4si", v4si_ftype_long_pcvoid,
15917 ALTIVEC_BUILTIN_LVX_V4SI);
15918 def_builtin ("__builtin_altivec_lvx_v8hi", v8hi_ftype_long_pcvoid,
15919 ALTIVEC_BUILTIN_LVX_V8HI);
15920 def_builtin ("__builtin_altivec_lvx_v16qi", v16qi_ftype_long_pcvoid,
15921 ALTIVEC_BUILTIN_LVX_V16QI);
15922 def_builtin ("__builtin_altivec_stvx", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVX);
15923 def_builtin ("__builtin_altivec_stvx_v2df", void_ftype_v2df_long_pvoid,
15924 ALTIVEC_BUILTIN_STVX_V2DF);
15925 def_builtin ("__builtin_altivec_stvx_v2di", void_ftype_v2di_long_pvoid,
15926 ALTIVEC_BUILTIN_STVX_V2DI);
15927 def_builtin ("__builtin_altivec_stvx_v4sf", void_ftype_v4sf_long_pvoid,
15928 ALTIVEC_BUILTIN_STVX_V4SF);
15929 def_builtin ("__builtin_altivec_stvx_v4si", void_ftype_v4si_long_pvoid,
15930 ALTIVEC_BUILTIN_STVX_V4SI);
15931 def_builtin ("__builtin_altivec_stvx_v8hi", void_ftype_v8hi_long_pvoid,
15932 ALTIVEC_BUILTIN_STVX_V8HI);
15933 def_builtin ("__builtin_altivec_stvx_v16qi", void_ftype_v16qi_long_pvoid,
15934 ALTIVEC_BUILTIN_STVX_V16QI);
15935 def_builtin ("__builtin_altivec_stvewx", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVEWX);
15936 def_builtin ("__builtin_altivec_stvxl", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVXL);
15937 def_builtin ("__builtin_altivec_stvxl_v2df", void_ftype_v2df_long_pvoid,
15938 ALTIVEC_BUILTIN_STVXL_V2DF);
15939 def_builtin ("__builtin_altivec_stvxl_v2di", void_ftype_v2di_long_pvoid,
15940 ALTIVEC_BUILTIN_STVXL_V2DI);
15941 def_builtin ("__builtin_altivec_stvxl_v4sf", void_ftype_v4sf_long_pvoid,
15942 ALTIVEC_BUILTIN_STVXL_V4SF);
15943 def_builtin ("__builtin_altivec_stvxl_v4si", void_ftype_v4si_long_pvoid,
15944 ALTIVEC_BUILTIN_STVXL_V4SI);
15945 def_builtin ("__builtin_altivec_stvxl_v8hi", void_ftype_v8hi_long_pvoid,
15946 ALTIVEC_BUILTIN_STVXL_V8HI);
15947 def_builtin ("__builtin_altivec_stvxl_v16qi", void_ftype_v16qi_long_pvoid,
15948 ALTIVEC_BUILTIN_STVXL_V16QI);
15949 def_builtin ("__builtin_altivec_stvebx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVEBX);
15950 def_builtin ("__builtin_altivec_stvehx", void_ftype_v8hi_long_pvoid, ALTIVEC_BUILTIN_STVEHX);
15951 def_builtin ("__builtin_vec_ld", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LD);
15952 def_builtin ("__builtin_vec_lde", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LDE);
15953 def_builtin ("__builtin_vec_ldl", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LDL);
15954 def_builtin ("__builtin_vec_lvsl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVSL);
15955 def_builtin ("__builtin_vec_lvsr", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVSR);
15956 def_builtin ("__builtin_vec_lvebx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEBX);
15957 def_builtin ("__builtin_vec_lvehx", v8hi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEHX);
15958 def_builtin ("__builtin_vec_lvewx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEWX);
15959 def_builtin ("__builtin_vec_st", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_ST);
15960 def_builtin ("__builtin_vec_ste", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STE);
15961 def_builtin ("__builtin_vec_stl", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STL);
15962 def_builtin ("__builtin_vec_stvewx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEWX);
15963 def_builtin ("__builtin_vec_stvebx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEBX);
15964 def_builtin ("__builtin_vec_stvehx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEHX);
15966 def_builtin ("__builtin_vsx_lxvd2x_v2df", v2df_ftype_long_pcvoid,
15967 VSX_BUILTIN_LXVD2X_V2DF);
15968 def_builtin ("__builtin_vsx_lxvd2x_v2di", v2di_ftype_long_pcvoid,
15969 VSX_BUILTIN_LXVD2X_V2DI);
15970 def_builtin ("__builtin_vsx_lxvw4x_v4sf", v4sf_ftype_long_pcvoid,
15971 VSX_BUILTIN_LXVW4X_V4SF);
15972 def_builtin ("__builtin_vsx_lxvw4x_v4si", v4si_ftype_long_pcvoid,
15973 VSX_BUILTIN_LXVW4X_V4SI);
15974 def_builtin ("__builtin_vsx_lxvw4x_v8hi", v8hi_ftype_long_pcvoid,
15975 VSX_BUILTIN_LXVW4X_V8HI);
15976 def_builtin ("__builtin_vsx_lxvw4x_v16qi", v16qi_ftype_long_pcvoid,
15977 VSX_BUILTIN_LXVW4X_V16QI);
15978 def_builtin ("__builtin_vsx_stxvd2x_v2df", void_ftype_v2df_long_pvoid,
15979 VSX_BUILTIN_STXVD2X_V2DF);
15980 def_builtin ("__builtin_vsx_stxvd2x_v2di", void_ftype_v2di_long_pvoid,
15981 VSX_BUILTIN_STXVD2X_V2DI);
15982 def_builtin ("__builtin_vsx_stxvw4x_v4sf", void_ftype_v4sf_long_pvoid,
15983 VSX_BUILTIN_STXVW4X_V4SF);
15984 def_builtin ("__builtin_vsx_stxvw4x_v4si", void_ftype_v4si_long_pvoid,
15985 VSX_BUILTIN_STXVW4X_V4SI);
15986 def_builtin ("__builtin_vsx_stxvw4x_v8hi", void_ftype_v8hi_long_pvoid,
15987 VSX_BUILTIN_STXVW4X_V8HI);
15988 def_builtin ("__builtin_vsx_stxvw4x_v16qi", void_ftype_v16qi_long_pvoid,
15989 VSX_BUILTIN_STXVW4X_V16QI);
15991 def_builtin ("__builtin_vsx_ld_elemrev_v2df", v2df_ftype_long_pcvoid,
15992 VSX_BUILTIN_LD_ELEMREV_V2DF);
15993 def_builtin ("__builtin_vsx_ld_elemrev_v2di", v2di_ftype_long_pcvoid,
15994 VSX_BUILTIN_LD_ELEMREV_V2DI);
15995 def_builtin ("__builtin_vsx_ld_elemrev_v4sf", v4sf_ftype_long_pcvoid,
15996 VSX_BUILTIN_LD_ELEMREV_V4SF);
15997 def_builtin ("__builtin_vsx_ld_elemrev_v4si", v4si_ftype_long_pcvoid,
15998 VSX_BUILTIN_LD_ELEMREV_V4SI);
15999 def_builtin ("__builtin_vsx_st_elemrev_v2df", void_ftype_v2df_long_pvoid,
16000 VSX_BUILTIN_ST_ELEMREV_V2DF);
16001 def_builtin ("__builtin_vsx_st_elemrev_v2di", void_ftype_v2di_long_pvoid,
16002 VSX_BUILTIN_ST_ELEMREV_V2DI);
16003 def_builtin ("__builtin_vsx_st_elemrev_v4sf", void_ftype_v4sf_long_pvoid,
16004 VSX_BUILTIN_ST_ELEMREV_V4SF);
16005 def_builtin ("__builtin_vsx_st_elemrev_v4si", void_ftype_v4si_long_pvoid,
16006 VSX_BUILTIN_ST_ELEMREV_V4SI);
16008 if (TARGET_P9_VECTOR)
16010 def_builtin ("__builtin_vsx_ld_elemrev_v8hi", v8hi_ftype_long_pcvoid,
16011 VSX_BUILTIN_LD_ELEMREV_V8HI);
16012 def_builtin ("__builtin_vsx_ld_elemrev_v16qi", v16qi_ftype_long_pcvoid,
16013 VSX_BUILTIN_LD_ELEMREV_V16QI);
16014 def_builtin ("__builtin_vsx_st_elemrev_v8hi",
16015 void_ftype_v8hi_long_pvoid, VSX_BUILTIN_ST_ELEMREV_V8HI);
16016 def_builtin ("__builtin_vsx_st_elemrev_v16qi",
16017 void_ftype_v16qi_long_pvoid, VSX_BUILTIN_ST_ELEMREV_V16QI);
16020 def_builtin ("__builtin_vec_vsx_ld", opaque_ftype_long_pcvoid,
16021 VSX_BUILTIN_VEC_LD);
16022 def_builtin ("__builtin_vec_vsx_st", void_ftype_opaque_long_pvoid,
16023 VSX_BUILTIN_VEC_ST);
16024 def_builtin ("__builtin_vec_xl", opaque_ftype_long_pcvoid,
16025 VSX_BUILTIN_VEC_XL);
16026 def_builtin ("__builtin_vec_xst", void_ftype_opaque_long_pvoid,
16027 VSX_BUILTIN_VEC_XST);
16029 def_builtin ("__builtin_vec_step", int_ftype_opaque, ALTIVEC_BUILTIN_VEC_STEP);
16030 def_builtin ("__builtin_vec_splats", opaque_ftype_opaque, ALTIVEC_BUILTIN_VEC_SPLATS);
16031 def_builtin ("__builtin_vec_promote", opaque_ftype_opaque, ALTIVEC_BUILTIN_VEC_PROMOTE);
16033 def_builtin ("__builtin_vec_sld", opaque_ftype_opaque_opaque_int, ALTIVEC_BUILTIN_VEC_SLD);
16034 def_builtin ("__builtin_vec_splat", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_SPLAT);
16035 def_builtin ("__builtin_vec_extract", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_EXTRACT);
16036 def_builtin ("__builtin_vec_insert", opaque_ftype_opaque_opaque_int, ALTIVEC_BUILTIN_VEC_INSERT);
16037 def_builtin ("__builtin_vec_vspltw", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTW);
16038 def_builtin ("__builtin_vec_vsplth", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTH);
16039 def_builtin ("__builtin_vec_vspltb", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTB);
16040 def_builtin ("__builtin_vec_ctf", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTF);
16041 def_builtin ("__builtin_vec_vcfsx", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VCFSX);
16042 def_builtin ("__builtin_vec_vcfux", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VCFUX);
16043 def_builtin ("__builtin_vec_cts", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTS);
16044 def_builtin ("__builtin_vec_ctu", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTU);
16046 def_builtin ("__builtin_vec_adde", opaque_ftype_opaque_opaque_opaque,
16047 ALTIVEC_BUILTIN_VEC_ADDE);
16049 /* Cell builtins. */
16050 def_builtin ("__builtin_altivec_lvlx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVLX);
16051 def_builtin ("__builtin_altivec_lvlxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVLXL);
16052 def_builtin ("__builtin_altivec_lvrx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVRX);
16053 def_builtin ("__builtin_altivec_lvrxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVRXL);
16055 def_builtin ("__builtin_vec_lvlx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVLX);
16056 def_builtin ("__builtin_vec_lvlxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVLXL);
16057 def_builtin ("__builtin_vec_lvrx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVRX);
16058 def_builtin ("__builtin_vec_lvrxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVRXL);
16060 def_builtin ("__builtin_altivec_stvlx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVLX);
16061 def_builtin ("__builtin_altivec_stvlxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVLXL);
16062 def_builtin ("__builtin_altivec_stvrx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVRX);
16063 def_builtin ("__builtin_altivec_stvrxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVRXL);
16065 def_builtin ("__builtin_vec_stvlx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVLX);
16066 def_builtin ("__builtin_vec_stvlxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVLXL);
16067 def_builtin ("__builtin_vec_stvrx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVRX);
16068 def_builtin ("__builtin_vec_stvrxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVRXL);
16070 /* Add the DST variants. */
16071 d = bdesc_dst;
16072 for (i = 0; i < ARRAY_SIZE (bdesc_dst); i++, d++)
16073 def_builtin (d->name, void_ftype_pcvoid_int_int, d->code);
16075 /* Initialize the predicates. */
16076 d = bdesc_altivec_preds;
16077 for (i = 0; i < ARRAY_SIZE (bdesc_altivec_preds); i++, d++)
16079 machine_mode mode1;
16080 tree type;
16082 if (rs6000_overloaded_builtin_p (d->code))
16083 mode1 = VOIDmode;
16084 else
16085 mode1 = insn_data[d->icode].operand[1].mode;
16087 switch (mode1)
16089 case VOIDmode:
16090 type = int_ftype_int_opaque_opaque;
16091 break;
16092 case V2DImode:
16093 type = int_ftype_int_v2di_v2di;
16094 break;
16095 case V4SImode:
16096 type = int_ftype_int_v4si_v4si;
16097 break;
16098 case V8HImode:
16099 type = int_ftype_int_v8hi_v8hi;
16100 break;
16101 case V16QImode:
16102 type = int_ftype_int_v16qi_v16qi;
16103 break;
16104 case V4SFmode:
16105 type = int_ftype_int_v4sf_v4sf;
16106 break;
16107 case V2DFmode:
16108 type = int_ftype_int_v2df_v2df;
16109 break;
16110 default:
16111 gcc_unreachable ();
16114 def_builtin (d->name, type, d->code);
16117 /* Initialize the abs* operators. */
16118 d = bdesc_abs;
16119 for (i = 0; i < ARRAY_SIZE (bdesc_abs); i++, d++)
16121 machine_mode mode0;
16122 tree type;
16124 mode0 = insn_data[d->icode].operand[0].mode;
16126 switch (mode0)
16128 case V2DImode:
16129 type = v2di_ftype_v2di;
16130 break;
16131 case V4SImode:
16132 type = v4si_ftype_v4si;
16133 break;
16134 case V8HImode:
16135 type = v8hi_ftype_v8hi;
16136 break;
16137 case V16QImode:
16138 type = v16qi_ftype_v16qi;
16139 break;
16140 case V4SFmode:
16141 type = v4sf_ftype_v4sf;
16142 break;
16143 case V2DFmode:
16144 type = v2df_ftype_v2df;
16145 break;
16146 default:
16147 gcc_unreachable ();
16150 def_builtin (d->name, type, d->code);
16153 /* Initialize target builtin that implements
16154 targetm.vectorize.builtin_mask_for_load. */
16156 decl = add_builtin_function ("__builtin_altivec_mask_for_load",
16157 v16qi_ftype_long_pcvoid,
16158 ALTIVEC_BUILTIN_MASK_FOR_LOAD,
16159 BUILT_IN_MD, NULL, NULL_TREE);
16160 TREE_READONLY (decl) = 1;
16161 /* Record the decl. Will be used by rs6000_builtin_mask_for_load. */
16162 altivec_builtin_mask_for_load = decl;
16164 /* Access to the vec_init patterns. */
16165 ftype = build_function_type_list (V4SI_type_node, integer_type_node,
16166 integer_type_node, integer_type_node,
16167 integer_type_node, NULL_TREE);
16168 def_builtin ("__builtin_vec_init_v4si", ftype, ALTIVEC_BUILTIN_VEC_INIT_V4SI);
16170 ftype = build_function_type_list (V8HI_type_node, short_integer_type_node,
16171 short_integer_type_node,
16172 short_integer_type_node,
16173 short_integer_type_node,
16174 short_integer_type_node,
16175 short_integer_type_node,
16176 short_integer_type_node,
16177 short_integer_type_node, NULL_TREE);
16178 def_builtin ("__builtin_vec_init_v8hi", ftype, ALTIVEC_BUILTIN_VEC_INIT_V8HI);
16180 ftype = build_function_type_list (V16QI_type_node, char_type_node,
16181 char_type_node, char_type_node,
16182 char_type_node, char_type_node,
16183 char_type_node, char_type_node,
16184 char_type_node, char_type_node,
16185 char_type_node, char_type_node,
16186 char_type_node, char_type_node,
16187 char_type_node, char_type_node,
16188 char_type_node, NULL_TREE);
16189 def_builtin ("__builtin_vec_init_v16qi", ftype,
16190 ALTIVEC_BUILTIN_VEC_INIT_V16QI);
16192 ftype = build_function_type_list (V4SF_type_node, float_type_node,
16193 float_type_node, float_type_node,
16194 float_type_node, NULL_TREE);
16195 def_builtin ("__builtin_vec_init_v4sf", ftype, ALTIVEC_BUILTIN_VEC_INIT_V4SF);
16197 /* VSX builtins. */
16198 ftype = build_function_type_list (V2DF_type_node, double_type_node,
16199 double_type_node, NULL_TREE);
16200 def_builtin ("__builtin_vec_init_v2df", ftype, VSX_BUILTIN_VEC_INIT_V2DF);
16202 ftype = build_function_type_list (V2DI_type_node, intDI_type_node,
16203 intDI_type_node, NULL_TREE);
16204 def_builtin ("__builtin_vec_init_v2di", ftype, VSX_BUILTIN_VEC_INIT_V2DI);
16206 /* Access to the vec_set patterns. */
16207 ftype = build_function_type_list (V4SI_type_node, V4SI_type_node,
16208 intSI_type_node,
16209 integer_type_node, NULL_TREE);
16210 def_builtin ("__builtin_vec_set_v4si", ftype, ALTIVEC_BUILTIN_VEC_SET_V4SI);
16212 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
16213 intHI_type_node,
16214 integer_type_node, NULL_TREE);
16215 def_builtin ("__builtin_vec_set_v8hi", ftype, ALTIVEC_BUILTIN_VEC_SET_V8HI);
16217 ftype = build_function_type_list (V16QI_type_node, V16QI_type_node,
16218 intQI_type_node,
16219 integer_type_node, NULL_TREE);
16220 def_builtin ("__builtin_vec_set_v16qi", ftype, ALTIVEC_BUILTIN_VEC_SET_V16QI);
16222 ftype = build_function_type_list (V4SF_type_node, V4SF_type_node,
16223 float_type_node,
16224 integer_type_node, NULL_TREE);
16225 def_builtin ("__builtin_vec_set_v4sf", ftype, ALTIVEC_BUILTIN_VEC_SET_V4SF);
16227 ftype = build_function_type_list (V2DF_type_node, V2DF_type_node,
16228 double_type_node,
16229 integer_type_node, NULL_TREE);
16230 def_builtin ("__builtin_vec_set_v2df", ftype, VSX_BUILTIN_VEC_SET_V2DF);
16232 ftype = build_function_type_list (V2DI_type_node, V2DI_type_node,
16233 intDI_type_node,
16234 integer_type_node, NULL_TREE);
16235 def_builtin ("__builtin_vec_set_v2di", ftype, VSX_BUILTIN_VEC_SET_V2DI);
16237 /* Access to the vec_extract patterns. */
16238 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
16239 integer_type_node, NULL_TREE);
16240 def_builtin ("__builtin_vec_ext_v4si", ftype, ALTIVEC_BUILTIN_VEC_EXT_V4SI);
16242 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
16243 integer_type_node, NULL_TREE);
16244 def_builtin ("__builtin_vec_ext_v8hi", ftype, ALTIVEC_BUILTIN_VEC_EXT_V8HI);
16246 ftype = build_function_type_list (intQI_type_node, V16QI_type_node,
16247 integer_type_node, NULL_TREE);
16248 def_builtin ("__builtin_vec_ext_v16qi", ftype, ALTIVEC_BUILTIN_VEC_EXT_V16QI);
16250 ftype = build_function_type_list (float_type_node, V4SF_type_node,
16251 integer_type_node, NULL_TREE);
16252 def_builtin ("__builtin_vec_ext_v4sf", ftype, ALTIVEC_BUILTIN_VEC_EXT_V4SF);
16254 ftype = build_function_type_list (double_type_node, V2DF_type_node,
16255 integer_type_node, NULL_TREE);
16256 def_builtin ("__builtin_vec_ext_v2df", ftype, VSX_BUILTIN_VEC_EXT_V2DF);
16258 ftype = build_function_type_list (intDI_type_node, V2DI_type_node,
16259 integer_type_node, NULL_TREE);
16260 def_builtin ("__builtin_vec_ext_v2di", ftype, VSX_BUILTIN_VEC_EXT_V2DI);
16263 if (V1TI_type_node)
16265 tree v1ti_ftype_long_pcvoid
16266 = build_function_type_list (V1TI_type_node,
16267 long_integer_type_node, pcvoid_type_node,
16268 NULL_TREE);
16269 tree void_ftype_v1ti_long_pvoid
16270 = build_function_type_list (void_type_node,
16271 V1TI_type_node, long_integer_type_node,
16272 pvoid_type_node, NULL_TREE);
16273 def_builtin ("__builtin_vsx_lxvd2x_v1ti", v1ti_ftype_long_pcvoid,
16274 VSX_BUILTIN_LXVD2X_V1TI);
16275 def_builtin ("__builtin_vsx_stxvd2x_v1ti", void_ftype_v1ti_long_pvoid,
16276 VSX_BUILTIN_STXVD2X_V1TI);
16277 ftype = build_function_type_list (V1TI_type_node, intTI_type_node,
16278 NULL_TREE, NULL_TREE);
16279 def_builtin ("__builtin_vec_init_v1ti", ftype, VSX_BUILTIN_VEC_INIT_V1TI);
16280 ftype = build_function_type_list (V1TI_type_node, V1TI_type_node,
16281 intTI_type_node,
16282 integer_type_node, NULL_TREE);
16283 def_builtin ("__builtin_vec_set_v1ti", ftype, VSX_BUILTIN_VEC_SET_V1TI);
16284 ftype = build_function_type_list (intTI_type_node, V1TI_type_node,
16285 integer_type_node, NULL_TREE);
16286 def_builtin ("__builtin_vec_ext_v1ti", ftype, VSX_BUILTIN_VEC_EXT_V1TI);
16291 static void
16292 htm_init_builtins (void)
16294 HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;
16295 const struct builtin_description *d;
16296 size_t i;
16298 d = bdesc_htm;
16299 for (i = 0; i < ARRAY_SIZE (bdesc_htm); i++, d++)
16301 tree op[MAX_HTM_OPERANDS], type;
16302 HOST_WIDE_INT mask = d->mask;
16303 unsigned attr = rs6000_builtin_info[d->code].attr;
16304 bool void_func = (attr & RS6000_BTC_VOID);
16305 int attr_args = (attr & RS6000_BTC_TYPE_MASK);
16306 int nopnds = 0;
16307 tree gpr_type_node;
16308 tree rettype;
16309 tree argtype;
16311 if (TARGET_32BIT && TARGET_POWERPC64)
16312 gpr_type_node = long_long_unsigned_type_node;
16313 else
16314 gpr_type_node = long_unsigned_type_node;
16316 if (attr & RS6000_BTC_SPR)
16318 rettype = gpr_type_node;
16319 argtype = gpr_type_node;
16321 else if (d->code == HTM_BUILTIN_TABORTDC
16322 || d->code == HTM_BUILTIN_TABORTDCI)
16324 rettype = unsigned_type_node;
16325 argtype = gpr_type_node;
16327 else
16329 rettype = unsigned_type_node;
16330 argtype = unsigned_type_node;
16333 if ((mask & builtin_mask) != mask)
16335 if (TARGET_DEBUG_BUILTIN)
16336 fprintf (stderr, "htm_builtin, skip binary %s\n", d->name);
16337 continue;
16340 if (d->name == 0)
16342 if (TARGET_DEBUG_BUILTIN)
16343 fprintf (stderr, "htm_builtin, bdesc_htm[%ld] no name\n",
16344 (long unsigned) i);
16345 continue;
16348 op[nopnds++] = (void_func) ? void_type_node : rettype;
16350 if (attr_args == RS6000_BTC_UNARY)
16351 op[nopnds++] = argtype;
16352 else if (attr_args == RS6000_BTC_BINARY)
16354 op[nopnds++] = argtype;
16355 op[nopnds++] = argtype;
16357 else if (attr_args == RS6000_BTC_TERNARY)
16359 op[nopnds++] = argtype;
16360 op[nopnds++] = argtype;
16361 op[nopnds++] = argtype;
16364 switch (nopnds)
16366 case 1:
16367 type = build_function_type_list (op[0], NULL_TREE);
16368 break;
16369 case 2:
16370 type = build_function_type_list (op[0], op[1], NULL_TREE);
16371 break;
16372 case 3:
16373 type = build_function_type_list (op[0], op[1], op[2], NULL_TREE);
16374 break;
16375 case 4:
16376 type = build_function_type_list (op[0], op[1], op[2], op[3],
16377 NULL_TREE);
16378 break;
16379 default:
16380 gcc_unreachable ();
16383 def_builtin (d->name, type, d->code);
16387 /* Hash function for builtin functions with up to 3 arguments and a return
16388 type. */
16389 hashval_t
16390 builtin_hasher::hash (builtin_hash_struct *bh)
16392 unsigned ret = 0;
16393 int i;
16395 for (i = 0; i < 4; i++)
16397 ret = (ret * (unsigned)MAX_MACHINE_MODE) + ((unsigned)bh->mode[i]);
16398 ret = (ret * 2) + bh->uns_p[i];
16401 return ret;
16404 /* Compare builtin hash entries H1 and H2 for equivalence. */
16405 bool
16406 builtin_hasher::equal (builtin_hash_struct *p1, builtin_hash_struct *p2)
16408 return ((p1->mode[0] == p2->mode[0])
16409 && (p1->mode[1] == p2->mode[1])
16410 && (p1->mode[2] == p2->mode[2])
16411 && (p1->mode[3] == p2->mode[3])
16412 && (p1->uns_p[0] == p2->uns_p[0])
16413 && (p1->uns_p[1] == p2->uns_p[1])
16414 && (p1->uns_p[2] == p2->uns_p[2])
16415 && (p1->uns_p[3] == p2->uns_p[3]));
16418 /* Map types for builtin functions with an explicit return type and up to 3
16419 arguments. Functions with fewer than 3 arguments use VOIDmode as the type
16420 of the argument. */
16421 static tree
16422 builtin_function_type (machine_mode mode_ret, machine_mode mode_arg0,
16423 machine_mode mode_arg1, machine_mode mode_arg2,
16424 enum rs6000_builtins builtin, const char *name)
16426 struct builtin_hash_struct h;
16427 struct builtin_hash_struct *h2;
16428 int num_args = 3;
16429 int i;
16430 tree ret_type = NULL_TREE;
16431 tree arg_type[3] = { NULL_TREE, NULL_TREE, NULL_TREE };
16433 /* Create builtin_hash_table. */
16434 if (builtin_hash_table == NULL)
16435 builtin_hash_table = hash_table<builtin_hasher>::create_ggc (1500);
16437 h.type = NULL_TREE;
16438 h.mode[0] = mode_ret;
16439 h.mode[1] = mode_arg0;
16440 h.mode[2] = mode_arg1;
16441 h.mode[3] = mode_arg2;
16442 h.uns_p[0] = 0;
16443 h.uns_p[1] = 0;
16444 h.uns_p[2] = 0;
16445 h.uns_p[3] = 0;
16447 /* If the builtin is a type that produces unsigned results or takes unsigned
16448 arguments, and it is returned as a decl for the vectorizer (such as
16449 widening multiplies, permute), make sure the arguments and return value
16450 are type correct. */
16451 switch (builtin)
16453 /* unsigned 1 argument functions. */
16454 case CRYPTO_BUILTIN_VSBOX:
16455 case P8V_BUILTIN_VGBBD:
16456 case MISC_BUILTIN_CDTBCD:
16457 case MISC_BUILTIN_CBCDTD:
16458 h.uns_p[0] = 1;
16459 h.uns_p[1] = 1;
16460 break;
16462 /* unsigned 2 argument functions. */
16463 case ALTIVEC_BUILTIN_VMULEUB_UNS:
16464 case ALTIVEC_BUILTIN_VMULEUH_UNS:
16465 case ALTIVEC_BUILTIN_VMULOUB_UNS:
16466 case ALTIVEC_BUILTIN_VMULOUH_UNS:
16467 case CRYPTO_BUILTIN_VCIPHER:
16468 case CRYPTO_BUILTIN_VCIPHERLAST:
16469 case CRYPTO_BUILTIN_VNCIPHER:
16470 case CRYPTO_BUILTIN_VNCIPHERLAST:
16471 case CRYPTO_BUILTIN_VPMSUMB:
16472 case CRYPTO_BUILTIN_VPMSUMH:
16473 case CRYPTO_BUILTIN_VPMSUMW:
16474 case CRYPTO_BUILTIN_VPMSUMD:
16475 case CRYPTO_BUILTIN_VPMSUM:
16476 case MISC_BUILTIN_ADDG6S:
16477 case MISC_BUILTIN_DIVWEU:
16478 case MISC_BUILTIN_DIVWEUO:
16479 case MISC_BUILTIN_DIVDEU:
16480 case MISC_BUILTIN_DIVDEUO:
16481 h.uns_p[0] = 1;
16482 h.uns_p[1] = 1;
16483 h.uns_p[2] = 1;
16484 break;
16486 /* unsigned 3 argument functions. */
16487 case ALTIVEC_BUILTIN_VPERM_16QI_UNS:
16488 case ALTIVEC_BUILTIN_VPERM_8HI_UNS:
16489 case ALTIVEC_BUILTIN_VPERM_4SI_UNS:
16490 case ALTIVEC_BUILTIN_VPERM_2DI_UNS:
16491 case ALTIVEC_BUILTIN_VSEL_16QI_UNS:
16492 case ALTIVEC_BUILTIN_VSEL_8HI_UNS:
16493 case ALTIVEC_BUILTIN_VSEL_4SI_UNS:
16494 case ALTIVEC_BUILTIN_VSEL_2DI_UNS:
16495 case VSX_BUILTIN_VPERM_16QI_UNS:
16496 case VSX_BUILTIN_VPERM_8HI_UNS:
16497 case VSX_BUILTIN_VPERM_4SI_UNS:
16498 case VSX_BUILTIN_VPERM_2DI_UNS:
16499 case VSX_BUILTIN_XXSEL_16QI_UNS:
16500 case VSX_BUILTIN_XXSEL_8HI_UNS:
16501 case VSX_BUILTIN_XXSEL_4SI_UNS:
16502 case VSX_BUILTIN_XXSEL_2DI_UNS:
16503 case CRYPTO_BUILTIN_VPERMXOR:
16504 case CRYPTO_BUILTIN_VPERMXOR_V2DI:
16505 case CRYPTO_BUILTIN_VPERMXOR_V4SI:
16506 case CRYPTO_BUILTIN_VPERMXOR_V8HI:
16507 case CRYPTO_BUILTIN_VPERMXOR_V16QI:
16508 case CRYPTO_BUILTIN_VSHASIGMAW:
16509 case CRYPTO_BUILTIN_VSHASIGMAD:
16510 case CRYPTO_BUILTIN_VSHASIGMA:
16511 h.uns_p[0] = 1;
16512 h.uns_p[1] = 1;
16513 h.uns_p[2] = 1;
16514 h.uns_p[3] = 1;
16515 break;
16517 /* signed permute functions with unsigned char mask. */
16518 case ALTIVEC_BUILTIN_VPERM_16QI:
16519 case ALTIVEC_BUILTIN_VPERM_8HI:
16520 case ALTIVEC_BUILTIN_VPERM_4SI:
16521 case ALTIVEC_BUILTIN_VPERM_4SF:
16522 case ALTIVEC_BUILTIN_VPERM_2DI:
16523 case ALTIVEC_BUILTIN_VPERM_2DF:
16524 case VSX_BUILTIN_VPERM_16QI:
16525 case VSX_BUILTIN_VPERM_8HI:
16526 case VSX_BUILTIN_VPERM_4SI:
16527 case VSX_BUILTIN_VPERM_4SF:
16528 case VSX_BUILTIN_VPERM_2DI:
16529 case VSX_BUILTIN_VPERM_2DF:
16530 h.uns_p[3] = 1;
16531 break;
16533 /* unsigned args, signed return. */
16534 case VSX_BUILTIN_XVCVUXDDP_UNS:
16535 case ALTIVEC_BUILTIN_UNSFLOAT_V4SI_V4SF:
16536 h.uns_p[1] = 1;
16537 break;
16539 /* signed args, unsigned return. */
16540 case VSX_BUILTIN_XVCVDPUXDS_UNS:
16541 case ALTIVEC_BUILTIN_FIXUNS_V4SF_V4SI:
16542 case MISC_BUILTIN_UNPACK_TD:
16543 case MISC_BUILTIN_UNPACK_V1TI:
16544 h.uns_p[0] = 1;
16545 break;
16547 /* unsigned arguments for 128-bit pack instructions. */
16548 case MISC_BUILTIN_PACK_TD:
16549 case MISC_BUILTIN_PACK_V1TI:
16550 h.uns_p[1] = 1;
16551 h.uns_p[2] = 1;
16552 break;
16554 default:
16555 break;
16558 /* Figure out how many args are present. */
16559 while (num_args > 0 && h.mode[num_args] == VOIDmode)
16560 num_args--;
16562 if (num_args == 0)
16563 fatal_error (input_location,
16564 "internal error: builtin function %s had no type", name);
16566 ret_type = builtin_mode_to_type[h.mode[0]][h.uns_p[0]];
16567 if (!ret_type && h.uns_p[0])
16568 ret_type = builtin_mode_to_type[h.mode[0]][0];
16570 if (!ret_type)
16571 fatal_error (input_location,
16572 "internal error: builtin function %s had an unexpected "
16573 "return type %s", name, GET_MODE_NAME (h.mode[0]));
16575 for (i = 0; i < (int) ARRAY_SIZE (arg_type); i++)
16576 arg_type[i] = NULL_TREE;
16578 for (i = 0; i < num_args; i++)
16580 int m = (int) h.mode[i+1];
16581 int uns_p = h.uns_p[i+1];
16583 arg_type[i] = builtin_mode_to_type[m][uns_p];
16584 if (!arg_type[i] && uns_p)
16585 arg_type[i] = builtin_mode_to_type[m][0];
16587 if (!arg_type[i])
16588 fatal_error (input_location,
16589 "internal error: builtin function %s, argument %d "
16590 "had unexpected argument type %s", name, i,
16591 GET_MODE_NAME (m));
16594 builtin_hash_struct **found = builtin_hash_table->find_slot (&h, INSERT);
16595 if (*found == NULL)
16597 h2 = ggc_alloc<builtin_hash_struct> ();
16598 *h2 = h;
16599 *found = h2;
16601 h2->type = build_function_type_list (ret_type, arg_type[0], arg_type[1],
16602 arg_type[2], NULL_TREE);
16605 return (*found)->type;
16608 static void
16609 rs6000_common_init_builtins (void)
16611 const struct builtin_description *d;
16612 size_t i;
16614 tree opaque_ftype_opaque = NULL_TREE;
16615 tree opaque_ftype_opaque_opaque = NULL_TREE;
16616 tree opaque_ftype_opaque_opaque_opaque = NULL_TREE;
16617 tree v2si_ftype_qi = NULL_TREE;
16618 tree v2si_ftype_v2si_qi = NULL_TREE;
16619 tree v2si_ftype_int_qi = NULL_TREE;
16620 HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;
16622 if (!TARGET_PAIRED_FLOAT)
16624 builtin_mode_to_type[V2SImode][0] = opaque_V2SI_type_node;
16625 builtin_mode_to_type[V2SFmode][0] = opaque_V2SF_type_node;
16628 /* Paired and SPE builtins are only available if you build a compiler with
16629 the appropriate options, so only create those builtins with the
16630 appropriate compiler option. Create Altivec and VSX builtins on machines
16631 with at least the general purpose extensions (970 and newer) to allow the
16632 use of the target attribute.. */
16634 if (TARGET_EXTRA_BUILTINS)
16635 builtin_mask |= RS6000_BTM_COMMON;
16637 /* Add the ternary operators. */
16638 d = bdesc_3arg;
16639 for (i = 0; i < ARRAY_SIZE (bdesc_3arg); i++, d++)
16641 tree type;
16642 HOST_WIDE_INT mask = d->mask;
16644 if ((mask & builtin_mask) != mask)
16646 if (TARGET_DEBUG_BUILTIN)
16647 fprintf (stderr, "rs6000_builtin, skip ternary %s\n", d->name);
16648 continue;
16651 if (rs6000_overloaded_builtin_p (d->code))
16653 if (! (type = opaque_ftype_opaque_opaque_opaque))
16654 type = opaque_ftype_opaque_opaque_opaque
16655 = build_function_type_list (opaque_V4SI_type_node,
16656 opaque_V4SI_type_node,
16657 opaque_V4SI_type_node,
16658 opaque_V4SI_type_node,
16659 NULL_TREE);
16661 else
16663 enum insn_code icode = d->icode;
16664 if (d->name == 0)
16666 if (TARGET_DEBUG_BUILTIN)
16667 fprintf (stderr, "rs6000_builtin, bdesc_3arg[%ld] no name\n",
16668 (long unsigned)i);
16670 continue;
16673 if (icode == CODE_FOR_nothing)
16675 if (TARGET_DEBUG_BUILTIN)
16676 fprintf (stderr, "rs6000_builtin, skip ternary %s (no code)\n",
16677 d->name);
16679 continue;
16682 type = builtin_function_type (insn_data[icode].operand[0].mode,
16683 insn_data[icode].operand[1].mode,
16684 insn_data[icode].operand[2].mode,
16685 insn_data[icode].operand[3].mode,
16686 d->code, d->name);
16689 def_builtin (d->name, type, d->code);
16692 /* Add the binary operators. */
16693 d = bdesc_2arg;
16694 for (i = 0; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
16696 machine_mode mode0, mode1, mode2;
16697 tree type;
16698 HOST_WIDE_INT mask = d->mask;
16700 if ((mask & builtin_mask) != mask)
16702 if (TARGET_DEBUG_BUILTIN)
16703 fprintf (stderr, "rs6000_builtin, skip binary %s\n", d->name);
16704 continue;
16707 if (rs6000_overloaded_builtin_p (d->code))
16709 if (! (type = opaque_ftype_opaque_opaque))
16710 type = opaque_ftype_opaque_opaque
16711 = build_function_type_list (opaque_V4SI_type_node,
16712 opaque_V4SI_type_node,
16713 opaque_V4SI_type_node,
16714 NULL_TREE);
16716 else
16718 enum insn_code icode = d->icode;
16719 if (d->name == 0)
16721 if (TARGET_DEBUG_BUILTIN)
16722 fprintf (stderr, "rs6000_builtin, bdesc_2arg[%ld] no name\n",
16723 (long unsigned)i);
16725 continue;
16728 if (icode == CODE_FOR_nothing)
16730 if (TARGET_DEBUG_BUILTIN)
16731 fprintf (stderr, "rs6000_builtin, skip binary %s (no code)\n",
16732 d->name);
16734 continue;
16737 mode0 = insn_data[icode].operand[0].mode;
16738 mode1 = insn_data[icode].operand[1].mode;
16739 mode2 = insn_data[icode].operand[2].mode;
16741 if (mode0 == V2SImode && mode1 == V2SImode && mode2 == QImode)
16743 if (! (type = v2si_ftype_v2si_qi))
16744 type = v2si_ftype_v2si_qi
16745 = build_function_type_list (opaque_V2SI_type_node,
16746 opaque_V2SI_type_node,
16747 char_type_node,
16748 NULL_TREE);
16751 else if (mode0 == V2SImode && GET_MODE_CLASS (mode1) == MODE_INT
16752 && mode2 == QImode)
16754 if (! (type = v2si_ftype_int_qi))
16755 type = v2si_ftype_int_qi
16756 = build_function_type_list (opaque_V2SI_type_node,
16757 integer_type_node,
16758 char_type_node,
16759 NULL_TREE);
16762 else
16763 type = builtin_function_type (mode0, mode1, mode2, VOIDmode,
16764 d->code, d->name);
16767 def_builtin (d->name, type, d->code);
16770 /* Add the simple unary operators. */
16771 d = bdesc_1arg;
16772 for (i = 0; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
16774 machine_mode mode0, mode1;
16775 tree type;
16776 HOST_WIDE_INT mask = d->mask;
16778 if ((mask & builtin_mask) != mask)
16780 if (TARGET_DEBUG_BUILTIN)
16781 fprintf (stderr, "rs6000_builtin, skip unary %s\n", d->name);
16782 continue;
16785 if (rs6000_overloaded_builtin_p (d->code))
16787 if (! (type = opaque_ftype_opaque))
16788 type = opaque_ftype_opaque
16789 = build_function_type_list (opaque_V4SI_type_node,
16790 opaque_V4SI_type_node,
16791 NULL_TREE);
16793 else
16795 enum insn_code icode = d->icode;
16796 if (d->name == 0)
16798 if (TARGET_DEBUG_BUILTIN)
16799 fprintf (stderr, "rs6000_builtin, bdesc_1arg[%ld] no name\n",
16800 (long unsigned)i);
16802 continue;
16805 if (icode == CODE_FOR_nothing)
16807 if (TARGET_DEBUG_BUILTIN)
16808 fprintf (stderr, "rs6000_builtin, skip unary %s (no code)\n",
16809 d->name);
16811 continue;
16814 mode0 = insn_data[icode].operand[0].mode;
16815 mode1 = insn_data[icode].operand[1].mode;
16817 if (mode0 == V2SImode && mode1 == QImode)
16819 if (! (type = v2si_ftype_qi))
16820 type = v2si_ftype_qi
16821 = build_function_type_list (opaque_V2SI_type_node,
16822 char_type_node,
16823 NULL_TREE);
16826 else
16827 type = builtin_function_type (mode0, mode1, VOIDmode, VOIDmode,
16828 d->code, d->name);
16831 def_builtin (d->name, type, d->code);
16835 /* Set up AIX/Darwin/64-bit Linux quad floating point routines. */
16836 static void
16837 init_float128_ibm (machine_mode mode)
16839 if (!TARGET_XL_COMPAT)
16841 set_optab_libfunc (add_optab, mode, "__gcc_qadd");
16842 set_optab_libfunc (sub_optab, mode, "__gcc_qsub");
16843 set_optab_libfunc (smul_optab, mode, "__gcc_qmul");
16844 set_optab_libfunc (sdiv_optab, mode, "__gcc_qdiv");
16846 if (!(TARGET_HARD_FLOAT && (TARGET_FPRS || TARGET_E500_DOUBLE)))
16848 set_optab_libfunc (neg_optab, mode, "__gcc_qneg");
16849 set_optab_libfunc (eq_optab, mode, "__gcc_qeq");
16850 set_optab_libfunc (ne_optab, mode, "__gcc_qne");
16851 set_optab_libfunc (gt_optab, mode, "__gcc_qgt");
16852 set_optab_libfunc (ge_optab, mode, "__gcc_qge");
16853 set_optab_libfunc (lt_optab, mode, "__gcc_qlt");
16854 set_optab_libfunc (le_optab, mode, "__gcc_qle");
16856 set_conv_libfunc (sext_optab, mode, SFmode, "__gcc_stoq");
16857 set_conv_libfunc (sext_optab, mode, DFmode, "__gcc_dtoq");
16858 set_conv_libfunc (trunc_optab, SFmode, mode, "__gcc_qtos");
16859 set_conv_libfunc (trunc_optab, DFmode, mode, "__gcc_qtod");
16860 set_conv_libfunc (sfix_optab, SImode, mode, "__gcc_qtoi");
16861 set_conv_libfunc (ufix_optab, SImode, mode, "__gcc_qtou");
16862 set_conv_libfunc (sfloat_optab, mode, SImode, "__gcc_itoq");
16863 set_conv_libfunc (ufloat_optab, mode, SImode, "__gcc_utoq");
16866 if (!(TARGET_HARD_FLOAT && TARGET_FPRS))
16867 set_optab_libfunc (unord_optab, mode, "__gcc_qunord");
16869 else
16871 set_optab_libfunc (add_optab, mode, "_xlqadd");
16872 set_optab_libfunc (sub_optab, mode, "_xlqsub");
16873 set_optab_libfunc (smul_optab, mode, "_xlqmul");
16874 set_optab_libfunc (sdiv_optab, mode, "_xlqdiv");
16877 /* Add various conversions for IFmode to use the traditional TFmode
16878 names. */
16879 if (mode == IFmode)
16881 set_conv_libfunc (sext_optab, mode, SDmode, "__dpd_extendsdtf2");
16882 set_conv_libfunc (sext_optab, mode, DDmode, "__dpd_extendddtf2");
16883 set_conv_libfunc (trunc_optab, mode, TDmode, "__dpd_trunctftd2");
16884 set_conv_libfunc (trunc_optab, SDmode, mode, "__dpd_trunctfsd2");
16885 set_conv_libfunc (trunc_optab, DDmode, mode, "__dpd_trunctfdd2");
16886 set_conv_libfunc (sext_optab, TDmode, mode, "__dpd_extendtdtf2");
16888 if (TARGET_POWERPC64)
16890 set_conv_libfunc (sfix_optab, TImode, mode, "__fixtfti");
16891 set_conv_libfunc (ufix_optab, TImode, mode, "__fixunstfti");
16892 set_conv_libfunc (sfloat_optab, mode, TImode, "__floattitf");
16893 set_conv_libfunc (ufloat_optab, mode, TImode, "__floatuntitf");
16898 /* Set up IEEE 128-bit floating point routines. Use different names if the
16899 arguments can be passed in a vector register. The historical PowerPC
16900 implementation of IEEE 128-bit floating point used _q_<op> for the names, so
16901 continue to use that if we aren't using vector registers to pass IEEE
16902 128-bit floating point. */
16904 static void
16905 init_float128_ieee (machine_mode mode)
16907 if (FLOAT128_VECTOR_P (mode))
16909 set_optab_libfunc (add_optab, mode, "__addkf3");
16910 set_optab_libfunc (sub_optab, mode, "__subkf3");
16911 set_optab_libfunc (neg_optab, mode, "__negkf2");
16912 set_optab_libfunc (smul_optab, mode, "__mulkf3");
16913 set_optab_libfunc (sdiv_optab, mode, "__divkf3");
16914 set_optab_libfunc (sqrt_optab, mode, "__sqrtkf2");
16915 set_optab_libfunc (abs_optab, mode, "__abstkf2");
16917 set_optab_libfunc (eq_optab, mode, "__eqkf2");
16918 set_optab_libfunc (ne_optab, mode, "__nekf2");
16919 set_optab_libfunc (gt_optab, mode, "__gtkf2");
16920 set_optab_libfunc (ge_optab, mode, "__gekf2");
16921 set_optab_libfunc (lt_optab, mode, "__ltkf2");
16922 set_optab_libfunc (le_optab, mode, "__lekf2");
16923 set_optab_libfunc (unord_optab, mode, "__unordkf2");
16925 set_conv_libfunc (sext_optab, mode, SFmode, "__extendsfkf2");
16926 set_conv_libfunc (sext_optab, mode, DFmode, "__extenddfkf2");
16927 set_conv_libfunc (trunc_optab, SFmode, mode, "__trunckfsf2");
16928 set_conv_libfunc (trunc_optab, DFmode, mode, "__trunckfdf2");
16930 set_conv_libfunc (sext_optab, mode, IFmode, "__extendtfkf2");
16931 if (mode != TFmode && FLOAT128_IBM_P (TFmode))
16932 set_conv_libfunc (sext_optab, mode, TFmode, "__extendtfkf2");
16934 set_conv_libfunc (trunc_optab, IFmode, mode, "__trunckftf2");
16935 if (mode != TFmode && FLOAT128_IBM_P (TFmode))
16936 set_conv_libfunc (trunc_optab, TFmode, mode, "__trunckftf2");
16938 set_conv_libfunc (sext_optab, mode, SDmode, "__dpd_extendsdkf2");
16939 set_conv_libfunc (sext_optab, mode, DDmode, "__dpd_extendddkf2");
16940 set_conv_libfunc (trunc_optab, mode, TDmode, "__dpd_trunckftd2");
16941 set_conv_libfunc (trunc_optab, SDmode, mode, "__dpd_trunckfsd2");
16942 set_conv_libfunc (trunc_optab, DDmode, mode, "__dpd_trunckfdd2");
16943 set_conv_libfunc (sext_optab, TDmode, mode, "__dpd_extendtdkf2");
16945 set_conv_libfunc (sfix_optab, SImode, mode, "__fixkfsi");
16946 set_conv_libfunc (ufix_optab, SImode, mode, "__fixunskfsi");
16947 set_conv_libfunc (sfix_optab, DImode, mode, "__fixkfdi");
16948 set_conv_libfunc (ufix_optab, DImode, mode, "__fixunskfdi");
16950 set_conv_libfunc (sfloat_optab, mode, SImode, "__floatsikf");
16951 set_conv_libfunc (ufloat_optab, mode, SImode, "__floatunsikf");
16952 set_conv_libfunc (sfloat_optab, mode, DImode, "__floatdikf");
16953 set_conv_libfunc (ufloat_optab, mode, DImode, "__floatundikf");
16955 if (TARGET_POWERPC64)
16957 set_conv_libfunc (sfix_optab, TImode, mode, "__fixkfti");
16958 set_conv_libfunc (ufix_optab, TImode, mode, "__fixunskfti");
16959 set_conv_libfunc (sfloat_optab, mode, TImode, "__floattikf");
16960 set_conv_libfunc (ufloat_optab, mode, TImode, "__floatuntikf");
16964 else
16966 set_optab_libfunc (add_optab, mode, "_q_add");
16967 set_optab_libfunc (sub_optab, mode, "_q_sub");
16968 set_optab_libfunc (neg_optab, mode, "_q_neg");
16969 set_optab_libfunc (smul_optab, mode, "_q_mul");
16970 set_optab_libfunc (sdiv_optab, mode, "_q_div");
16971 if (TARGET_PPC_GPOPT)
16972 set_optab_libfunc (sqrt_optab, mode, "_q_sqrt");
16974 set_optab_libfunc (eq_optab, mode, "_q_feq");
16975 set_optab_libfunc (ne_optab, mode, "_q_fne");
16976 set_optab_libfunc (gt_optab, mode, "_q_fgt");
16977 set_optab_libfunc (ge_optab, mode, "_q_fge");
16978 set_optab_libfunc (lt_optab, mode, "_q_flt");
16979 set_optab_libfunc (le_optab, mode, "_q_fle");
16981 set_conv_libfunc (sext_optab, mode, SFmode, "_q_stoq");
16982 set_conv_libfunc (sext_optab, mode, DFmode, "_q_dtoq");
16983 set_conv_libfunc (trunc_optab, SFmode, mode, "_q_qtos");
16984 set_conv_libfunc (trunc_optab, DFmode, mode, "_q_qtod");
16985 set_conv_libfunc (sfix_optab, SImode, mode, "_q_qtoi");
16986 set_conv_libfunc (ufix_optab, SImode, mode, "_q_qtou");
16987 set_conv_libfunc (sfloat_optab, mode, SImode, "_q_itoq");
16988 set_conv_libfunc (ufloat_optab, mode, SImode, "_q_utoq");
16992 static void
16993 rs6000_init_libfuncs (void)
16995 /* __float128 support. */
16996 if (TARGET_FLOAT128)
16998 init_float128_ibm (IFmode);
16999 init_float128_ieee (KFmode);
17002 /* AIX/Darwin/64-bit Linux quad floating point routines. */
17003 if (TARGET_LONG_DOUBLE_128)
17005 if (!TARGET_IEEEQUAD)
17006 init_float128_ibm (TFmode);
17008 /* IEEE 128-bit including 32-bit SVR4 quad floating point routines. */
17009 else
17010 init_float128_ieee (TFmode);
17015 /* Expand a block clear operation, and return 1 if successful. Return 0
17016 if we should let the compiler generate normal code.
17018 operands[0] is the destination
17019 operands[1] is the length
17020 operands[3] is the alignment */
17023 expand_block_clear (rtx operands[])
17025 rtx orig_dest = operands[0];
17026 rtx bytes_rtx = operands[1];
17027 rtx align_rtx = operands[3];
17028 bool constp = (GET_CODE (bytes_rtx) == CONST_INT);
17029 HOST_WIDE_INT align;
17030 HOST_WIDE_INT bytes;
17031 int offset;
17032 int clear_bytes;
17033 int clear_step;
17035 /* If this is not a fixed size move, just call memcpy */
17036 if (! constp)
17037 return 0;
17039 /* This must be a fixed size alignment */
17040 gcc_assert (GET_CODE (align_rtx) == CONST_INT);
17041 align = INTVAL (align_rtx) * BITS_PER_UNIT;
17043 /* Anything to clear? */
17044 bytes = INTVAL (bytes_rtx);
17045 if (bytes <= 0)
17046 return 1;
17048 /* Use the builtin memset after a point, to avoid huge code bloat.
17049 When optimize_size, avoid any significant code bloat; calling
17050 memset is about 4 instructions, so allow for one instruction to
17051 load zero and three to do clearing. */
17052 if (TARGET_ALTIVEC && align >= 128)
17053 clear_step = 16;
17054 else if (TARGET_POWERPC64 && (align >= 64 || !STRICT_ALIGNMENT))
17055 clear_step = 8;
17056 else if (TARGET_SPE && align >= 64)
17057 clear_step = 8;
17058 else
17059 clear_step = 4;
17061 if (optimize_size && bytes > 3 * clear_step)
17062 return 0;
17063 if (! optimize_size && bytes > 8 * clear_step)
17064 return 0;
17066 for (offset = 0; bytes > 0; offset += clear_bytes, bytes -= clear_bytes)
17068 machine_mode mode = BLKmode;
17069 rtx dest;
17071 if (bytes >= 16 && TARGET_ALTIVEC && align >= 128)
17073 clear_bytes = 16;
17074 mode = V4SImode;
17076 else if (bytes >= 8 && TARGET_SPE && align >= 64)
17078 clear_bytes = 8;
17079 mode = V2SImode;
17081 else if (bytes >= 8 && TARGET_POWERPC64
17082 && (align >= 64 || !STRICT_ALIGNMENT))
17084 clear_bytes = 8;
17085 mode = DImode;
17086 if (offset == 0 && align < 64)
17088 rtx addr;
17090 /* If the address form is reg+offset with offset not a
17091 multiple of four, reload into reg indirect form here
17092 rather than waiting for reload. This way we get one
17093 reload, not one per store. */
17094 addr = XEXP (orig_dest, 0);
17095 if ((GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
17096 && GET_CODE (XEXP (addr, 1)) == CONST_INT
17097 && (INTVAL (XEXP (addr, 1)) & 3) != 0)
17099 addr = copy_addr_to_reg (addr);
17100 orig_dest = replace_equiv_address (orig_dest, addr);
17104 else if (bytes >= 4 && (align >= 32 || !STRICT_ALIGNMENT))
17105 { /* move 4 bytes */
17106 clear_bytes = 4;
17107 mode = SImode;
17109 else if (bytes >= 2 && (align >= 16 || !STRICT_ALIGNMENT))
17110 { /* move 2 bytes */
17111 clear_bytes = 2;
17112 mode = HImode;
17114 else /* move 1 byte at a time */
17116 clear_bytes = 1;
17117 mode = QImode;
17120 dest = adjust_address (orig_dest, mode, offset);
17122 emit_move_insn (dest, CONST0_RTX (mode));
17125 return 1;
17129 /* Expand a block move operation, and return 1 if successful. Return 0
17130 if we should let the compiler generate normal code.
17132 operands[0] is the destination
17133 operands[1] is the source
17134 operands[2] is the length
17135 operands[3] is the alignment */
17137 #define MAX_MOVE_REG 4
17140 expand_block_move (rtx operands[])
17142 rtx orig_dest = operands[0];
17143 rtx orig_src = operands[1];
17144 rtx bytes_rtx = operands[2];
17145 rtx align_rtx = operands[3];
17146 int constp = (GET_CODE (bytes_rtx) == CONST_INT);
17147 int align;
17148 int bytes;
17149 int offset;
17150 int move_bytes;
17151 rtx stores[MAX_MOVE_REG];
17152 int num_reg = 0;
17154 /* If this is not a fixed size move, just call memcpy */
17155 if (! constp)
17156 return 0;
17158 /* This must be a fixed size alignment */
17159 gcc_assert (GET_CODE (align_rtx) == CONST_INT);
17160 align = INTVAL (align_rtx) * BITS_PER_UNIT;
17162 /* Anything to move? */
17163 bytes = INTVAL (bytes_rtx);
17164 if (bytes <= 0)
17165 return 1;
17167 if (bytes > rs6000_block_move_inline_limit)
17168 return 0;
17170 for (offset = 0; bytes > 0; offset += move_bytes, bytes -= move_bytes)
17172 union {
17173 rtx (*movmemsi) (rtx, rtx, rtx, rtx);
17174 rtx (*mov) (rtx, rtx);
17175 } gen_func;
17176 machine_mode mode = BLKmode;
17177 rtx src, dest;
17179 /* Altivec first, since it will be faster than a string move
17180 when it applies, and usually not significantly larger. */
17181 if (TARGET_ALTIVEC && bytes >= 16 && align >= 128)
17183 move_bytes = 16;
17184 mode = V4SImode;
17185 gen_func.mov = gen_movv4si;
17187 else if (TARGET_SPE && bytes >= 8 && align >= 64)
17189 move_bytes = 8;
17190 mode = V2SImode;
17191 gen_func.mov = gen_movv2si;
17193 else if (TARGET_STRING
17194 && bytes > 24 /* move up to 32 bytes at a time */
17195 && ! fixed_regs[5]
17196 && ! fixed_regs[6]
17197 && ! fixed_regs[7]
17198 && ! fixed_regs[8]
17199 && ! fixed_regs[9]
17200 && ! fixed_regs[10]
17201 && ! fixed_regs[11]
17202 && ! fixed_regs[12])
17204 move_bytes = (bytes > 32) ? 32 : bytes;
17205 gen_func.movmemsi = gen_movmemsi_8reg;
17207 else if (TARGET_STRING
17208 && bytes > 16 /* move up to 24 bytes at a time */
17209 && ! fixed_regs[5]
17210 && ! fixed_regs[6]
17211 && ! fixed_regs[7]
17212 && ! fixed_regs[8]
17213 && ! fixed_regs[9]
17214 && ! fixed_regs[10])
17216 move_bytes = (bytes > 24) ? 24 : bytes;
17217 gen_func.movmemsi = gen_movmemsi_6reg;
17219 else if (TARGET_STRING
17220 && bytes > 8 /* move up to 16 bytes at a time */
17221 && ! fixed_regs[5]
17222 && ! fixed_regs[6]
17223 && ! fixed_regs[7]
17224 && ! fixed_regs[8])
17226 move_bytes = (bytes > 16) ? 16 : bytes;
17227 gen_func.movmemsi = gen_movmemsi_4reg;
17229 else if (bytes >= 8 && TARGET_POWERPC64
17230 && (align >= 64 || !STRICT_ALIGNMENT))
17232 move_bytes = 8;
17233 mode = DImode;
17234 gen_func.mov = gen_movdi;
17235 if (offset == 0 && align < 64)
17237 rtx addr;
17239 /* If the address form is reg+offset with offset not a
17240 multiple of four, reload into reg indirect form here
17241 rather than waiting for reload. This way we get one
17242 reload, not one per load and/or store. */
17243 addr = XEXP (orig_dest, 0);
17244 if ((GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
17245 && GET_CODE (XEXP (addr, 1)) == CONST_INT
17246 && (INTVAL (XEXP (addr, 1)) & 3) != 0)
17248 addr = copy_addr_to_reg (addr);
17249 orig_dest = replace_equiv_address (orig_dest, addr);
17251 addr = XEXP (orig_src, 0);
17252 if ((GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
17253 && GET_CODE (XEXP (addr, 1)) == CONST_INT
17254 && (INTVAL (XEXP (addr, 1)) & 3) != 0)
17256 addr = copy_addr_to_reg (addr);
17257 orig_src = replace_equiv_address (orig_src, addr);
17261 else if (TARGET_STRING && bytes > 4 && !TARGET_POWERPC64)
17262 { /* move up to 8 bytes at a time */
17263 move_bytes = (bytes > 8) ? 8 : bytes;
17264 gen_func.movmemsi = gen_movmemsi_2reg;
17266 else if (bytes >= 4 && (align >= 32 || !STRICT_ALIGNMENT))
17267 { /* move 4 bytes */
17268 move_bytes = 4;
17269 mode = SImode;
17270 gen_func.mov = gen_movsi;
17272 else if (bytes >= 2 && (align >= 16 || !STRICT_ALIGNMENT))
17273 { /* move 2 bytes */
17274 move_bytes = 2;
17275 mode = HImode;
17276 gen_func.mov = gen_movhi;
17278 else if (TARGET_STRING && bytes > 1)
17279 { /* move up to 4 bytes at a time */
17280 move_bytes = (bytes > 4) ? 4 : bytes;
17281 gen_func.movmemsi = gen_movmemsi_1reg;
17283 else /* move 1 byte at a time */
17285 move_bytes = 1;
17286 mode = QImode;
17287 gen_func.mov = gen_movqi;
17290 src = adjust_address (orig_src, mode, offset);
17291 dest = adjust_address (orig_dest, mode, offset);
17293 if (mode != BLKmode)
17295 rtx tmp_reg = gen_reg_rtx (mode);
17297 emit_insn ((*gen_func.mov) (tmp_reg, src));
17298 stores[num_reg++] = (*gen_func.mov) (dest, tmp_reg);
17301 if (mode == BLKmode || num_reg >= MAX_MOVE_REG || bytes == move_bytes)
17303 int i;
17304 for (i = 0; i < num_reg; i++)
17305 emit_insn (stores[i]);
17306 num_reg = 0;
17309 if (mode == BLKmode)
17311 /* Move the address into scratch registers. The movmemsi
17312 patterns require zero offset. */
17313 if (!REG_P (XEXP (src, 0)))
17315 rtx src_reg = copy_addr_to_reg (XEXP (src, 0));
17316 src = replace_equiv_address (src, src_reg);
17318 set_mem_size (src, move_bytes);
17320 if (!REG_P (XEXP (dest, 0)))
17322 rtx dest_reg = copy_addr_to_reg (XEXP (dest, 0));
17323 dest = replace_equiv_address (dest, dest_reg);
17325 set_mem_size (dest, move_bytes);
17327 emit_insn ((*gen_func.movmemsi) (dest, src,
17328 GEN_INT (move_bytes & 31),
17329 align_rtx));
17333 return 1;
17337 /* Return a string to perform a load_multiple operation.
17338 operands[0] is the vector.
17339 operands[1] is the source address.
17340 operands[2] is the first destination register. */
17342 const char *
17343 rs6000_output_load_multiple (rtx operands[3])
17345 /* We have to handle the case where the pseudo used to contain the address
17346 is assigned to one of the output registers. */
17347 int i, j;
17348 int words = XVECLEN (operands[0], 0);
17349 rtx xop[10];
17351 if (XVECLEN (operands[0], 0) == 1)
17352 return "lwz %2,0(%1)";
17354 for (i = 0; i < words; i++)
17355 if (refers_to_regno_p (REGNO (operands[2]) + i, operands[1]))
17357 if (i == words-1)
17359 xop[0] = GEN_INT (4 * (words-1));
17360 xop[1] = operands[1];
17361 xop[2] = operands[2];
17362 output_asm_insn ("lswi %2,%1,%0\n\tlwz %1,%0(%1)", xop);
17363 return "";
17365 else if (i == 0)
17367 xop[0] = GEN_INT (4 * (words-1));
17368 xop[1] = operands[1];
17369 xop[2] = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
17370 output_asm_insn ("addi %1,%1,4\n\tlswi %2,%1,%0\n\tlwz %1,-4(%1)", xop);
17371 return "";
17373 else
17375 for (j = 0; j < words; j++)
17376 if (j != i)
17378 xop[0] = GEN_INT (j * 4);
17379 xop[1] = operands[1];
17380 xop[2] = gen_rtx_REG (SImode, REGNO (operands[2]) + j);
17381 output_asm_insn ("lwz %2,%0(%1)", xop);
17383 xop[0] = GEN_INT (i * 4);
17384 xop[1] = operands[1];
17385 output_asm_insn ("lwz %1,%0(%1)", xop);
17386 return "";
17390 return "lswi %2,%1,%N0";
17394 /* A validation routine: say whether CODE, a condition code, and MODE
17395 match. The other alternatives either don't make sense or should
17396 never be generated. */
17398 void
17399 validate_condition_mode (enum rtx_code code, machine_mode mode)
17401 gcc_assert ((GET_RTX_CLASS (code) == RTX_COMPARE
17402 || GET_RTX_CLASS (code) == RTX_COMM_COMPARE)
17403 && GET_MODE_CLASS (mode) == MODE_CC);
17405 /* These don't make sense. */
17406 gcc_assert ((code != GT && code != LT && code != GE && code != LE)
17407 || mode != CCUNSmode);
17409 gcc_assert ((code != GTU && code != LTU && code != GEU && code != LEU)
17410 || mode == CCUNSmode);
17412 gcc_assert (mode == CCFPmode
17413 || (code != ORDERED && code != UNORDERED
17414 && code != UNEQ && code != LTGT
17415 && code != UNGT && code != UNLT
17416 && code != UNGE && code != UNLE));
17418 /* These should never be generated except for
17419 flag_finite_math_only. */
17420 gcc_assert (mode != CCFPmode
17421 || flag_finite_math_only
17422 || (code != LE && code != GE
17423 && code != UNEQ && code != LTGT
17424 && code != UNGT && code != UNLT));
17426 /* These are invalid; the information is not there. */
17427 gcc_assert (mode != CCEQmode || code == EQ || code == NE);
17431 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm,
17432 rldicl, rldicr, or rldic instruction in mode MODE. If so, if E is
17433 not zero, store there the bit offset (counted from the right) where
17434 the single stretch of 1 bits begins; and similarly for B, the bit
17435 offset where it ends. */
17437 bool
17438 rs6000_is_valid_mask (rtx mask, int *b, int *e, machine_mode mode)
17440 unsigned HOST_WIDE_INT val = INTVAL (mask);
17441 unsigned HOST_WIDE_INT bit;
17442 int nb, ne;
17443 int n = GET_MODE_PRECISION (mode);
17445 if (mode != DImode && mode != SImode)
17446 return false;
17448 if (INTVAL (mask) >= 0)
17450 bit = val & -val;
17451 ne = exact_log2 (bit);
17452 nb = exact_log2 (val + bit);
17454 else if (val + 1 == 0)
17456 nb = n;
17457 ne = 0;
17459 else if (val & 1)
17461 val = ~val;
17462 bit = val & -val;
17463 nb = exact_log2 (bit);
17464 ne = exact_log2 (val + bit);
17466 else
17468 bit = val & -val;
17469 ne = exact_log2 (bit);
17470 if (val + bit == 0)
17471 nb = n;
17472 else
17473 nb = 0;
17476 nb--;
17478 if (nb < 0 || ne < 0 || nb >= n || ne >= n)
17479 return false;
17481 if (b)
17482 *b = nb;
17483 if (e)
17484 *e = ne;
17486 return true;
17489 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm, rldicl,
17490 or rldicr instruction, to implement an AND with it in mode MODE. */
17492 bool
17493 rs6000_is_valid_and_mask (rtx mask, machine_mode mode)
17495 int nb, ne;
17497 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
17498 return false;
17500 /* For DImode, we need a rldicl, rldicr, or a rlwinm with mask that
17501 does not wrap. */
17502 if (mode == DImode)
17503 return (ne == 0 || nb == 63 || (nb < 32 && ne <= nb));
17505 /* For SImode, rlwinm can do everything. */
17506 if (mode == SImode)
17507 return (nb < 32 && ne < 32);
17509 return false;
17512 /* Return the instruction template for an AND with mask in mode MODE, with
17513 operands OPERANDS. If DOT is true, make it a record-form instruction. */
17515 const char *
17516 rs6000_insn_for_and_mask (machine_mode mode, rtx *operands, bool dot)
17518 int nb, ne;
17520 if (!rs6000_is_valid_mask (operands[2], &nb, &ne, mode))
17521 gcc_unreachable ();
17523 if (mode == DImode && ne == 0)
17525 operands[3] = GEN_INT (63 - nb);
17526 if (dot)
17527 return "rldicl. %0,%1,0,%3";
17528 return "rldicl %0,%1,0,%3";
17531 if (mode == DImode && nb == 63)
17533 operands[3] = GEN_INT (63 - ne);
17534 if (dot)
17535 return "rldicr. %0,%1,0,%3";
17536 return "rldicr %0,%1,0,%3";
17539 if (nb < 32 && ne < 32)
17541 operands[3] = GEN_INT (31 - nb);
17542 operands[4] = GEN_INT (31 - ne);
17543 if (dot)
17544 return "rlwinm. %0,%1,0,%3,%4";
17545 return "rlwinm %0,%1,0,%3,%4";
17548 gcc_unreachable ();
17551 /* Return whether MASK (a CONST_INT) is a valid mask for any rlw[i]nm,
17552 rld[i]cl, rld[i]cr, or rld[i]c instruction, to implement an AND with
17553 shift SHIFT (a ROTATE, ASHIFT, or LSHIFTRT) in mode MODE. */
17555 bool
17556 rs6000_is_valid_shift_mask (rtx mask, rtx shift, machine_mode mode)
17558 int nb, ne;
17560 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
17561 return false;
17563 int n = GET_MODE_PRECISION (mode);
17564 int sh = -1;
17566 if (CONST_INT_P (XEXP (shift, 1)))
17568 sh = INTVAL (XEXP (shift, 1));
17569 if (sh < 0 || sh >= n)
17570 return false;
17573 rtx_code code = GET_CODE (shift);
17575 /* Convert any shift by 0 to a rotate, to simplify below code. */
17576 if (sh == 0)
17577 code = ROTATE;
17579 /* Convert rotate to simple shift if we can, to make analysis simpler. */
17580 if (code == ROTATE && sh >= 0 && nb >= ne && ne >= sh)
17581 code = ASHIFT;
17582 if (code == ROTATE && sh >= 0 && nb >= ne && nb < sh)
17584 code = LSHIFTRT;
17585 sh = n - sh;
17588 /* DImode rotates need rld*. */
17589 if (mode == DImode && code == ROTATE)
17590 return (nb == 63 || ne == 0 || ne == sh);
17592 /* SImode rotates need rlw*. */
17593 if (mode == SImode && code == ROTATE)
17594 return (nb < 32 && ne < 32 && sh < 32);
17596 /* Wrap-around masks are only okay for rotates. */
17597 if (ne > nb)
17598 return false;
17600 /* Variable shifts are only okay for rotates. */
17601 if (sh < 0)
17602 return false;
17604 /* Don't allow ASHIFT if the mask is wrong for that. */
17605 if (code == ASHIFT && ne < sh)
17606 return false;
17608 /* If we can do it with an rlw*, we can do it. Don't allow LSHIFTRT
17609 if the mask is wrong for that. */
17610 if (nb < 32 && ne < 32 && sh < 32
17611 && !(code == LSHIFTRT && nb >= 32 - sh))
17612 return true;
17614 /* If we can do it with an rld*, we can do it. Don't allow LSHIFTRT
17615 if the mask is wrong for that. */
17616 if (code == LSHIFTRT)
17617 sh = 64 - sh;
17618 if (nb == 63 || ne == 0 || ne == sh)
17619 return !(code == LSHIFTRT && nb >= sh);
17621 return false;
17624 /* Return the instruction template for a shift with mask in mode MODE, with
17625 operands OPERANDS. If DOT is true, make it a record-form instruction. */
17627 const char *
17628 rs6000_insn_for_shift_mask (machine_mode mode, rtx *operands, bool dot)
17630 int nb, ne;
17632 if (!rs6000_is_valid_mask (operands[3], &nb, &ne, mode))
17633 gcc_unreachable ();
17635 if (mode == DImode && ne == 0)
17637 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
17638 operands[2] = GEN_INT (64 - INTVAL (operands[2]));
17639 operands[3] = GEN_INT (63 - nb);
17640 if (dot)
17641 return "rld%I2cl. %0,%1,%2,%3";
17642 return "rld%I2cl %0,%1,%2,%3";
17645 if (mode == DImode && nb == 63)
17647 operands[3] = GEN_INT (63 - ne);
17648 if (dot)
17649 return "rld%I2cr. %0,%1,%2,%3";
17650 return "rld%I2cr %0,%1,%2,%3";
17653 if (mode == DImode
17654 && GET_CODE (operands[4]) != LSHIFTRT
17655 && CONST_INT_P (operands[2])
17656 && ne == INTVAL (operands[2]))
17658 operands[3] = GEN_INT (63 - nb);
17659 if (dot)
17660 return "rld%I2c. %0,%1,%2,%3";
17661 return "rld%I2c %0,%1,%2,%3";
17664 if (nb < 32 && ne < 32)
17666 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
17667 operands[2] = GEN_INT (32 - INTVAL (operands[2]));
17668 operands[3] = GEN_INT (31 - nb);
17669 operands[4] = GEN_INT (31 - ne);
17670 /* This insn can also be a 64-bit rotate with mask that really makes
17671 it just a shift right (with mask); the %h below are to adjust for
17672 that situation (shift count is >= 32 in that case). */
17673 if (dot)
17674 return "rlw%I2nm. %0,%1,%h2,%3,%4";
17675 return "rlw%I2nm %0,%1,%h2,%3,%4";
17678 gcc_unreachable ();
17681 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwimi or
17682 rldimi instruction, to implement an insert with shift SHIFT (a ROTATE,
17683 ASHIFT, or LSHIFTRT) in mode MODE. */
17685 bool
17686 rs6000_is_valid_insert_mask (rtx mask, rtx shift, machine_mode mode)
17688 int nb, ne;
17690 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
17691 return false;
17693 int n = GET_MODE_PRECISION (mode);
17695 int sh = INTVAL (XEXP (shift, 1));
17696 if (sh < 0 || sh >= n)
17697 return false;
17699 rtx_code code = GET_CODE (shift);
17701 /* Convert any shift by 0 to a rotate, to simplify below code. */
17702 if (sh == 0)
17703 code = ROTATE;
17705 /* Convert rotate to simple shift if we can, to make analysis simpler. */
17706 if (code == ROTATE && sh >= 0 && nb >= ne && ne >= sh)
17707 code = ASHIFT;
17708 if (code == ROTATE && sh >= 0 && nb >= ne && nb < sh)
17710 code = LSHIFTRT;
17711 sh = n - sh;
17714 /* DImode rotates need rldimi. */
17715 if (mode == DImode && code == ROTATE)
17716 return (ne == sh);
17718 /* SImode rotates need rlwimi. */
17719 if (mode == SImode && code == ROTATE)
17720 return (nb < 32 && ne < 32 && sh < 32);
17722 /* Wrap-around masks are only okay for rotates. */
17723 if (ne > nb)
17724 return false;
17726 /* Don't allow ASHIFT if the mask is wrong for that. */
17727 if (code == ASHIFT && ne < sh)
17728 return false;
17730 /* If we can do it with an rlwimi, we can do it. Don't allow LSHIFTRT
17731 if the mask is wrong for that. */
17732 if (nb < 32 && ne < 32 && sh < 32
17733 && !(code == LSHIFTRT && nb >= 32 - sh))
17734 return true;
17736 /* If we can do it with an rldimi, we can do it. Don't allow LSHIFTRT
17737 if the mask is wrong for that. */
17738 if (code == LSHIFTRT)
17739 sh = 64 - sh;
17740 if (ne == sh)
17741 return !(code == LSHIFTRT && nb >= sh);
17743 return false;
17746 /* Return the instruction template for an insert with mask in mode MODE, with
17747 operands OPERANDS. If DOT is true, make it a record-form instruction. */
17749 const char *
17750 rs6000_insn_for_insert_mask (machine_mode mode, rtx *operands, bool dot)
17752 int nb, ne;
17754 if (!rs6000_is_valid_mask (operands[3], &nb, &ne, mode))
17755 gcc_unreachable ();
17757 /* Prefer rldimi because rlwimi is cracked. */
17758 if (TARGET_POWERPC64
17759 && (!dot || mode == DImode)
17760 && GET_CODE (operands[4]) != LSHIFTRT
17761 && ne == INTVAL (operands[2]))
17763 operands[3] = GEN_INT (63 - nb);
17764 if (dot)
17765 return "rldimi. %0,%1,%2,%3";
17766 return "rldimi %0,%1,%2,%3";
17769 if (nb < 32 && ne < 32)
17771 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
17772 operands[2] = GEN_INT (32 - INTVAL (operands[2]));
17773 operands[3] = GEN_INT (31 - nb);
17774 operands[4] = GEN_INT (31 - ne);
17775 if (dot)
17776 return "rlwimi. %0,%1,%2,%3,%4";
17777 return "rlwimi %0,%1,%2,%3,%4";
17780 gcc_unreachable ();
17783 /* Return whether an AND with C (a CONST_INT) in mode MODE can be done
17784 using two machine instructions. */
17786 bool
17787 rs6000_is_valid_2insn_and (rtx c, machine_mode mode)
17789 /* There are two kinds of AND we can handle with two insns:
17790 1) those we can do with two rl* insn;
17791 2) ori[s];xori[s].
17793 We do not handle that last case yet. */
17795 /* If there is just one stretch of ones, we can do it. */
17796 if (rs6000_is_valid_mask (c, NULL, NULL, mode))
17797 return true;
17799 /* Otherwise, fill in the lowest "hole"; if we can do the result with
17800 one insn, we can do the whole thing with two. */
17801 unsigned HOST_WIDE_INT val = INTVAL (c);
17802 unsigned HOST_WIDE_INT bit1 = val & -val;
17803 unsigned HOST_WIDE_INT bit2 = (val + bit1) & ~val;
17804 unsigned HOST_WIDE_INT val1 = (val + bit1) & val;
17805 unsigned HOST_WIDE_INT bit3 = val1 & -val1;
17806 return rs6000_is_valid_and_mask (GEN_INT (val + bit3 - bit2), mode);
17809 /* Emit a potentially record-form instruction, setting DST from SRC.
17810 If DOT is 0, that is all; otherwise, set CCREG to the result of the
17811 signed comparison of DST with zero. If DOT is 1, the generated RTL
17812 doesn't care about the DST result; if DOT is 2, it does. If CCREG
17813 is CR0 do a single dot insn (as a PARALLEL); otherwise, do a SET and
17814 a separate COMPARE. */
17816 static void
17817 rs6000_emit_dot_insn (rtx dst, rtx src, int dot, rtx ccreg)
17819 if (dot == 0)
17821 emit_move_insn (dst, src);
17822 return;
17825 if (cc_reg_not_cr0_operand (ccreg, CCmode))
17827 emit_move_insn (dst, src);
17828 emit_move_insn (ccreg, gen_rtx_COMPARE (CCmode, dst, const0_rtx));
17829 return;
17832 rtx ccset = gen_rtx_SET (ccreg, gen_rtx_COMPARE (CCmode, src, const0_rtx));
17833 if (dot == 1)
17835 rtx clobber = gen_rtx_CLOBBER (VOIDmode, dst);
17836 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, ccset, clobber)));
17838 else
17840 rtx set = gen_rtx_SET (dst, src);
17841 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, ccset, set)));
17845 /* Emit the two insns to do an AND in mode MODE, with operands OPERANDS.
17846 If EXPAND is true, split rotate-and-mask instructions we generate to
17847 their constituent parts as well (this is used during expand); if DOT
17848 is 1, make the last insn a record-form instruction clobbering the
17849 destination GPR and setting the CC reg (from operands[3]); if 2, set
17850 that GPR as well as the CC reg. */
17852 void
17853 rs6000_emit_2insn_and (machine_mode mode, rtx *operands, bool expand, int dot)
17855 gcc_assert (!(expand && dot));
17857 unsigned HOST_WIDE_INT val = INTVAL (operands[2]);
17859 /* If it is one stretch of ones, it is DImode; shift left, mask, then
17860 shift right. This generates better code than doing the masks without
17861 shifts, or shifting first right and then left. */
17862 int nb, ne;
17863 if (rs6000_is_valid_mask (operands[2], &nb, &ne, mode) && nb >= ne)
17865 gcc_assert (mode == DImode);
17867 int shift = 63 - nb;
17868 if (expand)
17870 rtx tmp1 = gen_reg_rtx (DImode);
17871 rtx tmp2 = gen_reg_rtx (DImode);
17872 emit_insn (gen_ashldi3 (tmp1, operands[1], GEN_INT (shift)));
17873 emit_insn (gen_anddi3 (tmp2, tmp1, GEN_INT (val << shift)));
17874 emit_insn (gen_lshrdi3 (operands[0], tmp2, GEN_INT (shift)));
17876 else
17878 rtx tmp = gen_rtx_ASHIFT (mode, operands[1], GEN_INT (shift));
17879 tmp = gen_rtx_AND (mode, tmp, GEN_INT (val << shift));
17880 emit_move_insn (operands[0], tmp);
17881 tmp = gen_rtx_LSHIFTRT (mode, operands[0], GEN_INT (shift));
17882 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
17884 return;
17887 /* Otherwise, make a mask2 that cuts out the lowest "hole", and a mask1
17888 that does the rest. */
17889 unsigned HOST_WIDE_INT bit1 = val & -val;
17890 unsigned HOST_WIDE_INT bit2 = (val + bit1) & ~val;
17891 unsigned HOST_WIDE_INT val1 = (val + bit1) & val;
17892 unsigned HOST_WIDE_INT bit3 = val1 & -val1;
17894 unsigned HOST_WIDE_INT mask1 = -bit3 + bit2 - 1;
17895 unsigned HOST_WIDE_INT mask2 = val + bit3 - bit2;
17897 gcc_assert (rs6000_is_valid_and_mask (GEN_INT (mask2), mode));
17899 /* Two "no-rotate"-and-mask instructions, for SImode. */
17900 if (rs6000_is_valid_and_mask (GEN_INT (mask1), mode))
17902 gcc_assert (mode == SImode);
17904 rtx reg = expand ? gen_reg_rtx (mode) : operands[0];
17905 rtx tmp = gen_rtx_AND (mode, operands[1], GEN_INT (mask1));
17906 emit_move_insn (reg, tmp);
17907 tmp = gen_rtx_AND (mode, reg, GEN_INT (mask2));
17908 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
17909 return;
17912 gcc_assert (mode == DImode);
17914 /* Two "no-rotate"-and-mask instructions, for DImode: both are rlwinm
17915 insns; we have to do the first in SImode, because it wraps. */
17916 if (mask2 <= 0xffffffff
17917 && rs6000_is_valid_and_mask (GEN_INT (mask1), SImode))
17919 rtx reg = expand ? gen_reg_rtx (mode) : operands[0];
17920 rtx tmp = gen_rtx_AND (SImode, gen_lowpart (SImode, operands[1]),
17921 GEN_INT (mask1));
17922 rtx reg_low = gen_lowpart (SImode, reg);
17923 emit_move_insn (reg_low, tmp);
17924 tmp = gen_rtx_AND (mode, reg, GEN_INT (mask2));
17925 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
17926 return;
17929 /* Two rld* insns: rotate, clear the hole in the middle (which now is
17930 at the top end), rotate back and clear the other hole. */
17931 int right = exact_log2 (bit3);
17932 int left = 64 - right;
17934 /* Rotate the mask too. */
17935 mask1 = (mask1 >> right) | ((bit2 - 1) << left);
17937 if (expand)
17939 rtx tmp1 = gen_reg_rtx (DImode);
17940 rtx tmp2 = gen_reg_rtx (DImode);
17941 rtx tmp3 = gen_reg_rtx (DImode);
17942 emit_insn (gen_rotldi3 (tmp1, operands[1], GEN_INT (left)));
17943 emit_insn (gen_anddi3 (tmp2, tmp1, GEN_INT (mask1)));
17944 emit_insn (gen_rotldi3 (tmp3, tmp2, GEN_INT (right)));
17945 emit_insn (gen_anddi3 (operands[0], tmp3, GEN_INT (mask2)));
17947 else
17949 rtx tmp = gen_rtx_ROTATE (mode, operands[1], GEN_INT (left));
17950 tmp = gen_rtx_AND (mode, tmp, GEN_INT (mask1));
17951 emit_move_insn (operands[0], tmp);
17952 tmp = gen_rtx_ROTATE (mode, operands[0], GEN_INT (right));
17953 tmp = gen_rtx_AND (mode, tmp, GEN_INT (mask2));
17954 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
17958 /* Return 1 if REGNO (reg1) == REGNO (reg2) - 1 making them candidates
17959 for lfq and stfq insns iff the registers are hard registers. */
17962 registers_ok_for_quad_peep (rtx reg1, rtx reg2)
17964 /* We might have been passed a SUBREG. */
17965 if (GET_CODE (reg1) != REG || GET_CODE (reg2) != REG)
17966 return 0;
17968 /* We might have been passed non floating point registers. */
17969 if (!FP_REGNO_P (REGNO (reg1))
17970 || !FP_REGNO_P (REGNO (reg2)))
17971 return 0;
17973 return (REGNO (reg1) == REGNO (reg2) - 1);
17976 /* Return 1 if addr1 and addr2 are suitable for lfq or stfq insn.
17977 addr1 and addr2 must be in consecutive memory locations
17978 (addr2 == addr1 + 8). */
17981 mems_ok_for_quad_peep (rtx mem1, rtx mem2)
17983 rtx addr1, addr2;
17984 unsigned int reg1, reg2;
17985 int offset1, offset2;
17987 /* The mems cannot be volatile. */
17988 if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
17989 return 0;
17991 addr1 = XEXP (mem1, 0);
17992 addr2 = XEXP (mem2, 0);
17994 /* Extract an offset (if used) from the first addr. */
17995 if (GET_CODE (addr1) == PLUS)
17997 /* If not a REG, return zero. */
17998 if (GET_CODE (XEXP (addr1, 0)) != REG)
17999 return 0;
18000 else
18002 reg1 = REGNO (XEXP (addr1, 0));
18003 /* The offset must be constant! */
18004 if (GET_CODE (XEXP (addr1, 1)) != CONST_INT)
18005 return 0;
18006 offset1 = INTVAL (XEXP (addr1, 1));
18009 else if (GET_CODE (addr1) != REG)
18010 return 0;
18011 else
18013 reg1 = REGNO (addr1);
18014 /* This was a simple (mem (reg)) expression. Offset is 0. */
18015 offset1 = 0;
18018 /* And now for the second addr. */
18019 if (GET_CODE (addr2) == PLUS)
18021 /* If not a REG, return zero. */
18022 if (GET_CODE (XEXP (addr2, 0)) != REG)
18023 return 0;
18024 else
18026 reg2 = REGNO (XEXP (addr2, 0));
18027 /* The offset must be constant. */
18028 if (GET_CODE (XEXP (addr2, 1)) != CONST_INT)
18029 return 0;
18030 offset2 = INTVAL (XEXP (addr2, 1));
18033 else if (GET_CODE (addr2) != REG)
18034 return 0;
18035 else
18037 reg2 = REGNO (addr2);
18038 /* This was a simple (mem (reg)) expression. Offset is 0. */
18039 offset2 = 0;
18042 /* Both of these must have the same base register. */
18043 if (reg1 != reg2)
18044 return 0;
18046 /* The offset for the second addr must be 8 more than the first addr. */
18047 if (offset2 != offset1 + 8)
18048 return 0;
18050 /* All the tests passed. addr1 and addr2 are valid for lfq or stfq
18051 instructions. */
18052 return 1;
18057 rs6000_secondary_memory_needed_rtx (machine_mode mode)
18059 static bool eliminated = false;
18060 rtx ret;
18062 if (mode != SDmode || TARGET_NO_SDMODE_STACK)
18063 ret = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
18064 else
18066 rtx mem = cfun->machine->sdmode_stack_slot;
18067 gcc_assert (mem != NULL_RTX);
18069 if (!eliminated)
18071 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
18072 cfun->machine->sdmode_stack_slot = mem;
18073 eliminated = true;
18075 ret = mem;
18078 if (TARGET_DEBUG_ADDR)
18080 fprintf (stderr, "\nrs6000_secondary_memory_needed_rtx, mode %s, rtx:\n",
18081 GET_MODE_NAME (mode));
18082 if (!ret)
18083 fprintf (stderr, "\tNULL_RTX\n");
18084 else
18085 debug_rtx (ret);
18088 return ret;
18091 /* Return the mode to be used for memory when a secondary memory
18092 location is needed. For SDmode values we need to use DDmode, in
18093 all other cases we can use the same mode. */
18094 machine_mode
18095 rs6000_secondary_memory_needed_mode (machine_mode mode)
18097 if (lra_in_progress && mode == SDmode)
18098 return DDmode;
18099 return mode;
18102 static tree
18103 rs6000_check_sdmode (tree *tp, int *walk_subtrees, void *data ATTRIBUTE_UNUSED)
18105 /* Don't walk into types. */
18106 if (*tp == NULL_TREE || *tp == error_mark_node || TYPE_P (*tp))
18108 *walk_subtrees = 0;
18109 return NULL_TREE;
18112 switch (TREE_CODE (*tp))
18114 case VAR_DECL:
18115 case PARM_DECL:
18116 case FIELD_DECL:
18117 case RESULT_DECL:
18118 case SSA_NAME:
18119 case REAL_CST:
18120 case MEM_REF:
18121 case VIEW_CONVERT_EXPR:
18122 if (TYPE_MODE (TREE_TYPE (*tp)) == SDmode)
18123 return *tp;
18124 break;
18125 default:
18126 break;
18129 return NULL_TREE;
18132 /* Classify a register type. Because the FMRGOW/FMRGEW instructions only work
18133 on traditional floating point registers, and the VMRGOW/VMRGEW instructions
18134 only work on the traditional altivec registers, note if an altivec register
18135 was chosen. */
18137 static enum rs6000_reg_type
18138 register_to_reg_type (rtx reg, bool *is_altivec)
18140 HOST_WIDE_INT regno;
18141 enum reg_class rclass;
18143 if (GET_CODE (reg) == SUBREG)
18144 reg = SUBREG_REG (reg);
18146 if (!REG_P (reg))
18147 return NO_REG_TYPE;
18149 regno = REGNO (reg);
18150 if (regno >= FIRST_PSEUDO_REGISTER)
18152 if (!lra_in_progress && !reload_in_progress && !reload_completed)
18153 return PSEUDO_REG_TYPE;
18155 regno = true_regnum (reg);
18156 if (regno < 0 || regno >= FIRST_PSEUDO_REGISTER)
18157 return PSEUDO_REG_TYPE;
18160 gcc_assert (regno >= 0);
18162 if (is_altivec && ALTIVEC_REGNO_P (regno))
18163 *is_altivec = true;
18165 rclass = rs6000_regno_regclass[regno];
18166 return reg_class_to_reg_type[(int)rclass];
18169 /* Helper function to return the cost of adding a TOC entry address. */
18171 static inline int
18172 rs6000_secondary_reload_toc_costs (addr_mask_type addr_mask)
18174 int ret;
18176 if (TARGET_CMODEL != CMODEL_SMALL)
18177 ret = ((addr_mask & RELOAD_REG_OFFSET) == 0) ? 1 : 2;
18179 else
18180 ret = (TARGET_MINIMAL_TOC) ? 6 : 3;
18182 return ret;
18185 /* Helper function for rs6000_secondary_reload to determine whether the memory
18186 address (ADDR) with a given register class (RCLASS) and machine mode (MODE)
18187 needs reloading. Return negative if the memory is not handled by the memory
18188 helper functions and to try a different reload method, 0 if no additional
18189 instructions are need, and positive to give the extra cost for the
18190 memory. */
18192 static int
18193 rs6000_secondary_reload_memory (rtx addr,
18194 enum reg_class rclass,
18195 machine_mode mode)
18197 int extra_cost = 0;
18198 rtx reg, and_arg, plus_arg0, plus_arg1;
18199 addr_mask_type addr_mask;
18200 const char *type = NULL;
18201 const char *fail_msg = NULL;
18203 if (GPR_REG_CLASS_P (rclass))
18204 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
18206 else if (rclass == FLOAT_REGS)
18207 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
18209 else if (rclass == ALTIVEC_REGS)
18210 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
18212 /* For the combined VSX_REGS, turn off Altivec AND -16. */
18213 else if (rclass == VSX_REGS)
18214 addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_VMX]
18215 & ~RELOAD_REG_AND_M16);
18217 /* If the register allocator hasn't made up its mind yet on the register
18218 class to use, settle on defaults to use. */
18219 else if (rclass == NO_REGS)
18221 addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_ANY]
18222 & ~RELOAD_REG_AND_M16);
18224 if ((addr_mask & RELOAD_REG_MULTIPLE) != 0)
18225 addr_mask &= ~(RELOAD_REG_INDEXED
18226 | RELOAD_REG_PRE_INCDEC
18227 | RELOAD_REG_PRE_MODIFY);
18230 else
18231 addr_mask = 0;
18233 /* If the register isn't valid in this register class, just return now. */
18234 if ((addr_mask & RELOAD_REG_VALID) == 0)
18236 if (TARGET_DEBUG_ADDR)
18238 fprintf (stderr,
18239 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
18240 "not valid in class\n",
18241 GET_MODE_NAME (mode), reg_class_names[rclass]);
18242 debug_rtx (addr);
18245 return -1;
18248 switch (GET_CODE (addr))
18250 /* Does the register class supports auto update forms for this mode? We
18251 don't need a scratch register, since the powerpc only supports
18252 PRE_INC, PRE_DEC, and PRE_MODIFY. */
18253 case PRE_INC:
18254 case PRE_DEC:
18255 reg = XEXP (addr, 0);
18256 if (!base_reg_operand (addr, GET_MODE (reg)))
18258 fail_msg = "no base register #1";
18259 extra_cost = -1;
18262 else if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
18264 extra_cost = 1;
18265 type = "update";
18267 break;
18269 case PRE_MODIFY:
18270 reg = XEXP (addr, 0);
18271 plus_arg1 = XEXP (addr, 1);
18272 if (!base_reg_operand (reg, GET_MODE (reg))
18273 || GET_CODE (plus_arg1) != PLUS
18274 || !rtx_equal_p (reg, XEXP (plus_arg1, 0)))
18276 fail_msg = "bad PRE_MODIFY";
18277 extra_cost = -1;
18280 else if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
18282 extra_cost = 1;
18283 type = "update";
18285 break;
18287 /* Do we need to simulate AND -16 to clear the bottom address bits used
18288 in VMX load/stores? Only allow the AND for vector sizes. */
18289 case AND:
18290 and_arg = XEXP (addr, 0);
18291 if (GET_MODE_SIZE (mode) != 16
18292 || GET_CODE (XEXP (addr, 1)) != CONST_INT
18293 || INTVAL (XEXP (addr, 1)) != -16)
18295 fail_msg = "bad Altivec AND #1";
18296 extra_cost = -1;
18299 if (rclass != ALTIVEC_REGS)
18301 if (legitimate_indirect_address_p (and_arg, false))
18302 extra_cost = 1;
18304 else if (legitimate_indexed_address_p (and_arg, false))
18305 extra_cost = 2;
18307 else
18309 fail_msg = "bad Altivec AND #2";
18310 extra_cost = -1;
18313 type = "and";
18315 break;
18317 /* If this is an indirect address, make sure it is a base register. */
18318 case REG:
18319 case SUBREG:
18320 if (!legitimate_indirect_address_p (addr, false))
18322 extra_cost = 1;
18323 type = "move";
18325 break;
18327 /* If this is an indexed address, make sure the register class can handle
18328 indexed addresses for this mode. */
18329 case PLUS:
18330 plus_arg0 = XEXP (addr, 0);
18331 plus_arg1 = XEXP (addr, 1);
18333 /* (plus (plus (reg) (constant)) (constant)) is generated during
18334 push_reload processing, so handle it now. */
18335 if (GET_CODE (plus_arg0) == PLUS && CONST_INT_P (plus_arg1))
18337 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
18339 extra_cost = 1;
18340 type = "offset";
18344 /* (plus (plus (reg) (constant)) (reg)) is also generated during
18345 push_reload processing, so handle it now. */
18346 else if (GET_CODE (plus_arg0) == PLUS && REG_P (plus_arg1))
18348 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
18350 extra_cost = 1;
18351 type = "indexed #2";
18355 else if (!base_reg_operand (plus_arg0, GET_MODE (plus_arg0)))
18357 fail_msg = "no base register #2";
18358 extra_cost = -1;
18361 else if (int_reg_operand (plus_arg1, GET_MODE (plus_arg1)))
18363 if ((addr_mask & RELOAD_REG_INDEXED) == 0
18364 || !legitimate_indexed_address_p (addr, false))
18366 extra_cost = 1;
18367 type = "indexed";
18371 /* Make sure the register class can handle offset addresses. */
18372 else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
18374 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
18376 extra_cost = 1;
18377 type = "offset";
18381 else
18383 fail_msg = "bad PLUS";
18384 extra_cost = -1;
18387 break;
18389 case LO_SUM:
18390 if (!legitimate_lo_sum_address_p (mode, addr, false))
18392 fail_msg = "bad LO_SUM";
18393 extra_cost = -1;
18396 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
18398 extra_cost = 1;
18399 type = "lo_sum";
18401 break;
18403 /* Static addresses need to create a TOC entry. */
18404 case CONST:
18405 case SYMBOL_REF:
18406 case LABEL_REF:
18407 type = "address";
18408 extra_cost = rs6000_secondary_reload_toc_costs (addr_mask);
18409 break;
18411 /* TOC references look like offsetable memory. */
18412 case UNSPEC:
18413 if (TARGET_CMODEL == CMODEL_SMALL || XINT (addr, 1) != UNSPEC_TOCREL)
18415 fail_msg = "bad UNSPEC";
18416 extra_cost = -1;
18419 else if ((addr_mask & RELOAD_REG_OFFSET) == 0)
18421 extra_cost = 1;
18422 type = "toc reference";
18424 break;
18426 default:
18428 fail_msg = "bad address";
18429 extra_cost = -1;
18433 if (TARGET_DEBUG_ADDR /* && extra_cost != 0 */)
18435 if (extra_cost < 0)
18436 fprintf (stderr,
18437 "rs6000_secondary_reload_memory error: mode = %s, "
18438 "class = %s, addr_mask = '%s', %s\n",
18439 GET_MODE_NAME (mode),
18440 reg_class_names[rclass],
18441 rs6000_debug_addr_mask (addr_mask, false),
18442 (fail_msg != NULL) ? fail_msg : "<bad address>");
18444 else
18445 fprintf (stderr,
18446 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
18447 "addr_mask = '%s', extra cost = %d, %s\n",
18448 GET_MODE_NAME (mode),
18449 reg_class_names[rclass],
18450 rs6000_debug_addr_mask (addr_mask, false),
18451 extra_cost,
18452 (type) ? type : "<none>");
18454 debug_rtx (addr);
18457 return extra_cost;
18460 /* Helper function for rs6000_secondary_reload to return true if a move to a
18461 different register classe is really a simple move. */
18463 static bool
18464 rs6000_secondary_reload_simple_move (enum rs6000_reg_type to_type,
18465 enum rs6000_reg_type from_type,
18466 machine_mode mode)
18468 int size;
18470 /* Add support for various direct moves available. In this function, we only
18471 look at cases where we don't need any extra registers, and one or more
18472 simple move insns are issued. At present, 32-bit integers are not allowed
18473 in FPR/VSX registers. Single precision binary floating is not a simple
18474 move because we need to convert to the single precision memory layout.
18475 The 4-byte SDmode can be moved. */
18476 size = GET_MODE_SIZE (mode);
18477 if (TARGET_DIRECT_MOVE
18478 && ((mode == SDmode) || (TARGET_POWERPC64 && size == 8))
18479 && ((to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
18480 || (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)))
18481 return true;
18483 else if (TARGET_DIRECT_MOVE_128 && size == 16
18484 && ((to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
18485 || (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)))
18486 return true;
18488 else if (TARGET_MFPGPR && TARGET_POWERPC64 && size == 8
18489 && ((to_type == GPR_REG_TYPE && from_type == FPR_REG_TYPE)
18490 || (to_type == FPR_REG_TYPE && from_type == GPR_REG_TYPE)))
18491 return true;
18493 else if ((size == 4 || (TARGET_POWERPC64 && size == 8))
18494 && ((to_type == GPR_REG_TYPE && from_type == SPR_REG_TYPE)
18495 || (to_type == SPR_REG_TYPE && from_type == GPR_REG_TYPE)))
18496 return true;
18498 return false;
18501 /* Direct move helper function for rs6000_secondary_reload, handle all of the
18502 special direct moves that involve allocating an extra register, return the
18503 insn code of the helper function if there is such a function or
18504 CODE_FOR_nothing if not. */
18506 static bool
18507 rs6000_secondary_reload_direct_move (enum rs6000_reg_type to_type,
18508 enum rs6000_reg_type from_type,
18509 machine_mode mode,
18510 secondary_reload_info *sri,
18511 bool altivec_p)
18513 bool ret = false;
18514 enum insn_code icode = CODE_FOR_nothing;
18515 int cost = 0;
18516 int size = GET_MODE_SIZE (mode);
18518 if (TARGET_POWERPC64)
18520 if (size == 16)
18522 /* Handle moving 128-bit values from GPRs to VSX point registers on
18523 ISA 2.07 (power8, power9) when running in 64-bit mode using
18524 XXPERMDI to glue the two 64-bit values back together. */
18525 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
18527 cost = 3; /* 2 mtvsrd's, 1 xxpermdi. */
18528 icode = reg_addr[mode].reload_vsx_gpr;
18531 /* Handle moving 128-bit values from VSX point registers to GPRs on
18532 ISA 2.07 when running in 64-bit mode using XXPERMDI to get access to the
18533 bottom 64-bit value. */
18534 else if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
18536 cost = 3; /* 2 mfvsrd's, 1 xxpermdi. */
18537 icode = reg_addr[mode].reload_gpr_vsx;
18541 else if (mode == SFmode)
18543 if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
18545 cost = 3; /* xscvdpspn, mfvsrd, and. */
18546 icode = reg_addr[mode].reload_gpr_vsx;
18549 else if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
18551 cost = 2; /* mtvsrz, xscvspdpn. */
18552 icode = reg_addr[mode].reload_vsx_gpr;
18557 if (TARGET_POWERPC64 && size == 16)
18559 /* Handle moving 128-bit values from GPRs to VSX point registers on
18560 ISA 2.07 when running in 64-bit mode using XXPERMDI to glue the two
18561 64-bit values back together. */
18562 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
18564 cost = 3; /* 2 mtvsrd's, 1 xxpermdi. */
18565 icode = reg_addr[mode].reload_vsx_gpr;
18568 /* Handle moving 128-bit values from VSX point registers to GPRs on
18569 ISA 2.07 when running in 64-bit mode using XXPERMDI to get access to the
18570 bottom 64-bit value. */
18571 else if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
18573 cost = 3; /* 2 mfvsrd's, 1 xxpermdi. */
18574 icode = reg_addr[mode].reload_gpr_vsx;
18578 else if (!TARGET_POWERPC64 && size == 8)
18580 /* Handle moving 64-bit values from GPRs to floating point registers on
18581 ISA 2.07 when running in 32-bit mode using FMRGOW to glue the two
18582 32-bit values back together. Altivec register classes must be handled
18583 specially since a different instruction is used, and the secondary
18584 reload support requires a single instruction class in the scratch
18585 register constraint. However, right now TFmode is not allowed in
18586 Altivec registers, so the pattern will never match. */
18587 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE && !altivec_p)
18589 cost = 3; /* 2 mtvsrwz's, 1 fmrgow. */
18590 icode = reg_addr[mode].reload_fpr_gpr;
18594 if (icode != CODE_FOR_nothing)
18596 ret = true;
18597 if (sri)
18599 sri->icode = icode;
18600 sri->extra_cost = cost;
18604 return ret;
18607 /* Return whether a move between two register classes can be done either
18608 directly (simple move) or via a pattern that uses a single extra temporary
18609 (using ISA 2.07's direct move in this case. */
18611 static bool
18612 rs6000_secondary_reload_move (enum rs6000_reg_type to_type,
18613 enum rs6000_reg_type from_type,
18614 machine_mode mode,
18615 secondary_reload_info *sri,
18616 bool altivec_p)
18618 /* Fall back to load/store reloads if either type is not a register. */
18619 if (to_type == NO_REG_TYPE || from_type == NO_REG_TYPE)
18620 return false;
18622 /* If we haven't allocated registers yet, assume the move can be done for the
18623 standard register types. */
18624 if ((to_type == PSEUDO_REG_TYPE && from_type == PSEUDO_REG_TYPE)
18625 || (to_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (from_type))
18626 || (from_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (to_type)))
18627 return true;
18629 /* Moves to the same set of registers is a simple move for non-specialized
18630 registers. */
18631 if (to_type == from_type && IS_STD_REG_TYPE (to_type))
18632 return true;
18634 /* Check whether a simple move can be done directly. */
18635 if (rs6000_secondary_reload_simple_move (to_type, from_type, mode))
18637 if (sri)
18639 sri->icode = CODE_FOR_nothing;
18640 sri->extra_cost = 0;
18642 return true;
18645 /* Now check if we can do it in a few steps. */
18646 return rs6000_secondary_reload_direct_move (to_type, from_type, mode, sri,
18647 altivec_p);
18650 /* Inform reload about cases where moving X with a mode MODE to a register in
18651 RCLASS requires an extra scratch or immediate register. Return the class
18652 needed for the immediate register.
18654 For VSX and Altivec, we may need a register to convert sp+offset into
18655 reg+sp.
18657 For misaligned 64-bit gpr loads and stores we need a register to
18658 convert an offset address to indirect. */
18660 static reg_class_t
18661 rs6000_secondary_reload (bool in_p,
18662 rtx x,
18663 reg_class_t rclass_i,
18664 machine_mode mode,
18665 secondary_reload_info *sri)
18667 enum reg_class rclass = (enum reg_class) rclass_i;
18668 reg_class_t ret = ALL_REGS;
18669 enum insn_code icode;
18670 bool default_p = false;
18671 bool done_p = false;
18673 /* Allow subreg of memory before/during reload. */
18674 bool memory_p = (MEM_P (x)
18675 || (!reload_completed && GET_CODE (x) == SUBREG
18676 && MEM_P (SUBREG_REG (x))));
18678 sri->icode = CODE_FOR_nothing;
18679 sri->extra_cost = 0;
18680 icode = ((in_p)
18681 ? reg_addr[mode].reload_load
18682 : reg_addr[mode].reload_store);
18684 if (REG_P (x) || register_operand (x, mode))
18686 enum rs6000_reg_type to_type = reg_class_to_reg_type[(int)rclass];
18687 bool altivec_p = (rclass == ALTIVEC_REGS);
18688 enum rs6000_reg_type from_type = register_to_reg_type (x, &altivec_p);
18690 if (!in_p)
18692 enum rs6000_reg_type exchange = to_type;
18693 to_type = from_type;
18694 from_type = exchange;
18697 /* Can we do a direct move of some sort? */
18698 if (rs6000_secondary_reload_move (to_type, from_type, mode, sri,
18699 altivec_p))
18701 icode = (enum insn_code)sri->icode;
18702 default_p = false;
18703 done_p = true;
18704 ret = NO_REGS;
18708 /* Make sure 0.0 is not reloaded or forced into memory. */
18709 if (x == CONST0_RTX (mode) && VSX_REG_CLASS_P (rclass))
18711 ret = NO_REGS;
18712 default_p = false;
18713 done_p = true;
18716 /* If this is a scalar floating point value and we want to load it into the
18717 traditional Altivec registers, do it via a move via a traditional floating
18718 point register, unless we have D-form addressing. Also make sure that
18719 non-zero constants use a FPR. */
18720 if (!done_p && reg_addr[mode].scalar_in_vmx_p
18721 && !mode_supports_vmx_dform (mode)
18722 && (rclass == VSX_REGS || rclass == ALTIVEC_REGS)
18723 && (memory_p || (GET_CODE (x) == CONST_DOUBLE)))
18725 ret = FLOAT_REGS;
18726 default_p = false;
18727 done_p = true;
18730 /* Handle reload of load/stores if we have reload helper functions. */
18731 if (!done_p && icode != CODE_FOR_nothing && memory_p)
18733 int extra_cost = rs6000_secondary_reload_memory (XEXP (x, 0), rclass,
18734 mode);
18736 if (extra_cost >= 0)
18738 done_p = true;
18739 ret = NO_REGS;
18740 if (extra_cost > 0)
18742 sri->extra_cost = extra_cost;
18743 sri->icode = icode;
18748 /* Handle unaligned loads and stores of integer registers. */
18749 if (!done_p && TARGET_POWERPC64
18750 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
18751 && memory_p
18752 && GET_MODE_SIZE (GET_MODE (x)) >= UNITS_PER_WORD)
18754 rtx addr = XEXP (x, 0);
18755 rtx off = address_offset (addr);
18757 if (off != NULL_RTX)
18759 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
18760 unsigned HOST_WIDE_INT offset = INTVAL (off);
18762 /* We need a secondary reload when our legitimate_address_p
18763 says the address is good (as otherwise the entire address
18764 will be reloaded), and the offset is not a multiple of
18765 four or we have an address wrap. Address wrap will only
18766 occur for LO_SUMs since legitimate_offset_address_p
18767 rejects addresses for 16-byte mems that will wrap. */
18768 if (GET_CODE (addr) == LO_SUM
18769 ? (1 /* legitimate_address_p allows any offset for lo_sum */
18770 && ((offset & 3) != 0
18771 || ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra))
18772 : (offset + 0x8000 < 0x10000 - extra /* legitimate_address_p */
18773 && (offset & 3) != 0))
18775 /* -m32 -mpowerpc64 needs to use a 32-bit scratch register. */
18776 if (in_p)
18777 sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_load
18778 : CODE_FOR_reload_di_load);
18779 else
18780 sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_store
18781 : CODE_FOR_reload_di_store);
18782 sri->extra_cost = 2;
18783 ret = NO_REGS;
18784 done_p = true;
18786 else
18787 default_p = true;
18789 else
18790 default_p = true;
18793 if (!done_p && !TARGET_POWERPC64
18794 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
18795 && memory_p
18796 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
18798 rtx addr = XEXP (x, 0);
18799 rtx off = address_offset (addr);
18801 if (off != NULL_RTX)
18803 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
18804 unsigned HOST_WIDE_INT offset = INTVAL (off);
18806 /* We need a secondary reload when our legitimate_address_p
18807 says the address is good (as otherwise the entire address
18808 will be reloaded), and we have a wrap.
18810 legitimate_lo_sum_address_p allows LO_SUM addresses to
18811 have any offset so test for wrap in the low 16 bits.
18813 legitimate_offset_address_p checks for the range
18814 [-0x8000,0x7fff] for mode size of 8 and [-0x8000,0x7ff7]
18815 for mode size of 16. We wrap at [0x7ffc,0x7fff] and
18816 [0x7ff4,0x7fff] respectively, so test for the
18817 intersection of these ranges, [0x7ffc,0x7fff] and
18818 [0x7ff4,0x7ff7] respectively.
18820 Note that the address we see here may have been
18821 manipulated by legitimize_reload_address. */
18822 if (GET_CODE (addr) == LO_SUM
18823 ? ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra
18824 : offset - (0x8000 - extra) < UNITS_PER_WORD)
18826 if (in_p)
18827 sri->icode = CODE_FOR_reload_si_load;
18828 else
18829 sri->icode = CODE_FOR_reload_si_store;
18830 sri->extra_cost = 2;
18831 ret = NO_REGS;
18832 done_p = true;
18834 else
18835 default_p = true;
18837 else
18838 default_p = true;
18841 if (!done_p)
18842 default_p = true;
18844 if (default_p)
18845 ret = default_secondary_reload (in_p, x, rclass, mode, sri);
18847 gcc_assert (ret != ALL_REGS);
18849 if (TARGET_DEBUG_ADDR)
18851 fprintf (stderr,
18852 "\nrs6000_secondary_reload, return %s, in_p = %s, rclass = %s, "
18853 "mode = %s",
18854 reg_class_names[ret],
18855 in_p ? "true" : "false",
18856 reg_class_names[rclass],
18857 GET_MODE_NAME (mode));
18859 if (reload_completed)
18860 fputs (", after reload", stderr);
18862 if (!done_p)
18863 fputs (", done_p not set", stderr);
18865 if (default_p)
18866 fputs (", default secondary reload", stderr);
18868 if (sri->icode != CODE_FOR_nothing)
18869 fprintf (stderr, ", reload func = %s, extra cost = %d",
18870 insn_data[sri->icode].name, sri->extra_cost);
18872 else if (sri->extra_cost > 0)
18873 fprintf (stderr, ", extra cost = %d", sri->extra_cost);
18875 fputs ("\n", stderr);
18876 debug_rtx (x);
18879 return ret;
18882 /* Better tracing for rs6000_secondary_reload_inner. */
18884 static void
18885 rs6000_secondary_reload_trace (int line, rtx reg, rtx mem, rtx scratch,
18886 bool store_p)
18888 rtx set, clobber;
18890 gcc_assert (reg != NULL_RTX && mem != NULL_RTX && scratch != NULL_RTX);
18892 fprintf (stderr, "rs6000_secondary_reload_inner:%d, type = %s\n", line,
18893 store_p ? "store" : "load");
18895 if (store_p)
18896 set = gen_rtx_SET (mem, reg);
18897 else
18898 set = gen_rtx_SET (reg, mem);
18900 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
18901 debug_rtx (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
18904 static void rs6000_secondary_reload_fail (int, rtx, rtx, rtx, bool)
18905 ATTRIBUTE_NORETURN;
18907 static void
18908 rs6000_secondary_reload_fail (int line, rtx reg, rtx mem, rtx scratch,
18909 bool store_p)
18911 rs6000_secondary_reload_trace (line, reg, mem, scratch, store_p);
18912 gcc_unreachable ();
18915 /* Fixup reload addresses for values in GPR, FPR, and VMX registers that have
18916 reload helper functions. These were identified in
18917 rs6000_secondary_reload_memory, and if reload decided to use the secondary
18918 reload, it calls the insns:
18919 reload_<RELOAD:mode>_<P:mptrsize>_store
18920 reload_<RELOAD:mode>_<P:mptrsize>_load
18922 which in turn calls this function, to do whatever is necessary to create
18923 valid addresses. */
18925 void
18926 rs6000_secondary_reload_inner (rtx reg, rtx mem, rtx scratch, bool store_p)
18928 int regno = true_regnum (reg);
18929 machine_mode mode = GET_MODE (reg);
18930 addr_mask_type addr_mask;
18931 rtx addr;
18932 rtx new_addr;
18933 rtx op_reg, op0, op1;
18934 rtx and_op;
18935 rtx cc_clobber;
18936 rtvec rv;
18938 if (regno < 0 || regno >= FIRST_PSEUDO_REGISTER || !MEM_P (mem)
18939 || !base_reg_operand (scratch, GET_MODE (scratch)))
18940 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
18942 if (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO))
18943 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
18945 else if (IN_RANGE (regno, FIRST_FPR_REGNO, LAST_FPR_REGNO))
18946 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
18948 else if (IN_RANGE (regno, FIRST_ALTIVEC_REGNO, LAST_ALTIVEC_REGNO))
18949 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
18951 else
18952 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
18954 /* Make sure the mode is valid in this register class. */
18955 if ((addr_mask & RELOAD_REG_VALID) == 0)
18956 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
18958 if (TARGET_DEBUG_ADDR)
18959 rs6000_secondary_reload_trace (__LINE__, reg, mem, scratch, store_p);
18961 new_addr = addr = XEXP (mem, 0);
18962 switch (GET_CODE (addr))
18964 /* Does the register class support auto update forms for this mode? If
18965 not, do the update now. We don't need a scratch register, since the
18966 powerpc only supports PRE_INC, PRE_DEC, and PRE_MODIFY. */
18967 case PRE_INC:
18968 case PRE_DEC:
18969 op_reg = XEXP (addr, 0);
18970 if (!base_reg_operand (op_reg, Pmode))
18971 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
18973 if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
18975 emit_insn (gen_add2_insn (op_reg, GEN_INT (GET_MODE_SIZE (mode))));
18976 new_addr = op_reg;
18978 break;
18980 case PRE_MODIFY:
18981 op0 = XEXP (addr, 0);
18982 op1 = XEXP (addr, 1);
18983 if (!base_reg_operand (op0, Pmode)
18984 || GET_CODE (op1) != PLUS
18985 || !rtx_equal_p (op0, XEXP (op1, 0)))
18986 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
18988 if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
18990 emit_insn (gen_rtx_SET (op0, op1));
18991 new_addr = reg;
18993 break;
18995 /* Do we need to simulate AND -16 to clear the bottom address bits used
18996 in VMX load/stores? */
18997 case AND:
18998 op0 = XEXP (addr, 0);
18999 op1 = XEXP (addr, 1);
19000 if ((addr_mask & RELOAD_REG_AND_M16) == 0)
19002 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
19003 op_reg = op0;
19005 else if (GET_CODE (op1) == PLUS)
19007 emit_insn (gen_rtx_SET (scratch, op1));
19008 op_reg = scratch;
19011 else
19012 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
19014 and_op = gen_rtx_AND (GET_MODE (scratch), op_reg, op1);
19015 cc_clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (CCmode));
19016 rv = gen_rtvec (2, gen_rtx_SET (scratch, and_op), cc_clobber);
19017 emit_insn (gen_rtx_PARALLEL (VOIDmode, rv));
19018 new_addr = scratch;
19020 break;
19022 /* If this is an indirect address, make sure it is a base register. */
19023 case REG:
19024 case SUBREG:
19025 if (!base_reg_operand (addr, GET_MODE (addr)))
19027 emit_insn (gen_rtx_SET (scratch, addr));
19028 new_addr = scratch;
19030 break;
19032 /* If this is an indexed address, make sure the register class can handle
19033 indexed addresses for this mode. */
19034 case PLUS:
19035 op0 = XEXP (addr, 0);
19036 op1 = XEXP (addr, 1);
19037 if (!base_reg_operand (op0, Pmode))
19038 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
19040 else if (int_reg_operand (op1, Pmode))
19042 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
19044 emit_insn (gen_rtx_SET (scratch, addr));
19045 new_addr = scratch;
19049 /* Make sure the register class can handle offset addresses. */
19050 else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
19052 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
19054 emit_insn (gen_rtx_SET (scratch, addr));
19055 new_addr = scratch;
19059 else
19060 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
19062 break;
19064 case LO_SUM:
19065 op0 = XEXP (addr, 0);
19066 op1 = XEXP (addr, 1);
19067 if (!base_reg_operand (op0, Pmode))
19068 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
19070 else if (int_reg_operand (op1, Pmode))
19072 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
19074 emit_insn (gen_rtx_SET (scratch, addr));
19075 new_addr = scratch;
19079 /* Make sure the register class can handle offset addresses. */
19080 else if (legitimate_lo_sum_address_p (mode, addr, false))
19082 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
19084 emit_insn (gen_rtx_SET (scratch, addr));
19085 new_addr = scratch;
19089 else
19090 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
19092 break;
19094 case SYMBOL_REF:
19095 case CONST:
19096 case LABEL_REF:
19097 rs6000_emit_move (scratch, addr, Pmode);
19098 new_addr = scratch;
19099 break;
19101 default:
19102 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
19105 /* Adjust the address if it changed. */
19106 if (addr != new_addr)
19108 mem = replace_equiv_address_nv (mem, new_addr);
19109 if (TARGET_DEBUG_ADDR)
19110 fprintf (stderr, "\nrs6000_secondary_reload_inner, mem adjusted.\n");
19113 /* Now create the move. */
19114 if (store_p)
19115 emit_insn (gen_rtx_SET (mem, reg));
19116 else
19117 emit_insn (gen_rtx_SET (reg, mem));
19119 return;
19122 /* Convert reloads involving 64-bit gprs and misaligned offset
19123 addressing, or multiple 32-bit gprs and offsets that are too large,
19124 to use indirect addressing. */
19126 void
19127 rs6000_secondary_reload_gpr (rtx reg, rtx mem, rtx scratch, bool store_p)
19129 int regno = true_regnum (reg);
19130 enum reg_class rclass;
19131 rtx addr;
19132 rtx scratch_or_premodify = scratch;
19134 if (TARGET_DEBUG_ADDR)
19136 fprintf (stderr, "\nrs6000_secondary_reload_gpr, type = %s\n",
19137 store_p ? "store" : "load");
19138 fprintf (stderr, "reg:\n");
19139 debug_rtx (reg);
19140 fprintf (stderr, "mem:\n");
19141 debug_rtx (mem);
19142 fprintf (stderr, "scratch:\n");
19143 debug_rtx (scratch);
19146 gcc_assert (regno >= 0 && regno < FIRST_PSEUDO_REGISTER);
19147 gcc_assert (GET_CODE (mem) == MEM);
19148 rclass = REGNO_REG_CLASS (regno);
19149 gcc_assert (rclass == GENERAL_REGS || rclass == BASE_REGS);
19150 addr = XEXP (mem, 0);
19152 if (GET_CODE (addr) == PRE_MODIFY)
19154 gcc_assert (REG_P (XEXP (addr, 0))
19155 && GET_CODE (XEXP (addr, 1)) == PLUS
19156 && XEXP (XEXP (addr, 1), 0) == XEXP (addr, 0));
19157 scratch_or_premodify = XEXP (addr, 0);
19158 if (!HARD_REGISTER_P (scratch_or_premodify))
19159 /* If we have a pseudo here then reload will have arranged
19160 to have it replaced, but only in the original insn.
19161 Use the replacement here too. */
19162 scratch_or_premodify = find_replacement (&XEXP (addr, 0));
19164 /* RTL emitted by rs6000_secondary_reload_gpr uses RTL
19165 expressions from the original insn, without unsharing them.
19166 Any RTL that points into the original insn will of course
19167 have register replacements applied. That is why we don't
19168 need to look for replacements under the PLUS. */
19169 addr = XEXP (addr, 1);
19171 gcc_assert (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM);
19173 rs6000_emit_move (scratch_or_premodify, addr, Pmode);
19175 mem = replace_equiv_address_nv (mem, scratch_or_premodify);
19177 /* Now create the move. */
19178 if (store_p)
19179 emit_insn (gen_rtx_SET (mem, reg));
19180 else
19181 emit_insn (gen_rtx_SET (reg, mem));
19183 return;
19186 /* Allocate a 64-bit stack slot to be used for copying SDmode values through if
19187 this function has any SDmode references. If we are on a power7 or later, we
19188 don't need the 64-bit stack slot since the LFIWZX and STIFWX instructions
19189 can load/store the value. */
19191 static void
19192 rs6000_alloc_sdmode_stack_slot (void)
19194 tree t;
19195 basic_block bb;
19196 gimple_stmt_iterator gsi;
19198 gcc_assert (cfun->machine->sdmode_stack_slot == NULL_RTX);
19199 /* We use a different approach for dealing with the secondary
19200 memory in LRA. */
19201 if (ira_use_lra_p)
19202 return;
19204 if (TARGET_NO_SDMODE_STACK)
19205 return;
19207 FOR_EACH_BB_FN (bb, cfun)
19208 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
19210 tree ret = walk_gimple_op (gsi_stmt (gsi), rs6000_check_sdmode, NULL);
19211 if (ret)
19213 rtx stack = assign_stack_local (DDmode, GET_MODE_SIZE (DDmode), 0);
19214 cfun->machine->sdmode_stack_slot = adjust_address_nv (stack,
19215 SDmode, 0);
19216 return;
19220 /* Check for any SDmode parameters of the function. */
19221 for (t = DECL_ARGUMENTS (cfun->decl); t; t = DECL_CHAIN (t))
19223 if (TREE_TYPE (t) == error_mark_node)
19224 continue;
19226 if (TYPE_MODE (TREE_TYPE (t)) == SDmode
19227 || TYPE_MODE (DECL_ARG_TYPE (t)) == SDmode)
19229 rtx stack = assign_stack_local (DDmode, GET_MODE_SIZE (DDmode), 0);
19230 cfun->machine->sdmode_stack_slot = adjust_address_nv (stack,
19231 SDmode, 0);
19232 return;
19237 static void
19238 rs6000_instantiate_decls (void)
19240 if (cfun->machine->sdmode_stack_slot != NULL_RTX)
19241 instantiate_decl_rtl (cfun->machine->sdmode_stack_slot);
19244 /* Given an rtx X being reloaded into a reg required to be
19245 in class CLASS, return the class of reg to actually use.
19246 In general this is just CLASS; but on some machines
19247 in some cases it is preferable to use a more restrictive class.
19249 On the RS/6000, we have to return NO_REGS when we want to reload a
19250 floating-point CONST_DOUBLE to force it to be copied to memory.
19252 We also don't want to reload integer values into floating-point
19253 registers if we can at all help it. In fact, this can
19254 cause reload to die, if it tries to generate a reload of CTR
19255 into a FP register and discovers it doesn't have the memory location
19256 required.
19258 ??? Would it be a good idea to have reload do the converse, that is
19259 try to reload floating modes into FP registers if possible?
19262 static enum reg_class
19263 rs6000_preferred_reload_class (rtx x, enum reg_class rclass)
19265 machine_mode mode = GET_MODE (x);
19266 bool is_constant = CONSTANT_P (x);
19268 /* If a mode can't go in FPR/ALTIVEC/VSX registers, don't return a preferred
19269 reload class for it. */
19270 if ((rclass == ALTIVEC_REGS || rclass == VSX_REGS)
19271 && (reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_VALID) == 0)
19272 return NO_REGS;
19274 if ((rclass == FLOAT_REGS || rclass == VSX_REGS)
19275 && (reg_addr[mode].addr_mask[RELOAD_REG_FPR] & RELOAD_REG_VALID) == 0)
19276 return NO_REGS;
19278 /* For VSX, see if we should prefer FLOAT_REGS or ALTIVEC_REGS. Do not allow
19279 the reloading of address expressions using PLUS into floating point
19280 registers. */
19281 if (TARGET_VSX && VSX_REG_CLASS_P (rclass) && GET_CODE (x) != PLUS)
19283 if (is_constant)
19285 /* Zero is always allowed in all VSX registers. */
19286 if (x == CONST0_RTX (mode))
19287 return rclass;
19289 /* If this is a vector constant that can be formed with a few Altivec
19290 instructions, we want altivec registers. */
19291 if (GET_CODE (x) == CONST_VECTOR && easy_vector_constant (x, mode))
19292 return ALTIVEC_REGS;
19294 /* Force constant to memory. */
19295 return NO_REGS;
19298 /* D-form addressing can easily reload the value. */
19299 if (mode_supports_vmx_dform (mode))
19300 return rclass;
19302 /* If this is a scalar floating point value and we don't have D-form
19303 addressing, prefer the traditional floating point registers so that we
19304 can use D-form (register+offset) addressing. */
19305 if (GET_MODE_SIZE (mode) < 16 && rclass == VSX_REGS)
19306 return FLOAT_REGS;
19308 /* Prefer the Altivec registers if Altivec is handling the vector
19309 operations (i.e. V16QI, V8HI, and V4SI), or if we prefer Altivec
19310 loads. */
19311 if (VECTOR_UNIT_ALTIVEC_P (mode) || VECTOR_MEM_ALTIVEC_P (mode)
19312 || mode == V1TImode)
19313 return ALTIVEC_REGS;
19315 return rclass;
19318 if (is_constant || GET_CODE (x) == PLUS)
19320 if (reg_class_subset_p (GENERAL_REGS, rclass))
19321 return GENERAL_REGS;
19322 if (reg_class_subset_p (BASE_REGS, rclass))
19323 return BASE_REGS;
19324 return NO_REGS;
19327 /* If we haven't picked a register class, and the type is a vector or
19328 floating point type, prefer to use the VSX, FPR, or Altivec register
19329 classes. */
19330 if (rclass == NO_REGS)
19332 if (TARGET_VSX && VECTOR_MEM_VSX_OR_P8_VECTOR_P (mode))
19333 return VSX_REGS;
19335 if (TARGET_ALTIVEC && VECTOR_MEM_ALTIVEC_P (mode))
19336 return ALTIVEC_REGS;
19338 if (DECIMAL_FLOAT_MODE_P (mode))
19339 return TARGET_DFP ? FLOAT_REGS : NO_REGS;
19341 if (TARGET_FPRS && TARGET_HARD_FLOAT && FLOAT_MODE_P (mode)
19342 && (reg_addr[mode].addr_mask[RELOAD_REG_FPR] & RELOAD_REG_VALID) == 0)
19343 return FLOAT_REGS;
19346 if (GET_MODE_CLASS (mode) == MODE_INT && rclass == NON_SPECIAL_REGS)
19347 return GENERAL_REGS;
19349 return rclass;
19352 /* Debug version of rs6000_preferred_reload_class. */
19353 static enum reg_class
19354 rs6000_debug_preferred_reload_class (rtx x, enum reg_class rclass)
19356 enum reg_class ret = rs6000_preferred_reload_class (x, rclass);
19358 fprintf (stderr,
19359 "\nrs6000_preferred_reload_class, return %s, rclass = %s, "
19360 "mode = %s, x:\n",
19361 reg_class_names[ret], reg_class_names[rclass],
19362 GET_MODE_NAME (GET_MODE (x)));
19363 debug_rtx (x);
19365 return ret;
19368 /* If we are copying between FP or AltiVec registers and anything else, we need
19369 a memory location. The exception is when we are targeting ppc64 and the
19370 move to/from fpr to gpr instructions are available. Also, under VSX, you
19371 can copy vector registers from the FP register set to the Altivec register
19372 set and vice versa. */
19374 static bool
19375 rs6000_secondary_memory_needed (enum reg_class from_class,
19376 enum reg_class to_class,
19377 machine_mode mode)
19379 enum rs6000_reg_type from_type, to_type;
19380 bool altivec_p = ((from_class == ALTIVEC_REGS)
19381 || (to_class == ALTIVEC_REGS));
19383 /* If a simple/direct move is available, we don't need secondary memory */
19384 from_type = reg_class_to_reg_type[(int)from_class];
19385 to_type = reg_class_to_reg_type[(int)to_class];
19387 if (rs6000_secondary_reload_move (to_type, from_type, mode,
19388 (secondary_reload_info *)0, altivec_p))
19389 return false;
19391 /* If we have a floating point or vector register class, we need to use
19392 memory to transfer the data. */
19393 if (IS_FP_VECT_REG_TYPE (from_type) || IS_FP_VECT_REG_TYPE (to_type))
19394 return true;
19396 return false;
19399 /* Debug version of rs6000_secondary_memory_needed. */
19400 static bool
19401 rs6000_debug_secondary_memory_needed (enum reg_class from_class,
19402 enum reg_class to_class,
19403 machine_mode mode)
19405 bool ret = rs6000_secondary_memory_needed (from_class, to_class, mode);
19407 fprintf (stderr,
19408 "rs6000_secondary_memory_needed, return: %s, from_class = %s, "
19409 "to_class = %s, mode = %s\n",
19410 ret ? "true" : "false",
19411 reg_class_names[from_class],
19412 reg_class_names[to_class],
19413 GET_MODE_NAME (mode));
19415 return ret;
19418 /* Return the register class of a scratch register needed to copy IN into
19419 or out of a register in RCLASS in MODE. If it can be done directly,
19420 NO_REGS is returned. */
19422 static enum reg_class
19423 rs6000_secondary_reload_class (enum reg_class rclass, machine_mode mode,
19424 rtx in)
19426 int regno;
19428 if (TARGET_ELF || (DEFAULT_ABI == ABI_DARWIN
19429 #if TARGET_MACHO
19430 && MACHOPIC_INDIRECT
19431 #endif
19434 /* We cannot copy a symbolic operand directly into anything
19435 other than BASE_REGS for TARGET_ELF. So indicate that a
19436 register from BASE_REGS is needed as an intermediate
19437 register.
19439 On Darwin, pic addresses require a load from memory, which
19440 needs a base register. */
19441 if (rclass != BASE_REGS
19442 && (GET_CODE (in) == SYMBOL_REF
19443 || GET_CODE (in) == HIGH
19444 || GET_CODE (in) == LABEL_REF
19445 || GET_CODE (in) == CONST))
19446 return BASE_REGS;
19449 if (GET_CODE (in) == REG)
19451 regno = REGNO (in);
19452 if (regno >= FIRST_PSEUDO_REGISTER)
19454 regno = true_regnum (in);
19455 if (regno >= FIRST_PSEUDO_REGISTER)
19456 regno = -1;
19459 else if (GET_CODE (in) == SUBREG)
19461 regno = true_regnum (in);
19462 if (regno >= FIRST_PSEUDO_REGISTER)
19463 regno = -1;
19465 else
19466 regno = -1;
19468 /* If we have VSX register moves, prefer moving scalar values between
19469 Altivec registers and GPR by going via an FPR (and then via memory)
19470 instead of reloading the secondary memory address for Altivec moves. */
19471 if (TARGET_VSX
19472 && GET_MODE_SIZE (mode) < 16
19473 && !mode_supports_vmx_dform (mode)
19474 && (((rclass == GENERAL_REGS || rclass == BASE_REGS)
19475 && (regno >= 0 && ALTIVEC_REGNO_P (regno)))
19476 || ((rclass == VSX_REGS || rclass == ALTIVEC_REGS)
19477 && (regno >= 0 && INT_REGNO_P (regno)))))
19478 return FLOAT_REGS;
19480 /* We can place anything into GENERAL_REGS and can put GENERAL_REGS
19481 into anything. */
19482 if (rclass == GENERAL_REGS || rclass == BASE_REGS
19483 || (regno >= 0 && INT_REGNO_P (regno)))
19484 return NO_REGS;
19486 /* Constants, memory, and VSX registers can go into VSX registers (both the
19487 traditional floating point and the altivec registers). */
19488 if (rclass == VSX_REGS
19489 && (regno == -1 || VSX_REGNO_P (regno)))
19490 return NO_REGS;
19492 /* Constants, memory, and FP registers can go into FP registers. */
19493 if ((regno == -1 || FP_REGNO_P (regno))
19494 && (rclass == FLOAT_REGS || rclass == NON_SPECIAL_REGS))
19495 return (mode != SDmode || lra_in_progress) ? NO_REGS : GENERAL_REGS;
19497 /* Memory, and AltiVec registers can go into AltiVec registers. */
19498 if ((regno == -1 || ALTIVEC_REGNO_P (regno))
19499 && rclass == ALTIVEC_REGS)
19500 return NO_REGS;
19502 /* We can copy among the CR registers. */
19503 if ((rclass == CR_REGS || rclass == CR0_REGS)
19504 && regno >= 0 && CR_REGNO_P (regno))
19505 return NO_REGS;
19507 /* Otherwise, we need GENERAL_REGS. */
19508 return GENERAL_REGS;
19511 /* Debug version of rs6000_secondary_reload_class. */
19512 static enum reg_class
19513 rs6000_debug_secondary_reload_class (enum reg_class rclass,
19514 machine_mode mode, rtx in)
19516 enum reg_class ret = rs6000_secondary_reload_class (rclass, mode, in);
19517 fprintf (stderr,
19518 "\nrs6000_secondary_reload_class, return %s, rclass = %s, "
19519 "mode = %s, input rtx:\n",
19520 reg_class_names[ret], reg_class_names[rclass],
19521 GET_MODE_NAME (mode));
19522 debug_rtx (in);
19524 return ret;
19527 /* Return nonzero if for CLASS a mode change from FROM to TO is invalid. */
19529 static bool
19530 rs6000_cannot_change_mode_class (machine_mode from,
19531 machine_mode to,
19532 enum reg_class rclass)
19534 unsigned from_size = GET_MODE_SIZE (from);
19535 unsigned to_size = GET_MODE_SIZE (to);
19537 if (from_size != to_size)
19539 enum reg_class xclass = (TARGET_VSX) ? VSX_REGS : FLOAT_REGS;
19541 if (reg_classes_intersect_p (xclass, rclass))
19543 unsigned to_nregs = hard_regno_nregs[FIRST_FPR_REGNO][to];
19544 unsigned from_nregs = hard_regno_nregs[FIRST_FPR_REGNO][from];
19545 bool to_float128_vector_p = FLOAT128_VECTOR_P (to);
19546 bool from_float128_vector_p = FLOAT128_VECTOR_P (from);
19548 /* Don't allow 64-bit types to overlap with 128-bit types that take a
19549 single register under VSX because the scalar part of the register
19550 is in the upper 64-bits, and not the lower 64-bits. Types like
19551 TFmode/TDmode that take 2 scalar register can overlap. 128-bit
19552 IEEE floating point can't overlap, and neither can small
19553 values. */
19555 if (to_float128_vector_p && from_float128_vector_p)
19556 return false;
19558 else if (to_float128_vector_p || from_float128_vector_p)
19559 return true;
19561 /* TDmode in floating-mode registers must always go into a register
19562 pair with the most significant word in the even-numbered register
19563 to match ISA requirements. In little-endian mode, this does not
19564 match subreg numbering, so we cannot allow subregs. */
19565 if (!BYTES_BIG_ENDIAN && (to == TDmode || from == TDmode))
19566 return true;
19568 if (from_size < 8 || to_size < 8)
19569 return true;
19571 if (from_size == 8 && (8 * to_nregs) != to_size)
19572 return true;
19574 if (to_size == 8 && (8 * from_nregs) != from_size)
19575 return true;
19577 return false;
19579 else
19580 return false;
19583 if (TARGET_E500_DOUBLE
19584 && ((((to) == DFmode) + ((from) == DFmode)) == 1
19585 || (((to) == TFmode) + ((from) == TFmode)) == 1
19586 || (((to) == IFmode) + ((from) == IFmode)) == 1
19587 || (((to) == KFmode) + ((from) == KFmode)) == 1
19588 || (((to) == DDmode) + ((from) == DDmode)) == 1
19589 || (((to) == TDmode) + ((from) == TDmode)) == 1
19590 || (((to) == DImode) + ((from) == DImode)) == 1))
19591 return true;
19593 /* Since the VSX register set includes traditional floating point registers
19594 and altivec registers, just check for the size being different instead of
19595 trying to check whether the modes are vector modes. Otherwise it won't
19596 allow say DF and DI to change classes. For types like TFmode and TDmode
19597 that take 2 64-bit registers, rather than a single 128-bit register, don't
19598 allow subregs of those types to other 128 bit types. */
19599 if (TARGET_VSX && VSX_REG_CLASS_P (rclass))
19601 unsigned num_regs = (from_size + 15) / 16;
19602 if (hard_regno_nregs[FIRST_FPR_REGNO][to] > num_regs
19603 || hard_regno_nregs[FIRST_FPR_REGNO][from] > num_regs)
19604 return true;
19606 return (from_size != 8 && from_size != 16);
19609 if (TARGET_ALTIVEC && rclass == ALTIVEC_REGS
19610 && (ALTIVEC_VECTOR_MODE (from) + ALTIVEC_VECTOR_MODE (to)) == 1)
19611 return true;
19613 if (TARGET_SPE && (SPE_VECTOR_MODE (from) + SPE_VECTOR_MODE (to)) == 1
19614 && reg_classes_intersect_p (GENERAL_REGS, rclass))
19615 return true;
19617 return false;
19620 /* Debug version of rs6000_cannot_change_mode_class. */
19621 static bool
19622 rs6000_debug_cannot_change_mode_class (machine_mode from,
19623 machine_mode to,
19624 enum reg_class rclass)
19626 bool ret = rs6000_cannot_change_mode_class (from, to, rclass);
19628 fprintf (stderr,
19629 "rs6000_cannot_change_mode_class, return %s, from = %s, "
19630 "to = %s, rclass = %s\n",
19631 ret ? "true" : "false",
19632 GET_MODE_NAME (from), GET_MODE_NAME (to),
19633 reg_class_names[rclass]);
19635 return ret;
19638 /* Return a string to do a move operation of 128 bits of data. */
19640 const char *
19641 rs6000_output_move_128bit (rtx operands[])
19643 rtx dest = operands[0];
19644 rtx src = operands[1];
19645 machine_mode mode = GET_MODE (dest);
19646 int dest_regno;
19647 int src_regno;
19648 bool dest_gpr_p, dest_fp_p, dest_vmx_p, dest_vsx_p;
19649 bool src_gpr_p, src_fp_p, src_vmx_p, src_vsx_p;
19651 if (REG_P (dest))
19653 dest_regno = REGNO (dest);
19654 dest_gpr_p = INT_REGNO_P (dest_regno);
19655 dest_fp_p = FP_REGNO_P (dest_regno);
19656 dest_vmx_p = ALTIVEC_REGNO_P (dest_regno);
19657 dest_vsx_p = dest_fp_p | dest_vmx_p;
19659 else
19661 dest_regno = -1;
19662 dest_gpr_p = dest_fp_p = dest_vmx_p = dest_vsx_p = false;
19665 if (REG_P (src))
19667 src_regno = REGNO (src);
19668 src_gpr_p = INT_REGNO_P (src_regno);
19669 src_fp_p = FP_REGNO_P (src_regno);
19670 src_vmx_p = ALTIVEC_REGNO_P (src_regno);
19671 src_vsx_p = src_fp_p | src_vmx_p;
19673 else
19675 src_regno = -1;
19676 src_gpr_p = src_fp_p = src_vmx_p = src_vsx_p = false;
19679 /* Register moves. */
19680 if (dest_regno >= 0 && src_regno >= 0)
19682 if (dest_gpr_p)
19684 if (src_gpr_p)
19685 return "#";
19687 if (TARGET_DIRECT_MOVE_128 && src_vsx_p)
19688 return (WORDS_BIG_ENDIAN
19689 ? "mfvsrd %0,%x1\n\tmfvsrld %L0,%x1"
19690 : "mfvsrd %L0,%x1\n\tmfvsrld %0,%x1");
19692 else if (TARGET_VSX && TARGET_DIRECT_MOVE && src_vsx_p)
19693 return "#";
19696 else if (TARGET_VSX && dest_vsx_p)
19698 if (src_vsx_p)
19699 return "xxlor %x0,%x1,%x1";
19701 else if (TARGET_DIRECT_MOVE_128 && src_gpr_p)
19702 return (WORDS_BIG_ENDIAN
19703 ? "mtvsrdd %x0,%1,%L1"
19704 : "mtvsrdd %x0,%L1,%1");
19706 else if (TARGET_DIRECT_MOVE && src_gpr_p)
19707 return "#";
19710 else if (TARGET_ALTIVEC && dest_vmx_p && src_vmx_p)
19711 return "vor %0,%1,%1";
19713 else if (dest_fp_p && src_fp_p)
19714 return "#";
19717 /* Loads. */
19718 else if (dest_regno >= 0 && MEM_P (src))
19720 if (dest_gpr_p)
19722 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
19723 return "lq %0,%1";
19724 else
19725 return "#";
19728 else if (TARGET_ALTIVEC && dest_vmx_p
19729 && altivec_indexed_or_indirect_operand (src, mode))
19730 return "lvx %0,%y1";
19732 else if (TARGET_VSX && dest_vsx_p)
19734 if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
19735 return "lxvw4x %x0,%y1";
19736 else
19737 return "lxvd2x %x0,%y1";
19740 else if (TARGET_ALTIVEC && dest_vmx_p)
19741 return "lvx %0,%y1";
19743 else if (dest_fp_p)
19744 return "#";
19747 /* Stores. */
19748 else if (src_regno >= 0 && MEM_P (dest))
19750 if (src_gpr_p)
19752 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
19753 return "stq %1,%0";
19754 else
19755 return "#";
19758 else if (TARGET_ALTIVEC && src_vmx_p
19759 && altivec_indexed_or_indirect_operand (src, mode))
19760 return "stvx %1,%y0";
19762 else if (TARGET_VSX && src_vsx_p)
19764 if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
19765 return "stxvw4x %x1,%y0";
19766 else
19767 return "stxvd2x %x1,%y0";
19770 else if (TARGET_ALTIVEC && src_vmx_p)
19771 return "stvx %1,%y0";
19773 else if (src_fp_p)
19774 return "#";
19777 /* Constants. */
19778 else if (dest_regno >= 0
19779 && (GET_CODE (src) == CONST_INT
19780 || GET_CODE (src) == CONST_WIDE_INT
19781 || GET_CODE (src) == CONST_DOUBLE
19782 || GET_CODE (src) == CONST_VECTOR))
19784 if (dest_gpr_p)
19785 return "#";
19787 else if (TARGET_VSX && dest_vsx_p && zero_constant (src, mode))
19788 return "xxlxor %x0,%x0,%x0";
19790 else if (TARGET_ALTIVEC && dest_vmx_p)
19791 return output_vec_const_move (operands);
19794 fatal_insn ("Bad 128-bit move", gen_rtx_SET (dest, src));
19797 /* Validate a 128-bit move. */
19798 bool
19799 rs6000_move_128bit_ok_p (rtx operands[])
19801 machine_mode mode = GET_MODE (operands[0]);
19802 return (gpc_reg_operand (operands[0], mode)
19803 || gpc_reg_operand (operands[1], mode));
19806 /* Return true if a 128-bit move needs to be split. */
19807 bool
19808 rs6000_split_128bit_ok_p (rtx operands[])
19810 if (!reload_completed)
19811 return false;
19813 if (!gpr_or_gpr_p (operands[0], operands[1]))
19814 return false;
19816 if (quad_load_store_p (operands[0], operands[1]))
19817 return false;
19819 return true;
19823 /* Given a comparison operation, return the bit number in CCR to test. We
19824 know this is a valid comparison.
19826 SCC_P is 1 if this is for an scc. That means that %D will have been
19827 used instead of %C, so the bits will be in different places.
19829 Return -1 if OP isn't a valid comparison for some reason. */
19832 ccr_bit (rtx op, int scc_p)
19834 enum rtx_code code = GET_CODE (op);
19835 machine_mode cc_mode;
19836 int cc_regnum;
19837 int base_bit;
19838 rtx reg;
19840 if (!COMPARISON_P (op))
19841 return -1;
19843 reg = XEXP (op, 0);
19845 gcc_assert (GET_CODE (reg) == REG && CR_REGNO_P (REGNO (reg)));
19847 cc_mode = GET_MODE (reg);
19848 cc_regnum = REGNO (reg);
19849 base_bit = 4 * (cc_regnum - CR0_REGNO);
19851 validate_condition_mode (code, cc_mode);
19853 /* When generating a sCOND operation, only positive conditions are
19854 allowed. */
19855 gcc_assert (!scc_p
19856 || code == EQ || code == GT || code == LT || code == UNORDERED
19857 || code == GTU || code == LTU);
19859 switch (code)
19861 case NE:
19862 return scc_p ? base_bit + 3 : base_bit + 2;
19863 case EQ:
19864 return base_bit + 2;
19865 case GT: case GTU: case UNLE:
19866 return base_bit + 1;
19867 case LT: case LTU: case UNGE:
19868 return base_bit;
19869 case ORDERED: case UNORDERED:
19870 return base_bit + 3;
19872 case GE: case GEU:
19873 /* If scc, we will have done a cror to put the bit in the
19874 unordered position. So test that bit. For integer, this is ! LT
19875 unless this is an scc insn. */
19876 return scc_p ? base_bit + 3 : base_bit;
19878 case LE: case LEU:
19879 return scc_p ? base_bit + 3 : base_bit + 1;
19881 default:
19882 gcc_unreachable ();
19886 /* Return the GOT register. */
19889 rs6000_got_register (rtx value ATTRIBUTE_UNUSED)
19891 /* The second flow pass currently (June 1999) can't update
19892 regs_ever_live without disturbing other parts of the compiler, so
19893 update it here to make the prolog/epilogue code happy. */
19894 if (!can_create_pseudo_p ()
19895 && !df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM))
19896 df_set_regs_ever_live (RS6000_PIC_OFFSET_TABLE_REGNUM, true);
19898 crtl->uses_pic_offset_table = 1;
19900 return pic_offset_table_rtx;
19903 static rs6000_stack_t stack_info;
19905 /* Function to init struct machine_function.
19906 This will be called, via a pointer variable,
19907 from push_function_context. */
19909 static struct machine_function *
19910 rs6000_init_machine_status (void)
19912 stack_info.reload_completed = 0;
19913 return ggc_cleared_alloc<machine_function> ();
19916 #define INT_P(X) (GET_CODE (X) == CONST_INT && GET_MODE (X) == VOIDmode)
19918 /* Write out a function code label. */
19920 void
19921 rs6000_output_function_entry (FILE *file, const char *fname)
19923 if (fname[0] != '.')
19925 switch (DEFAULT_ABI)
19927 default:
19928 gcc_unreachable ();
19930 case ABI_AIX:
19931 if (DOT_SYMBOLS)
19932 putc ('.', file);
19933 else
19934 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "L.");
19935 break;
19937 case ABI_ELFv2:
19938 case ABI_V4:
19939 case ABI_DARWIN:
19940 break;
19944 RS6000_OUTPUT_BASENAME (file, fname);
19947 /* Print an operand. Recognize special options, documented below. */
19949 #if TARGET_ELF
19950 #define SMALL_DATA_RELOC ((rs6000_sdata == SDATA_EABI) ? "sda21" : "sdarel")
19951 #define SMALL_DATA_REG ((rs6000_sdata == SDATA_EABI) ? 0 : 13)
19952 #else
19953 #define SMALL_DATA_RELOC "sda21"
19954 #define SMALL_DATA_REG 0
19955 #endif
19957 void
19958 print_operand (FILE *file, rtx x, int code)
19960 int i;
19961 unsigned HOST_WIDE_INT uval;
19963 switch (code)
19965 /* %a is output_address. */
19967 /* %c is output_addr_const if a CONSTANT_ADDRESS_P, otherwise
19968 output_operand. */
19970 case 'D':
19971 /* Like 'J' but get to the GT bit only. */
19972 gcc_assert (REG_P (x));
19974 /* Bit 1 is GT bit. */
19975 i = 4 * (REGNO (x) - CR0_REGNO) + 1;
19977 /* Add one for shift count in rlinm for scc. */
19978 fprintf (file, "%d", i + 1);
19979 return;
19981 case 'e':
19982 /* If the low 16 bits are 0, but some other bit is set, write 's'. */
19983 if (! INT_P (x))
19985 output_operand_lossage ("invalid %%e value");
19986 return;
19989 uval = INTVAL (x);
19990 if ((uval & 0xffff) == 0 && uval != 0)
19991 putc ('s', file);
19992 return;
19994 case 'E':
19995 /* X is a CR register. Print the number of the EQ bit of the CR */
19996 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
19997 output_operand_lossage ("invalid %%E value");
19998 else
19999 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO) + 2);
20000 return;
20002 case 'f':
20003 /* X is a CR register. Print the shift count needed to move it
20004 to the high-order four bits. */
20005 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
20006 output_operand_lossage ("invalid %%f value");
20007 else
20008 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO));
20009 return;
20011 case 'F':
20012 /* Similar, but print the count for the rotate in the opposite
20013 direction. */
20014 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
20015 output_operand_lossage ("invalid %%F value");
20016 else
20017 fprintf (file, "%d", 32 - 4 * (REGNO (x) - CR0_REGNO));
20018 return;
20020 case 'G':
20021 /* X is a constant integer. If it is negative, print "m",
20022 otherwise print "z". This is to make an aze or ame insn. */
20023 if (GET_CODE (x) != CONST_INT)
20024 output_operand_lossage ("invalid %%G value");
20025 else if (INTVAL (x) >= 0)
20026 putc ('z', file);
20027 else
20028 putc ('m', file);
20029 return;
20031 case 'h':
20032 /* If constant, output low-order five bits. Otherwise, write
20033 normally. */
20034 if (INT_P (x))
20035 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 31);
20036 else
20037 print_operand (file, x, 0);
20038 return;
20040 case 'H':
20041 /* If constant, output low-order six bits. Otherwise, write
20042 normally. */
20043 if (INT_P (x))
20044 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 63);
20045 else
20046 print_operand (file, x, 0);
20047 return;
20049 case 'I':
20050 /* Print `i' if this is a constant, else nothing. */
20051 if (INT_P (x))
20052 putc ('i', file);
20053 return;
20055 case 'j':
20056 /* Write the bit number in CCR for jump. */
20057 i = ccr_bit (x, 0);
20058 if (i == -1)
20059 output_operand_lossage ("invalid %%j code");
20060 else
20061 fprintf (file, "%d", i);
20062 return;
20064 case 'J':
20065 /* Similar, but add one for shift count in rlinm for scc and pass
20066 scc flag to `ccr_bit'. */
20067 i = ccr_bit (x, 1);
20068 if (i == -1)
20069 output_operand_lossage ("invalid %%J code");
20070 else
20071 /* If we want bit 31, write a shift count of zero, not 32. */
20072 fprintf (file, "%d", i == 31 ? 0 : i + 1);
20073 return;
20075 case 'k':
20076 /* X must be a constant. Write the 1's complement of the
20077 constant. */
20078 if (! INT_P (x))
20079 output_operand_lossage ("invalid %%k value");
20080 else
20081 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~ INTVAL (x));
20082 return;
20084 case 'K':
20085 /* X must be a symbolic constant on ELF. Write an
20086 expression suitable for an 'addi' that adds in the low 16
20087 bits of the MEM. */
20088 if (GET_CODE (x) == CONST)
20090 if (GET_CODE (XEXP (x, 0)) != PLUS
20091 || (GET_CODE (XEXP (XEXP (x, 0), 0)) != SYMBOL_REF
20092 && GET_CODE (XEXP (XEXP (x, 0), 0)) != LABEL_REF)
20093 || GET_CODE (XEXP (XEXP (x, 0), 1)) != CONST_INT)
20094 output_operand_lossage ("invalid %%K value");
20096 print_operand_address (file, x);
20097 fputs ("@l", file);
20098 return;
20100 /* %l is output_asm_label. */
20102 case 'L':
20103 /* Write second word of DImode or DFmode reference. Works on register
20104 or non-indexed memory only. */
20105 if (REG_P (x))
20106 fputs (reg_names[REGNO (x) + 1], file);
20107 else if (MEM_P (x))
20109 machine_mode mode = GET_MODE (x);
20110 /* Handle possible auto-increment. Since it is pre-increment and
20111 we have already done it, we can just use an offset of word. */
20112 if (GET_CODE (XEXP (x, 0)) == PRE_INC
20113 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
20114 output_address (mode, plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
20115 UNITS_PER_WORD));
20116 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
20117 output_address (mode, plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
20118 UNITS_PER_WORD));
20119 else
20120 output_address (mode, XEXP (adjust_address_nv (x, SImode,
20121 UNITS_PER_WORD),
20122 0));
20124 if (small_data_operand (x, GET_MODE (x)))
20125 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
20126 reg_names[SMALL_DATA_REG]);
20128 return;
20130 case 'N':
20131 /* Write the number of elements in the vector times 4. */
20132 if (GET_CODE (x) != PARALLEL)
20133 output_operand_lossage ("invalid %%N value");
20134 else
20135 fprintf (file, "%d", XVECLEN (x, 0) * 4);
20136 return;
20138 case 'O':
20139 /* Similar, but subtract 1 first. */
20140 if (GET_CODE (x) != PARALLEL)
20141 output_operand_lossage ("invalid %%O value");
20142 else
20143 fprintf (file, "%d", (XVECLEN (x, 0) - 1) * 4);
20144 return;
20146 case 'p':
20147 /* X is a CONST_INT that is a power of two. Output the logarithm. */
20148 if (! INT_P (x)
20149 || INTVAL (x) < 0
20150 || (i = exact_log2 (INTVAL (x))) < 0)
20151 output_operand_lossage ("invalid %%p value");
20152 else
20153 fprintf (file, "%d", i);
20154 return;
20156 case 'P':
20157 /* The operand must be an indirect memory reference. The result
20158 is the register name. */
20159 if (GET_CODE (x) != MEM || GET_CODE (XEXP (x, 0)) != REG
20160 || REGNO (XEXP (x, 0)) >= 32)
20161 output_operand_lossage ("invalid %%P value");
20162 else
20163 fputs (reg_names[REGNO (XEXP (x, 0))], file);
20164 return;
20166 case 'q':
20167 /* This outputs the logical code corresponding to a boolean
20168 expression. The expression may have one or both operands
20169 negated (if one, only the first one). For condition register
20170 logical operations, it will also treat the negated
20171 CR codes as NOTs, but not handle NOTs of them. */
20173 const char *const *t = 0;
20174 const char *s;
20175 enum rtx_code code = GET_CODE (x);
20176 static const char * const tbl[3][3] = {
20177 { "and", "andc", "nor" },
20178 { "or", "orc", "nand" },
20179 { "xor", "eqv", "xor" } };
20181 if (code == AND)
20182 t = tbl[0];
20183 else if (code == IOR)
20184 t = tbl[1];
20185 else if (code == XOR)
20186 t = tbl[2];
20187 else
20188 output_operand_lossage ("invalid %%q value");
20190 if (GET_CODE (XEXP (x, 0)) != NOT)
20191 s = t[0];
20192 else
20194 if (GET_CODE (XEXP (x, 1)) == NOT)
20195 s = t[2];
20196 else
20197 s = t[1];
20200 fputs (s, file);
20202 return;
20204 case 'Q':
20205 if (! TARGET_MFCRF)
20206 return;
20207 fputc (',', file);
20208 /* FALLTHRU */
20210 case 'R':
20211 /* X is a CR register. Print the mask for `mtcrf'. */
20212 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
20213 output_operand_lossage ("invalid %%R value");
20214 else
20215 fprintf (file, "%d", 128 >> (REGNO (x) - CR0_REGNO));
20216 return;
20218 case 's':
20219 /* Low 5 bits of 32 - value */
20220 if (! INT_P (x))
20221 output_operand_lossage ("invalid %%s value");
20222 else
20223 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (32 - INTVAL (x)) & 31);
20224 return;
20226 case 't':
20227 /* Like 'J' but get to the OVERFLOW/UNORDERED bit. */
20228 gcc_assert (REG_P (x) && GET_MODE (x) == CCmode);
20230 /* Bit 3 is OV bit. */
20231 i = 4 * (REGNO (x) - CR0_REGNO) + 3;
20233 /* If we want bit 31, write a shift count of zero, not 32. */
20234 fprintf (file, "%d", i == 31 ? 0 : i + 1);
20235 return;
20237 case 'T':
20238 /* Print the symbolic name of a branch target register. */
20239 if (GET_CODE (x) != REG || (REGNO (x) != LR_REGNO
20240 && REGNO (x) != CTR_REGNO))
20241 output_operand_lossage ("invalid %%T value");
20242 else if (REGNO (x) == LR_REGNO)
20243 fputs ("lr", file);
20244 else
20245 fputs ("ctr", file);
20246 return;
20248 case 'u':
20249 /* High-order or low-order 16 bits of constant, whichever is non-zero,
20250 for use in unsigned operand. */
20251 if (! INT_P (x))
20253 output_operand_lossage ("invalid %%u value");
20254 return;
20257 uval = INTVAL (x);
20258 if ((uval & 0xffff) == 0)
20259 uval >>= 16;
20261 fprintf (file, HOST_WIDE_INT_PRINT_HEX, uval & 0xffff);
20262 return;
20264 case 'v':
20265 /* High-order 16 bits of constant for use in signed operand. */
20266 if (! INT_P (x))
20267 output_operand_lossage ("invalid %%v value");
20268 else
20269 fprintf (file, HOST_WIDE_INT_PRINT_HEX,
20270 (INTVAL (x) >> 16) & 0xffff);
20271 return;
20273 case 'U':
20274 /* Print `u' if this has an auto-increment or auto-decrement. */
20275 if (MEM_P (x)
20276 && (GET_CODE (XEXP (x, 0)) == PRE_INC
20277 || GET_CODE (XEXP (x, 0)) == PRE_DEC
20278 || GET_CODE (XEXP (x, 0)) == PRE_MODIFY))
20279 putc ('u', file);
20280 return;
20282 case 'V':
20283 /* Print the trap code for this operand. */
20284 switch (GET_CODE (x))
20286 case EQ:
20287 fputs ("eq", file); /* 4 */
20288 break;
20289 case NE:
20290 fputs ("ne", file); /* 24 */
20291 break;
20292 case LT:
20293 fputs ("lt", file); /* 16 */
20294 break;
20295 case LE:
20296 fputs ("le", file); /* 20 */
20297 break;
20298 case GT:
20299 fputs ("gt", file); /* 8 */
20300 break;
20301 case GE:
20302 fputs ("ge", file); /* 12 */
20303 break;
20304 case LTU:
20305 fputs ("llt", file); /* 2 */
20306 break;
20307 case LEU:
20308 fputs ("lle", file); /* 6 */
20309 break;
20310 case GTU:
20311 fputs ("lgt", file); /* 1 */
20312 break;
20313 case GEU:
20314 fputs ("lge", file); /* 5 */
20315 break;
20316 default:
20317 gcc_unreachable ();
20319 break;
20321 case 'w':
20322 /* If constant, low-order 16 bits of constant, signed. Otherwise, write
20323 normally. */
20324 if (INT_P (x))
20325 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
20326 ((INTVAL (x) & 0xffff) ^ 0x8000) - 0x8000);
20327 else
20328 print_operand (file, x, 0);
20329 return;
20331 case 'x':
20332 /* X is a FPR or Altivec register used in a VSX context. */
20333 if (GET_CODE (x) != REG || !VSX_REGNO_P (REGNO (x)))
20334 output_operand_lossage ("invalid %%x value");
20335 else
20337 int reg = REGNO (x);
20338 int vsx_reg = (FP_REGNO_P (reg)
20339 ? reg - 32
20340 : reg - FIRST_ALTIVEC_REGNO + 32);
20342 #ifdef TARGET_REGNAMES
20343 if (TARGET_REGNAMES)
20344 fprintf (file, "%%vs%d", vsx_reg);
20345 else
20346 #endif
20347 fprintf (file, "%d", vsx_reg);
20349 return;
20351 case 'X':
20352 if (MEM_P (x)
20353 && (legitimate_indexed_address_p (XEXP (x, 0), 0)
20354 || (GET_CODE (XEXP (x, 0)) == PRE_MODIFY
20355 && legitimate_indexed_address_p (XEXP (XEXP (x, 0), 1), 0))))
20356 putc ('x', file);
20357 return;
20359 case 'Y':
20360 /* Like 'L', for third word of TImode/PTImode */
20361 if (REG_P (x))
20362 fputs (reg_names[REGNO (x) + 2], file);
20363 else if (MEM_P (x))
20365 machine_mode mode = GET_MODE (x);
20366 if (GET_CODE (XEXP (x, 0)) == PRE_INC
20367 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
20368 output_address (mode, plus_constant (Pmode,
20369 XEXP (XEXP (x, 0), 0), 8));
20370 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
20371 output_address (mode, plus_constant (Pmode,
20372 XEXP (XEXP (x, 0), 0), 8));
20373 else
20374 output_address (mode, XEXP (adjust_address_nv (x, SImode, 8), 0));
20375 if (small_data_operand (x, GET_MODE (x)))
20376 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
20377 reg_names[SMALL_DATA_REG]);
20379 return;
20381 case 'z':
20382 /* X is a SYMBOL_REF. Write out the name preceded by a
20383 period and without any trailing data in brackets. Used for function
20384 names. If we are configured for System V (or the embedded ABI) on
20385 the PowerPC, do not emit the period, since those systems do not use
20386 TOCs and the like. */
20387 gcc_assert (GET_CODE (x) == SYMBOL_REF);
20389 /* For macho, check to see if we need a stub. */
20390 if (TARGET_MACHO)
20392 const char *name = XSTR (x, 0);
20393 #if TARGET_MACHO
20394 if (darwin_emit_branch_islands
20395 && MACHOPIC_INDIRECT
20396 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
20397 name = machopic_indirection_name (x, /*stub_p=*/true);
20398 #endif
20399 assemble_name (file, name);
20401 else if (!DOT_SYMBOLS)
20402 assemble_name (file, XSTR (x, 0));
20403 else
20404 rs6000_output_function_entry (file, XSTR (x, 0));
20405 return;
20407 case 'Z':
20408 /* Like 'L', for last word of TImode/PTImode. */
20409 if (REG_P (x))
20410 fputs (reg_names[REGNO (x) + 3], file);
20411 else if (MEM_P (x))
20413 machine_mode mode = GET_MODE (x);
20414 if (GET_CODE (XEXP (x, 0)) == PRE_INC
20415 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
20416 output_address (mode, plus_constant (Pmode,
20417 XEXP (XEXP (x, 0), 0), 12));
20418 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
20419 output_address (mode, plus_constant (Pmode,
20420 XEXP (XEXP (x, 0), 0), 12));
20421 else
20422 output_address (mode, XEXP (adjust_address_nv (x, SImode, 12), 0));
20423 if (small_data_operand (x, GET_MODE (x)))
20424 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
20425 reg_names[SMALL_DATA_REG]);
20427 return;
20429 /* Print AltiVec or SPE memory operand. */
20430 case 'y':
20432 rtx tmp;
20434 gcc_assert (MEM_P (x));
20436 tmp = XEXP (x, 0);
20438 /* Ugly hack because %y is overloaded. */
20439 if ((TARGET_SPE || TARGET_E500_DOUBLE)
20440 && (GET_MODE_SIZE (GET_MODE (x)) == 8
20441 || FLOAT128_2REG_P (GET_MODE (x))
20442 || GET_MODE (x) == TImode
20443 || GET_MODE (x) == PTImode))
20445 /* Handle [reg]. */
20446 if (REG_P (tmp))
20448 fprintf (file, "0(%s)", reg_names[REGNO (tmp)]);
20449 break;
20451 /* Handle [reg+UIMM]. */
20452 else if (GET_CODE (tmp) == PLUS &&
20453 GET_CODE (XEXP (tmp, 1)) == CONST_INT)
20455 int x;
20457 gcc_assert (REG_P (XEXP (tmp, 0)));
20459 x = INTVAL (XEXP (tmp, 1));
20460 fprintf (file, "%d(%s)", x, reg_names[REGNO (XEXP (tmp, 0))]);
20461 break;
20464 /* Fall through. Must be [reg+reg]. */
20466 if (VECTOR_MEM_ALTIVEC_P (GET_MODE (x))
20467 && GET_CODE (tmp) == AND
20468 && GET_CODE (XEXP (tmp, 1)) == CONST_INT
20469 && INTVAL (XEXP (tmp, 1)) == -16)
20470 tmp = XEXP (tmp, 0);
20471 else if (VECTOR_MEM_VSX_P (GET_MODE (x))
20472 && GET_CODE (tmp) == PRE_MODIFY)
20473 tmp = XEXP (tmp, 1);
20474 if (REG_P (tmp))
20475 fprintf (file, "0,%s", reg_names[REGNO (tmp)]);
20476 else
20478 if (GET_CODE (tmp) != PLUS
20479 || !REG_P (XEXP (tmp, 0))
20480 || !REG_P (XEXP (tmp, 1)))
20482 output_operand_lossage ("invalid %%y value, try using the 'Z' constraint");
20483 break;
20486 if (REGNO (XEXP (tmp, 0)) == 0)
20487 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 1)) ],
20488 reg_names[ REGNO (XEXP (tmp, 0)) ]);
20489 else
20490 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 0)) ],
20491 reg_names[ REGNO (XEXP (tmp, 1)) ]);
20493 break;
20496 case 0:
20497 if (REG_P (x))
20498 fprintf (file, "%s", reg_names[REGNO (x)]);
20499 else if (MEM_P (x))
20501 /* We need to handle PRE_INC and PRE_DEC here, since we need to
20502 know the width from the mode. */
20503 if (GET_CODE (XEXP (x, 0)) == PRE_INC)
20504 fprintf (file, "%d(%s)", GET_MODE_SIZE (GET_MODE (x)),
20505 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
20506 else if (GET_CODE (XEXP (x, 0)) == PRE_DEC)
20507 fprintf (file, "%d(%s)", - GET_MODE_SIZE (GET_MODE (x)),
20508 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
20509 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
20510 output_address (GET_MODE (x), XEXP (XEXP (x, 0), 1));
20511 else
20512 output_address (GET_MODE (x), XEXP (x, 0));
20514 else
20516 if (toc_relative_expr_p (x, false))
20517 /* This hack along with a corresponding hack in
20518 rs6000_output_addr_const_extra arranges to output addends
20519 where the assembler expects to find them. eg.
20520 (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 4)
20521 without this hack would be output as "x@toc+4". We
20522 want "x+4@toc". */
20523 output_addr_const (file, CONST_CAST_RTX (tocrel_base));
20524 else
20525 output_addr_const (file, x);
20527 return;
20529 case '&':
20530 if (const char *name = get_some_local_dynamic_name ())
20531 assemble_name (file, name);
20532 else
20533 output_operand_lossage ("'%%&' used without any "
20534 "local dynamic TLS references");
20535 return;
20537 default:
20538 output_operand_lossage ("invalid %%xn code");
20542 /* Print the address of an operand. */
20544 void
20545 print_operand_address (FILE *file, rtx x)
20547 if (REG_P (x))
20548 fprintf (file, "0(%s)", reg_names[ REGNO (x) ]);
20549 else if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == CONST
20550 || GET_CODE (x) == LABEL_REF)
20552 output_addr_const (file, x);
20553 if (small_data_operand (x, GET_MODE (x)))
20554 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
20555 reg_names[SMALL_DATA_REG]);
20556 else
20557 gcc_assert (!TARGET_TOC);
20559 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
20560 && REG_P (XEXP (x, 1)))
20562 if (REGNO (XEXP (x, 0)) == 0)
20563 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 1)) ],
20564 reg_names[ REGNO (XEXP (x, 0)) ]);
20565 else
20566 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 0)) ],
20567 reg_names[ REGNO (XEXP (x, 1)) ]);
20569 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
20570 && GET_CODE (XEXP (x, 1)) == CONST_INT)
20571 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
20572 INTVAL (XEXP (x, 1)), reg_names[ REGNO (XEXP (x, 0)) ]);
20573 #if TARGET_MACHO
20574 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
20575 && CONSTANT_P (XEXP (x, 1)))
20577 fprintf (file, "lo16(");
20578 output_addr_const (file, XEXP (x, 1));
20579 fprintf (file, ")(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
20581 #endif
20582 #if TARGET_ELF
20583 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
20584 && CONSTANT_P (XEXP (x, 1)))
20586 output_addr_const (file, XEXP (x, 1));
20587 fprintf (file, "@l(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
20589 #endif
20590 else if (toc_relative_expr_p (x, false))
20592 /* This hack along with a corresponding hack in
20593 rs6000_output_addr_const_extra arranges to output addends
20594 where the assembler expects to find them. eg.
20595 (lo_sum (reg 9)
20596 . (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 8))
20597 without this hack would be output as "x@toc+8@l(9)". We
20598 want "x+8@toc@l(9)". */
20599 output_addr_const (file, CONST_CAST_RTX (tocrel_base));
20600 if (GET_CODE (x) == LO_SUM)
20601 fprintf (file, "@l(%s)", reg_names[REGNO (XEXP (x, 0))]);
20602 else
20603 fprintf (file, "(%s)", reg_names[REGNO (XVECEXP (tocrel_base, 0, 1))]);
20605 else
20606 gcc_unreachable ();
20609 /* Implement TARGET_OUTPUT_ADDR_CONST_EXTRA. */
20611 static bool
20612 rs6000_output_addr_const_extra (FILE *file, rtx x)
20614 if (GET_CODE (x) == UNSPEC)
20615 switch (XINT (x, 1))
20617 case UNSPEC_TOCREL:
20618 gcc_checking_assert (GET_CODE (XVECEXP (x, 0, 0)) == SYMBOL_REF
20619 && REG_P (XVECEXP (x, 0, 1))
20620 && REGNO (XVECEXP (x, 0, 1)) == TOC_REGISTER);
20621 output_addr_const (file, XVECEXP (x, 0, 0));
20622 if (x == tocrel_base && tocrel_offset != const0_rtx)
20624 if (INTVAL (tocrel_offset) >= 0)
20625 fprintf (file, "+");
20626 output_addr_const (file, CONST_CAST_RTX (tocrel_offset));
20628 if (!TARGET_AIX || (TARGET_ELF && TARGET_MINIMAL_TOC))
20630 putc ('-', file);
20631 assemble_name (file, toc_label_name);
20632 need_toc_init = 1;
20634 else if (TARGET_ELF)
20635 fputs ("@toc", file);
20636 return true;
20638 #if TARGET_MACHO
20639 case UNSPEC_MACHOPIC_OFFSET:
20640 output_addr_const (file, XVECEXP (x, 0, 0));
20641 putc ('-', file);
20642 machopic_output_function_base_name (file);
20643 return true;
20644 #endif
20646 return false;
20649 /* Target hook for assembling integer objects. The PowerPC version has
20650 to handle fixup entries for relocatable code if RELOCATABLE_NEEDS_FIXUP
20651 is defined. It also needs to handle DI-mode objects on 64-bit
20652 targets. */
20654 static bool
20655 rs6000_assemble_integer (rtx x, unsigned int size, int aligned_p)
20657 #ifdef RELOCATABLE_NEEDS_FIXUP
20658 /* Special handling for SI values. */
20659 if (RELOCATABLE_NEEDS_FIXUP && size == 4 && aligned_p)
20661 static int recurse = 0;
20663 /* For -mrelocatable, we mark all addresses that need to be fixed up in
20664 the .fixup section. Since the TOC section is already relocated, we
20665 don't need to mark it here. We used to skip the text section, but it
20666 should never be valid for relocated addresses to be placed in the text
20667 section. */
20668 if (TARGET_RELOCATABLE
20669 && in_section != toc_section
20670 && !recurse
20671 && !CONST_SCALAR_INT_P (x)
20672 && CONSTANT_P (x))
20674 char buf[256];
20676 recurse = 1;
20677 ASM_GENERATE_INTERNAL_LABEL (buf, "LCP", fixuplabelno);
20678 fixuplabelno++;
20679 ASM_OUTPUT_LABEL (asm_out_file, buf);
20680 fprintf (asm_out_file, "\t.long\t(");
20681 output_addr_const (asm_out_file, x);
20682 fprintf (asm_out_file, ")@fixup\n");
20683 fprintf (asm_out_file, "\t.section\t\".fixup\",\"aw\"\n");
20684 ASM_OUTPUT_ALIGN (asm_out_file, 2);
20685 fprintf (asm_out_file, "\t.long\t");
20686 assemble_name (asm_out_file, buf);
20687 fprintf (asm_out_file, "\n\t.previous\n");
20688 recurse = 0;
20689 return true;
20691 /* Remove initial .'s to turn a -mcall-aixdesc function
20692 address into the address of the descriptor, not the function
20693 itself. */
20694 else if (GET_CODE (x) == SYMBOL_REF
20695 && XSTR (x, 0)[0] == '.'
20696 && DEFAULT_ABI == ABI_AIX)
20698 const char *name = XSTR (x, 0);
20699 while (*name == '.')
20700 name++;
20702 fprintf (asm_out_file, "\t.long\t%s\n", name);
20703 return true;
20706 #endif /* RELOCATABLE_NEEDS_FIXUP */
20707 return default_assemble_integer (x, size, aligned_p);
20710 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
20711 /* Emit an assembler directive to set symbol visibility for DECL to
20712 VISIBILITY_TYPE. */
20714 static void
20715 rs6000_assemble_visibility (tree decl, int vis)
20717 if (TARGET_XCOFF)
20718 return;
20720 /* Functions need to have their entry point symbol visibility set as
20721 well as their descriptor symbol visibility. */
20722 if (DEFAULT_ABI == ABI_AIX
20723 && DOT_SYMBOLS
20724 && TREE_CODE (decl) == FUNCTION_DECL)
20726 static const char * const visibility_types[] = {
20727 NULL, "internal", "hidden", "protected"
20730 const char *name, *type;
20732 name = ((* targetm.strip_name_encoding)
20733 (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl))));
20734 type = visibility_types[vis];
20736 fprintf (asm_out_file, "\t.%s\t%s\n", type, name);
20737 fprintf (asm_out_file, "\t.%s\t.%s\n", type, name);
20739 else
20740 default_assemble_visibility (decl, vis);
20742 #endif
20744 enum rtx_code
20745 rs6000_reverse_condition (machine_mode mode, enum rtx_code code)
20747 /* Reversal of FP compares takes care -- an ordered compare
20748 becomes an unordered compare and vice versa. */
20749 if (mode == CCFPmode
20750 && (!flag_finite_math_only
20751 || code == UNLT || code == UNLE || code == UNGT || code == UNGE
20752 || code == UNEQ || code == LTGT))
20753 return reverse_condition_maybe_unordered (code);
20754 else
20755 return reverse_condition (code);
20758 /* Generate a compare for CODE. Return a brand-new rtx that
20759 represents the result of the compare. */
20761 static rtx
20762 rs6000_generate_compare (rtx cmp, machine_mode mode)
20764 machine_mode comp_mode;
20765 rtx compare_result;
20766 enum rtx_code code = GET_CODE (cmp);
20767 rtx op0 = XEXP (cmp, 0);
20768 rtx op1 = XEXP (cmp, 1);
20770 if (!TARGET_FLOAT128_HW && FLOAT128_VECTOR_P (mode))
20771 comp_mode = CCmode;
20772 else if (FLOAT_MODE_P (mode))
20773 comp_mode = CCFPmode;
20774 else if (code == GTU || code == LTU
20775 || code == GEU || code == LEU)
20776 comp_mode = CCUNSmode;
20777 else if ((code == EQ || code == NE)
20778 && unsigned_reg_p (op0)
20779 && (unsigned_reg_p (op1)
20780 || (CONST_INT_P (op1) && INTVAL (op1) != 0)))
20781 /* These are unsigned values, perhaps there will be a later
20782 ordering compare that can be shared with this one. */
20783 comp_mode = CCUNSmode;
20784 else
20785 comp_mode = CCmode;
20787 /* If we have an unsigned compare, make sure we don't have a signed value as
20788 an immediate. */
20789 if (comp_mode == CCUNSmode && GET_CODE (op1) == CONST_INT
20790 && INTVAL (op1) < 0)
20792 op0 = copy_rtx_if_shared (op0);
20793 op1 = force_reg (GET_MODE (op0), op1);
20794 cmp = gen_rtx_fmt_ee (code, GET_MODE (cmp), op0, op1);
20797 /* First, the compare. */
20798 compare_result = gen_reg_rtx (comp_mode);
20800 /* E500 FP compare instructions on the GPRs. Yuck! */
20801 if ((!TARGET_FPRS && TARGET_HARD_FLOAT)
20802 && FLOAT_MODE_P (mode))
20804 rtx cmp, or_result, compare_result2;
20805 machine_mode op_mode = GET_MODE (op0);
20806 bool reverse_p;
20808 if (op_mode == VOIDmode)
20809 op_mode = GET_MODE (op1);
20811 /* First reverse the condition codes that aren't directly supported. */
20812 switch (code)
20814 case NE:
20815 case UNLT:
20816 case UNLE:
20817 case UNGT:
20818 case UNGE:
20819 code = reverse_condition_maybe_unordered (code);
20820 reverse_p = true;
20821 break;
20823 case EQ:
20824 case LT:
20825 case LE:
20826 case GT:
20827 case GE:
20828 reverse_p = false;
20829 break;
20831 default:
20832 gcc_unreachable ();
20835 /* The E500 FP compare instructions toggle the GT bit (CR bit 1) only.
20836 This explains the following mess. */
20838 switch (code)
20840 case EQ:
20841 switch (op_mode)
20843 case SFmode:
20844 cmp = (flag_finite_math_only && !flag_trapping_math)
20845 ? gen_tstsfeq_gpr (compare_result, op0, op1)
20846 : gen_cmpsfeq_gpr (compare_result, op0, op1);
20847 break;
20849 case DFmode:
20850 cmp = (flag_finite_math_only && !flag_trapping_math)
20851 ? gen_tstdfeq_gpr (compare_result, op0, op1)
20852 : gen_cmpdfeq_gpr (compare_result, op0, op1);
20853 break;
20855 case TFmode:
20856 case IFmode:
20857 case KFmode:
20858 cmp = (flag_finite_math_only && !flag_trapping_math)
20859 ? gen_tsttfeq_gpr (compare_result, op0, op1)
20860 : gen_cmptfeq_gpr (compare_result, op0, op1);
20861 break;
20863 default:
20864 gcc_unreachable ();
20866 break;
20868 case GT:
20869 case GE:
20870 switch (op_mode)
20872 case SFmode:
20873 cmp = (flag_finite_math_only && !flag_trapping_math)
20874 ? gen_tstsfgt_gpr (compare_result, op0, op1)
20875 : gen_cmpsfgt_gpr (compare_result, op0, op1);
20876 break;
20878 case DFmode:
20879 cmp = (flag_finite_math_only && !flag_trapping_math)
20880 ? gen_tstdfgt_gpr (compare_result, op0, op1)
20881 : gen_cmpdfgt_gpr (compare_result, op0, op1);
20882 break;
20884 case TFmode:
20885 case IFmode:
20886 case KFmode:
20887 cmp = (flag_finite_math_only && !flag_trapping_math)
20888 ? gen_tsttfgt_gpr (compare_result, op0, op1)
20889 : gen_cmptfgt_gpr (compare_result, op0, op1);
20890 break;
20892 default:
20893 gcc_unreachable ();
20895 break;
20897 case LT:
20898 case LE:
20899 switch (op_mode)
20901 case SFmode:
20902 cmp = (flag_finite_math_only && !flag_trapping_math)
20903 ? gen_tstsflt_gpr (compare_result, op0, op1)
20904 : gen_cmpsflt_gpr (compare_result, op0, op1);
20905 break;
20907 case DFmode:
20908 cmp = (flag_finite_math_only && !flag_trapping_math)
20909 ? gen_tstdflt_gpr (compare_result, op0, op1)
20910 : gen_cmpdflt_gpr (compare_result, op0, op1);
20911 break;
20913 case TFmode:
20914 case IFmode:
20915 case KFmode:
20916 cmp = (flag_finite_math_only && !flag_trapping_math)
20917 ? gen_tsttflt_gpr (compare_result, op0, op1)
20918 : gen_cmptflt_gpr (compare_result, op0, op1);
20919 break;
20921 default:
20922 gcc_unreachable ();
20924 break;
20926 default:
20927 gcc_unreachable ();
20930 /* Synthesize LE and GE from LT/GT || EQ. */
20931 if (code == LE || code == GE)
20933 emit_insn (cmp);
20935 compare_result2 = gen_reg_rtx (CCFPmode);
20937 /* Do the EQ. */
20938 switch (op_mode)
20940 case SFmode:
20941 cmp = (flag_finite_math_only && !flag_trapping_math)
20942 ? gen_tstsfeq_gpr (compare_result2, op0, op1)
20943 : gen_cmpsfeq_gpr (compare_result2, op0, op1);
20944 break;
20946 case DFmode:
20947 cmp = (flag_finite_math_only && !flag_trapping_math)
20948 ? gen_tstdfeq_gpr (compare_result2, op0, op1)
20949 : gen_cmpdfeq_gpr (compare_result2, op0, op1);
20950 break;
20952 case TFmode:
20953 case IFmode:
20954 case KFmode:
20955 cmp = (flag_finite_math_only && !flag_trapping_math)
20956 ? gen_tsttfeq_gpr (compare_result2, op0, op1)
20957 : gen_cmptfeq_gpr (compare_result2, op0, op1);
20958 break;
20960 default:
20961 gcc_unreachable ();
20964 emit_insn (cmp);
20966 /* OR them together. */
20967 or_result = gen_reg_rtx (CCFPmode);
20968 cmp = gen_e500_cr_ior_compare (or_result, compare_result,
20969 compare_result2);
20970 compare_result = or_result;
20973 code = reverse_p ? NE : EQ;
20975 emit_insn (cmp);
20978 /* IEEE 128-bit support in VSX registers when we do not have hardware
20979 support. */
20980 else if (!TARGET_FLOAT128_HW && FLOAT128_VECTOR_P (mode))
20982 rtx libfunc = NULL_RTX;
20983 bool uneq_or_ltgt = false;
20984 rtx dest = gen_reg_rtx (SImode);
20986 switch (code)
20988 case EQ:
20989 case NE:
20990 libfunc = optab_libfunc (eq_optab, mode);
20991 break;
20993 case GT:
20994 case GE:
20995 libfunc = optab_libfunc (ge_optab, mode);
20996 break;
20998 case LT:
20999 case LE:
21000 libfunc = optab_libfunc (le_optab, mode);
21001 break;
21003 case UNORDERED:
21004 case ORDERED:
21005 libfunc = optab_libfunc (unord_optab, mode);
21006 code = (code == UNORDERED) ? NE : EQ;
21007 break;
21009 case UNGE:
21010 case UNGT:
21011 libfunc = optab_libfunc (le_optab, mode);
21012 code = (code == UNGE) ? GE : GT;
21013 break;
21015 case UNLE:
21016 case UNLT:
21017 libfunc = optab_libfunc (ge_optab, mode);
21018 code = (code == UNLE) ? LE : LT;
21019 break;
21021 case UNEQ:
21022 case LTGT:
21023 libfunc = optab_libfunc (le_optab, mode);
21024 uneq_or_ltgt = true;
21025 code = (code = UNEQ) ? NE : EQ;
21026 break;
21028 default:
21029 gcc_unreachable ();
21032 gcc_assert (libfunc);
21033 dest = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
21034 SImode, 2, op0, mode, op1, mode);
21036 /* If this is UNEQ or LTGT, we call __lekf2, which returns -1 for less
21037 than, 0 for equal, +1 for greater, and +2 for nan. We add 1, to give
21038 a value of 0..3, and then do and AND immediate of 1 to isolate whether
21039 it is 0/Nan (i.e. bottom bit is 0), or less than/greater than
21040 (i.e. bottom bit is 1). */
21041 if (uneq_or_ltgt)
21043 rtx add_result = gen_reg_rtx (SImode);
21044 rtx and_result = gen_reg_rtx (SImode);
21045 emit_insn (gen_addsi3 (add_result, dest, GEN_INT (1)));
21046 emit_insn (gen_andsi3 (and_result, add_result, GEN_INT (1)));
21047 dest = and_result;
21050 emit_insn (gen_rtx_SET (compare_result,
21051 gen_rtx_COMPARE (comp_mode, dest, const0_rtx)));
21054 else
21056 /* Generate XLC-compatible TFmode compare as PARALLEL with extra
21057 CLOBBERs to match cmptf_internal2 pattern. */
21058 if (comp_mode == CCFPmode && TARGET_XL_COMPAT
21059 && FLOAT128_IBM_P (GET_MODE (op0))
21060 && TARGET_HARD_FLOAT && TARGET_FPRS)
21061 emit_insn (gen_rtx_PARALLEL (VOIDmode,
21062 gen_rtvec (10,
21063 gen_rtx_SET (compare_result,
21064 gen_rtx_COMPARE (comp_mode, op0, op1)),
21065 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
21066 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
21067 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
21068 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
21069 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
21070 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
21071 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
21072 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
21073 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (Pmode)))));
21074 else if (GET_CODE (op1) == UNSPEC
21075 && XINT (op1, 1) == UNSPEC_SP_TEST)
21077 rtx op1b = XVECEXP (op1, 0, 0);
21078 comp_mode = CCEQmode;
21079 compare_result = gen_reg_rtx (CCEQmode);
21080 if (TARGET_64BIT)
21081 emit_insn (gen_stack_protect_testdi (compare_result, op0, op1b));
21082 else
21083 emit_insn (gen_stack_protect_testsi (compare_result, op0, op1b));
21085 else
21086 emit_insn (gen_rtx_SET (compare_result,
21087 gen_rtx_COMPARE (comp_mode, op0, op1)));
21090 /* Some kinds of FP comparisons need an OR operation;
21091 under flag_finite_math_only we don't bother. */
21092 if (FLOAT_MODE_P (mode)
21093 && (!FLOAT128_IEEE_P (mode) || TARGET_FLOAT128_HW)
21094 && !flag_finite_math_only
21095 && !(TARGET_HARD_FLOAT && !TARGET_FPRS)
21096 && (code == LE || code == GE
21097 || code == UNEQ || code == LTGT
21098 || code == UNGT || code == UNLT))
21100 enum rtx_code or1, or2;
21101 rtx or1_rtx, or2_rtx, compare2_rtx;
21102 rtx or_result = gen_reg_rtx (CCEQmode);
21104 switch (code)
21106 case LE: or1 = LT; or2 = EQ; break;
21107 case GE: or1 = GT; or2 = EQ; break;
21108 case UNEQ: or1 = UNORDERED; or2 = EQ; break;
21109 case LTGT: or1 = LT; or2 = GT; break;
21110 case UNGT: or1 = UNORDERED; or2 = GT; break;
21111 case UNLT: or1 = UNORDERED; or2 = LT; break;
21112 default: gcc_unreachable ();
21114 validate_condition_mode (or1, comp_mode);
21115 validate_condition_mode (or2, comp_mode);
21116 or1_rtx = gen_rtx_fmt_ee (or1, SImode, compare_result, const0_rtx);
21117 or2_rtx = gen_rtx_fmt_ee (or2, SImode, compare_result, const0_rtx);
21118 compare2_rtx = gen_rtx_COMPARE (CCEQmode,
21119 gen_rtx_IOR (SImode, or1_rtx, or2_rtx),
21120 const_true_rtx);
21121 emit_insn (gen_rtx_SET (or_result, compare2_rtx));
21123 compare_result = or_result;
21124 code = EQ;
21127 validate_condition_mode (code, GET_MODE (compare_result));
21129 return gen_rtx_fmt_ee (code, VOIDmode, compare_result, const0_rtx);
21133 /* Return the diagnostic message string if the binary operation OP is
21134 not permitted on TYPE1 and TYPE2, NULL otherwise. */
21136 static const char*
21137 rs6000_invalid_binary_op (int op ATTRIBUTE_UNUSED,
21138 const_tree type1,
21139 const_tree type2)
21141 enum machine_mode mode1 = TYPE_MODE (type1);
21142 enum machine_mode mode2 = TYPE_MODE (type2);
21144 /* For complex modes, use the inner type. */
21145 if (COMPLEX_MODE_P (mode1))
21146 mode1 = GET_MODE_INNER (mode1);
21148 if (COMPLEX_MODE_P (mode2))
21149 mode2 = GET_MODE_INNER (mode2);
21151 /* Don't allow IEEE 754R 128-bit binary floating point and IBM extended
21152 double to intermix unless -mfloat128-convert. */
21153 if (mode1 == mode2)
21154 return NULL;
21156 if (!TARGET_FLOAT128_CVT)
21158 if ((mode1 == KFmode && mode2 == IFmode)
21159 || (mode1 == IFmode && mode2 == KFmode))
21160 return N_("__float128 and __ibm128 cannot be used in the same "
21161 "expression");
21163 if (TARGET_IEEEQUAD
21164 && ((mode1 == IFmode && mode2 == TFmode)
21165 || (mode1 == TFmode && mode2 == IFmode)))
21166 return N_("__ibm128 and long double cannot be used in the same "
21167 "expression");
21169 if (!TARGET_IEEEQUAD
21170 && ((mode1 == KFmode && mode2 == TFmode)
21171 || (mode1 == TFmode && mode2 == KFmode)))
21172 return N_("__float128 and long double cannot be used in the same "
21173 "expression");
21176 return NULL;
21180 /* Expand floating point conversion to/from __float128 and __ibm128. */
21182 void
21183 rs6000_expand_float128_convert (rtx dest, rtx src, bool unsigned_p)
21185 machine_mode dest_mode = GET_MODE (dest);
21186 machine_mode src_mode = GET_MODE (src);
21187 convert_optab cvt = unknown_optab;
21188 bool do_move = false;
21189 rtx libfunc = NULL_RTX;
21190 rtx dest2;
21191 typedef rtx (*rtx_2func_t) (rtx, rtx);
21192 rtx_2func_t hw_convert = (rtx_2func_t)0;
21193 size_t kf_or_tf;
21195 struct hw_conv_t {
21196 rtx_2func_t from_df;
21197 rtx_2func_t from_sf;
21198 rtx_2func_t from_si_sign;
21199 rtx_2func_t from_si_uns;
21200 rtx_2func_t from_di_sign;
21201 rtx_2func_t from_di_uns;
21202 rtx_2func_t to_df;
21203 rtx_2func_t to_sf;
21204 rtx_2func_t to_si_sign;
21205 rtx_2func_t to_si_uns;
21206 rtx_2func_t to_di_sign;
21207 rtx_2func_t to_di_uns;
21208 } hw_conversions[2] = {
21209 /* convertions to/from KFmode */
21211 gen_extenddfkf2_hw, /* KFmode <- DFmode. */
21212 gen_extendsfkf2_hw, /* KFmode <- SFmode. */
21213 gen_float_kfsi2_hw, /* KFmode <- SImode (signed). */
21214 gen_floatuns_kfsi2_hw, /* KFmode <- SImode (unsigned). */
21215 gen_float_kfdi2_hw, /* KFmode <- DImode (signed). */
21216 gen_floatuns_kfdi2_hw, /* KFmode <- DImode (unsigned). */
21217 gen_trunckfdf2_hw, /* DFmode <- KFmode. */
21218 gen_trunckfsf2_hw, /* SFmode <- KFmode. */
21219 gen_fix_kfsi2_hw, /* SImode <- KFmode (signed). */
21220 gen_fixuns_kfsi2_hw, /* SImode <- KFmode (unsigned). */
21221 gen_fix_kfdi2_hw, /* DImode <- KFmode (signed). */
21222 gen_fixuns_kfdi2_hw, /* DImode <- KFmode (unsigned). */
21225 /* convertions to/from TFmode */
21227 gen_extenddftf2_hw, /* TFmode <- DFmode. */
21228 gen_extendsftf2_hw, /* TFmode <- SFmode. */
21229 gen_float_tfsi2_hw, /* TFmode <- SImode (signed). */
21230 gen_floatuns_tfsi2_hw, /* TFmode <- SImode (unsigned). */
21231 gen_float_tfdi2_hw, /* TFmode <- DImode (signed). */
21232 gen_floatuns_tfdi2_hw, /* TFmode <- DImode (unsigned). */
21233 gen_trunctfdf2_hw, /* DFmode <- TFmode. */
21234 gen_trunctfsf2_hw, /* SFmode <- TFmode. */
21235 gen_fix_tfsi2_hw, /* SImode <- TFmode (signed). */
21236 gen_fixuns_tfsi2_hw, /* SImode <- TFmode (unsigned). */
21237 gen_fix_tfdi2_hw, /* DImode <- TFmode (signed). */
21238 gen_fixuns_tfdi2_hw, /* DImode <- TFmode (unsigned). */
21242 if (dest_mode == src_mode)
21243 gcc_unreachable ();
21245 /* Eliminate memory operations. */
21246 if (MEM_P (src))
21247 src = force_reg (src_mode, src);
21249 if (MEM_P (dest))
21251 rtx tmp = gen_reg_rtx (dest_mode);
21252 rs6000_expand_float128_convert (tmp, src, unsigned_p);
21253 rs6000_emit_move (dest, tmp, dest_mode);
21254 return;
21257 /* Convert to IEEE 128-bit floating point. */
21258 if (FLOAT128_IEEE_P (dest_mode))
21260 if (dest_mode == KFmode)
21261 kf_or_tf = 0;
21262 else if (dest_mode == TFmode)
21263 kf_or_tf = 1;
21264 else
21265 gcc_unreachable ();
21267 switch (src_mode)
21269 case DFmode:
21270 cvt = sext_optab;
21271 hw_convert = hw_conversions[kf_or_tf].from_df;
21272 break;
21274 case SFmode:
21275 cvt = sext_optab;
21276 hw_convert = hw_conversions[kf_or_tf].from_sf;
21277 break;
21279 case KFmode:
21280 case IFmode:
21281 case TFmode:
21282 if (FLOAT128_IBM_P (src_mode))
21283 cvt = sext_optab;
21284 else
21285 do_move = true;
21286 break;
21288 case SImode:
21289 if (unsigned_p)
21291 cvt = ufloat_optab;
21292 hw_convert = hw_conversions[kf_or_tf].from_si_uns;
21294 else
21296 cvt = sfloat_optab;
21297 hw_convert = hw_conversions[kf_or_tf].from_si_sign;
21299 break;
21301 case DImode:
21302 if (unsigned_p)
21304 cvt = ufloat_optab;
21305 hw_convert = hw_conversions[kf_or_tf].from_di_uns;
21307 else
21309 cvt = sfloat_optab;
21310 hw_convert = hw_conversions[kf_or_tf].from_di_sign;
21312 break;
21314 default:
21315 gcc_unreachable ();
21319 /* Convert from IEEE 128-bit floating point. */
21320 else if (FLOAT128_IEEE_P (src_mode))
21322 if (src_mode == KFmode)
21323 kf_or_tf = 0;
21324 else if (src_mode == TFmode)
21325 kf_or_tf = 1;
21326 else
21327 gcc_unreachable ();
21329 switch (dest_mode)
21331 case DFmode:
21332 cvt = trunc_optab;
21333 hw_convert = hw_conversions[kf_or_tf].to_df;
21334 break;
21336 case SFmode:
21337 cvt = trunc_optab;
21338 hw_convert = hw_conversions[kf_or_tf].to_sf;
21339 break;
21341 case KFmode:
21342 case IFmode:
21343 case TFmode:
21344 if (FLOAT128_IBM_P (dest_mode))
21345 cvt = trunc_optab;
21346 else
21347 do_move = true;
21348 break;
21350 case SImode:
21351 if (unsigned_p)
21353 cvt = ufix_optab;
21354 hw_convert = hw_conversions[kf_or_tf].to_si_uns;
21356 else
21358 cvt = sfix_optab;
21359 hw_convert = hw_conversions[kf_or_tf].to_si_sign;
21361 break;
21363 case DImode:
21364 if (unsigned_p)
21366 cvt = ufix_optab;
21367 hw_convert = hw_conversions[kf_or_tf].to_di_uns;
21369 else
21371 cvt = sfix_optab;
21372 hw_convert = hw_conversions[kf_or_tf].to_di_sign;
21374 break;
21376 default:
21377 gcc_unreachable ();
21381 /* Both IBM format. */
21382 else if (FLOAT128_IBM_P (dest_mode) && FLOAT128_IBM_P (src_mode))
21383 do_move = true;
21385 else
21386 gcc_unreachable ();
21388 /* Handle conversion between TFmode/KFmode. */
21389 if (do_move)
21390 emit_move_insn (dest, gen_lowpart (dest_mode, src));
21392 /* Handle conversion if we have hardware support. */
21393 else if (TARGET_FLOAT128_HW && hw_convert)
21394 emit_insn ((hw_convert) (dest, src));
21396 /* Call an external function to do the conversion. */
21397 else if (cvt != unknown_optab)
21399 libfunc = convert_optab_libfunc (cvt, dest_mode, src_mode);
21400 gcc_assert (libfunc != NULL_RTX);
21402 dest2 = emit_library_call_value (libfunc, dest, LCT_CONST, dest_mode, 1, src,
21403 src_mode);
21405 gcc_assert (dest2 != NULL_RTX);
21406 if (!rtx_equal_p (dest, dest2))
21407 emit_move_insn (dest, dest2);
21410 else
21411 gcc_unreachable ();
21413 return;
21416 /* Split a conversion from __float128 to an integer type into separate insns.
21417 OPERANDS points to the destination, source, and V2DI temporary
21418 register. CODE is either FIX or UNSIGNED_FIX. */
21420 void
21421 convert_float128_to_int (rtx *operands, enum rtx_code code)
21423 rtx dest = operands[0];
21424 rtx src = operands[1];
21425 rtx tmp = operands[2];
21426 rtx cvt;
21427 rtvec cvt_vec;
21428 rtx cvt_unspec;
21429 rtvec move_vec;
21430 rtx move_unspec;
21432 if (GET_CODE (tmp) == SCRATCH)
21433 tmp = gen_reg_rtx (V2DImode);
21435 if (MEM_P (dest))
21436 dest = rs6000_address_for_fpconvert (dest);
21438 /* Generate the actual convert insn of the form:
21439 (set (tmp) (unspec:V2DI [(fix:SI (reg:KF))] UNSPEC_IEEE128_CONVERT)). */
21440 cvt = gen_rtx_fmt_e (code, GET_MODE (dest), src);
21441 cvt_vec = gen_rtvec (1, cvt);
21442 cvt_unspec = gen_rtx_UNSPEC (V2DImode, cvt_vec, UNSPEC_IEEE128_CONVERT);
21443 emit_insn (gen_rtx_SET (tmp, cvt_unspec));
21445 /* Generate the move insn of the form:
21446 (set (dest:SI) (unspec:SI [(tmp:V2DI))] UNSPEC_IEEE128_MOVE)). */
21447 move_vec = gen_rtvec (1, tmp);
21448 move_unspec = gen_rtx_UNSPEC (GET_MODE (dest), move_vec, UNSPEC_IEEE128_MOVE);
21449 emit_insn (gen_rtx_SET (dest, move_unspec));
21452 /* Split a conversion from an integer type to __float128 into separate insns.
21453 OPERANDS points to the destination, source, and V2DI temporary
21454 register. CODE is either FLOAT or UNSIGNED_FLOAT. */
21456 void
21457 convert_int_to_float128 (rtx *operands, enum rtx_code code)
21459 rtx dest = operands[0];
21460 rtx src = operands[1];
21461 rtx tmp = operands[2];
21462 rtx cvt;
21463 rtvec cvt_vec;
21464 rtx cvt_unspec;
21465 rtvec move_vec;
21466 rtx move_unspec;
21467 rtx unsigned_flag;
21469 if (GET_CODE (tmp) == SCRATCH)
21470 tmp = gen_reg_rtx (V2DImode);
21472 if (MEM_P (src))
21473 src = rs6000_address_for_fpconvert (src);
21475 /* Generate the move of the integer into the Altivec register of the form:
21476 (set (tmp:V2DI) (unspec:V2DI [(src:SI)
21477 (const_int 0)] UNSPEC_IEEE128_MOVE)).
21480 (set (tmp:V2DI) (unspec:V2DI [(src:DI)] UNSPEC_IEEE128_MOVE)). */
21482 if (GET_MODE (src) == SImode)
21484 unsigned_flag = (code == UNSIGNED_FLOAT) ? const1_rtx : const0_rtx;
21485 move_vec = gen_rtvec (2, src, unsigned_flag);
21487 else
21488 move_vec = gen_rtvec (1, src);
21490 move_unspec = gen_rtx_UNSPEC (V2DImode, move_vec, UNSPEC_IEEE128_MOVE);
21491 emit_insn (gen_rtx_SET (tmp, move_unspec));
21493 /* Generate the actual convert insn of the form:
21494 (set (dest:KF) (float:KF (unspec:DI [(tmp:V2DI)]
21495 UNSPEC_IEEE128_CONVERT))). */
21496 cvt_vec = gen_rtvec (1, tmp);
21497 cvt_unspec = gen_rtx_UNSPEC (DImode, cvt_vec, UNSPEC_IEEE128_CONVERT);
21498 cvt = gen_rtx_fmt_e (code, GET_MODE (dest), cvt_unspec);
21499 emit_insn (gen_rtx_SET (dest, cvt));
21503 /* Emit the RTL for an sISEL pattern. */
21505 void
21506 rs6000_emit_sISEL (machine_mode mode ATTRIBUTE_UNUSED, rtx operands[])
21508 rs6000_emit_int_cmove (operands[0], operands[1], const1_rtx, const0_rtx);
21511 /* Emit RTL that sets a register to zero if OP1 and OP2 are equal. SCRATCH
21512 can be used as that dest register. Return the dest register. */
21515 rs6000_emit_eqne (machine_mode mode, rtx op1, rtx op2, rtx scratch)
21517 if (op2 == const0_rtx)
21518 return op1;
21520 if (GET_CODE (scratch) == SCRATCH)
21521 scratch = gen_reg_rtx (mode);
21523 if (logical_operand (op2, mode))
21524 emit_insn (gen_rtx_SET (scratch, gen_rtx_XOR (mode, op1, op2)));
21525 else
21526 emit_insn (gen_rtx_SET (scratch,
21527 gen_rtx_PLUS (mode, op1, negate_rtx (mode, op2))));
21529 return scratch;
21532 void
21533 rs6000_emit_sCOND (machine_mode mode, rtx operands[])
21535 rtx condition_rtx;
21536 machine_mode op_mode;
21537 enum rtx_code cond_code;
21538 rtx result = operands[0];
21540 condition_rtx = rs6000_generate_compare (operands[1], mode);
21541 cond_code = GET_CODE (condition_rtx);
21543 if (FLOAT_MODE_P (mode)
21544 && !TARGET_FPRS && TARGET_HARD_FLOAT)
21546 rtx t;
21548 PUT_MODE (condition_rtx, SImode);
21549 t = XEXP (condition_rtx, 0);
21551 gcc_assert (cond_code == NE || cond_code == EQ);
21553 if (cond_code == NE)
21554 emit_insn (gen_e500_flip_gt_bit (t, t));
21556 emit_insn (gen_move_from_CR_gt_bit (result, t));
21557 return;
21560 if (cond_code == NE
21561 || cond_code == GE || cond_code == LE
21562 || cond_code == GEU || cond_code == LEU
21563 || cond_code == ORDERED || cond_code == UNGE || cond_code == UNLE)
21565 rtx not_result = gen_reg_rtx (CCEQmode);
21566 rtx not_op, rev_cond_rtx;
21567 machine_mode cc_mode;
21569 cc_mode = GET_MODE (XEXP (condition_rtx, 0));
21571 rev_cond_rtx = gen_rtx_fmt_ee (rs6000_reverse_condition (cc_mode, cond_code),
21572 SImode, XEXP (condition_rtx, 0), const0_rtx);
21573 not_op = gen_rtx_COMPARE (CCEQmode, rev_cond_rtx, const0_rtx);
21574 emit_insn (gen_rtx_SET (not_result, not_op));
21575 condition_rtx = gen_rtx_EQ (VOIDmode, not_result, const0_rtx);
21578 op_mode = GET_MODE (XEXP (operands[1], 0));
21579 if (op_mode == VOIDmode)
21580 op_mode = GET_MODE (XEXP (operands[1], 1));
21582 if (TARGET_POWERPC64 && (op_mode == DImode || FLOAT_MODE_P (mode)))
21584 PUT_MODE (condition_rtx, DImode);
21585 convert_move (result, condition_rtx, 0);
21587 else
21589 PUT_MODE (condition_rtx, SImode);
21590 emit_insn (gen_rtx_SET (result, condition_rtx));
21594 /* Emit a branch of kind CODE to location LOC. */
21596 void
21597 rs6000_emit_cbranch (machine_mode mode, rtx operands[])
21599 rtx condition_rtx, loc_ref;
21601 condition_rtx = rs6000_generate_compare (operands[0], mode);
21602 loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
21603 emit_jump_insn (gen_rtx_SET (pc_rtx,
21604 gen_rtx_IF_THEN_ELSE (VOIDmode, condition_rtx,
21605 loc_ref, pc_rtx)));
21608 /* Return the string to output a conditional branch to LABEL, which is
21609 the operand template of the label, or NULL if the branch is really a
21610 conditional return.
21612 OP is the conditional expression. XEXP (OP, 0) is assumed to be a
21613 condition code register and its mode specifies what kind of
21614 comparison we made.
21616 REVERSED is nonzero if we should reverse the sense of the comparison.
21618 INSN is the insn. */
21620 char *
21621 output_cbranch (rtx op, const char *label, int reversed, rtx_insn *insn)
21623 static char string[64];
21624 enum rtx_code code = GET_CODE (op);
21625 rtx cc_reg = XEXP (op, 0);
21626 machine_mode mode = GET_MODE (cc_reg);
21627 int cc_regno = REGNO (cc_reg) - CR0_REGNO;
21628 int need_longbranch = label != NULL && get_attr_length (insn) == 8;
21629 int really_reversed = reversed ^ need_longbranch;
21630 char *s = string;
21631 const char *ccode;
21632 const char *pred;
21633 rtx note;
21635 validate_condition_mode (code, mode);
21637 /* Work out which way this really branches. We could use
21638 reverse_condition_maybe_unordered here always but this
21639 makes the resulting assembler clearer. */
21640 if (really_reversed)
21642 /* Reversal of FP compares takes care -- an ordered compare
21643 becomes an unordered compare and vice versa. */
21644 if (mode == CCFPmode)
21645 code = reverse_condition_maybe_unordered (code);
21646 else
21647 code = reverse_condition (code);
21650 if ((!TARGET_FPRS && TARGET_HARD_FLOAT) && mode == CCFPmode)
21652 /* The efscmp/tst* instructions twiddle bit 2, which maps nicely
21653 to the GT bit. */
21654 switch (code)
21656 case EQ:
21657 /* Opposite of GT. */
21658 code = GT;
21659 break;
21661 case NE:
21662 code = UNLE;
21663 break;
21665 default:
21666 gcc_unreachable ();
21670 switch (code)
21672 /* Not all of these are actually distinct opcodes, but
21673 we distinguish them for clarity of the resulting assembler. */
21674 case NE: case LTGT:
21675 ccode = "ne"; break;
21676 case EQ: case UNEQ:
21677 ccode = "eq"; break;
21678 case GE: case GEU:
21679 ccode = "ge"; break;
21680 case GT: case GTU: case UNGT:
21681 ccode = "gt"; break;
21682 case LE: case LEU:
21683 ccode = "le"; break;
21684 case LT: case LTU: case UNLT:
21685 ccode = "lt"; break;
21686 case UNORDERED: ccode = "un"; break;
21687 case ORDERED: ccode = "nu"; break;
21688 case UNGE: ccode = "nl"; break;
21689 case UNLE: ccode = "ng"; break;
21690 default:
21691 gcc_unreachable ();
21694 /* Maybe we have a guess as to how likely the branch is. */
21695 pred = "";
21696 note = find_reg_note (insn, REG_BR_PROB, NULL_RTX);
21697 if (note != NULL_RTX)
21699 /* PROB is the difference from 50%. */
21700 int prob = XINT (note, 0) - REG_BR_PROB_BASE / 2;
21702 /* Only hint for highly probable/improbable branches on newer cpus when
21703 we have real profile data, as static prediction overrides processor
21704 dynamic prediction. For older cpus we may as well always hint, but
21705 assume not taken for branches that are very close to 50% as a
21706 mispredicted taken branch is more expensive than a
21707 mispredicted not-taken branch. */
21708 if (rs6000_always_hint
21709 || (abs (prob) > REG_BR_PROB_BASE / 100 * 48
21710 && (profile_status_for_fn (cfun) != PROFILE_GUESSED)
21711 && br_prob_note_reliable_p (note)))
21713 if (abs (prob) > REG_BR_PROB_BASE / 20
21714 && ((prob > 0) ^ need_longbranch))
21715 pred = "+";
21716 else
21717 pred = "-";
21721 if (label == NULL)
21722 s += sprintf (s, "b%slr%s ", ccode, pred);
21723 else
21724 s += sprintf (s, "b%s%s ", ccode, pred);
21726 /* We need to escape any '%' characters in the reg_names string.
21727 Assume they'd only be the first character.... */
21728 if (reg_names[cc_regno + CR0_REGNO][0] == '%')
21729 *s++ = '%';
21730 s += sprintf (s, "%s", reg_names[cc_regno + CR0_REGNO]);
21732 if (label != NULL)
21734 /* If the branch distance was too far, we may have to use an
21735 unconditional branch to go the distance. */
21736 if (need_longbranch)
21737 s += sprintf (s, ",$+8\n\tb %s", label);
21738 else
21739 s += sprintf (s, ",%s", label);
21742 return string;
21745 /* Return the string to flip the GT bit on a CR. */
21746 char *
21747 output_e500_flip_gt_bit (rtx dst, rtx src)
21749 static char string[64];
21750 int a, b;
21752 gcc_assert (GET_CODE (dst) == REG && CR_REGNO_P (REGNO (dst))
21753 && GET_CODE (src) == REG && CR_REGNO_P (REGNO (src)));
21755 /* GT bit. */
21756 a = 4 * (REGNO (dst) - CR0_REGNO) + 1;
21757 b = 4 * (REGNO (src) - CR0_REGNO) + 1;
21759 sprintf (string, "crnot %d,%d", a, b);
21760 return string;
21763 /* Return insn for VSX or Altivec comparisons. */
21765 static rtx
21766 rs6000_emit_vector_compare_inner (enum rtx_code code, rtx op0, rtx op1)
21768 rtx mask;
21769 machine_mode mode = GET_MODE (op0);
21771 switch (code)
21773 default:
21774 break;
21776 case GE:
21777 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
21778 return NULL_RTX;
21780 case EQ:
21781 case GT:
21782 case GTU:
21783 case ORDERED:
21784 case UNORDERED:
21785 case UNEQ:
21786 case LTGT:
21787 mask = gen_reg_rtx (mode);
21788 emit_insn (gen_rtx_SET (mask, gen_rtx_fmt_ee (code, mode, op0, op1)));
21789 return mask;
21792 return NULL_RTX;
21795 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
21796 DMODE is expected destination mode. This is a recursive function. */
21798 static rtx
21799 rs6000_emit_vector_compare (enum rtx_code rcode,
21800 rtx op0, rtx op1,
21801 machine_mode dmode)
21803 rtx mask;
21804 bool swap_operands = false;
21805 bool try_again = false;
21807 gcc_assert (VECTOR_UNIT_ALTIVEC_OR_VSX_P (dmode));
21808 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
21810 /* See if the comparison works as is. */
21811 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
21812 if (mask)
21813 return mask;
21815 switch (rcode)
21817 case LT:
21818 rcode = GT;
21819 swap_operands = true;
21820 try_again = true;
21821 break;
21822 case LTU:
21823 rcode = GTU;
21824 swap_operands = true;
21825 try_again = true;
21826 break;
21827 case NE:
21828 case UNLE:
21829 case UNLT:
21830 case UNGE:
21831 case UNGT:
21832 /* Invert condition and try again.
21833 e.g., A != B becomes ~(A==B). */
21835 enum rtx_code rev_code;
21836 enum insn_code nor_code;
21837 rtx mask2;
21839 rev_code = reverse_condition_maybe_unordered (rcode);
21840 if (rev_code == UNKNOWN)
21841 return NULL_RTX;
21843 nor_code = optab_handler (one_cmpl_optab, dmode);
21844 if (nor_code == CODE_FOR_nothing)
21845 return NULL_RTX;
21847 mask2 = rs6000_emit_vector_compare (rev_code, op0, op1, dmode);
21848 if (!mask2)
21849 return NULL_RTX;
21851 mask = gen_reg_rtx (dmode);
21852 emit_insn (GEN_FCN (nor_code) (mask, mask2));
21853 return mask;
21855 break;
21856 case GE:
21857 case GEU:
21858 case LE:
21859 case LEU:
21860 /* Try GT/GTU/LT/LTU OR EQ */
21862 rtx c_rtx, eq_rtx;
21863 enum insn_code ior_code;
21864 enum rtx_code new_code;
21866 switch (rcode)
21868 case GE:
21869 new_code = GT;
21870 break;
21872 case GEU:
21873 new_code = GTU;
21874 break;
21876 case LE:
21877 new_code = LT;
21878 break;
21880 case LEU:
21881 new_code = LTU;
21882 break;
21884 default:
21885 gcc_unreachable ();
21888 ior_code = optab_handler (ior_optab, dmode);
21889 if (ior_code == CODE_FOR_nothing)
21890 return NULL_RTX;
21892 c_rtx = rs6000_emit_vector_compare (new_code, op0, op1, dmode);
21893 if (!c_rtx)
21894 return NULL_RTX;
21896 eq_rtx = rs6000_emit_vector_compare (EQ, op0, op1, dmode);
21897 if (!eq_rtx)
21898 return NULL_RTX;
21900 mask = gen_reg_rtx (dmode);
21901 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
21902 return mask;
21904 break;
21905 default:
21906 return NULL_RTX;
21909 if (try_again)
21911 if (swap_operands)
21912 std::swap (op0, op1);
21914 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
21915 if (mask)
21916 return mask;
21919 /* You only get two chances. */
21920 return NULL_RTX;
21923 /* Emit vector conditional expression. DEST is destination. OP_TRUE and
21924 OP_FALSE are two VEC_COND_EXPR operands. CC_OP0 and CC_OP1 are the two
21925 operands for the relation operation COND. */
21928 rs6000_emit_vector_cond_expr (rtx dest, rtx op_true, rtx op_false,
21929 rtx cond, rtx cc_op0, rtx cc_op1)
21931 machine_mode dest_mode = GET_MODE (dest);
21932 machine_mode mask_mode = GET_MODE (cc_op0);
21933 enum rtx_code rcode = GET_CODE (cond);
21934 machine_mode cc_mode = CCmode;
21935 rtx mask;
21936 rtx cond2;
21937 rtx tmp;
21938 bool invert_move = false;
21940 if (VECTOR_UNIT_NONE_P (dest_mode))
21941 return 0;
21943 gcc_assert (GET_MODE_SIZE (dest_mode) == GET_MODE_SIZE (mask_mode)
21944 && GET_MODE_NUNITS (dest_mode) == GET_MODE_NUNITS (mask_mode));
21946 switch (rcode)
21948 /* Swap operands if we can, and fall back to doing the operation as
21949 specified, and doing a NOR to invert the test. */
21950 case NE:
21951 case UNLE:
21952 case UNLT:
21953 case UNGE:
21954 case UNGT:
21955 /* Invert condition and try again.
21956 e.g., A = (B != C) ? D : E becomes A = (B == C) ? E : D. */
21957 invert_move = true;
21958 rcode = reverse_condition_maybe_unordered (rcode);
21959 if (rcode == UNKNOWN)
21960 return 0;
21961 break;
21963 /* Mark unsigned tests with CCUNSmode. */
21964 case GTU:
21965 case GEU:
21966 case LTU:
21967 case LEU:
21968 cc_mode = CCUNSmode;
21969 break;
21971 default:
21972 break;
21975 /* Get the vector mask for the given relational operations. */
21976 mask = rs6000_emit_vector_compare (rcode, cc_op0, cc_op1, mask_mode);
21978 if (!mask)
21979 return 0;
21981 if (invert_move)
21983 tmp = op_true;
21984 op_true = op_false;
21985 op_false = tmp;
21988 cond2 = gen_rtx_fmt_ee (NE, cc_mode, gen_lowpart (dest_mode, mask),
21989 CONST0_RTX (dest_mode));
21990 emit_insn (gen_rtx_SET (dest,
21991 gen_rtx_IF_THEN_ELSE (dest_mode,
21992 cond2,
21993 op_true,
21994 op_false)));
21995 return 1;
21998 /* Emit a conditional move: move TRUE_COND to DEST if OP of the
21999 operands of the last comparison is nonzero/true, FALSE_COND if it
22000 is zero/false. Return 0 if the hardware has no such operation. */
22003 rs6000_emit_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
22005 enum rtx_code code = GET_CODE (op);
22006 rtx op0 = XEXP (op, 0);
22007 rtx op1 = XEXP (op, 1);
22008 machine_mode compare_mode = GET_MODE (op0);
22009 machine_mode result_mode = GET_MODE (dest);
22010 rtx temp;
22011 bool is_against_zero;
22013 /* These modes should always match. */
22014 if (GET_MODE (op1) != compare_mode
22015 /* In the isel case however, we can use a compare immediate, so
22016 op1 may be a small constant. */
22017 && (!TARGET_ISEL || !short_cint_operand (op1, VOIDmode)))
22018 return 0;
22019 if (GET_MODE (true_cond) != result_mode)
22020 return 0;
22021 if (GET_MODE (false_cond) != result_mode)
22022 return 0;
22024 /* Don't allow using floating point comparisons for integer results for
22025 now. */
22026 if (FLOAT_MODE_P (compare_mode) && !FLOAT_MODE_P (result_mode))
22027 return 0;
22029 /* First, work out if the hardware can do this at all, or
22030 if it's too slow.... */
22031 if (!FLOAT_MODE_P (compare_mode))
22033 if (TARGET_ISEL)
22034 return rs6000_emit_int_cmove (dest, op, true_cond, false_cond);
22035 return 0;
22037 else if (TARGET_HARD_FLOAT && !TARGET_FPRS
22038 && SCALAR_FLOAT_MODE_P (compare_mode))
22039 return 0;
22041 is_against_zero = op1 == CONST0_RTX (compare_mode);
22043 /* A floating-point subtract might overflow, underflow, or produce
22044 an inexact result, thus changing the floating-point flags, so it
22045 can't be generated if we care about that. It's safe if one side
22046 of the construct is zero, since then no subtract will be
22047 generated. */
22048 if (SCALAR_FLOAT_MODE_P (compare_mode)
22049 && flag_trapping_math && ! is_against_zero)
22050 return 0;
22052 /* Eliminate half of the comparisons by switching operands, this
22053 makes the remaining code simpler. */
22054 if (code == UNLT || code == UNGT || code == UNORDERED || code == NE
22055 || code == LTGT || code == LT || code == UNLE)
22057 code = reverse_condition_maybe_unordered (code);
22058 temp = true_cond;
22059 true_cond = false_cond;
22060 false_cond = temp;
22063 /* UNEQ and LTGT take four instructions for a comparison with zero,
22064 it'll probably be faster to use a branch here too. */
22065 if (code == UNEQ && HONOR_NANS (compare_mode))
22066 return 0;
22068 /* We're going to try to implement comparisons by performing
22069 a subtract, then comparing against zero. Unfortunately,
22070 Inf - Inf is NaN which is not zero, and so if we don't
22071 know that the operand is finite and the comparison
22072 would treat EQ different to UNORDERED, we can't do it. */
22073 if (HONOR_INFINITIES (compare_mode)
22074 && code != GT && code != UNGE
22075 && (GET_CODE (op1) != CONST_DOUBLE
22076 || real_isinf (CONST_DOUBLE_REAL_VALUE (op1)))
22077 /* Constructs of the form (a OP b ? a : b) are safe. */
22078 && ((! rtx_equal_p (op0, false_cond) && ! rtx_equal_p (op1, false_cond))
22079 || (! rtx_equal_p (op0, true_cond)
22080 && ! rtx_equal_p (op1, true_cond))))
22081 return 0;
22083 /* At this point we know we can use fsel. */
22085 /* Reduce the comparison to a comparison against zero. */
22086 if (! is_against_zero)
22088 temp = gen_reg_rtx (compare_mode);
22089 emit_insn (gen_rtx_SET (temp, gen_rtx_MINUS (compare_mode, op0, op1)));
22090 op0 = temp;
22091 op1 = CONST0_RTX (compare_mode);
22094 /* If we don't care about NaNs we can reduce some of the comparisons
22095 down to faster ones. */
22096 if (! HONOR_NANS (compare_mode))
22097 switch (code)
22099 case GT:
22100 code = LE;
22101 temp = true_cond;
22102 true_cond = false_cond;
22103 false_cond = temp;
22104 break;
22105 case UNGE:
22106 code = GE;
22107 break;
22108 case UNEQ:
22109 code = EQ;
22110 break;
22111 default:
22112 break;
22115 /* Now, reduce everything down to a GE. */
22116 switch (code)
22118 case GE:
22119 break;
22121 case LE:
22122 temp = gen_reg_rtx (compare_mode);
22123 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
22124 op0 = temp;
22125 break;
22127 case ORDERED:
22128 temp = gen_reg_rtx (compare_mode);
22129 emit_insn (gen_rtx_SET (temp, gen_rtx_ABS (compare_mode, op0)));
22130 op0 = temp;
22131 break;
22133 case EQ:
22134 temp = gen_reg_rtx (compare_mode);
22135 emit_insn (gen_rtx_SET (temp,
22136 gen_rtx_NEG (compare_mode,
22137 gen_rtx_ABS (compare_mode, op0))));
22138 op0 = temp;
22139 break;
22141 case UNGE:
22142 /* a UNGE 0 <-> (a GE 0 || -a UNLT 0) */
22143 temp = gen_reg_rtx (result_mode);
22144 emit_insn (gen_rtx_SET (temp,
22145 gen_rtx_IF_THEN_ELSE (result_mode,
22146 gen_rtx_GE (VOIDmode,
22147 op0, op1),
22148 true_cond, false_cond)));
22149 false_cond = true_cond;
22150 true_cond = temp;
22152 temp = gen_reg_rtx (compare_mode);
22153 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
22154 op0 = temp;
22155 break;
22157 case GT:
22158 /* a GT 0 <-> (a GE 0 && -a UNLT 0) */
22159 temp = gen_reg_rtx (result_mode);
22160 emit_insn (gen_rtx_SET (temp,
22161 gen_rtx_IF_THEN_ELSE (result_mode,
22162 gen_rtx_GE (VOIDmode,
22163 op0, op1),
22164 true_cond, false_cond)));
22165 true_cond = false_cond;
22166 false_cond = temp;
22168 temp = gen_reg_rtx (compare_mode);
22169 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
22170 op0 = temp;
22171 break;
22173 default:
22174 gcc_unreachable ();
22177 emit_insn (gen_rtx_SET (dest,
22178 gen_rtx_IF_THEN_ELSE (result_mode,
22179 gen_rtx_GE (VOIDmode,
22180 op0, op1),
22181 true_cond, false_cond)));
22182 return 1;
22185 /* Same as above, but for ints (isel). */
22187 static int
22188 rs6000_emit_int_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
22190 rtx condition_rtx, cr;
22191 machine_mode mode = GET_MODE (dest);
22192 enum rtx_code cond_code;
22193 rtx (*isel_func) (rtx, rtx, rtx, rtx, rtx);
22194 bool signedp;
22196 if (mode != SImode && (!TARGET_POWERPC64 || mode != DImode))
22197 return 0;
22199 /* We still have to do the compare, because isel doesn't do a
22200 compare, it just looks at the CRx bits set by a previous compare
22201 instruction. */
22202 condition_rtx = rs6000_generate_compare (op, mode);
22203 cond_code = GET_CODE (condition_rtx);
22204 cr = XEXP (condition_rtx, 0);
22205 signedp = GET_MODE (cr) == CCmode;
22207 isel_func = (mode == SImode
22208 ? (signedp ? gen_isel_signed_si : gen_isel_unsigned_si)
22209 : (signedp ? gen_isel_signed_di : gen_isel_unsigned_di));
22211 switch (cond_code)
22213 case LT: case GT: case LTU: case GTU: case EQ:
22214 /* isel handles these directly. */
22215 break;
22217 default:
22218 /* We need to swap the sense of the comparison. */
22220 std::swap (false_cond, true_cond);
22221 PUT_CODE (condition_rtx, reverse_condition (cond_code));
22223 break;
22226 false_cond = force_reg (mode, false_cond);
22227 if (true_cond != const0_rtx)
22228 true_cond = force_reg (mode, true_cond);
22230 emit_insn (isel_func (dest, condition_rtx, true_cond, false_cond, cr));
22232 return 1;
22235 const char *
22236 output_isel (rtx *operands)
22238 enum rtx_code code;
22240 code = GET_CODE (operands[1]);
22242 if (code == GE || code == GEU || code == LE || code == LEU || code == NE)
22244 gcc_assert (GET_CODE (operands[2]) == REG
22245 && GET_CODE (operands[3]) == REG);
22246 PUT_CODE (operands[1], reverse_condition (code));
22247 return "isel %0,%3,%2,%j1";
22250 return "isel %0,%2,%3,%j1";
22253 void
22254 rs6000_emit_minmax (rtx dest, enum rtx_code code, rtx op0, rtx op1)
22256 machine_mode mode = GET_MODE (op0);
22257 enum rtx_code c;
22258 rtx target;
22260 /* VSX/altivec have direct min/max insns. */
22261 if ((code == SMAX || code == SMIN)
22262 && (VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode)
22263 || (mode == SFmode && VECTOR_UNIT_VSX_P (DFmode))))
22265 emit_insn (gen_rtx_SET (dest, gen_rtx_fmt_ee (code, mode, op0, op1)));
22266 return;
22269 if (code == SMAX || code == SMIN)
22270 c = GE;
22271 else
22272 c = GEU;
22274 if (code == SMAX || code == UMAX)
22275 target = emit_conditional_move (dest, c, op0, op1, mode,
22276 op0, op1, mode, 0);
22277 else
22278 target = emit_conditional_move (dest, c, op0, op1, mode,
22279 op1, op0, mode, 0);
22280 gcc_assert (target);
22281 if (target != dest)
22282 emit_move_insn (dest, target);
22285 /* A subroutine of the atomic operation splitters. Jump to LABEL if
22286 COND is true. Mark the jump as unlikely to be taken. */
22288 static void
22289 emit_unlikely_jump (rtx cond, rtx label)
22291 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
22292 rtx x;
22294 x = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, label, pc_rtx);
22295 x = emit_jump_insn (gen_rtx_SET (pc_rtx, x));
22296 add_int_reg_note (x, REG_BR_PROB, very_unlikely);
22299 /* A subroutine of the atomic operation splitters. Emit a load-locked
22300 instruction in MODE. For QI/HImode, possibly use a pattern than includes
22301 the zero_extend operation. */
22303 static void
22304 emit_load_locked (machine_mode mode, rtx reg, rtx mem)
22306 rtx (*fn) (rtx, rtx) = NULL;
22308 switch (mode)
22310 case QImode:
22311 fn = gen_load_lockedqi;
22312 break;
22313 case HImode:
22314 fn = gen_load_lockedhi;
22315 break;
22316 case SImode:
22317 if (GET_MODE (mem) == QImode)
22318 fn = gen_load_lockedqi_si;
22319 else if (GET_MODE (mem) == HImode)
22320 fn = gen_load_lockedhi_si;
22321 else
22322 fn = gen_load_lockedsi;
22323 break;
22324 case DImode:
22325 fn = gen_load_lockeddi;
22326 break;
22327 case TImode:
22328 fn = gen_load_lockedti;
22329 break;
22330 default:
22331 gcc_unreachable ();
22333 emit_insn (fn (reg, mem));
22336 /* A subroutine of the atomic operation splitters. Emit a store-conditional
22337 instruction in MODE. */
22339 static void
22340 emit_store_conditional (machine_mode mode, rtx res, rtx mem, rtx val)
22342 rtx (*fn) (rtx, rtx, rtx) = NULL;
22344 switch (mode)
22346 case QImode:
22347 fn = gen_store_conditionalqi;
22348 break;
22349 case HImode:
22350 fn = gen_store_conditionalhi;
22351 break;
22352 case SImode:
22353 fn = gen_store_conditionalsi;
22354 break;
22355 case DImode:
22356 fn = gen_store_conditionaldi;
22357 break;
22358 case TImode:
22359 fn = gen_store_conditionalti;
22360 break;
22361 default:
22362 gcc_unreachable ();
22365 /* Emit sync before stwcx. to address PPC405 Erratum. */
22366 if (PPC405_ERRATUM77)
22367 emit_insn (gen_hwsync ());
22369 emit_insn (fn (res, mem, val));
22372 /* Expand barriers before and after a load_locked/store_cond sequence. */
22374 static rtx
22375 rs6000_pre_atomic_barrier (rtx mem, enum memmodel model)
22377 rtx addr = XEXP (mem, 0);
22378 int strict_p = (reload_in_progress || reload_completed);
22380 if (!legitimate_indirect_address_p (addr, strict_p)
22381 && !legitimate_indexed_address_p (addr, strict_p))
22383 addr = force_reg (Pmode, addr);
22384 mem = replace_equiv_address_nv (mem, addr);
22387 switch (model)
22389 case MEMMODEL_RELAXED:
22390 case MEMMODEL_CONSUME:
22391 case MEMMODEL_ACQUIRE:
22392 break;
22393 case MEMMODEL_RELEASE:
22394 case MEMMODEL_ACQ_REL:
22395 emit_insn (gen_lwsync ());
22396 break;
22397 case MEMMODEL_SEQ_CST:
22398 emit_insn (gen_hwsync ());
22399 break;
22400 default:
22401 gcc_unreachable ();
22403 return mem;
22406 static void
22407 rs6000_post_atomic_barrier (enum memmodel model)
22409 switch (model)
22411 case MEMMODEL_RELAXED:
22412 case MEMMODEL_CONSUME:
22413 case MEMMODEL_RELEASE:
22414 break;
22415 case MEMMODEL_ACQUIRE:
22416 case MEMMODEL_ACQ_REL:
22417 case MEMMODEL_SEQ_CST:
22418 emit_insn (gen_isync ());
22419 break;
22420 default:
22421 gcc_unreachable ();
22425 /* A subroutine of the various atomic expanders. For sub-word operations,
22426 we must adjust things to operate on SImode. Given the original MEM,
22427 return a new aligned memory. Also build and return the quantities by
22428 which to shift and mask. */
22430 static rtx
22431 rs6000_adjust_atomic_subword (rtx orig_mem, rtx *pshift, rtx *pmask)
22433 rtx addr, align, shift, mask, mem;
22434 HOST_WIDE_INT shift_mask;
22435 machine_mode mode = GET_MODE (orig_mem);
22437 /* For smaller modes, we have to implement this via SImode. */
22438 shift_mask = (mode == QImode ? 0x18 : 0x10);
22440 addr = XEXP (orig_mem, 0);
22441 addr = force_reg (GET_MODE (addr), addr);
22443 /* Aligned memory containing subword. Generate a new memory. We
22444 do not want any of the existing MEM_ATTR data, as we're now
22445 accessing memory outside the original object. */
22446 align = expand_simple_binop (Pmode, AND, addr, GEN_INT (-4),
22447 NULL_RTX, 1, OPTAB_LIB_WIDEN);
22448 mem = gen_rtx_MEM (SImode, align);
22449 MEM_VOLATILE_P (mem) = MEM_VOLATILE_P (orig_mem);
22450 if (MEM_ALIAS_SET (orig_mem) == ALIAS_SET_MEMORY_BARRIER)
22451 set_mem_alias_set (mem, ALIAS_SET_MEMORY_BARRIER);
22453 /* Shift amount for subword relative to aligned word. */
22454 shift = gen_reg_rtx (SImode);
22455 addr = gen_lowpart (SImode, addr);
22456 rtx tmp = gen_reg_rtx (SImode);
22457 emit_insn (gen_ashlsi3 (tmp, addr, GEN_INT (3)));
22458 emit_insn (gen_andsi3 (shift, tmp, GEN_INT (shift_mask)));
22459 if (BYTES_BIG_ENDIAN)
22460 shift = expand_simple_binop (SImode, XOR, shift, GEN_INT (shift_mask),
22461 shift, 1, OPTAB_LIB_WIDEN);
22462 *pshift = shift;
22464 /* Mask for insertion. */
22465 mask = expand_simple_binop (SImode, ASHIFT, GEN_INT (GET_MODE_MASK (mode)),
22466 shift, NULL_RTX, 1, OPTAB_LIB_WIDEN);
22467 *pmask = mask;
22469 return mem;
22472 /* A subroutine of the various atomic expanders. For sub-word operands,
22473 combine OLDVAL and NEWVAL via MASK. Returns a new pseduo. */
22475 static rtx
22476 rs6000_mask_atomic_subword (rtx oldval, rtx newval, rtx mask)
22478 rtx x;
22480 x = gen_reg_rtx (SImode);
22481 emit_insn (gen_rtx_SET (x, gen_rtx_AND (SImode,
22482 gen_rtx_NOT (SImode, mask),
22483 oldval)));
22485 x = expand_simple_binop (SImode, IOR, newval, x, x, 1, OPTAB_LIB_WIDEN);
22487 return x;
22490 /* A subroutine of the various atomic expanders. For sub-word operands,
22491 extract WIDE to NARROW via SHIFT. */
22493 static void
22494 rs6000_finish_atomic_subword (rtx narrow, rtx wide, rtx shift)
22496 wide = expand_simple_binop (SImode, LSHIFTRT, wide, shift,
22497 wide, 1, OPTAB_LIB_WIDEN);
22498 emit_move_insn (narrow, gen_lowpart (GET_MODE (narrow), wide));
22501 /* Expand an atomic compare and swap operation. */
22503 void
22504 rs6000_expand_atomic_compare_and_swap (rtx operands[])
22506 rtx boolval, retval, mem, oldval, newval, cond;
22507 rtx label1, label2, x, mask, shift;
22508 machine_mode mode, orig_mode;
22509 enum memmodel mod_s, mod_f;
22510 bool is_weak;
22512 boolval = operands[0];
22513 retval = operands[1];
22514 mem = operands[2];
22515 oldval = operands[3];
22516 newval = operands[4];
22517 is_weak = (INTVAL (operands[5]) != 0);
22518 mod_s = memmodel_base (INTVAL (operands[6]));
22519 mod_f = memmodel_base (INTVAL (operands[7]));
22520 orig_mode = mode = GET_MODE (mem);
22522 mask = shift = NULL_RTX;
22523 if (mode == QImode || mode == HImode)
22525 /* Before power8, we didn't have access to lbarx/lharx, so generate a
22526 lwarx and shift/mask operations. With power8, we need to do the
22527 comparison in SImode, but the store is still done in QI/HImode. */
22528 oldval = convert_modes (SImode, mode, oldval, 1);
22530 if (!TARGET_SYNC_HI_QI)
22532 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
22534 /* Shift and mask OLDVAL into position with the word. */
22535 oldval = expand_simple_binop (SImode, ASHIFT, oldval, shift,
22536 NULL_RTX, 1, OPTAB_LIB_WIDEN);
22538 /* Shift and mask NEWVAL into position within the word. */
22539 newval = convert_modes (SImode, mode, newval, 1);
22540 newval = expand_simple_binop (SImode, ASHIFT, newval, shift,
22541 NULL_RTX, 1, OPTAB_LIB_WIDEN);
22544 /* Prepare to adjust the return value. */
22545 retval = gen_reg_rtx (SImode);
22546 mode = SImode;
22548 else if (reg_overlap_mentioned_p (retval, oldval))
22549 oldval = copy_to_reg (oldval);
22551 if (mode != TImode && !reg_or_short_operand (oldval, mode))
22552 oldval = copy_to_mode_reg (mode, oldval);
22554 if (reg_overlap_mentioned_p (retval, newval))
22555 newval = copy_to_reg (newval);
22557 mem = rs6000_pre_atomic_barrier (mem, mod_s);
22559 label1 = NULL_RTX;
22560 if (!is_weak)
22562 label1 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
22563 emit_label (XEXP (label1, 0));
22565 label2 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
22567 emit_load_locked (mode, retval, mem);
22569 x = retval;
22570 if (mask)
22571 x = expand_simple_binop (SImode, AND, retval, mask,
22572 NULL_RTX, 1, OPTAB_LIB_WIDEN);
22574 cond = gen_reg_rtx (CCmode);
22575 /* If we have TImode, synthesize a comparison. */
22576 if (mode != TImode)
22577 x = gen_rtx_COMPARE (CCmode, x, oldval);
22578 else
22580 rtx xor1_result = gen_reg_rtx (DImode);
22581 rtx xor2_result = gen_reg_rtx (DImode);
22582 rtx or_result = gen_reg_rtx (DImode);
22583 rtx new_word0 = simplify_gen_subreg (DImode, x, TImode, 0);
22584 rtx new_word1 = simplify_gen_subreg (DImode, x, TImode, 8);
22585 rtx old_word0 = simplify_gen_subreg (DImode, oldval, TImode, 0);
22586 rtx old_word1 = simplify_gen_subreg (DImode, oldval, TImode, 8);
22588 emit_insn (gen_xordi3 (xor1_result, new_word0, old_word0));
22589 emit_insn (gen_xordi3 (xor2_result, new_word1, old_word1));
22590 emit_insn (gen_iordi3 (or_result, xor1_result, xor2_result));
22591 x = gen_rtx_COMPARE (CCmode, or_result, const0_rtx);
22594 emit_insn (gen_rtx_SET (cond, x));
22596 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
22597 emit_unlikely_jump (x, label2);
22599 x = newval;
22600 if (mask)
22601 x = rs6000_mask_atomic_subword (retval, newval, mask);
22603 emit_store_conditional (orig_mode, cond, mem, x);
22605 if (!is_weak)
22607 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
22608 emit_unlikely_jump (x, label1);
22611 if (!is_mm_relaxed (mod_f))
22612 emit_label (XEXP (label2, 0));
22614 rs6000_post_atomic_barrier (mod_s);
22616 if (is_mm_relaxed (mod_f))
22617 emit_label (XEXP (label2, 0));
22619 if (shift)
22620 rs6000_finish_atomic_subword (operands[1], retval, shift);
22621 else if (mode != GET_MODE (operands[1]))
22622 convert_move (operands[1], retval, 1);
22624 /* In all cases, CR0 contains EQ on success, and NE on failure. */
22625 x = gen_rtx_EQ (SImode, cond, const0_rtx);
22626 emit_insn (gen_rtx_SET (boolval, x));
22629 /* Expand an atomic exchange operation. */
22631 void
22632 rs6000_expand_atomic_exchange (rtx operands[])
22634 rtx retval, mem, val, cond;
22635 machine_mode mode;
22636 enum memmodel model;
22637 rtx label, x, mask, shift;
22639 retval = operands[0];
22640 mem = operands[1];
22641 val = operands[2];
22642 model = memmodel_base (INTVAL (operands[3]));
22643 mode = GET_MODE (mem);
22645 mask = shift = NULL_RTX;
22646 if (!TARGET_SYNC_HI_QI && (mode == QImode || mode == HImode))
22648 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
22650 /* Shift and mask VAL into position with the word. */
22651 val = convert_modes (SImode, mode, val, 1);
22652 val = expand_simple_binop (SImode, ASHIFT, val, shift,
22653 NULL_RTX, 1, OPTAB_LIB_WIDEN);
22655 /* Prepare to adjust the return value. */
22656 retval = gen_reg_rtx (SImode);
22657 mode = SImode;
22660 mem = rs6000_pre_atomic_barrier (mem, model);
22662 label = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
22663 emit_label (XEXP (label, 0));
22665 emit_load_locked (mode, retval, mem);
22667 x = val;
22668 if (mask)
22669 x = rs6000_mask_atomic_subword (retval, val, mask);
22671 cond = gen_reg_rtx (CCmode);
22672 emit_store_conditional (mode, cond, mem, x);
22674 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
22675 emit_unlikely_jump (x, label);
22677 rs6000_post_atomic_barrier (model);
22679 if (shift)
22680 rs6000_finish_atomic_subword (operands[0], retval, shift);
22683 /* Expand an atomic fetch-and-operate pattern. CODE is the binary operation
22684 to perform. MEM is the memory on which to operate. VAL is the second
22685 operand of the binary operator. BEFORE and AFTER are optional locations to
22686 return the value of MEM either before of after the operation. MODEL_RTX
22687 is a CONST_INT containing the memory model to use. */
22689 void
22690 rs6000_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
22691 rtx orig_before, rtx orig_after, rtx model_rtx)
22693 enum memmodel model = memmodel_base (INTVAL (model_rtx));
22694 machine_mode mode = GET_MODE (mem);
22695 machine_mode store_mode = mode;
22696 rtx label, x, cond, mask, shift;
22697 rtx before = orig_before, after = orig_after;
22699 mask = shift = NULL_RTX;
22700 /* On power8, we want to use SImode for the operation. On previous systems,
22701 use the operation in a subword and shift/mask to get the proper byte or
22702 halfword. */
22703 if (mode == QImode || mode == HImode)
22705 if (TARGET_SYNC_HI_QI)
22707 val = convert_modes (SImode, mode, val, 1);
22709 /* Prepare to adjust the return value. */
22710 before = gen_reg_rtx (SImode);
22711 if (after)
22712 after = gen_reg_rtx (SImode);
22713 mode = SImode;
22715 else
22717 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
22719 /* Shift and mask VAL into position with the word. */
22720 val = convert_modes (SImode, mode, val, 1);
22721 val = expand_simple_binop (SImode, ASHIFT, val, shift,
22722 NULL_RTX, 1, OPTAB_LIB_WIDEN);
22724 switch (code)
22726 case IOR:
22727 case XOR:
22728 /* We've already zero-extended VAL. That is sufficient to
22729 make certain that it does not affect other bits. */
22730 mask = NULL;
22731 break;
22733 case AND:
22734 /* If we make certain that all of the other bits in VAL are
22735 set, that will be sufficient to not affect other bits. */
22736 x = gen_rtx_NOT (SImode, mask);
22737 x = gen_rtx_IOR (SImode, x, val);
22738 emit_insn (gen_rtx_SET (val, x));
22739 mask = NULL;
22740 break;
22742 case NOT:
22743 case PLUS:
22744 case MINUS:
22745 /* These will all affect bits outside the field and need
22746 adjustment via MASK within the loop. */
22747 break;
22749 default:
22750 gcc_unreachable ();
22753 /* Prepare to adjust the return value. */
22754 before = gen_reg_rtx (SImode);
22755 if (after)
22756 after = gen_reg_rtx (SImode);
22757 store_mode = mode = SImode;
22761 mem = rs6000_pre_atomic_barrier (mem, model);
22763 label = gen_label_rtx ();
22764 emit_label (label);
22765 label = gen_rtx_LABEL_REF (VOIDmode, label);
22767 if (before == NULL_RTX)
22768 before = gen_reg_rtx (mode);
22770 emit_load_locked (mode, before, mem);
22772 if (code == NOT)
22774 x = expand_simple_binop (mode, AND, before, val,
22775 NULL_RTX, 1, OPTAB_LIB_WIDEN);
22776 after = expand_simple_unop (mode, NOT, x, after, 1);
22778 else
22780 after = expand_simple_binop (mode, code, before, val,
22781 after, 1, OPTAB_LIB_WIDEN);
22784 x = after;
22785 if (mask)
22787 x = expand_simple_binop (SImode, AND, after, mask,
22788 NULL_RTX, 1, OPTAB_LIB_WIDEN);
22789 x = rs6000_mask_atomic_subword (before, x, mask);
22791 else if (store_mode != mode)
22792 x = convert_modes (store_mode, mode, x, 1);
22794 cond = gen_reg_rtx (CCmode);
22795 emit_store_conditional (store_mode, cond, mem, x);
22797 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
22798 emit_unlikely_jump (x, label);
22800 rs6000_post_atomic_barrier (model);
22802 if (shift)
22804 /* QImode/HImode on machines without lbarx/lharx where we do a lwarx and
22805 then do the calcuations in a SImode register. */
22806 if (orig_before)
22807 rs6000_finish_atomic_subword (orig_before, before, shift);
22808 if (orig_after)
22809 rs6000_finish_atomic_subword (orig_after, after, shift);
22811 else if (store_mode != mode)
22813 /* QImode/HImode on machines with lbarx/lharx where we do the native
22814 operation and then do the calcuations in a SImode register. */
22815 if (orig_before)
22816 convert_move (orig_before, before, 1);
22817 if (orig_after)
22818 convert_move (orig_after, after, 1);
22820 else if (orig_after && after != orig_after)
22821 emit_move_insn (orig_after, after);
22824 /* Emit instructions to move SRC to DST. Called by splitters for
22825 multi-register moves. It will emit at most one instruction for
22826 each register that is accessed; that is, it won't emit li/lis pairs
22827 (or equivalent for 64-bit code). One of SRC or DST must be a hard
22828 register. */
22830 void
22831 rs6000_split_multireg_move (rtx dst, rtx src)
22833 /* The register number of the first register being moved. */
22834 int reg;
22835 /* The mode that is to be moved. */
22836 machine_mode mode;
22837 /* The mode that the move is being done in, and its size. */
22838 machine_mode reg_mode;
22839 int reg_mode_size;
22840 /* The number of registers that will be moved. */
22841 int nregs;
22843 reg = REG_P (dst) ? REGNO (dst) : REGNO (src);
22844 mode = GET_MODE (dst);
22845 nregs = hard_regno_nregs[reg][mode];
22846 if (FP_REGNO_P (reg))
22847 reg_mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode :
22848 ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT) ? DFmode : SFmode);
22849 else if (ALTIVEC_REGNO_P (reg))
22850 reg_mode = V16QImode;
22851 else if (TARGET_E500_DOUBLE && FLOAT128_2REG_P (mode))
22852 reg_mode = DFmode;
22853 else
22854 reg_mode = word_mode;
22855 reg_mode_size = GET_MODE_SIZE (reg_mode);
22857 gcc_assert (reg_mode_size * nregs == GET_MODE_SIZE (mode));
22859 /* TDmode residing in FP registers is special, since the ISA requires that
22860 the lower-numbered word of a register pair is always the most significant
22861 word, even in little-endian mode. This does not match the usual subreg
22862 semantics, so we cannnot use simplify_gen_subreg in those cases. Access
22863 the appropriate constituent registers "by hand" in little-endian mode.
22865 Note we do not need to check for destructive overlap here since TDmode
22866 can only reside in even/odd register pairs. */
22867 if (FP_REGNO_P (reg) && DECIMAL_FLOAT_MODE_P (mode) && !BYTES_BIG_ENDIAN)
22869 rtx p_src, p_dst;
22870 int i;
22872 for (i = 0; i < nregs; i++)
22874 if (REG_P (src) && FP_REGNO_P (REGNO (src)))
22875 p_src = gen_rtx_REG (reg_mode, REGNO (src) + nregs - 1 - i);
22876 else
22877 p_src = simplify_gen_subreg (reg_mode, src, mode,
22878 i * reg_mode_size);
22880 if (REG_P (dst) && FP_REGNO_P (REGNO (dst)))
22881 p_dst = gen_rtx_REG (reg_mode, REGNO (dst) + nregs - 1 - i);
22882 else
22883 p_dst = simplify_gen_subreg (reg_mode, dst, mode,
22884 i * reg_mode_size);
22886 emit_insn (gen_rtx_SET (p_dst, p_src));
22889 return;
22892 if (REG_P (src) && REG_P (dst) && (REGNO (src) < REGNO (dst)))
22894 /* Move register range backwards, if we might have destructive
22895 overlap. */
22896 int i;
22897 for (i = nregs - 1; i >= 0; i--)
22898 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
22899 i * reg_mode_size),
22900 simplify_gen_subreg (reg_mode, src, mode,
22901 i * reg_mode_size)));
22903 else
22905 int i;
22906 int j = -1;
22907 bool used_update = false;
22908 rtx restore_basereg = NULL_RTX;
22910 if (MEM_P (src) && INT_REGNO_P (reg))
22912 rtx breg;
22914 if (GET_CODE (XEXP (src, 0)) == PRE_INC
22915 || GET_CODE (XEXP (src, 0)) == PRE_DEC)
22917 rtx delta_rtx;
22918 breg = XEXP (XEXP (src, 0), 0);
22919 delta_rtx = (GET_CODE (XEXP (src, 0)) == PRE_INC
22920 ? GEN_INT (GET_MODE_SIZE (GET_MODE (src)))
22921 : GEN_INT (-GET_MODE_SIZE (GET_MODE (src))));
22922 emit_insn (gen_add3_insn (breg, breg, delta_rtx));
22923 src = replace_equiv_address (src, breg);
22925 else if (! rs6000_offsettable_memref_p (src, reg_mode))
22927 if (GET_CODE (XEXP (src, 0)) == PRE_MODIFY)
22929 rtx basereg = XEXP (XEXP (src, 0), 0);
22930 if (TARGET_UPDATE)
22932 rtx ndst = simplify_gen_subreg (reg_mode, dst, mode, 0);
22933 emit_insn (gen_rtx_SET (ndst,
22934 gen_rtx_MEM (reg_mode,
22935 XEXP (src, 0))));
22936 used_update = true;
22938 else
22939 emit_insn (gen_rtx_SET (basereg,
22940 XEXP (XEXP (src, 0), 1)));
22941 src = replace_equiv_address (src, basereg);
22943 else
22945 rtx basereg = gen_rtx_REG (Pmode, reg);
22946 emit_insn (gen_rtx_SET (basereg, XEXP (src, 0)));
22947 src = replace_equiv_address (src, basereg);
22951 breg = XEXP (src, 0);
22952 if (GET_CODE (breg) == PLUS || GET_CODE (breg) == LO_SUM)
22953 breg = XEXP (breg, 0);
22955 /* If the base register we are using to address memory is
22956 also a destination reg, then change that register last. */
22957 if (REG_P (breg)
22958 && REGNO (breg) >= REGNO (dst)
22959 && REGNO (breg) < REGNO (dst) + nregs)
22960 j = REGNO (breg) - REGNO (dst);
22962 else if (MEM_P (dst) && INT_REGNO_P (reg))
22964 rtx breg;
22966 if (GET_CODE (XEXP (dst, 0)) == PRE_INC
22967 || GET_CODE (XEXP (dst, 0)) == PRE_DEC)
22969 rtx delta_rtx;
22970 breg = XEXP (XEXP (dst, 0), 0);
22971 delta_rtx = (GET_CODE (XEXP (dst, 0)) == PRE_INC
22972 ? GEN_INT (GET_MODE_SIZE (GET_MODE (dst)))
22973 : GEN_INT (-GET_MODE_SIZE (GET_MODE (dst))));
22975 /* We have to update the breg before doing the store.
22976 Use store with update, if available. */
22978 if (TARGET_UPDATE)
22980 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
22981 emit_insn (TARGET_32BIT
22982 ? (TARGET_POWERPC64
22983 ? gen_movdi_si_update (breg, breg, delta_rtx, nsrc)
22984 : gen_movsi_update (breg, breg, delta_rtx, nsrc))
22985 : gen_movdi_di_update (breg, breg, delta_rtx, nsrc));
22986 used_update = true;
22988 else
22989 emit_insn (gen_add3_insn (breg, breg, delta_rtx));
22990 dst = replace_equiv_address (dst, breg);
22992 else if (!rs6000_offsettable_memref_p (dst, reg_mode)
22993 && GET_CODE (XEXP (dst, 0)) != LO_SUM)
22995 if (GET_CODE (XEXP (dst, 0)) == PRE_MODIFY)
22997 rtx basereg = XEXP (XEXP (dst, 0), 0);
22998 if (TARGET_UPDATE)
23000 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
23001 emit_insn (gen_rtx_SET (gen_rtx_MEM (reg_mode,
23002 XEXP (dst, 0)),
23003 nsrc));
23004 used_update = true;
23006 else
23007 emit_insn (gen_rtx_SET (basereg,
23008 XEXP (XEXP (dst, 0), 1)));
23009 dst = replace_equiv_address (dst, basereg);
23011 else
23013 rtx basereg = XEXP (XEXP (dst, 0), 0);
23014 rtx offsetreg = XEXP (XEXP (dst, 0), 1);
23015 gcc_assert (GET_CODE (XEXP (dst, 0)) == PLUS
23016 && REG_P (basereg)
23017 && REG_P (offsetreg)
23018 && REGNO (basereg) != REGNO (offsetreg));
23019 if (REGNO (basereg) == 0)
23021 rtx tmp = offsetreg;
23022 offsetreg = basereg;
23023 basereg = tmp;
23025 emit_insn (gen_add3_insn (basereg, basereg, offsetreg));
23026 restore_basereg = gen_sub3_insn (basereg, basereg, offsetreg);
23027 dst = replace_equiv_address (dst, basereg);
23030 else if (GET_CODE (XEXP (dst, 0)) != LO_SUM)
23031 gcc_assert (rs6000_offsettable_memref_p (dst, reg_mode));
23034 for (i = 0; i < nregs; i++)
23036 /* Calculate index to next subword. */
23037 ++j;
23038 if (j == nregs)
23039 j = 0;
23041 /* If compiler already emitted move of first word by
23042 store with update, no need to do anything. */
23043 if (j == 0 && used_update)
23044 continue;
23046 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
23047 j * reg_mode_size),
23048 simplify_gen_subreg (reg_mode, src, mode,
23049 j * reg_mode_size)));
23051 if (restore_basereg != NULL_RTX)
23052 emit_insn (restore_basereg);
23057 /* This page contains routines that are used to determine what the
23058 function prologue and epilogue code will do and write them out. */
23060 static inline bool
23061 save_reg_p (int r)
23063 return !call_used_regs[r] && df_regs_ever_live_p (r);
23066 /* Determine whether the gp REG is really used. */
23068 static bool
23069 rs6000_reg_live_or_pic_offset_p (int reg)
23071 /* We need to mark the PIC offset register live for the same conditions
23072 as it is set up, or otherwise it won't be saved before we clobber it. */
23074 if (reg == RS6000_PIC_OFFSET_TABLE_REGNUM && !TARGET_SINGLE_PIC_BASE)
23076 if (TARGET_TOC && TARGET_MINIMAL_TOC
23077 && (crtl->calls_eh_return
23078 || df_regs_ever_live_p (reg)
23079 || get_pool_size ()))
23080 return true;
23082 if ((DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN)
23083 && flag_pic)
23084 return true;
23087 /* If the function calls eh_return, claim used all the registers that would
23088 be checked for liveness otherwise. */
23090 return ((crtl->calls_eh_return || df_regs_ever_live_p (reg))
23091 && !call_used_regs[reg]);
23094 /* Return the first fixed-point register that is required to be
23095 saved. 32 if none. */
23098 first_reg_to_save (void)
23100 int first_reg;
23102 /* Find lowest numbered live register. */
23103 for (first_reg = 13; first_reg <= 31; first_reg++)
23104 if (save_reg_p (first_reg))
23105 break;
23107 if (first_reg > RS6000_PIC_OFFSET_TABLE_REGNUM
23108 && ((DEFAULT_ABI == ABI_V4 && flag_pic != 0)
23109 || (DEFAULT_ABI == ABI_DARWIN && flag_pic)
23110 || (TARGET_TOC && TARGET_MINIMAL_TOC))
23111 && rs6000_reg_live_or_pic_offset_p (RS6000_PIC_OFFSET_TABLE_REGNUM))
23112 first_reg = RS6000_PIC_OFFSET_TABLE_REGNUM;
23114 #if TARGET_MACHO
23115 if (flag_pic
23116 && crtl->uses_pic_offset_table
23117 && first_reg > RS6000_PIC_OFFSET_TABLE_REGNUM)
23118 return RS6000_PIC_OFFSET_TABLE_REGNUM;
23119 #endif
23121 return first_reg;
23124 /* Similar, for FP regs. */
23127 first_fp_reg_to_save (void)
23129 int first_reg;
23131 /* Find lowest numbered live register. */
23132 for (first_reg = 14 + 32; first_reg <= 63; first_reg++)
23133 if (save_reg_p (first_reg))
23134 break;
23136 return first_reg;
23139 /* Similar, for AltiVec regs. */
23141 static int
23142 first_altivec_reg_to_save (void)
23144 int i;
23146 /* Stack frame remains as is unless we are in AltiVec ABI. */
23147 if (! TARGET_ALTIVEC_ABI)
23148 return LAST_ALTIVEC_REGNO + 1;
23150 /* On Darwin, the unwind routines are compiled without
23151 TARGET_ALTIVEC, and use save_world to save/restore the
23152 altivec registers when necessary. */
23153 if (DEFAULT_ABI == ABI_DARWIN && crtl->calls_eh_return
23154 && ! TARGET_ALTIVEC)
23155 return FIRST_ALTIVEC_REGNO + 20;
23157 /* Find lowest numbered live register. */
23158 for (i = FIRST_ALTIVEC_REGNO + 20; i <= LAST_ALTIVEC_REGNO; ++i)
23159 if (save_reg_p (i))
23160 break;
23162 return i;
23165 /* Return a 32-bit mask of the AltiVec registers we need to set in
23166 VRSAVE. Bit n of the return value is 1 if Vn is live. The MSB in
23167 the 32-bit word is 0. */
23169 static unsigned int
23170 compute_vrsave_mask (void)
23172 unsigned int i, mask = 0;
23174 /* On Darwin, the unwind routines are compiled without
23175 TARGET_ALTIVEC, and use save_world to save/restore the
23176 call-saved altivec registers when necessary. */
23177 if (DEFAULT_ABI == ABI_DARWIN && crtl->calls_eh_return
23178 && ! TARGET_ALTIVEC)
23179 mask |= 0xFFF;
23181 /* First, find out if we use _any_ altivec registers. */
23182 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
23183 if (df_regs_ever_live_p (i))
23184 mask |= ALTIVEC_REG_BIT (i);
23186 if (mask == 0)
23187 return mask;
23189 /* Next, remove the argument registers from the set. These must
23190 be in the VRSAVE mask set by the caller, so we don't need to add
23191 them in again. More importantly, the mask we compute here is
23192 used to generate CLOBBERs in the set_vrsave insn, and we do not
23193 wish the argument registers to die. */
23194 for (i = ALTIVEC_ARG_MIN_REG; i < (unsigned) crtl->args.info.vregno; i++)
23195 mask &= ~ALTIVEC_REG_BIT (i);
23197 /* Similarly, remove the return value from the set. */
23199 bool yes = false;
23200 diddle_return_value (is_altivec_return_reg, &yes);
23201 if (yes)
23202 mask &= ~ALTIVEC_REG_BIT (ALTIVEC_ARG_RETURN);
23205 return mask;
23208 /* For a very restricted set of circumstances, we can cut down the
23209 size of prologues/epilogues by calling our own save/restore-the-world
23210 routines. */
23212 static void
23213 compute_save_world_info (rs6000_stack_t *info)
23215 info->world_save_p = 1;
23216 info->world_save_p
23217 = (WORLD_SAVE_P (info)
23218 && DEFAULT_ABI == ABI_DARWIN
23219 && !cfun->has_nonlocal_label
23220 && info->first_fp_reg_save == FIRST_SAVED_FP_REGNO
23221 && info->first_gp_reg_save == FIRST_SAVED_GP_REGNO
23222 && info->first_altivec_reg_save == FIRST_SAVED_ALTIVEC_REGNO
23223 && info->cr_save_p);
23225 /* This will not work in conjunction with sibcalls. Make sure there
23226 are none. (This check is expensive, but seldom executed.) */
23227 if (WORLD_SAVE_P (info))
23229 rtx_insn *insn;
23230 for (insn = get_last_insn_anywhere (); insn; insn = PREV_INSN (insn))
23231 if (CALL_P (insn) && SIBLING_CALL_P (insn))
23233 info->world_save_p = 0;
23234 break;
23238 if (WORLD_SAVE_P (info))
23240 /* Even if we're not touching VRsave, make sure there's room on the
23241 stack for it, if it looks like we're calling SAVE_WORLD, which
23242 will attempt to save it. */
23243 info->vrsave_size = 4;
23245 /* If we are going to save the world, we need to save the link register too. */
23246 info->lr_save_p = 1;
23248 /* "Save" the VRsave register too if we're saving the world. */
23249 if (info->vrsave_mask == 0)
23250 info->vrsave_mask = compute_vrsave_mask ();
23252 /* Because the Darwin register save/restore routines only handle
23253 F14 .. F31 and V20 .. V31 as per the ABI, perform a consistency
23254 check. */
23255 gcc_assert (info->first_fp_reg_save >= FIRST_SAVED_FP_REGNO
23256 && (info->first_altivec_reg_save
23257 >= FIRST_SAVED_ALTIVEC_REGNO));
23260 return;
23264 static void
23265 is_altivec_return_reg (rtx reg, void *xyes)
23267 bool *yes = (bool *) xyes;
23268 if (REGNO (reg) == ALTIVEC_ARG_RETURN)
23269 *yes = true;
23273 /* Return whether REG is a global user reg or has been specifed by
23274 -ffixed-REG. We should not restore these, and so cannot use
23275 lmw or out-of-line restore functions if there are any. We also
23276 can't save them (well, emit frame notes for them), because frame
23277 unwinding during exception handling will restore saved registers. */
23279 static bool
23280 fixed_reg_p (int reg)
23282 /* Ignore fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] when the
23283 backend sets it, overriding anything the user might have given. */
23284 if (reg == RS6000_PIC_OFFSET_TABLE_REGNUM
23285 && ((DEFAULT_ABI == ABI_V4 && flag_pic)
23286 || (DEFAULT_ABI == ABI_DARWIN && flag_pic)
23287 || (TARGET_TOC && TARGET_MINIMAL_TOC)))
23288 return false;
23290 return fixed_regs[reg];
23293 /* Determine the strategy for savings/restoring registers. */
23295 enum {
23296 SAVE_MULTIPLE = 0x1,
23297 SAVE_INLINE_GPRS = 0x2,
23298 SAVE_INLINE_FPRS = 0x4,
23299 SAVE_NOINLINE_GPRS_SAVES_LR = 0x8,
23300 SAVE_NOINLINE_FPRS_SAVES_LR = 0x10,
23301 SAVE_INLINE_VRS = 0x20,
23302 REST_MULTIPLE = 0x100,
23303 REST_INLINE_GPRS = 0x200,
23304 REST_INLINE_FPRS = 0x400,
23305 REST_NOINLINE_FPRS_DOESNT_RESTORE_LR = 0x800,
23306 REST_INLINE_VRS = 0x1000
23309 static int
23310 rs6000_savres_strategy (rs6000_stack_t *info,
23311 bool using_static_chain_p)
23313 int strategy = 0;
23315 /* Select between in-line and out-of-line save and restore of regs.
23316 First, all the obvious cases where we don't use out-of-line. */
23317 if (crtl->calls_eh_return
23318 || cfun->machine->ra_need_lr)
23319 strategy |= (SAVE_INLINE_FPRS | REST_INLINE_FPRS
23320 | SAVE_INLINE_GPRS | REST_INLINE_GPRS
23321 | SAVE_INLINE_VRS | REST_INLINE_VRS);
23323 if (info->first_gp_reg_save == 32)
23324 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
23326 if (info->first_fp_reg_save == 64
23327 /* The out-of-line FP routines use double-precision stores;
23328 we can't use those routines if we don't have such stores. */
23329 || (TARGET_HARD_FLOAT && !TARGET_DOUBLE_FLOAT))
23330 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
23332 if (info->first_altivec_reg_save == LAST_ALTIVEC_REGNO + 1)
23333 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
23335 /* Define cutoff for using out-of-line functions to save registers. */
23336 if (DEFAULT_ABI == ABI_V4 || TARGET_ELF)
23338 if (!optimize_size)
23340 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
23341 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
23342 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
23344 else
23346 /* Prefer out-of-line restore if it will exit. */
23347 if (info->first_fp_reg_save > 61)
23348 strategy |= SAVE_INLINE_FPRS;
23349 if (info->first_gp_reg_save > 29)
23351 if (info->first_fp_reg_save == 64)
23352 strategy |= SAVE_INLINE_GPRS;
23353 else
23354 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
23356 if (info->first_altivec_reg_save == LAST_ALTIVEC_REGNO)
23357 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
23360 else if (DEFAULT_ABI == ABI_DARWIN)
23362 if (info->first_fp_reg_save > 60)
23363 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
23364 if (info->first_gp_reg_save > 29)
23365 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
23366 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
23368 else
23370 gcc_checking_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2);
23371 if (info->first_fp_reg_save > 61)
23372 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
23373 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
23374 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
23377 /* Don't bother to try to save things out-of-line if r11 is occupied
23378 by the static chain. It would require too much fiddling and the
23379 static chain is rarely used anyway. FPRs are saved w.r.t the stack
23380 pointer on Darwin, and AIX uses r1 or r12. */
23381 if (using_static_chain_p
23382 && (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN))
23383 strategy |= ((DEFAULT_ABI == ABI_DARWIN ? 0 : SAVE_INLINE_FPRS)
23384 | SAVE_INLINE_GPRS
23385 | SAVE_INLINE_VRS);
23387 /* Saving CR interferes with the exit routines used on the SPE, so
23388 just punt here. */
23389 if (TARGET_SPE_ABI
23390 && info->spe_64bit_regs_used
23391 && info->cr_save_p)
23392 strategy |= REST_INLINE_GPRS;
23394 /* We can only use the out-of-line routines to restore fprs if we've
23395 saved all the registers from first_fp_reg_save in the prologue.
23396 Otherwise, we risk loading garbage. Of course, if we have saved
23397 out-of-line then we know we haven't skipped any fprs. */
23398 if ((strategy & SAVE_INLINE_FPRS)
23399 && !(strategy & REST_INLINE_FPRS))
23401 int i;
23403 for (i = info->first_fp_reg_save; i < 64; i++)
23404 if (fixed_regs[i] || !save_reg_p (i))
23406 strategy |= REST_INLINE_FPRS;
23407 break;
23411 /* Similarly, for altivec regs. */
23412 if ((strategy & SAVE_INLINE_VRS)
23413 && !(strategy & REST_INLINE_VRS))
23415 int i;
23417 for (i = info->first_altivec_reg_save; i < LAST_ALTIVEC_REGNO + 1; i++)
23418 if (fixed_regs[i] || !save_reg_p (i))
23420 strategy |= REST_INLINE_VRS;
23421 break;
23425 /* info->lr_save_p isn't yet set if the only reason lr needs to be
23426 saved is an out-of-line save or restore. Set up the value for
23427 the next test (excluding out-of-line gprs). */
23428 bool lr_save_p = (info->lr_save_p
23429 || !(strategy & SAVE_INLINE_FPRS)
23430 || !(strategy & SAVE_INLINE_VRS)
23431 || !(strategy & REST_INLINE_FPRS)
23432 || !(strategy & REST_INLINE_VRS));
23434 if (TARGET_MULTIPLE
23435 && !TARGET_POWERPC64
23436 && !(TARGET_SPE_ABI && info->spe_64bit_regs_used)
23437 && info->first_gp_reg_save < 31)
23439 /* Prefer store multiple for saves over out-of-line routines,
23440 since the store-multiple instruction will always be smaller. */
23441 strategy |= SAVE_INLINE_GPRS | SAVE_MULTIPLE;
23443 /* The situation is more complicated with load multiple. We'd
23444 prefer to use the out-of-line routines for restores, since the
23445 "exit" out-of-line routines can handle the restore of LR and the
23446 frame teardown. However if doesn't make sense to use the
23447 out-of-line routine if that is the only reason we'd need to save
23448 LR, and we can't use the "exit" out-of-line gpr restore if we
23449 have saved some fprs; In those cases it is advantageous to use
23450 load multiple when available. */
23451 if (info->first_fp_reg_save != 64 || !lr_save_p)
23452 strategy |= REST_INLINE_GPRS | REST_MULTIPLE;
23455 /* Using the "exit" out-of-line routine does not improve code size
23456 if using it would require lr to be saved and if only saving one
23457 or two gprs. */
23458 else if (!lr_save_p && info->first_gp_reg_save > 29)
23459 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
23461 /* We can only use load multiple or the out-of-line routines to
23462 restore gprs if we've saved all the registers from
23463 first_gp_reg_save. Otherwise, we risk loading garbage.
23464 Of course, if we have saved out-of-line or used stmw then we know
23465 we haven't skipped any gprs. */
23466 if ((strategy & (SAVE_INLINE_GPRS | SAVE_MULTIPLE)) == SAVE_INLINE_GPRS
23467 && (strategy & (REST_INLINE_GPRS | REST_MULTIPLE)) != REST_INLINE_GPRS)
23469 int i;
23471 for (i = info->first_gp_reg_save; i < 32; i++)
23472 if (fixed_reg_p (i) || !save_reg_p (i))
23474 strategy |= REST_INLINE_GPRS;
23475 strategy &= ~REST_MULTIPLE;
23476 break;
23480 if (TARGET_ELF && TARGET_64BIT)
23482 if (!(strategy & SAVE_INLINE_FPRS))
23483 strategy |= SAVE_NOINLINE_FPRS_SAVES_LR;
23484 else if (!(strategy & SAVE_INLINE_GPRS)
23485 && info->first_fp_reg_save == 64)
23486 strategy |= SAVE_NOINLINE_GPRS_SAVES_LR;
23488 else if (TARGET_AIX && !(strategy & REST_INLINE_FPRS))
23489 strategy |= REST_NOINLINE_FPRS_DOESNT_RESTORE_LR;
23491 if (TARGET_MACHO && !(strategy & SAVE_INLINE_FPRS))
23492 strategy |= SAVE_NOINLINE_FPRS_SAVES_LR;
23494 return strategy;
23497 /* Calculate the stack information for the current function. This is
23498 complicated by having two separate calling sequences, the AIX calling
23499 sequence and the V.4 calling sequence.
23501 AIX (and Darwin/Mac OS X) stack frames look like:
23502 32-bit 64-bit
23503 SP----> +---------------------------------------+
23504 | back chain to caller | 0 0
23505 +---------------------------------------+
23506 | saved CR | 4 8 (8-11)
23507 +---------------------------------------+
23508 | saved LR | 8 16
23509 +---------------------------------------+
23510 | reserved for compilers | 12 24
23511 +---------------------------------------+
23512 | reserved for binders | 16 32
23513 +---------------------------------------+
23514 | saved TOC pointer | 20 40
23515 +---------------------------------------+
23516 | Parameter save area (P) | 24 48
23517 +---------------------------------------+
23518 | Alloca space (A) | 24+P etc.
23519 +---------------------------------------+
23520 | Local variable space (L) | 24+P+A
23521 +---------------------------------------+
23522 | Float/int conversion temporary (X) | 24+P+A+L
23523 +---------------------------------------+
23524 | Save area for AltiVec registers (W) | 24+P+A+L+X
23525 +---------------------------------------+
23526 | AltiVec alignment padding (Y) | 24+P+A+L+X+W
23527 +---------------------------------------+
23528 | Save area for VRSAVE register (Z) | 24+P+A+L+X+W+Y
23529 +---------------------------------------+
23530 | Save area for GP registers (G) | 24+P+A+X+L+X+W+Y+Z
23531 +---------------------------------------+
23532 | Save area for FP registers (F) | 24+P+A+X+L+X+W+Y+Z+G
23533 +---------------------------------------+
23534 old SP->| back chain to caller's caller |
23535 +---------------------------------------+
23537 The required alignment for AIX configurations is two words (i.e., 8
23538 or 16 bytes).
23540 The ELFv2 ABI is a variant of the AIX ABI. Stack frames look like:
23542 SP----> +---------------------------------------+
23543 | Back chain to caller | 0
23544 +---------------------------------------+
23545 | Save area for CR | 8
23546 +---------------------------------------+
23547 | Saved LR | 16
23548 +---------------------------------------+
23549 | Saved TOC pointer | 24
23550 +---------------------------------------+
23551 | Parameter save area (P) | 32
23552 +---------------------------------------+
23553 | Alloca space (A) | 32+P
23554 +---------------------------------------+
23555 | Local variable space (L) | 32+P+A
23556 +---------------------------------------+
23557 | Save area for AltiVec registers (W) | 32+P+A+L
23558 +---------------------------------------+
23559 | AltiVec alignment padding (Y) | 32+P+A+L+W
23560 +---------------------------------------+
23561 | Save area for GP registers (G) | 32+P+A+L+W+Y
23562 +---------------------------------------+
23563 | Save area for FP registers (F) | 32+P+A+L+W+Y+G
23564 +---------------------------------------+
23565 old SP->| back chain to caller's caller | 32+P+A+L+W+Y+G+F
23566 +---------------------------------------+
23569 V.4 stack frames look like:
23571 SP----> +---------------------------------------+
23572 | back chain to caller | 0
23573 +---------------------------------------+
23574 | caller's saved LR | 4
23575 +---------------------------------------+
23576 | Parameter save area (P) | 8
23577 +---------------------------------------+
23578 | Alloca space (A) | 8+P
23579 +---------------------------------------+
23580 | Varargs save area (V) | 8+P+A
23581 +---------------------------------------+
23582 | Local variable space (L) | 8+P+A+V
23583 +---------------------------------------+
23584 | Float/int conversion temporary (X) | 8+P+A+V+L
23585 +---------------------------------------+
23586 | Save area for AltiVec registers (W) | 8+P+A+V+L+X
23587 +---------------------------------------+
23588 | AltiVec alignment padding (Y) | 8+P+A+V+L+X+W
23589 +---------------------------------------+
23590 | Save area for VRSAVE register (Z) | 8+P+A+V+L+X+W+Y
23591 +---------------------------------------+
23592 | SPE: area for 64-bit GP registers |
23593 +---------------------------------------+
23594 | SPE alignment padding |
23595 +---------------------------------------+
23596 | saved CR (C) | 8+P+A+V+L+X+W+Y+Z
23597 +---------------------------------------+
23598 | Save area for GP registers (G) | 8+P+A+V+L+X+W+Y+Z+C
23599 +---------------------------------------+
23600 | Save area for FP registers (F) | 8+P+A+V+L+X+W+Y+Z+C+G
23601 +---------------------------------------+
23602 old SP->| back chain to caller's caller |
23603 +---------------------------------------+
23605 The required alignment for V.4 is 16 bytes, or 8 bytes if -meabi is
23606 given. (But note below and in sysv4.h that we require only 8 and
23607 may round up the size of our stack frame anyways. The historical
23608 reason is early versions of powerpc-linux which didn't properly
23609 align the stack at program startup. A happy side-effect is that
23610 -mno-eabi libraries can be used with -meabi programs.)
23612 The EABI configuration defaults to the V.4 layout. However,
23613 the stack alignment requirements may differ. If -mno-eabi is not
23614 given, the required stack alignment is 8 bytes; if -mno-eabi is
23615 given, the required alignment is 16 bytes. (But see V.4 comment
23616 above.) */
23618 #ifndef ABI_STACK_BOUNDARY
23619 #define ABI_STACK_BOUNDARY STACK_BOUNDARY
23620 #endif
23622 static rs6000_stack_t *
23623 rs6000_stack_info (void)
23625 /* We should never be called for thunks, we are not set up for that. */
23626 gcc_assert (!cfun->is_thunk);
23628 rs6000_stack_t *info = &stack_info;
23629 int reg_size = TARGET_32BIT ? 4 : 8;
23630 int ehrd_size;
23631 int ehcr_size;
23632 int save_align;
23633 int first_gp;
23634 HOST_WIDE_INT non_fixed_size;
23635 bool using_static_chain_p;
23637 if (reload_completed && info->reload_completed)
23638 return info;
23640 memset (info, 0, sizeof (*info));
23641 info->reload_completed = reload_completed;
23643 if (TARGET_SPE)
23645 /* Cache value so we don't rescan instruction chain over and over. */
23646 if (cfun->machine->spe_insn_chain_scanned_p == 0)
23647 cfun->machine->spe_insn_chain_scanned_p
23648 = spe_func_has_64bit_regs_p () + 1;
23649 info->spe_64bit_regs_used = cfun->machine->spe_insn_chain_scanned_p - 1;
23652 /* Select which calling sequence. */
23653 info->abi = DEFAULT_ABI;
23655 /* Calculate which registers need to be saved & save area size. */
23656 info->first_gp_reg_save = first_reg_to_save ();
23657 /* Assume that we will have to save RS6000_PIC_OFFSET_TABLE_REGNUM,
23658 even if it currently looks like we won't. Reload may need it to
23659 get at a constant; if so, it will have already created a constant
23660 pool entry for it. */
23661 if (((TARGET_TOC && TARGET_MINIMAL_TOC)
23662 || (flag_pic == 1 && DEFAULT_ABI == ABI_V4)
23663 || (flag_pic && DEFAULT_ABI == ABI_DARWIN))
23664 && crtl->uses_const_pool
23665 && info->first_gp_reg_save > RS6000_PIC_OFFSET_TABLE_REGNUM)
23666 first_gp = RS6000_PIC_OFFSET_TABLE_REGNUM;
23667 else
23668 first_gp = info->first_gp_reg_save;
23670 info->gp_size = reg_size * (32 - first_gp);
23672 /* For the SPE, we have an additional upper 32-bits on each GPR.
23673 Ideally we should save the entire 64-bits only when the upper
23674 half is used in SIMD instructions. Since we only record
23675 registers live (not the size they are used in), this proves
23676 difficult because we'd have to traverse the instruction chain at
23677 the right time, taking reload into account. This is a real pain,
23678 so we opt to save the GPRs in 64-bits always if but one register
23679 gets used in 64-bits. Otherwise, all the registers in the frame
23680 get saved in 32-bits.
23682 So... since when we save all GPRs (except the SP) in 64-bits, the
23683 traditional GP save area will be empty. */
23684 if (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0)
23685 info->gp_size = 0;
23687 info->first_fp_reg_save = first_fp_reg_to_save ();
23688 info->fp_size = 8 * (64 - info->first_fp_reg_save);
23690 info->first_altivec_reg_save = first_altivec_reg_to_save ();
23691 info->altivec_size = 16 * (LAST_ALTIVEC_REGNO + 1
23692 - info->first_altivec_reg_save);
23694 /* Does this function call anything? */
23695 info->calls_p = (!crtl->is_leaf || cfun->machine->ra_needs_full_frame);
23697 /* Determine if we need to save the condition code registers. */
23698 if (save_reg_p (CR2_REGNO)
23699 || save_reg_p (CR3_REGNO)
23700 || save_reg_p (CR4_REGNO))
23702 info->cr_save_p = 1;
23703 if (DEFAULT_ABI == ABI_V4)
23704 info->cr_size = reg_size;
23707 /* If the current function calls __builtin_eh_return, then we need
23708 to allocate stack space for registers that will hold data for
23709 the exception handler. */
23710 if (crtl->calls_eh_return)
23712 unsigned int i;
23713 for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM; ++i)
23714 continue;
23716 /* SPE saves EH registers in 64-bits. */
23717 ehrd_size = i * (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0
23718 ? UNITS_PER_SPE_WORD : UNITS_PER_WORD);
23720 else
23721 ehrd_size = 0;
23723 /* In the ELFv2 ABI, we also need to allocate space for separate
23724 CR field save areas if the function calls __builtin_eh_return. */
23725 if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return)
23727 /* This hard-codes that we have three call-saved CR fields. */
23728 ehcr_size = 3 * reg_size;
23729 /* We do *not* use the regular CR save mechanism. */
23730 info->cr_save_p = 0;
23732 else
23733 ehcr_size = 0;
23735 /* Determine various sizes. */
23736 info->reg_size = reg_size;
23737 info->fixed_size = RS6000_SAVE_AREA;
23738 info->vars_size = RS6000_ALIGN (get_frame_size (), 8);
23739 info->parm_size = RS6000_ALIGN (crtl->outgoing_args_size,
23740 TARGET_ALTIVEC ? 16 : 8);
23741 if (FRAME_GROWS_DOWNWARD)
23742 info->vars_size
23743 += RS6000_ALIGN (info->fixed_size + info->vars_size + info->parm_size,
23744 ABI_STACK_BOUNDARY / BITS_PER_UNIT)
23745 - (info->fixed_size + info->vars_size + info->parm_size);
23747 if (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0)
23748 info->spe_gp_size = 8 * (32 - first_gp);
23750 if (TARGET_ALTIVEC_ABI)
23751 info->vrsave_mask = compute_vrsave_mask ();
23753 if (TARGET_ALTIVEC_VRSAVE && info->vrsave_mask)
23754 info->vrsave_size = 4;
23756 compute_save_world_info (info);
23758 /* Calculate the offsets. */
23759 switch (DEFAULT_ABI)
23761 case ABI_NONE:
23762 default:
23763 gcc_unreachable ();
23765 case ABI_AIX:
23766 case ABI_ELFv2:
23767 case ABI_DARWIN:
23768 info->fp_save_offset = -info->fp_size;
23769 info->gp_save_offset = info->fp_save_offset - info->gp_size;
23771 if (TARGET_ALTIVEC_ABI)
23773 info->vrsave_save_offset = info->gp_save_offset - info->vrsave_size;
23775 /* Align stack so vector save area is on a quadword boundary.
23776 The padding goes above the vectors. */
23777 if (info->altivec_size != 0)
23778 info->altivec_padding_size = info->vrsave_save_offset & 0xF;
23780 info->altivec_save_offset = info->vrsave_save_offset
23781 - info->altivec_padding_size
23782 - info->altivec_size;
23783 gcc_assert (info->altivec_size == 0
23784 || info->altivec_save_offset % 16 == 0);
23786 /* Adjust for AltiVec case. */
23787 info->ehrd_offset = info->altivec_save_offset - ehrd_size;
23789 else
23790 info->ehrd_offset = info->gp_save_offset - ehrd_size;
23792 info->ehcr_offset = info->ehrd_offset - ehcr_size;
23793 info->cr_save_offset = reg_size; /* first word when 64-bit. */
23794 info->lr_save_offset = 2*reg_size;
23795 break;
23797 case ABI_V4:
23798 info->fp_save_offset = -info->fp_size;
23799 info->gp_save_offset = info->fp_save_offset - info->gp_size;
23800 info->cr_save_offset = info->gp_save_offset - info->cr_size;
23802 if (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0)
23804 /* Align stack so SPE GPR save area is aligned on a
23805 double-word boundary. */
23806 if (info->spe_gp_size != 0 && info->cr_save_offset != 0)
23807 info->spe_padding_size = 8 - (-info->cr_save_offset % 8);
23808 else
23809 info->spe_padding_size = 0;
23811 info->spe_gp_save_offset = info->cr_save_offset
23812 - info->spe_padding_size
23813 - info->spe_gp_size;
23815 /* Adjust for SPE case. */
23816 info->ehrd_offset = info->spe_gp_save_offset;
23818 else if (TARGET_ALTIVEC_ABI)
23820 info->vrsave_save_offset = info->cr_save_offset - info->vrsave_size;
23822 /* Align stack so vector save area is on a quadword boundary. */
23823 if (info->altivec_size != 0)
23824 info->altivec_padding_size = 16 - (-info->vrsave_save_offset % 16);
23826 info->altivec_save_offset = info->vrsave_save_offset
23827 - info->altivec_padding_size
23828 - info->altivec_size;
23830 /* Adjust for AltiVec case. */
23831 info->ehrd_offset = info->altivec_save_offset;
23833 else
23834 info->ehrd_offset = info->cr_save_offset;
23836 info->ehrd_offset -= ehrd_size;
23837 info->lr_save_offset = reg_size;
23840 save_align = (TARGET_ALTIVEC_ABI || DEFAULT_ABI == ABI_DARWIN) ? 16 : 8;
23841 info->save_size = RS6000_ALIGN (info->fp_size
23842 + info->gp_size
23843 + info->altivec_size
23844 + info->altivec_padding_size
23845 + info->spe_gp_size
23846 + info->spe_padding_size
23847 + ehrd_size
23848 + ehcr_size
23849 + info->cr_size
23850 + info->vrsave_size,
23851 save_align);
23853 non_fixed_size = info->vars_size + info->parm_size + info->save_size;
23855 info->total_size = RS6000_ALIGN (non_fixed_size + info->fixed_size,
23856 ABI_STACK_BOUNDARY / BITS_PER_UNIT);
23858 /* Determine if we need to save the link register. */
23859 if (info->calls_p
23860 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
23861 && crtl->profile
23862 && !TARGET_PROFILE_KERNEL)
23863 || (DEFAULT_ABI == ABI_V4 && cfun->calls_alloca)
23864 #ifdef TARGET_RELOCATABLE
23865 || (TARGET_RELOCATABLE && (get_pool_size () != 0))
23866 #endif
23867 || rs6000_ra_ever_killed ())
23868 info->lr_save_p = 1;
23870 using_static_chain_p = (cfun->static_chain_decl != NULL_TREE
23871 && df_regs_ever_live_p (STATIC_CHAIN_REGNUM)
23872 && call_used_regs[STATIC_CHAIN_REGNUM]);
23873 info->savres_strategy = rs6000_savres_strategy (info, using_static_chain_p);
23875 if (!(info->savres_strategy & SAVE_INLINE_GPRS)
23876 || !(info->savres_strategy & SAVE_INLINE_FPRS)
23877 || !(info->savres_strategy & SAVE_INLINE_VRS)
23878 || !(info->savres_strategy & REST_INLINE_GPRS)
23879 || !(info->savres_strategy & REST_INLINE_FPRS)
23880 || !(info->savres_strategy & REST_INLINE_VRS))
23881 info->lr_save_p = 1;
23883 if (info->lr_save_p)
23884 df_set_regs_ever_live (LR_REGNO, true);
23886 /* Determine if we need to allocate any stack frame:
23888 For AIX we need to push the stack if a frame pointer is needed
23889 (because the stack might be dynamically adjusted), if we are
23890 debugging, if we make calls, or if the sum of fp_save, gp_save,
23891 and local variables are more than the space needed to save all
23892 non-volatile registers: 32-bit: 18*8 + 19*4 = 220 or 64-bit: 18*8
23893 + 18*8 = 288 (GPR13 reserved).
23895 For V.4 we don't have the stack cushion that AIX uses, but assume
23896 that the debugger can handle stackless frames. */
23898 if (info->calls_p)
23899 info->push_p = 1;
23901 else if (DEFAULT_ABI == ABI_V4)
23902 info->push_p = non_fixed_size != 0;
23904 else if (frame_pointer_needed)
23905 info->push_p = 1;
23907 else if (TARGET_XCOFF && write_symbols != NO_DEBUG)
23908 info->push_p = 1;
23910 else
23911 info->push_p = non_fixed_size > (TARGET_32BIT ? 220 : 288);
23913 return info;
23916 /* Return true if the current function uses any GPRs in 64-bit SIMD
23917 mode. */
23919 static bool
23920 spe_func_has_64bit_regs_p (void)
23922 rtx_insn *insns, *insn;
23924 /* Functions that save and restore all the call-saved registers will
23925 need to save/restore the registers in 64-bits. */
23926 if (crtl->calls_eh_return
23927 || cfun->calls_setjmp
23928 || crtl->has_nonlocal_goto)
23929 return true;
23931 insns = get_insns ();
23933 for (insn = NEXT_INSN (insns); insn != NULL_RTX; insn = NEXT_INSN (insn))
23935 if (INSN_P (insn))
23937 rtx i;
23939 /* FIXME: This should be implemented with attributes...
23941 (set_attr "spe64" "true")....then,
23942 if (get_spe64(insn)) return true;
23944 It's the only reliable way to do the stuff below. */
23946 i = PATTERN (insn);
23947 if (GET_CODE (i) == SET)
23949 machine_mode mode = GET_MODE (SET_SRC (i));
23951 if (SPE_VECTOR_MODE (mode))
23952 return true;
23953 if (TARGET_E500_DOUBLE
23954 && (mode == DFmode || FLOAT128_2REG_P (mode)))
23955 return true;
23960 return false;
23963 static void
23964 debug_stack_info (rs6000_stack_t *info)
23966 const char *abi_string;
23968 if (! info)
23969 info = rs6000_stack_info ();
23971 fprintf (stderr, "\nStack information for function %s:\n",
23972 ((current_function_decl && DECL_NAME (current_function_decl))
23973 ? IDENTIFIER_POINTER (DECL_NAME (current_function_decl))
23974 : "<unknown>"));
23976 switch (info->abi)
23978 default: abi_string = "Unknown"; break;
23979 case ABI_NONE: abi_string = "NONE"; break;
23980 case ABI_AIX: abi_string = "AIX"; break;
23981 case ABI_ELFv2: abi_string = "ELFv2"; break;
23982 case ABI_DARWIN: abi_string = "Darwin"; break;
23983 case ABI_V4: abi_string = "V.4"; break;
23986 fprintf (stderr, "\tABI = %5s\n", abi_string);
23988 if (TARGET_ALTIVEC_ABI)
23989 fprintf (stderr, "\tALTIVEC ABI extensions enabled.\n");
23991 if (TARGET_SPE_ABI)
23992 fprintf (stderr, "\tSPE ABI extensions enabled.\n");
23994 if (info->first_gp_reg_save != 32)
23995 fprintf (stderr, "\tfirst_gp_reg_save = %5d\n", info->first_gp_reg_save);
23997 if (info->first_fp_reg_save != 64)
23998 fprintf (stderr, "\tfirst_fp_reg_save = %5d\n", info->first_fp_reg_save);
24000 if (info->first_altivec_reg_save <= LAST_ALTIVEC_REGNO)
24001 fprintf (stderr, "\tfirst_altivec_reg_save = %5d\n",
24002 info->first_altivec_reg_save);
24004 if (info->lr_save_p)
24005 fprintf (stderr, "\tlr_save_p = %5d\n", info->lr_save_p);
24007 if (info->cr_save_p)
24008 fprintf (stderr, "\tcr_save_p = %5d\n", info->cr_save_p);
24010 if (info->vrsave_mask)
24011 fprintf (stderr, "\tvrsave_mask = 0x%x\n", info->vrsave_mask);
24013 if (info->push_p)
24014 fprintf (stderr, "\tpush_p = %5d\n", info->push_p);
24016 if (info->calls_p)
24017 fprintf (stderr, "\tcalls_p = %5d\n", info->calls_p);
24019 if (info->gp_size)
24020 fprintf (stderr, "\tgp_save_offset = %5d\n", info->gp_save_offset);
24022 if (info->fp_size)
24023 fprintf (stderr, "\tfp_save_offset = %5d\n", info->fp_save_offset);
24025 if (info->altivec_size)
24026 fprintf (stderr, "\taltivec_save_offset = %5d\n",
24027 info->altivec_save_offset);
24029 if (info->spe_gp_size)
24030 fprintf (stderr, "\tspe_gp_save_offset = %5d\n",
24031 info->spe_gp_save_offset);
24033 if (info->vrsave_size)
24034 fprintf (stderr, "\tvrsave_save_offset = %5d\n",
24035 info->vrsave_save_offset);
24037 if (info->lr_save_p)
24038 fprintf (stderr, "\tlr_save_offset = %5d\n", info->lr_save_offset);
24040 if (info->cr_save_p)
24041 fprintf (stderr, "\tcr_save_offset = %5d\n", info->cr_save_offset);
24043 if (info->varargs_save_offset)
24044 fprintf (stderr, "\tvarargs_save_offset = %5d\n", info->varargs_save_offset);
24046 if (info->total_size)
24047 fprintf (stderr, "\ttotal_size = " HOST_WIDE_INT_PRINT_DEC"\n",
24048 info->total_size);
24050 if (info->vars_size)
24051 fprintf (stderr, "\tvars_size = " HOST_WIDE_INT_PRINT_DEC"\n",
24052 info->vars_size);
24054 if (info->parm_size)
24055 fprintf (stderr, "\tparm_size = %5d\n", info->parm_size);
24057 if (info->fixed_size)
24058 fprintf (stderr, "\tfixed_size = %5d\n", info->fixed_size);
24060 if (info->gp_size)
24061 fprintf (stderr, "\tgp_size = %5d\n", info->gp_size);
24063 if (info->spe_gp_size)
24064 fprintf (stderr, "\tspe_gp_size = %5d\n", info->spe_gp_size);
24066 if (info->fp_size)
24067 fprintf (stderr, "\tfp_size = %5d\n", info->fp_size);
24069 if (info->altivec_size)
24070 fprintf (stderr, "\taltivec_size = %5d\n", info->altivec_size);
24072 if (info->vrsave_size)
24073 fprintf (stderr, "\tvrsave_size = %5d\n", info->vrsave_size);
24075 if (info->altivec_padding_size)
24076 fprintf (stderr, "\taltivec_padding_size= %5d\n",
24077 info->altivec_padding_size);
24079 if (info->spe_padding_size)
24080 fprintf (stderr, "\tspe_padding_size = %5d\n",
24081 info->spe_padding_size);
24083 if (info->cr_size)
24084 fprintf (stderr, "\tcr_size = %5d\n", info->cr_size);
24086 if (info->save_size)
24087 fprintf (stderr, "\tsave_size = %5d\n", info->save_size);
24089 if (info->reg_size != 4)
24090 fprintf (stderr, "\treg_size = %5d\n", info->reg_size);
24092 fprintf (stderr, "\tsave-strategy = %04x\n", info->savres_strategy);
24094 fprintf (stderr, "\n");
24098 rs6000_return_addr (int count, rtx frame)
24100 /* Currently we don't optimize very well between prolog and body
24101 code and for PIC code the code can be actually quite bad, so
24102 don't try to be too clever here. */
24103 if (count != 0
24104 || ((DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN) && flag_pic))
24106 cfun->machine->ra_needs_full_frame = 1;
24108 return
24109 gen_rtx_MEM
24110 (Pmode,
24111 memory_address
24112 (Pmode,
24113 plus_constant (Pmode,
24114 copy_to_reg
24115 (gen_rtx_MEM (Pmode,
24116 memory_address (Pmode, frame))),
24117 RETURN_ADDRESS_OFFSET)));
24120 cfun->machine->ra_need_lr = 1;
24121 return get_hard_reg_initial_val (Pmode, LR_REGNO);
24124 /* Say whether a function is a candidate for sibcall handling or not. */
24126 static bool
24127 rs6000_function_ok_for_sibcall (tree decl, tree exp)
24129 tree fntype;
24131 if (decl)
24132 fntype = TREE_TYPE (decl);
24133 else
24134 fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
24136 /* We can't do it if the called function has more vector parameters
24137 than the current function; there's nowhere to put the VRsave code. */
24138 if (TARGET_ALTIVEC_ABI
24139 && TARGET_ALTIVEC_VRSAVE
24140 && !(decl && decl == current_function_decl))
24142 function_args_iterator args_iter;
24143 tree type;
24144 int nvreg = 0;
24146 /* Functions with vector parameters are required to have a
24147 prototype, so the argument type info must be available
24148 here. */
24149 FOREACH_FUNCTION_ARGS(fntype, type, args_iter)
24150 if (TREE_CODE (type) == VECTOR_TYPE
24151 && ALTIVEC_OR_VSX_VECTOR_MODE (TYPE_MODE (type)))
24152 nvreg++;
24154 FOREACH_FUNCTION_ARGS(TREE_TYPE (current_function_decl), type, args_iter)
24155 if (TREE_CODE (type) == VECTOR_TYPE
24156 && ALTIVEC_OR_VSX_VECTOR_MODE (TYPE_MODE (type)))
24157 nvreg--;
24159 if (nvreg > 0)
24160 return false;
24163 /* Under the AIX or ELFv2 ABIs we can't allow calls to non-local
24164 functions, because the callee may have a different TOC pointer to
24165 the caller and there's no way to ensure we restore the TOC when
24166 we return. With the secure-plt SYSV ABI we can't make non-local
24167 calls when -fpic/PIC because the plt call stubs use r30. */
24168 if (DEFAULT_ABI == ABI_DARWIN
24169 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
24170 && decl
24171 && !DECL_EXTERNAL (decl)
24172 && !DECL_WEAK (decl)
24173 && (*targetm.binds_local_p) (decl))
24174 || (DEFAULT_ABI == ABI_V4
24175 && (!TARGET_SECURE_PLT
24176 || !flag_pic
24177 || (decl
24178 && (*targetm.binds_local_p) (decl)))))
24180 tree attr_list = TYPE_ATTRIBUTES (fntype);
24182 if (!lookup_attribute ("longcall", attr_list)
24183 || lookup_attribute ("shortcall", attr_list))
24184 return true;
24187 return false;
24190 static int
24191 rs6000_ra_ever_killed (void)
24193 rtx_insn *top;
24194 rtx reg;
24195 rtx_insn *insn;
24197 if (cfun->is_thunk)
24198 return 0;
24200 if (cfun->machine->lr_save_state)
24201 return cfun->machine->lr_save_state - 1;
24203 /* regs_ever_live has LR marked as used if any sibcalls are present,
24204 but this should not force saving and restoring in the
24205 pro/epilogue. Likewise, reg_set_between_p thinks a sibcall
24206 clobbers LR, so that is inappropriate. */
24208 /* Also, the prologue can generate a store into LR that
24209 doesn't really count, like this:
24211 move LR->R0
24212 bcl to set PIC register
24213 move LR->R31
24214 move R0->LR
24216 When we're called from the epilogue, we need to avoid counting
24217 this as a store. */
24219 push_topmost_sequence ();
24220 top = get_insns ();
24221 pop_topmost_sequence ();
24222 reg = gen_rtx_REG (Pmode, LR_REGNO);
24224 for (insn = NEXT_INSN (top); insn != NULL_RTX; insn = NEXT_INSN (insn))
24226 if (INSN_P (insn))
24228 if (CALL_P (insn))
24230 if (!SIBLING_CALL_P (insn))
24231 return 1;
24233 else if (find_regno_note (insn, REG_INC, LR_REGNO))
24234 return 1;
24235 else if (set_of (reg, insn) != NULL_RTX
24236 && !prologue_epilogue_contains (insn))
24237 return 1;
24240 return 0;
24243 /* Emit instructions needed to load the TOC register.
24244 This is only needed when TARGET_TOC, TARGET_MINIMAL_TOC, and there is
24245 a constant pool; or for SVR4 -fpic. */
24247 void
24248 rs6000_emit_load_toc_table (int fromprolog)
24250 rtx dest;
24251 dest = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
24253 if (TARGET_ELF && TARGET_SECURE_PLT && DEFAULT_ABI == ABI_V4 && flag_pic)
24255 char buf[30];
24256 rtx lab, tmp1, tmp2, got;
24258 lab = gen_label_rtx ();
24259 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (lab));
24260 lab = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
24261 if (flag_pic == 2)
24263 got = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (toc_label_name));
24264 need_toc_init = 1;
24266 else
24267 got = rs6000_got_sym ();
24268 tmp1 = tmp2 = dest;
24269 if (!fromprolog)
24271 tmp1 = gen_reg_rtx (Pmode);
24272 tmp2 = gen_reg_rtx (Pmode);
24274 emit_insn (gen_load_toc_v4_PIC_1 (lab));
24275 emit_move_insn (tmp1, gen_rtx_REG (Pmode, LR_REGNO));
24276 emit_insn (gen_load_toc_v4_PIC_3b (tmp2, tmp1, got, lab));
24277 emit_insn (gen_load_toc_v4_PIC_3c (dest, tmp2, got, lab));
24279 else if (TARGET_ELF && DEFAULT_ABI == ABI_V4 && flag_pic == 1)
24281 emit_insn (gen_load_toc_v4_pic_si ());
24282 emit_move_insn (dest, gen_rtx_REG (Pmode, LR_REGNO));
24284 else if (TARGET_ELF && DEFAULT_ABI == ABI_V4 && flag_pic == 2)
24286 char buf[30];
24287 rtx temp0 = (fromprolog
24288 ? gen_rtx_REG (Pmode, 0)
24289 : gen_reg_rtx (Pmode));
24291 if (fromprolog)
24293 rtx symF, symL;
24295 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
24296 symF = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
24298 ASM_GENERATE_INTERNAL_LABEL (buf, "LCL", rs6000_pic_labelno);
24299 symL = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
24301 emit_insn (gen_load_toc_v4_PIC_1 (symF));
24302 emit_move_insn (dest, gen_rtx_REG (Pmode, LR_REGNO));
24303 emit_insn (gen_load_toc_v4_PIC_2 (temp0, dest, symL, symF));
24305 else
24307 rtx tocsym, lab;
24309 tocsym = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (toc_label_name));
24310 need_toc_init = 1;
24311 lab = gen_label_rtx ();
24312 emit_insn (gen_load_toc_v4_PIC_1b (tocsym, lab));
24313 emit_move_insn (dest, gen_rtx_REG (Pmode, LR_REGNO));
24314 if (TARGET_LINK_STACK)
24315 emit_insn (gen_addsi3 (dest, dest, GEN_INT (4)));
24316 emit_move_insn (temp0, gen_rtx_MEM (Pmode, dest));
24318 emit_insn (gen_addsi3 (dest, temp0, dest));
24320 else if (TARGET_ELF && !TARGET_AIX && flag_pic == 0 && TARGET_MINIMAL_TOC)
24322 /* This is for AIX code running in non-PIC ELF32. */
24323 rtx realsym = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (toc_label_name));
24325 need_toc_init = 1;
24326 emit_insn (gen_elf_high (dest, realsym));
24327 emit_insn (gen_elf_low (dest, dest, realsym));
24329 else
24331 gcc_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2);
24333 if (TARGET_32BIT)
24334 emit_insn (gen_load_toc_aix_si (dest));
24335 else
24336 emit_insn (gen_load_toc_aix_di (dest));
24340 /* Emit instructions to restore the link register after determining where
24341 its value has been stored. */
24343 void
24344 rs6000_emit_eh_reg_restore (rtx source, rtx scratch)
24346 rs6000_stack_t *info = rs6000_stack_info ();
24347 rtx operands[2];
24349 operands[0] = source;
24350 operands[1] = scratch;
24352 if (info->lr_save_p)
24354 rtx frame_rtx = stack_pointer_rtx;
24355 HOST_WIDE_INT sp_offset = 0;
24356 rtx tmp;
24358 if (frame_pointer_needed
24359 || cfun->calls_alloca
24360 || info->total_size > 32767)
24362 tmp = gen_frame_mem (Pmode, frame_rtx);
24363 emit_move_insn (operands[1], tmp);
24364 frame_rtx = operands[1];
24366 else if (info->push_p)
24367 sp_offset = info->total_size;
24369 tmp = plus_constant (Pmode, frame_rtx,
24370 info->lr_save_offset + sp_offset);
24371 tmp = gen_frame_mem (Pmode, tmp);
24372 emit_move_insn (tmp, operands[0]);
24374 else
24375 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNO), operands[0]);
24377 /* Freeze lr_save_p. We've just emitted rtl that depends on the
24378 state of lr_save_p so any change from here on would be a bug. In
24379 particular, stop rs6000_ra_ever_killed from considering the SET
24380 of lr we may have added just above. */
24381 cfun->machine->lr_save_state = info->lr_save_p + 1;
24384 static GTY(()) alias_set_type set = -1;
24386 alias_set_type
24387 get_TOC_alias_set (void)
24389 if (set == -1)
24390 set = new_alias_set ();
24391 return set;
24394 /* This returns nonzero if the current function uses the TOC. This is
24395 determined by the presence of (use (unspec ... UNSPEC_TOC)), which
24396 is generated by the ABI_V4 load_toc_* patterns. */
24397 #if TARGET_ELF
24398 static int
24399 uses_TOC (void)
24401 rtx_insn *insn;
24403 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
24404 if (INSN_P (insn))
24406 rtx pat = PATTERN (insn);
24407 int i;
24409 if (GET_CODE (pat) == PARALLEL)
24410 for (i = 0; i < XVECLEN (pat, 0); i++)
24412 rtx sub = XVECEXP (pat, 0, i);
24413 if (GET_CODE (sub) == USE)
24415 sub = XEXP (sub, 0);
24416 if (GET_CODE (sub) == UNSPEC
24417 && XINT (sub, 1) == UNSPEC_TOC)
24418 return 1;
24422 return 0;
24424 #endif
24427 create_TOC_reference (rtx symbol, rtx largetoc_reg)
24429 rtx tocrel, tocreg, hi;
24431 if (TARGET_DEBUG_ADDR)
24433 if (GET_CODE (symbol) == SYMBOL_REF)
24434 fprintf (stderr, "\ncreate_TOC_reference, (symbol_ref %s)\n",
24435 XSTR (symbol, 0));
24436 else
24438 fprintf (stderr, "\ncreate_TOC_reference, code %s:\n",
24439 GET_RTX_NAME (GET_CODE (symbol)));
24440 debug_rtx (symbol);
24444 if (!can_create_pseudo_p ())
24445 df_set_regs_ever_live (TOC_REGISTER, true);
24447 tocreg = gen_rtx_REG (Pmode, TOC_REGISTER);
24448 tocrel = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, symbol, tocreg), UNSPEC_TOCREL);
24449 if (TARGET_CMODEL == CMODEL_SMALL || can_create_pseudo_p ())
24450 return tocrel;
24452 hi = gen_rtx_HIGH (Pmode, copy_rtx (tocrel));
24453 if (largetoc_reg != NULL)
24455 emit_move_insn (largetoc_reg, hi);
24456 hi = largetoc_reg;
24458 return gen_rtx_LO_SUM (Pmode, hi, tocrel);
24461 /* Issue assembly directives that create a reference to the given DWARF
24462 FRAME_TABLE_LABEL from the current function section. */
24463 void
24464 rs6000_aix_asm_output_dwarf_table_ref (char * frame_table_label)
24466 fprintf (asm_out_file, "\t.ref %s\n",
24467 (* targetm.strip_name_encoding) (frame_table_label));
24470 /* This ties together stack memory (MEM with an alias set of frame_alias_set)
24471 and the change to the stack pointer. */
24473 static void
24474 rs6000_emit_stack_tie (rtx fp, bool hard_frame_needed)
24476 rtvec p;
24477 int i;
24478 rtx regs[3];
24480 i = 0;
24481 regs[i++] = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
24482 if (hard_frame_needed)
24483 regs[i++] = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
24484 if (!(REGNO (fp) == STACK_POINTER_REGNUM
24485 || (hard_frame_needed
24486 && REGNO (fp) == HARD_FRAME_POINTER_REGNUM)))
24487 regs[i++] = fp;
24489 p = rtvec_alloc (i);
24490 while (--i >= 0)
24492 rtx mem = gen_frame_mem (BLKmode, regs[i]);
24493 RTVEC_ELT (p, i) = gen_rtx_SET (mem, const0_rtx);
24496 emit_insn (gen_stack_tie (gen_rtx_PARALLEL (VOIDmode, p)));
24499 /* Emit the correct code for allocating stack space, as insns.
24500 If COPY_REG, make sure a copy of the old frame is left there.
24501 The generated code may use hard register 0 as a temporary. */
24503 static rtx_insn *
24504 rs6000_emit_allocate_stack (HOST_WIDE_INT size, rtx copy_reg, int copy_off)
24506 rtx_insn *insn;
24507 rtx stack_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
24508 rtx tmp_reg = gen_rtx_REG (Pmode, 0);
24509 rtx todec = gen_int_mode (-size, Pmode);
24510 rtx par, set, mem;
24512 if (INTVAL (todec) != -size)
24514 warning (0, "stack frame too large");
24515 emit_insn (gen_trap ());
24516 return 0;
24519 if (crtl->limit_stack)
24521 if (REG_P (stack_limit_rtx)
24522 && REGNO (stack_limit_rtx) > 1
24523 && REGNO (stack_limit_rtx) <= 31)
24525 emit_insn (gen_add3_insn (tmp_reg, stack_limit_rtx, GEN_INT (size)));
24526 emit_insn (gen_cond_trap (LTU, stack_reg, tmp_reg,
24527 const0_rtx));
24529 else if (GET_CODE (stack_limit_rtx) == SYMBOL_REF
24530 && TARGET_32BIT
24531 && DEFAULT_ABI == ABI_V4)
24533 rtx toload = gen_rtx_CONST (VOIDmode,
24534 gen_rtx_PLUS (Pmode,
24535 stack_limit_rtx,
24536 GEN_INT (size)));
24538 emit_insn (gen_elf_high (tmp_reg, toload));
24539 emit_insn (gen_elf_low (tmp_reg, tmp_reg, toload));
24540 emit_insn (gen_cond_trap (LTU, stack_reg, tmp_reg,
24541 const0_rtx));
24543 else
24544 warning (0, "stack limit expression is not supported");
24547 if (copy_reg)
24549 if (copy_off != 0)
24550 emit_insn (gen_add3_insn (copy_reg, stack_reg, GEN_INT (copy_off)));
24551 else
24552 emit_move_insn (copy_reg, stack_reg);
24555 if (size > 32767)
24557 /* Need a note here so that try_split doesn't get confused. */
24558 if (get_last_insn () == NULL_RTX)
24559 emit_note (NOTE_INSN_DELETED);
24560 insn = emit_move_insn (tmp_reg, todec);
24561 try_split (PATTERN (insn), insn, 0);
24562 todec = tmp_reg;
24565 insn = emit_insn (TARGET_32BIT
24566 ? gen_movsi_update_stack (stack_reg, stack_reg,
24567 todec, stack_reg)
24568 : gen_movdi_di_update_stack (stack_reg, stack_reg,
24569 todec, stack_reg));
24570 /* Since we didn't use gen_frame_mem to generate the MEM, grab
24571 it now and set the alias set/attributes. The above gen_*_update
24572 calls will generate a PARALLEL with the MEM set being the first
24573 operation. */
24574 par = PATTERN (insn);
24575 gcc_assert (GET_CODE (par) == PARALLEL);
24576 set = XVECEXP (par, 0, 0);
24577 gcc_assert (GET_CODE (set) == SET);
24578 mem = SET_DEST (set);
24579 gcc_assert (MEM_P (mem));
24580 MEM_NOTRAP_P (mem) = 1;
24581 set_mem_alias_set (mem, get_frame_alias_set ());
24583 RTX_FRAME_RELATED_P (insn) = 1;
24584 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
24585 gen_rtx_SET (stack_reg, gen_rtx_PLUS (Pmode, stack_reg,
24586 GEN_INT (-size))));
24587 return insn;
24590 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
24592 #if PROBE_INTERVAL > 32768
24593 #error Cannot use indexed addressing mode for stack probing
24594 #endif
24596 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
24597 inclusive. These are offsets from the current stack pointer. */
24599 static void
24600 rs6000_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
24602 /* See if we have a constant small number of probes to generate. If so,
24603 that's the easy case. */
24604 if (first + size <= 32768)
24606 HOST_WIDE_INT i;
24608 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
24609 it exceeds SIZE. If only one probe is needed, this will not
24610 generate any code. Then probe at FIRST + SIZE. */
24611 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
24612 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
24613 -(first + i)));
24615 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
24616 -(first + size)));
24619 /* Otherwise, do the same as above, but in a loop. Note that we must be
24620 extra careful with variables wrapping around because we might be at
24621 the very top (or the very bottom) of the address space and we have
24622 to be able to handle this case properly; in particular, we use an
24623 equality test for the loop condition. */
24624 else
24626 HOST_WIDE_INT rounded_size;
24627 rtx r12 = gen_rtx_REG (Pmode, 12);
24628 rtx r0 = gen_rtx_REG (Pmode, 0);
24630 /* Sanity check for the addressing mode we're going to use. */
24631 gcc_assert (first <= 32768);
24633 /* Step 1: round SIZE to the previous multiple of the interval. */
24635 rounded_size = ROUND_DOWN (size, PROBE_INTERVAL);
24638 /* Step 2: compute initial and final value of the loop counter. */
24640 /* TEST_ADDR = SP + FIRST. */
24641 emit_insn (gen_rtx_SET (r12, plus_constant (Pmode, stack_pointer_rtx,
24642 -first)));
24644 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
24645 if (rounded_size > 32768)
24647 emit_move_insn (r0, GEN_INT (-rounded_size));
24648 emit_insn (gen_rtx_SET (r0, gen_rtx_PLUS (Pmode, r12, r0)));
24650 else
24651 emit_insn (gen_rtx_SET (r0, plus_constant (Pmode, r12,
24652 -rounded_size)));
24655 /* Step 3: the loop
24659 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
24660 probe at TEST_ADDR
24662 while (TEST_ADDR != LAST_ADDR)
24664 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
24665 until it is equal to ROUNDED_SIZE. */
24667 if (TARGET_64BIT)
24668 emit_insn (gen_probe_stack_rangedi (r12, r12, r0));
24669 else
24670 emit_insn (gen_probe_stack_rangesi (r12, r12, r0));
24673 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
24674 that SIZE is equal to ROUNDED_SIZE. */
24676 if (size != rounded_size)
24677 emit_stack_probe (plus_constant (Pmode, r12, rounded_size - size));
24681 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
24682 absolute addresses. */
24684 const char *
24685 output_probe_stack_range (rtx reg1, rtx reg2)
24687 static int labelno = 0;
24688 char loop_lab[32];
24689 rtx xops[2];
24691 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
24693 /* Loop. */
24694 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
24696 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
24697 xops[0] = reg1;
24698 xops[1] = GEN_INT (-PROBE_INTERVAL);
24699 output_asm_insn ("addi %0,%0,%1", xops);
24701 /* Probe at TEST_ADDR. */
24702 xops[1] = gen_rtx_REG (Pmode, 0);
24703 output_asm_insn ("stw %1,0(%0)", xops);
24705 /* Test if TEST_ADDR == LAST_ADDR. */
24706 xops[1] = reg2;
24707 if (TARGET_64BIT)
24708 output_asm_insn ("cmpd 0,%0,%1", xops);
24709 else
24710 output_asm_insn ("cmpw 0,%0,%1", xops);
24712 /* Branch. */
24713 fputs ("\tbne 0,", asm_out_file);
24714 assemble_name_raw (asm_out_file, loop_lab);
24715 fputc ('\n', asm_out_file);
24717 return "";
24720 /* Add to 'insn' a note which is PATTERN (INSN) but with REG replaced
24721 with (plus:P (reg 1) VAL), and with REG2 replaced with REPL2 if REG2
24722 is not NULL. It would be nice if dwarf2out_frame_debug_expr could
24723 deduce these equivalences by itself so it wasn't necessary to hold
24724 its hand so much. Don't be tempted to always supply d2_f_d_e with
24725 the actual cfa register, ie. r31 when we are using a hard frame
24726 pointer. That fails when saving regs off r1, and sched moves the
24727 r31 setup past the reg saves. */
24729 static rtx
24730 rs6000_frame_related (rtx insn, rtx reg, HOST_WIDE_INT val,
24731 rtx reg2, rtx repl2)
24733 rtx repl;
24735 if (REGNO (reg) == STACK_POINTER_REGNUM)
24737 gcc_checking_assert (val == 0);
24738 repl = NULL_RTX;
24740 else
24741 repl = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, STACK_POINTER_REGNUM),
24742 GEN_INT (val));
24744 rtx pat = PATTERN (insn);
24745 if (!repl && !reg2)
24747 /* No need for any replacement. Just set RTX_FRAME_RELATED_P. */
24748 if (GET_CODE (pat) == PARALLEL)
24749 for (int i = 0; i < XVECLEN (pat, 0); i++)
24750 if (GET_CODE (XVECEXP (pat, 0, i)) == SET)
24752 rtx set = XVECEXP (pat, 0, i);
24754 /* If this PARALLEL has been emitted for out-of-line
24755 register save functions, or store multiple, then omit
24756 eh_frame info for any user-defined global regs. If
24757 eh_frame info is supplied, frame unwinding will
24758 restore a user reg. */
24759 if (!REG_P (SET_SRC (set))
24760 || !fixed_reg_p (REGNO (SET_SRC (set))))
24761 RTX_FRAME_RELATED_P (set) = 1;
24763 RTX_FRAME_RELATED_P (insn) = 1;
24764 return insn;
24767 /* We expect that 'pat' is either a SET or a PARALLEL containing
24768 SETs (and possibly other stuff). In a PARALLEL, all the SETs
24769 are important so they all have to be marked RTX_FRAME_RELATED_P.
24770 Call simplify_replace_rtx on the SETs rather than the whole insn
24771 so as to leave the other stuff alone (for example USE of r12). */
24773 if (GET_CODE (pat) == SET)
24775 if (repl)
24776 pat = simplify_replace_rtx (pat, reg, repl);
24777 if (reg2)
24778 pat = simplify_replace_rtx (pat, reg2, repl2);
24780 else if (GET_CODE (pat) == PARALLEL)
24782 pat = shallow_copy_rtx (pat);
24783 XVEC (pat, 0) = shallow_copy_rtvec (XVEC (pat, 0));
24785 for (int i = 0; i < XVECLEN (pat, 0); i++)
24786 if (GET_CODE (XVECEXP (pat, 0, i)) == SET)
24788 rtx set = XVECEXP (pat, 0, i);
24790 if (repl)
24791 set = simplify_replace_rtx (set, reg, repl);
24792 if (reg2)
24793 set = simplify_replace_rtx (set, reg2, repl2);
24794 XVECEXP (pat, 0, i) = set;
24796 /* Omit eh_frame info for any user-defined global regs. */
24797 if (!REG_P (SET_SRC (set))
24798 || !fixed_reg_p (REGNO (SET_SRC (set))))
24799 RTX_FRAME_RELATED_P (set) = 1;
24802 else
24803 gcc_unreachable ();
24805 RTX_FRAME_RELATED_P (insn) = 1;
24806 if (repl || reg2)
24807 add_reg_note (insn, REG_FRAME_RELATED_EXPR, pat);
24809 return insn;
24812 /* Returns an insn that has a vrsave set operation with the
24813 appropriate CLOBBERs. */
24815 static rtx
24816 generate_set_vrsave (rtx reg, rs6000_stack_t *info, int epiloguep)
24818 int nclobs, i;
24819 rtx insn, clobs[TOTAL_ALTIVEC_REGS + 1];
24820 rtx vrsave = gen_rtx_REG (SImode, VRSAVE_REGNO);
24822 clobs[0]
24823 = gen_rtx_SET (vrsave,
24824 gen_rtx_UNSPEC_VOLATILE (SImode,
24825 gen_rtvec (2, reg, vrsave),
24826 UNSPECV_SET_VRSAVE));
24828 nclobs = 1;
24830 /* We need to clobber the registers in the mask so the scheduler
24831 does not move sets to VRSAVE before sets of AltiVec registers.
24833 However, if the function receives nonlocal gotos, reload will set
24834 all call saved registers live. We will end up with:
24836 (set (reg 999) (mem))
24837 (parallel [ (set (reg vrsave) (unspec blah))
24838 (clobber (reg 999))])
24840 The clobber will cause the store into reg 999 to be dead, and
24841 flow will attempt to delete an epilogue insn. In this case, we
24842 need an unspec use/set of the register. */
24844 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
24845 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
24847 if (!epiloguep || call_used_regs [i])
24848 clobs[nclobs++] = gen_rtx_CLOBBER (VOIDmode,
24849 gen_rtx_REG (V4SImode, i));
24850 else
24852 rtx reg = gen_rtx_REG (V4SImode, i);
24854 clobs[nclobs++]
24855 = gen_rtx_SET (reg,
24856 gen_rtx_UNSPEC (V4SImode,
24857 gen_rtvec (1, reg), 27));
24861 insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nclobs));
24863 for (i = 0; i < nclobs; ++i)
24864 XVECEXP (insn, 0, i) = clobs[i];
24866 return insn;
24869 static rtx
24870 gen_frame_set (rtx reg, rtx frame_reg, int offset, bool store)
24872 rtx addr, mem;
24874 addr = gen_rtx_PLUS (Pmode, frame_reg, GEN_INT (offset));
24875 mem = gen_frame_mem (GET_MODE (reg), addr);
24876 return gen_rtx_SET (store ? mem : reg, store ? reg : mem);
24879 static rtx
24880 gen_frame_load (rtx reg, rtx frame_reg, int offset)
24882 return gen_frame_set (reg, frame_reg, offset, false);
24885 static rtx
24886 gen_frame_store (rtx reg, rtx frame_reg, int offset)
24888 return gen_frame_set (reg, frame_reg, offset, true);
24891 /* Save a register into the frame, and emit RTX_FRAME_RELATED_P notes.
24892 Save REGNO into [FRAME_REG + OFFSET] in mode MODE. */
24894 static rtx
24895 emit_frame_save (rtx frame_reg, machine_mode mode,
24896 unsigned int regno, int offset, HOST_WIDE_INT frame_reg_to_sp)
24898 rtx reg, insn;
24900 /* Some cases that need register indexed addressing. */
24901 gcc_checking_assert (!((TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (mode))
24902 || (TARGET_VSX && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
24903 || (TARGET_E500_DOUBLE && mode == DFmode)
24904 || (TARGET_SPE_ABI
24905 && SPE_VECTOR_MODE (mode)
24906 && !SPE_CONST_OFFSET_OK (offset))));
24908 reg = gen_rtx_REG (mode, regno);
24909 insn = emit_insn (gen_frame_store (reg, frame_reg, offset));
24910 return rs6000_frame_related (insn, frame_reg, frame_reg_to_sp,
24911 NULL_RTX, NULL_RTX);
24914 /* Emit an offset memory reference suitable for a frame store, while
24915 converting to a valid addressing mode. */
24917 static rtx
24918 gen_frame_mem_offset (machine_mode mode, rtx reg, int offset)
24920 rtx int_rtx, offset_rtx;
24922 int_rtx = GEN_INT (offset);
24924 if ((TARGET_SPE_ABI && SPE_VECTOR_MODE (mode) && !SPE_CONST_OFFSET_OK (offset))
24925 || (TARGET_E500_DOUBLE && mode == DFmode))
24927 offset_rtx = gen_rtx_REG (Pmode, FIXED_SCRATCH);
24928 emit_move_insn (offset_rtx, int_rtx);
24930 else
24931 offset_rtx = int_rtx;
24933 return gen_frame_mem (mode, gen_rtx_PLUS (Pmode, reg, offset_rtx));
24936 #ifndef TARGET_FIX_AND_CONTINUE
24937 #define TARGET_FIX_AND_CONTINUE 0
24938 #endif
24940 /* It's really GPR 13 or 14, FPR 14 and VR 20. We need the smallest. */
24941 #define FIRST_SAVRES_REGISTER FIRST_SAVED_GP_REGNO
24942 #define LAST_SAVRES_REGISTER 31
24943 #define N_SAVRES_REGISTERS (LAST_SAVRES_REGISTER - FIRST_SAVRES_REGISTER + 1)
24945 enum {
24946 SAVRES_LR = 0x1,
24947 SAVRES_SAVE = 0x2,
24948 SAVRES_REG = 0x0c,
24949 SAVRES_GPR = 0,
24950 SAVRES_FPR = 4,
24951 SAVRES_VR = 8
24954 static GTY(()) rtx savres_routine_syms[N_SAVRES_REGISTERS][12];
24956 /* Temporary holding space for an out-of-line register save/restore
24957 routine name. */
24958 static char savres_routine_name[30];
24960 /* Return the name for an out-of-line register save/restore routine.
24961 We are saving/restoring GPRs if GPR is true. */
24963 static char *
24964 rs6000_savres_routine_name (rs6000_stack_t *info, int regno, int sel)
24966 const char *prefix = "";
24967 const char *suffix = "";
24969 /* Different targets are supposed to define
24970 {SAVE,RESTORE}_FP_{PREFIX,SUFFIX} with the idea that the needed
24971 routine name could be defined with:
24973 sprintf (name, "%s%d%s", SAVE_FP_PREFIX, regno, SAVE_FP_SUFFIX)
24975 This is a nice idea in practice, but in reality, things are
24976 complicated in several ways:
24978 - ELF targets have save/restore routines for GPRs.
24980 - SPE targets use different prefixes for 32/64-bit registers, and
24981 neither of them fit neatly in the FOO_{PREFIX,SUFFIX} regimen.
24983 - PPC64 ELF targets have routines for save/restore of GPRs that
24984 differ in what they do with the link register, so having a set
24985 prefix doesn't work. (We only use one of the save routines at
24986 the moment, though.)
24988 - PPC32 elf targets have "exit" versions of the restore routines
24989 that restore the link register and can save some extra space.
24990 These require an extra suffix. (There are also "tail" versions
24991 of the restore routines and "GOT" versions of the save routines,
24992 but we don't generate those at present. Same problems apply,
24993 though.)
24995 We deal with all this by synthesizing our own prefix/suffix and
24996 using that for the simple sprintf call shown above. */
24997 if (TARGET_SPE)
24999 /* No floating point saves on the SPE. */
25000 gcc_assert ((sel & SAVRES_REG) == SAVRES_GPR);
25002 if ((sel & SAVRES_SAVE))
25003 prefix = info->spe_64bit_regs_used ? "_save64gpr_" : "_save32gpr_";
25004 else
25005 prefix = info->spe_64bit_regs_used ? "_rest64gpr_" : "_rest32gpr_";
25007 if ((sel & SAVRES_LR))
25008 suffix = "_x";
25010 else if (DEFAULT_ABI == ABI_V4)
25012 if (TARGET_64BIT)
25013 goto aix_names;
25015 if ((sel & SAVRES_REG) == SAVRES_GPR)
25016 prefix = (sel & SAVRES_SAVE) ? "_savegpr_" : "_restgpr_";
25017 else if ((sel & SAVRES_REG) == SAVRES_FPR)
25018 prefix = (sel & SAVRES_SAVE) ? "_savefpr_" : "_restfpr_";
25019 else if ((sel & SAVRES_REG) == SAVRES_VR)
25020 prefix = (sel & SAVRES_SAVE) ? "_savevr_" : "_restvr_";
25021 else
25022 abort ();
25024 if ((sel & SAVRES_LR))
25025 suffix = "_x";
25027 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
25029 #if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD)
25030 /* No out-of-line save/restore routines for GPRs on AIX. */
25031 gcc_assert (!TARGET_AIX || (sel & SAVRES_REG) != SAVRES_GPR);
25032 #endif
25034 aix_names:
25035 if ((sel & SAVRES_REG) == SAVRES_GPR)
25036 prefix = ((sel & SAVRES_SAVE)
25037 ? ((sel & SAVRES_LR) ? "_savegpr0_" : "_savegpr1_")
25038 : ((sel & SAVRES_LR) ? "_restgpr0_" : "_restgpr1_"));
25039 else if ((sel & SAVRES_REG) == SAVRES_FPR)
25041 #if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD)
25042 if ((sel & SAVRES_LR))
25043 prefix = ((sel & SAVRES_SAVE) ? "_savefpr_" : "_restfpr_");
25044 else
25045 #endif
25047 prefix = (sel & SAVRES_SAVE) ? SAVE_FP_PREFIX : RESTORE_FP_PREFIX;
25048 suffix = (sel & SAVRES_SAVE) ? SAVE_FP_SUFFIX : RESTORE_FP_SUFFIX;
25051 else if ((sel & SAVRES_REG) == SAVRES_VR)
25052 prefix = (sel & SAVRES_SAVE) ? "_savevr_" : "_restvr_";
25053 else
25054 abort ();
25057 if (DEFAULT_ABI == ABI_DARWIN)
25059 /* The Darwin approach is (slightly) different, in order to be
25060 compatible with code generated by the system toolchain. There is a
25061 single symbol for the start of save sequence, and the code here
25062 embeds an offset into that code on the basis of the first register
25063 to be saved. */
25064 prefix = (sel & SAVRES_SAVE) ? "save" : "rest" ;
25065 if ((sel & SAVRES_REG) == SAVRES_GPR)
25066 sprintf (savres_routine_name, "*%sGPR%s%s%.0d ; %s r%d-r31", prefix,
25067 ((sel & SAVRES_LR) ? "x" : ""), (regno == 13 ? "" : "+"),
25068 (regno - 13) * 4, prefix, regno);
25069 else if ((sel & SAVRES_REG) == SAVRES_FPR)
25070 sprintf (savres_routine_name, "*%sFP%s%.0d ; %s f%d-f31", prefix,
25071 (regno == 14 ? "" : "+"), (regno - 14) * 4, prefix, regno);
25072 else if ((sel & SAVRES_REG) == SAVRES_VR)
25073 sprintf (savres_routine_name, "*%sVEC%s%.0d ; %s v%d-v31", prefix,
25074 (regno == 20 ? "" : "+"), (regno - 20) * 8, prefix, regno);
25075 else
25076 abort ();
25078 else
25079 sprintf (savres_routine_name, "%s%d%s", prefix, regno, suffix);
25081 return savres_routine_name;
25084 /* Return an RTL SYMBOL_REF for an out-of-line register save/restore routine.
25085 We are saving/restoring GPRs if GPR is true. */
25087 static rtx
25088 rs6000_savres_routine_sym (rs6000_stack_t *info, int sel)
25090 int regno = ((sel & SAVRES_REG) == SAVRES_GPR
25091 ? info->first_gp_reg_save
25092 : (sel & SAVRES_REG) == SAVRES_FPR
25093 ? info->first_fp_reg_save - 32
25094 : (sel & SAVRES_REG) == SAVRES_VR
25095 ? info->first_altivec_reg_save - FIRST_ALTIVEC_REGNO
25096 : -1);
25097 rtx sym;
25098 int select = sel;
25100 /* On the SPE, we never have any FPRs, but we do have 32/64-bit
25101 versions of the gpr routines. */
25102 if (TARGET_SPE_ABI && (sel & SAVRES_REG) == SAVRES_GPR
25103 && info->spe_64bit_regs_used)
25104 select ^= SAVRES_FPR ^ SAVRES_GPR;
25106 /* Don't generate bogus routine names. */
25107 gcc_assert (FIRST_SAVRES_REGISTER <= regno
25108 && regno <= LAST_SAVRES_REGISTER
25109 && select >= 0 && select <= 12);
25111 sym = savres_routine_syms[regno-FIRST_SAVRES_REGISTER][select];
25113 if (sym == NULL)
25115 char *name;
25117 name = rs6000_savres_routine_name (info, regno, sel);
25119 sym = savres_routine_syms[regno-FIRST_SAVRES_REGISTER][select]
25120 = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
25121 SYMBOL_REF_FLAGS (sym) |= SYMBOL_FLAG_FUNCTION;
25124 return sym;
25127 /* Emit a sequence of insns, including a stack tie if needed, for
25128 resetting the stack pointer. If UPDT_REGNO is not 1, then don't
25129 reset the stack pointer, but move the base of the frame into
25130 reg UPDT_REGNO for use by out-of-line register restore routines. */
25132 static rtx
25133 rs6000_emit_stack_reset (rs6000_stack_t *info,
25134 rtx frame_reg_rtx, HOST_WIDE_INT frame_off,
25135 unsigned updt_regno)
25137 rtx updt_reg_rtx;
25139 /* This blockage is needed so that sched doesn't decide to move
25140 the sp change before the register restores. */
25141 if (DEFAULT_ABI == ABI_V4
25142 || (TARGET_SPE_ABI
25143 && info->spe_64bit_regs_used != 0
25144 && info->first_gp_reg_save != 32))
25145 rs6000_emit_stack_tie (frame_reg_rtx, frame_pointer_needed);
25147 /* If we are restoring registers out-of-line, we will be using the
25148 "exit" variants of the restore routines, which will reset the
25149 stack for us. But we do need to point updt_reg into the
25150 right place for those routines. */
25151 updt_reg_rtx = gen_rtx_REG (Pmode, updt_regno);
25153 if (frame_off != 0)
25154 return emit_insn (gen_add3_insn (updt_reg_rtx,
25155 frame_reg_rtx, GEN_INT (frame_off)));
25156 else if (REGNO (frame_reg_rtx) != updt_regno)
25157 return emit_move_insn (updt_reg_rtx, frame_reg_rtx);
25159 return NULL_RTX;
25162 /* Return the register number used as a pointer by out-of-line
25163 save/restore functions. */
25165 static inline unsigned
25166 ptr_regno_for_savres (int sel)
25168 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
25169 return (sel & SAVRES_REG) == SAVRES_FPR || (sel & SAVRES_LR) ? 1 : 12;
25170 return DEFAULT_ABI == ABI_DARWIN && (sel & SAVRES_REG) == SAVRES_FPR ? 1 : 11;
25173 /* Construct a parallel rtx describing the effect of a call to an
25174 out-of-line register save/restore routine, and emit the insn
25175 or jump_insn as appropriate. */
25177 static rtx
25178 rs6000_emit_savres_rtx (rs6000_stack_t *info,
25179 rtx frame_reg_rtx, int save_area_offset, int lr_offset,
25180 machine_mode reg_mode, int sel)
25182 int i;
25183 int offset, start_reg, end_reg, n_regs, use_reg;
25184 int reg_size = GET_MODE_SIZE (reg_mode);
25185 rtx sym;
25186 rtvec p;
25187 rtx par, insn;
25189 offset = 0;
25190 start_reg = ((sel & SAVRES_REG) == SAVRES_GPR
25191 ? info->first_gp_reg_save
25192 : (sel & SAVRES_REG) == SAVRES_FPR
25193 ? info->first_fp_reg_save
25194 : (sel & SAVRES_REG) == SAVRES_VR
25195 ? info->first_altivec_reg_save
25196 : -1);
25197 end_reg = ((sel & SAVRES_REG) == SAVRES_GPR
25198 ? 32
25199 : (sel & SAVRES_REG) == SAVRES_FPR
25200 ? 64
25201 : (sel & SAVRES_REG) == SAVRES_VR
25202 ? LAST_ALTIVEC_REGNO + 1
25203 : -1);
25204 n_regs = end_reg - start_reg;
25205 p = rtvec_alloc (3 + ((sel & SAVRES_LR) ? 1 : 0)
25206 + ((sel & SAVRES_REG) == SAVRES_VR ? 1 : 0)
25207 + n_regs);
25209 if (!(sel & SAVRES_SAVE) && (sel & SAVRES_LR))
25210 RTVEC_ELT (p, offset++) = ret_rtx;
25212 RTVEC_ELT (p, offset++)
25213 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, LR_REGNO));
25215 sym = rs6000_savres_routine_sym (info, sel);
25216 RTVEC_ELT (p, offset++) = gen_rtx_USE (VOIDmode, sym);
25218 use_reg = ptr_regno_for_savres (sel);
25219 if ((sel & SAVRES_REG) == SAVRES_VR)
25221 /* Vector regs are saved/restored using [reg+reg] addressing. */
25222 RTVEC_ELT (p, offset++)
25223 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, use_reg));
25224 RTVEC_ELT (p, offset++)
25225 = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, 0));
25227 else
25228 RTVEC_ELT (p, offset++)
25229 = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, use_reg));
25231 for (i = 0; i < end_reg - start_reg; i++)
25232 RTVEC_ELT (p, i + offset)
25233 = gen_frame_set (gen_rtx_REG (reg_mode, start_reg + i),
25234 frame_reg_rtx, save_area_offset + reg_size * i,
25235 (sel & SAVRES_SAVE) != 0);
25237 if ((sel & SAVRES_SAVE) && (sel & SAVRES_LR))
25238 RTVEC_ELT (p, i + offset)
25239 = gen_frame_store (gen_rtx_REG (Pmode, 0), frame_reg_rtx, lr_offset);
25241 par = gen_rtx_PARALLEL (VOIDmode, p);
25243 if (!(sel & SAVRES_SAVE) && (sel & SAVRES_LR))
25245 insn = emit_jump_insn (par);
25246 JUMP_LABEL (insn) = ret_rtx;
25248 else
25249 insn = emit_insn (par);
25250 return insn;
25253 /* Emit code to store CR fields that need to be saved into REG. */
25255 static void
25256 rs6000_emit_move_from_cr (rtx reg)
25258 /* Only the ELFv2 ABI allows storing only selected fields. */
25259 if (DEFAULT_ABI == ABI_ELFv2 && TARGET_MFCRF)
25261 int i, cr_reg[8], count = 0;
25263 /* Collect CR fields that must be saved. */
25264 for (i = 0; i < 8; i++)
25265 if (save_reg_p (CR0_REGNO + i))
25266 cr_reg[count++] = i;
25268 /* If it's just a single one, use mfcrf. */
25269 if (count == 1)
25271 rtvec p = rtvec_alloc (1);
25272 rtvec r = rtvec_alloc (2);
25273 RTVEC_ELT (r, 0) = gen_rtx_REG (CCmode, CR0_REGNO + cr_reg[0]);
25274 RTVEC_ELT (r, 1) = GEN_INT (1 << (7 - cr_reg[0]));
25275 RTVEC_ELT (p, 0)
25276 = gen_rtx_SET (reg,
25277 gen_rtx_UNSPEC (SImode, r, UNSPEC_MOVESI_FROM_CR));
25279 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
25280 return;
25283 /* ??? It might be better to handle count == 2 / 3 cases here
25284 as well, using logical operations to combine the values. */
25287 emit_insn (gen_movesi_from_cr (reg));
25290 /* Return whether the split-stack arg pointer (r12) is used. */
25292 static bool
25293 split_stack_arg_pointer_used_p (void)
25295 /* If the pseudo holding the arg pointer is no longer a pseudo,
25296 then the arg pointer is used. */
25297 if (cfun->machine->split_stack_arg_pointer != NULL_RTX
25298 && (!REG_P (cfun->machine->split_stack_arg_pointer)
25299 || (REGNO (cfun->machine->split_stack_arg_pointer)
25300 < FIRST_PSEUDO_REGISTER)))
25301 return true;
25303 /* Unfortunately we also need to do some code scanning, since
25304 r12 may have been substituted for the pseudo. */
25305 rtx_insn *insn;
25306 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
25307 FOR_BB_INSNS (bb, insn)
25308 if (NONDEBUG_INSN_P (insn))
25310 /* A call destroys r12. */
25311 if (CALL_P (insn))
25312 return false;
25314 df_ref use;
25315 FOR_EACH_INSN_USE (use, insn)
25317 rtx x = DF_REF_REG (use);
25318 if (REG_P (x) && REGNO (x) == 12)
25319 return true;
25321 df_ref def;
25322 FOR_EACH_INSN_DEF (def, insn)
25324 rtx x = DF_REF_REG (def);
25325 if (REG_P (x) && REGNO (x) == 12)
25326 return false;
25329 return bitmap_bit_p (DF_LR_OUT (bb), 12);
25332 /* Return whether we need to emit an ELFv2 global entry point prologue. */
25334 static bool
25335 rs6000_global_entry_point_needed_p (void)
25337 /* Only needed for the ELFv2 ABI. */
25338 if (DEFAULT_ABI != ABI_ELFv2)
25339 return false;
25341 /* With -msingle-pic-base, we assume the whole program shares the same
25342 TOC, so no global entry point prologues are needed anywhere. */
25343 if (TARGET_SINGLE_PIC_BASE)
25344 return false;
25346 /* Ensure we have a global entry point for thunks. ??? We could
25347 avoid that if the target routine doesn't need a global entry point,
25348 but we do not know whether this is the case at this point. */
25349 if (cfun->is_thunk)
25350 return true;
25352 /* For regular functions, rs6000_emit_prologue sets this flag if the
25353 routine ever uses the TOC pointer. */
25354 return cfun->machine->r2_setup_needed;
25357 /* Emit function prologue as insns. */
25359 void
25360 rs6000_emit_prologue (void)
25362 rs6000_stack_t *info = rs6000_stack_info ();
25363 machine_mode reg_mode = Pmode;
25364 int reg_size = TARGET_32BIT ? 4 : 8;
25365 rtx sp_reg_rtx = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
25366 rtx frame_reg_rtx = sp_reg_rtx;
25367 unsigned int cr_save_regno;
25368 rtx cr_save_rtx = NULL_RTX;
25369 rtx insn;
25370 int strategy;
25371 int using_static_chain_p = (cfun->static_chain_decl != NULL_TREE
25372 && df_regs_ever_live_p (STATIC_CHAIN_REGNUM)
25373 && call_used_regs[STATIC_CHAIN_REGNUM]);
25374 int using_split_stack = (flag_split_stack
25375 && (lookup_attribute ("no_split_stack",
25376 DECL_ATTRIBUTES (cfun->decl))
25377 == NULL));
25379 /* Offset to top of frame for frame_reg and sp respectively. */
25380 HOST_WIDE_INT frame_off = 0;
25381 HOST_WIDE_INT sp_off = 0;
25382 /* sp_adjust is the stack adjusting instruction, tracked so that the
25383 insn setting up the split-stack arg pointer can be emitted just
25384 prior to it, when r12 is not used here for other purposes. */
25385 rtx_insn *sp_adjust = 0;
25387 #if CHECKING_P
25388 /* Track and check usage of r0, r11, r12. */
25389 int reg_inuse = using_static_chain_p ? 1 << 11 : 0;
25390 #define START_USE(R) do \
25392 gcc_assert ((reg_inuse & (1 << (R))) == 0); \
25393 reg_inuse |= 1 << (R); \
25394 } while (0)
25395 #define END_USE(R) do \
25397 gcc_assert ((reg_inuse & (1 << (R))) != 0); \
25398 reg_inuse &= ~(1 << (R)); \
25399 } while (0)
25400 #define NOT_INUSE(R) do \
25402 gcc_assert ((reg_inuse & (1 << (R))) == 0); \
25403 } while (0)
25404 #else
25405 #define START_USE(R) do {} while (0)
25406 #define END_USE(R) do {} while (0)
25407 #define NOT_INUSE(R) do {} while (0)
25408 #endif
25410 if (DEFAULT_ABI == ABI_ELFv2
25411 && !TARGET_SINGLE_PIC_BASE)
25413 cfun->machine->r2_setup_needed = df_regs_ever_live_p (TOC_REGNUM);
25415 /* With -mminimal-toc we may generate an extra use of r2 below. */
25416 if (TARGET_TOC && TARGET_MINIMAL_TOC && get_pool_size () != 0)
25417 cfun->machine->r2_setup_needed = true;
25421 if (flag_stack_usage_info)
25422 current_function_static_stack_size = info->total_size;
25424 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
25426 HOST_WIDE_INT size = info->total_size;
25428 if (crtl->is_leaf && !cfun->calls_alloca)
25430 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
25431 rs6000_emit_probe_stack_range (STACK_CHECK_PROTECT,
25432 size - STACK_CHECK_PROTECT);
25434 else if (size > 0)
25435 rs6000_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
25438 if (TARGET_FIX_AND_CONTINUE)
25440 /* gdb on darwin arranges to forward a function from the old
25441 address by modifying the first 5 instructions of the function
25442 to branch to the overriding function. This is necessary to
25443 permit function pointers that point to the old function to
25444 actually forward to the new function. */
25445 emit_insn (gen_nop ());
25446 emit_insn (gen_nop ());
25447 emit_insn (gen_nop ());
25448 emit_insn (gen_nop ());
25449 emit_insn (gen_nop ());
25452 if (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0)
25454 reg_mode = V2SImode;
25455 reg_size = 8;
25458 /* Handle world saves specially here. */
25459 if (WORLD_SAVE_P (info))
25461 int i, j, sz;
25462 rtx treg;
25463 rtvec p;
25464 rtx reg0;
25466 /* save_world expects lr in r0. */
25467 reg0 = gen_rtx_REG (Pmode, 0);
25468 if (info->lr_save_p)
25470 insn = emit_move_insn (reg0,
25471 gen_rtx_REG (Pmode, LR_REGNO));
25472 RTX_FRAME_RELATED_P (insn) = 1;
25475 /* The SAVE_WORLD and RESTORE_WORLD routines make a number of
25476 assumptions about the offsets of various bits of the stack
25477 frame. */
25478 gcc_assert (info->gp_save_offset == -220
25479 && info->fp_save_offset == -144
25480 && info->lr_save_offset == 8
25481 && info->cr_save_offset == 4
25482 && info->push_p
25483 && info->lr_save_p
25484 && (!crtl->calls_eh_return
25485 || info->ehrd_offset == -432)
25486 && info->vrsave_save_offset == -224
25487 && info->altivec_save_offset == -416);
25489 treg = gen_rtx_REG (SImode, 11);
25490 emit_move_insn (treg, GEN_INT (-info->total_size));
25492 /* SAVE_WORLD takes the caller's LR in R0 and the frame size
25493 in R11. It also clobbers R12, so beware! */
25495 /* Preserve CR2 for save_world prologues */
25496 sz = 5;
25497 sz += 32 - info->first_gp_reg_save;
25498 sz += 64 - info->first_fp_reg_save;
25499 sz += LAST_ALTIVEC_REGNO - info->first_altivec_reg_save + 1;
25500 p = rtvec_alloc (sz);
25501 j = 0;
25502 RTVEC_ELT (p, j++) = gen_rtx_CLOBBER (VOIDmode,
25503 gen_rtx_REG (SImode,
25504 LR_REGNO));
25505 RTVEC_ELT (p, j++) = gen_rtx_USE (VOIDmode,
25506 gen_rtx_SYMBOL_REF (Pmode,
25507 "*save_world"));
25508 /* We do floats first so that the instruction pattern matches
25509 properly. */
25510 for (i = 0; i < 64 - info->first_fp_reg_save; i++)
25511 RTVEC_ELT (p, j++)
25512 = gen_frame_store (gen_rtx_REG (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT
25513 ? DFmode : SFmode,
25514 info->first_fp_reg_save + i),
25515 frame_reg_rtx,
25516 info->fp_save_offset + frame_off + 8 * i);
25517 for (i = 0; info->first_altivec_reg_save + i <= LAST_ALTIVEC_REGNO; i++)
25518 RTVEC_ELT (p, j++)
25519 = gen_frame_store (gen_rtx_REG (V4SImode,
25520 info->first_altivec_reg_save + i),
25521 frame_reg_rtx,
25522 info->altivec_save_offset + frame_off + 16 * i);
25523 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
25524 RTVEC_ELT (p, j++)
25525 = gen_frame_store (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
25526 frame_reg_rtx,
25527 info->gp_save_offset + frame_off + reg_size * i);
25529 /* CR register traditionally saved as CR2. */
25530 RTVEC_ELT (p, j++)
25531 = gen_frame_store (gen_rtx_REG (SImode, CR2_REGNO),
25532 frame_reg_rtx, info->cr_save_offset + frame_off);
25533 /* Explain about use of R0. */
25534 if (info->lr_save_p)
25535 RTVEC_ELT (p, j++)
25536 = gen_frame_store (reg0,
25537 frame_reg_rtx, info->lr_save_offset + frame_off);
25538 /* Explain what happens to the stack pointer. */
25540 rtx newval = gen_rtx_PLUS (Pmode, sp_reg_rtx, treg);
25541 RTVEC_ELT (p, j++) = gen_rtx_SET (sp_reg_rtx, newval);
25544 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
25545 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
25546 treg, GEN_INT (-info->total_size));
25547 sp_off = frame_off = info->total_size;
25550 strategy = info->savres_strategy;
25552 /* For V.4, update stack before we do any saving and set back pointer. */
25553 if (! WORLD_SAVE_P (info)
25554 && info->push_p
25555 && (DEFAULT_ABI == ABI_V4
25556 || crtl->calls_eh_return))
25558 bool need_r11 = (TARGET_SPE
25559 ? (!(strategy & SAVE_INLINE_GPRS)
25560 && info->spe_64bit_regs_used == 0)
25561 : (!(strategy & SAVE_INLINE_FPRS)
25562 || !(strategy & SAVE_INLINE_GPRS)
25563 || !(strategy & SAVE_INLINE_VRS)));
25564 int ptr_regno = -1;
25565 rtx ptr_reg = NULL_RTX;
25566 int ptr_off = 0;
25568 if (info->total_size < 32767)
25569 frame_off = info->total_size;
25570 else if (need_r11)
25571 ptr_regno = 11;
25572 else if (info->cr_save_p
25573 || info->lr_save_p
25574 || info->first_fp_reg_save < 64
25575 || info->first_gp_reg_save < 32
25576 || info->altivec_size != 0
25577 || info->vrsave_size != 0
25578 || crtl->calls_eh_return)
25579 ptr_regno = 12;
25580 else
25582 /* The prologue won't be saving any regs so there is no need
25583 to set up a frame register to access any frame save area.
25584 We also won't be using frame_off anywhere below, but set
25585 the correct value anyway to protect against future
25586 changes to this function. */
25587 frame_off = info->total_size;
25589 if (ptr_regno != -1)
25591 /* Set up the frame offset to that needed by the first
25592 out-of-line save function. */
25593 START_USE (ptr_regno);
25594 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
25595 frame_reg_rtx = ptr_reg;
25596 if (!(strategy & SAVE_INLINE_FPRS) && info->fp_size != 0)
25597 gcc_checking_assert (info->fp_save_offset + info->fp_size == 0);
25598 else if (!(strategy & SAVE_INLINE_GPRS) && info->first_gp_reg_save < 32)
25599 ptr_off = info->gp_save_offset + info->gp_size;
25600 else if (!(strategy & SAVE_INLINE_VRS) && info->altivec_size != 0)
25601 ptr_off = info->altivec_save_offset + info->altivec_size;
25602 frame_off = -ptr_off;
25604 sp_adjust = rs6000_emit_allocate_stack (info->total_size,
25605 ptr_reg, ptr_off);
25606 if (REGNO (frame_reg_rtx) == 12)
25607 sp_adjust = 0;
25608 sp_off = info->total_size;
25609 if (frame_reg_rtx != sp_reg_rtx)
25610 rs6000_emit_stack_tie (frame_reg_rtx, false);
25613 /* If we use the link register, get it into r0. */
25614 if (!WORLD_SAVE_P (info) && info->lr_save_p)
25616 rtx addr, reg, mem;
25618 reg = gen_rtx_REG (Pmode, 0);
25619 START_USE (0);
25620 insn = emit_move_insn (reg, gen_rtx_REG (Pmode, LR_REGNO));
25621 RTX_FRAME_RELATED_P (insn) = 1;
25623 if (!(strategy & (SAVE_NOINLINE_GPRS_SAVES_LR
25624 | SAVE_NOINLINE_FPRS_SAVES_LR)))
25626 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx,
25627 GEN_INT (info->lr_save_offset + frame_off));
25628 mem = gen_rtx_MEM (Pmode, addr);
25629 /* This should not be of rs6000_sr_alias_set, because of
25630 __builtin_return_address. */
25632 insn = emit_move_insn (mem, reg);
25633 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
25634 NULL_RTX, NULL_RTX);
25635 END_USE (0);
25639 /* If we need to save CR, put it into r12 or r11. Choose r12 except when
25640 r12 will be needed by out-of-line gpr restore. */
25641 cr_save_regno = ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
25642 && !(strategy & (SAVE_INLINE_GPRS
25643 | SAVE_NOINLINE_GPRS_SAVES_LR))
25644 ? 11 : 12);
25645 if (!WORLD_SAVE_P (info)
25646 && info->cr_save_p
25647 && REGNO (frame_reg_rtx) != cr_save_regno
25648 && !(using_static_chain_p && cr_save_regno == 11)
25649 && !(using_split_stack && cr_save_regno == 12 && sp_adjust))
25651 cr_save_rtx = gen_rtx_REG (SImode, cr_save_regno);
25652 START_USE (cr_save_regno);
25653 rs6000_emit_move_from_cr (cr_save_rtx);
25656 /* Do any required saving of fpr's. If only one or two to save, do
25657 it ourselves. Otherwise, call function. */
25658 if (!WORLD_SAVE_P (info) && (strategy & SAVE_INLINE_FPRS))
25660 int i;
25661 for (i = 0; i < 64 - info->first_fp_reg_save; i++)
25662 if (save_reg_p (info->first_fp_reg_save + i))
25663 emit_frame_save (frame_reg_rtx,
25664 (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT
25665 ? DFmode : SFmode),
25666 info->first_fp_reg_save + i,
25667 info->fp_save_offset + frame_off + 8 * i,
25668 sp_off - frame_off);
25670 else if (!WORLD_SAVE_P (info) && info->first_fp_reg_save != 64)
25672 bool lr = (strategy & SAVE_NOINLINE_FPRS_SAVES_LR) != 0;
25673 int sel = SAVRES_SAVE | SAVRES_FPR | (lr ? SAVRES_LR : 0);
25674 unsigned ptr_regno = ptr_regno_for_savres (sel);
25675 rtx ptr_reg = frame_reg_rtx;
25677 if (REGNO (frame_reg_rtx) == ptr_regno)
25678 gcc_checking_assert (frame_off == 0);
25679 else
25681 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
25682 NOT_INUSE (ptr_regno);
25683 emit_insn (gen_add3_insn (ptr_reg,
25684 frame_reg_rtx, GEN_INT (frame_off)));
25686 insn = rs6000_emit_savres_rtx (info, ptr_reg,
25687 info->fp_save_offset,
25688 info->lr_save_offset,
25689 DFmode, sel);
25690 rs6000_frame_related (insn, ptr_reg, sp_off,
25691 NULL_RTX, NULL_RTX);
25692 if (lr)
25693 END_USE (0);
25696 /* Save GPRs. This is done as a PARALLEL if we are using
25697 the store-multiple instructions. */
25698 if (!WORLD_SAVE_P (info)
25699 && TARGET_SPE_ABI
25700 && info->spe_64bit_regs_used != 0
25701 && info->first_gp_reg_save != 32)
25703 int i;
25704 rtx spe_save_area_ptr;
25705 HOST_WIDE_INT save_off;
25706 int ool_adjust = 0;
25708 /* Determine whether we can address all of the registers that need
25709 to be saved with an offset from frame_reg_rtx that fits in
25710 the small const field for SPE memory instructions. */
25711 int spe_regs_addressable
25712 = (SPE_CONST_OFFSET_OK (info->spe_gp_save_offset + frame_off
25713 + reg_size * (32 - info->first_gp_reg_save - 1))
25714 && (strategy & SAVE_INLINE_GPRS));
25716 if (spe_regs_addressable)
25718 spe_save_area_ptr = frame_reg_rtx;
25719 save_off = frame_off;
25721 else
25723 /* Make r11 point to the start of the SPE save area. We need
25724 to be careful here if r11 is holding the static chain. If
25725 it is, then temporarily save it in r0. */
25726 HOST_WIDE_INT offset;
25728 if (!(strategy & SAVE_INLINE_GPRS))
25729 ool_adjust = 8 * (info->first_gp_reg_save - FIRST_SAVED_GP_REGNO);
25730 offset = info->spe_gp_save_offset + frame_off - ool_adjust;
25731 spe_save_area_ptr = gen_rtx_REG (Pmode, 11);
25732 save_off = frame_off - offset;
25734 if (using_static_chain_p)
25736 rtx r0 = gen_rtx_REG (Pmode, 0);
25738 START_USE (0);
25739 gcc_assert (info->first_gp_reg_save > 11);
25741 emit_move_insn (r0, spe_save_area_ptr);
25743 else if (REGNO (frame_reg_rtx) != 11)
25744 START_USE (11);
25746 emit_insn (gen_addsi3 (spe_save_area_ptr,
25747 frame_reg_rtx, GEN_INT (offset)));
25748 if (!using_static_chain_p && REGNO (frame_reg_rtx) == 11)
25749 frame_off = -info->spe_gp_save_offset + ool_adjust;
25752 if ((strategy & SAVE_INLINE_GPRS))
25754 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
25755 if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i))
25756 emit_frame_save (spe_save_area_ptr, reg_mode,
25757 info->first_gp_reg_save + i,
25758 (info->spe_gp_save_offset + save_off
25759 + reg_size * i),
25760 sp_off - save_off);
25762 else
25764 insn = rs6000_emit_savres_rtx (info, spe_save_area_ptr,
25765 info->spe_gp_save_offset + save_off,
25766 0, reg_mode,
25767 SAVRES_SAVE | SAVRES_GPR);
25769 rs6000_frame_related (insn, spe_save_area_ptr, sp_off - save_off,
25770 NULL_RTX, NULL_RTX);
25773 /* Move the static chain pointer back. */
25774 if (!spe_regs_addressable)
25776 if (using_static_chain_p)
25778 emit_move_insn (spe_save_area_ptr, gen_rtx_REG (Pmode, 0));
25779 END_USE (0);
25781 else if (REGNO (frame_reg_rtx) != 11)
25782 END_USE (11);
25785 else if (!WORLD_SAVE_P (info) && !(strategy & SAVE_INLINE_GPRS))
25787 bool lr = (strategy & SAVE_NOINLINE_GPRS_SAVES_LR) != 0;
25788 int sel = SAVRES_SAVE | SAVRES_GPR | (lr ? SAVRES_LR : 0);
25789 unsigned ptr_regno = ptr_regno_for_savres (sel);
25790 rtx ptr_reg = frame_reg_rtx;
25791 bool ptr_set_up = REGNO (ptr_reg) == ptr_regno;
25792 int end_save = info->gp_save_offset + info->gp_size;
25793 int ptr_off;
25795 if (ptr_regno == 12)
25796 sp_adjust = 0;
25797 if (!ptr_set_up)
25798 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
25800 /* Need to adjust r11 (r12) if we saved any FPRs. */
25801 if (end_save + frame_off != 0)
25803 rtx offset = GEN_INT (end_save + frame_off);
25805 if (ptr_set_up)
25806 frame_off = -end_save;
25807 else
25808 NOT_INUSE (ptr_regno);
25809 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
25811 else if (!ptr_set_up)
25813 NOT_INUSE (ptr_regno);
25814 emit_move_insn (ptr_reg, frame_reg_rtx);
25816 ptr_off = -end_save;
25817 insn = rs6000_emit_savres_rtx (info, ptr_reg,
25818 info->gp_save_offset + ptr_off,
25819 info->lr_save_offset + ptr_off,
25820 reg_mode, sel);
25821 rs6000_frame_related (insn, ptr_reg, sp_off - ptr_off,
25822 NULL_RTX, NULL_RTX);
25823 if (lr)
25824 END_USE (0);
25826 else if (!WORLD_SAVE_P (info) && (strategy & SAVE_MULTIPLE))
25828 rtvec p;
25829 int i;
25830 p = rtvec_alloc (32 - info->first_gp_reg_save);
25831 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
25832 RTVEC_ELT (p, i)
25833 = gen_frame_store (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
25834 frame_reg_rtx,
25835 info->gp_save_offset + frame_off + reg_size * i);
25836 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
25837 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
25838 NULL_RTX, NULL_RTX);
25840 else if (!WORLD_SAVE_P (info))
25842 int i;
25843 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
25844 if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i))
25845 emit_frame_save (frame_reg_rtx, reg_mode,
25846 info->first_gp_reg_save + i,
25847 info->gp_save_offset + frame_off + reg_size * i,
25848 sp_off - frame_off);
25851 if (crtl->calls_eh_return)
25853 unsigned int i;
25854 rtvec p;
25856 for (i = 0; ; ++i)
25858 unsigned int regno = EH_RETURN_DATA_REGNO (i);
25859 if (regno == INVALID_REGNUM)
25860 break;
25863 p = rtvec_alloc (i);
25865 for (i = 0; ; ++i)
25867 unsigned int regno = EH_RETURN_DATA_REGNO (i);
25868 if (regno == INVALID_REGNUM)
25869 break;
25871 insn
25872 = gen_frame_store (gen_rtx_REG (reg_mode, regno),
25873 sp_reg_rtx,
25874 info->ehrd_offset + sp_off + reg_size * (int) i);
25875 RTVEC_ELT (p, i) = insn;
25876 RTX_FRAME_RELATED_P (insn) = 1;
25879 insn = emit_insn (gen_blockage ());
25880 RTX_FRAME_RELATED_P (insn) = 1;
25881 add_reg_note (insn, REG_FRAME_RELATED_EXPR, gen_rtx_PARALLEL (VOIDmode, p));
25884 /* In AIX ABI we need to make sure r2 is really saved. */
25885 if (TARGET_AIX && crtl->calls_eh_return)
25887 rtx tmp_reg, tmp_reg_si, hi, lo, compare_result, toc_save_done, jump;
25888 rtx save_insn, join_insn, note;
25889 long toc_restore_insn;
25891 tmp_reg = gen_rtx_REG (Pmode, 11);
25892 tmp_reg_si = gen_rtx_REG (SImode, 11);
25893 if (using_static_chain_p)
25895 START_USE (0);
25896 emit_move_insn (gen_rtx_REG (Pmode, 0), tmp_reg);
25898 else
25899 START_USE (11);
25900 emit_move_insn (tmp_reg, gen_rtx_REG (Pmode, LR_REGNO));
25901 /* Peek at instruction to which this function returns. If it's
25902 restoring r2, then we know we've already saved r2. We can't
25903 unconditionally save r2 because the value we have will already
25904 be updated if we arrived at this function via a plt call or
25905 toc adjusting stub. */
25906 emit_move_insn (tmp_reg_si, gen_rtx_MEM (SImode, tmp_reg));
25907 toc_restore_insn = ((TARGET_32BIT ? 0x80410000 : 0xE8410000)
25908 + RS6000_TOC_SAVE_SLOT);
25909 hi = gen_int_mode (toc_restore_insn & ~0xffff, SImode);
25910 emit_insn (gen_xorsi3 (tmp_reg_si, tmp_reg_si, hi));
25911 compare_result = gen_rtx_REG (CCUNSmode, CR0_REGNO);
25912 validate_condition_mode (EQ, CCUNSmode);
25913 lo = gen_int_mode (toc_restore_insn & 0xffff, SImode);
25914 emit_insn (gen_rtx_SET (compare_result,
25915 gen_rtx_COMPARE (CCUNSmode, tmp_reg_si, lo)));
25916 toc_save_done = gen_label_rtx ();
25917 jump = gen_rtx_IF_THEN_ELSE (VOIDmode,
25918 gen_rtx_EQ (VOIDmode, compare_result,
25919 const0_rtx),
25920 gen_rtx_LABEL_REF (VOIDmode, toc_save_done),
25921 pc_rtx);
25922 jump = emit_jump_insn (gen_rtx_SET (pc_rtx, jump));
25923 JUMP_LABEL (jump) = toc_save_done;
25924 LABEL_NUSES (toc_save_done) += 1;
25926 save_insn = emit_frame_save (frame_reg_rtx, reg_mode,
25927 TOC_REGNUM, frame_off + RS6000_TOC_SAVE_SLOT,
25928 sp_off - frame_off);
25930 emit_label (toc_save_done);
25932 /* ??? If we leave SAVE_INSN as marked as saving R2, then we'll
25933 have a CFG that has different saves along different paths.
25934 Move the note to a dummy blockage insn, which describes that
25935 R2 is unconditionally saved after the label. */
25936 /* ??? An alternate representation might be a special insn pattern
25937 containing both the branch and the store. That might let the
25938 code that minimizes the number of DW_CFA_advance opcodes better
25939 freedom in placing the annotations. */
25940 note = find_reg_note (save_insn, REG_FRAME_RELATED_EXPR, NULL);
25941 if (note)
25942 remove_note (save_insn, note);
25943 else
25944 note = alloc_reg_note (REG_FRAME_RELATED_EXPR,
25945 copy_rtx (PATTERN (save_insn)), NULL_RTX);
25946 RTX_FRAME_RELATED_P (save_insn) = 0;
25948 join_insn = emit_insn (gen_blockage ());
25949 REG_NOTES (join_insn) = note;
25950 RTX_FRAME_RELATED_P (join_insn) = 1;
25952 if (using_static_chain_p)
25954 emit_move_insn (tmp_reg, gen_rtx_REG (Pmode, 0));
25955 END_USE (0);
25957 else
25958 END_USE (11);
25961 /* Save CR if we use any that must be preserved. */
25962 if (!WORLD_SAVE_P (info) && info->cr_save_p)
25964 rtx addr = gen_rtx_PLUS (Pmode, frame_reg_rtx,
25965 GEN_INT (info->cr_save_offset + frame_off));
25966 rtx mem = gen_frame_mem (SImode, addr);
25968 /* If we didn't copy cr before, do so now using r0. */
25969 if (cr_save_rtx == NULL_RTX)
25971 START_USE (0);
25972 cr_save_rtx = gen_rtx_REG (SImode, 0);
25973 rs6000_emit_move_from_cr (cr_save_rtx);
25976 /* Saving CR requires a two-instruction sequence: one instruction
25977 to move the CR to a general-purpose register, and a second
25978 instruction that stores the GPR to memory.
25980 We do not emit any DWARF CFI records for the first of these,
25981 because we cannot properly represent the fact that CR is saved in
25982 a register. One reason is that we cannot express that multiple
25983 CR fields are saved; another reason is that on 64-bit, the size
25984 of the CR register in DWARF (4 bytes) differs from the size of
25985 a general-purpose register.
25987 This means if any intervening instruction were to clobber one of
25988 the call-saved CR fields, we'd have incorrect CFI. To prevent
25989 this from happening, we mark the store to memory as a use of
25990 those CR fields, which prevents any such instruction from being
25991 scheduled in between the two instructions. */
25992 rtx crsave_v[9];
25993 int n_crsave = 0;
25994 int i;
25996 crsave_v[n_crsave++] = gen_rtx_SET (mem, cr_save_rtx);
25997 for (i = 0; i < 8; i++)
25998 if (save_reg_p (CR0_REGNO + i))
25999 crsave_v[n_crsave++]
26000 = gen_rtx_USE (VOIDmode, gen_rtx_REG (CCmode, CR0_REGNO + i));
26002 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode,
26003 gen_rtvec_v (n_crsave, crsave_v)));
26004 END_USE (REGNO (cr_save_rtx));
26006 /* Now, there's no way that dwarf2out_frame_debug_expr is going to
26007 understand '(unspec:SI [(reg:CC 68) ...] UNSPEC_MOVESI_FROM_CR)',
26008 so we need to construct a frame expression manually. */
26009 RTX_FRAME_RELATED_P (insn) = 1;
26011 /* Update address to be stack-pointer relative, like
26012 rs6000_frame_related would do. */
26013 addr = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, STACK_POINTER_REGNUM),
26014 GEN_INT (info->cr_save_offset + sp_off));
26015 mem = gen_frame_mem (SImode, addr);
26017 if (DEFAULT_ABI == ABI_ELFv2)
26019 /* In the ELFv2 ABI we generate separate CFI records for each
26020 CR field that was actually saved. They all point to the
26021 same 32-bit stack slot. */
26022 rtx crframe[8];
26023 int n_crframe = 0;
26025 for (i = 0; i < 8; i++)
26026 if (save_reg_p (CR0_REGNO + i))
26028 crframe[n_crframe]
26029 = gen_rtx_SET (mem, gen_rtx_REG (SImode, CR0_REGNO + i));
26031 RTX_FRAME_RELATED_P (crframe[n_crframe]) = 1;
26032 n_crframe++;
26035 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
26036 gen_rtx_PARALLEL (VOIDmode,
26037 gen_rtvec_v (n_crframe, crframe)));
26039 else
26041 /* In other ABIs, by convention, we use a single CR regnum to
26042 represent the fact that all call-saved CR fields are saved.
26043 We use CR2_REGNO to be compatible with gcc-2.95 on Linux. */
26044 rtx set = gen_rtx_SET (mem, gen_rtx_REG (SImode, CR2_REGNO));
26045 add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
26049 /* In the ELFv2 ABI we need to save all call-saved CR fields into
26050 *separate* slots if the routine calls __builtin_eh_return, so
26051 that they can be independently restored by the unwinder. */
26052 if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return)
26054 int i, cr_off = info->ehcr_offset;
26055 rtx crsave;
26057 /* ??? We might get better performance by using multiple mfocrf
26058 instructions. */
26059 crsave = gen_rtx_REG (SImode, 0);
26060 emit_insn (gen_movesi_from_cr (crsave));
26062 for (i = 0; i < 8; i++)
26063 if (!call_used_regs[CR0_REGNO + i])
26065 rtvec p = rtvec_alloc (2);
26066 RTVEC_ELT (p, 0)
26067 = gen_frame_store (crsave, frame_reg_rtx, cr_off + frame_off);
26068 RTVEC_ELT (p, 1)
26069 = gen_rtx_USE (VOIDmode, gen_rtx_REG (CCmode, CR0_REGNO + i));
26071 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
26073 RTX_FRAME_RELATED_P (insn) = 1;
26074 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
26075 gen_frame_store (gen_rtx_REG (SImode, CR0_REGNO + i),
26076 sp_reg_rtx, cr_off + sp_off));
26078 cr_off += reg_size;
26082 /* Update stack and set back pointer unless this is V.4,
26083 for which it was done previously. */
26084 if (!WORLD_SAVE_P (info) && info->push_p
26085 && !(DEFAULT_ABI == ABI_V4 || crtl->calls_eh_return))
26087 rtx ptr_reg = NULL;
26088 int ptr_off = 0;
26090 /* If saving altivec regs we need to be able to address all save
26091 locations using a 16-bit offset. */
26092 if ((strategy & SAVE_INLINE_VRS) == 0
26093 || (info->altivec_size != 0
26094 && (info->altivec_save_offset + info->altivec_size - 16
26095 + info->total_size - frame_off) > 32767)
26096 || (info->vrsave_size != 0
26097 && (info->vrsave_save_offset
26098 + info->total_size - frame_off) > 32767))
26100 int sel = SAVRES_SAVE | SAVRES_VR;
26101 unsigned ptr_regno = ptr_regno_for_savres (sel);
26103 if (using_static_chain_p
26104 && ptr_regno == STATIC_CHAIN_REGNUM)
26105 ptr_regno = 12;
26106 if (REGNO (frame_reg_rtx) != ptr_regno)
26107 START_USE (ptr_regno);
26108 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
26109 frame_reg_rtx = ptr_reg;
26110 ptr_off = info->altivec_save_offset + info->altivec_size;
26111 frame_off = -ptr_off;
26113 else if (REGNO (frame_reg_rtx) == 1)
26114 frame_off = info->total_size;
26115 sp_adjust = rs6000_emit_allocate_stack (info->total_size,
26116 ptr_reg, ptr_off);
26117 if (REGNO (frame_reg_rtx) == 12)
26118 sp_adjust = 0;
26119 sp_off = info->total_size;
26120 if (frame_reg_rtx != sp_reg_rtx)
26121 rs6000_emit_stack_tie (frame_reg_rtx, false);
26124 /* Set frame pointer, if needed. */
26125 if (frame_pointer_needed)
26127 insn = emit_move_insn (gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM),
26128 sp_reg_rtx);
26129 RTX_FRAME_RELATED_P (insn) = 1;
26132 /* Save AltiVec registers if needed. Save here because the red zone does
26133 not always include AltiVec registers. */
26134 if (!WORLD_SAVE_P (info)
26135 && info->altivec_size != 0 && (strategy & SAVE_INLINE_VRS) == 0)
26137 int end_save = info->altivec_save_offset + info->altivec_size;
26138 int ptr_off;
26139 /* Oddly, the vector save/restore functions point r0 at the end
26140 of the save area, then use r11 or r12 to load offsets for
26141 [reg+reg] addressing. */
26142 rtx ptr_reg = gen_rtx_REG (Pmode, 0);
26143 int scratch_regno = ptr_regno_for_savres (SAVRES_SAVE | SAVRES_VR);
26144 rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
26146 gcc_checking_assert (scratch_regno == 11 || scratch_regno == 12);
26147 NOT_INUSE (0);
26148 if (scratch_regno == 12)
26149 sp_adjust = 0;
26150 if (end_save + frame_off != 0)
26152 rtx offset = GEN_INT (end_save + frame_off);
26154 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
26156 else
26157 emit_move_insn (ptr_reg, frame_reg_rtx);
26159 ptr_off = -end_save;
26160 insn = rs6000_emit_savres_rtx (info, scratch_reg,
26161 info->altivec_save_offset + ptr_off,
26162 0, V4SImode, SAVRES_SAVE | SAVRES_VR);
26163 rs6000_frame_related (insn, scratch_reg, sp_off - ptr_off,
26164 NULL_RTX, NULL_RTX);
26165 if (REGNO (frame_reg_rtx) == REGNO (scratch_reg))
26167 /* The oddity mentioned above clobbered our frame reg. */
26168 emit_move_insn (frame_reg_rtx, ptr_reg);
26169 frame_off = ptr_off;
26172 else if (!WORLD_SAVE_P (info)
26173 && info->altivec_size != 0)
26175 int i;
26177 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
26178 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
26180 rtx areg, savereg, mem;
26181 int offset;
26183 offset = (info->altivec_save_offset + frame_off
26184 + 16 * (i - info->first_altivec_reg_save));
26186 savereg = gen_rtx_REG (V4SImode, i);
26188 NOT_INUSE (0);
26189 areg = gen_rtx_REG (Pmode, 0);
26190 emit_move_insn (areg, GEN_INT (offset));
26192 /* AltiVec addressing mode is [reg+reg]. */
26193 mem = gen_frame_mem (V4SImode,
26194 gen_rtx_PLUS (Pmode, frame_reg_rtx, areg));
26196 /* Rather than emitting a generic move, force use of the stvx
26197 instruction, which we always want. In particular we don't
26198 want xxpermdi/stxvd2x for little endian. */
26199 insn = emit_insn (gen_altivec_stvx_v4si_internal (mem, savereg));
26201 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
26202 areg, GEN_INT (offset));
26206 /* VRSAVE is a bit vector representing which AltiVec registers
26207 are used. The OS uses this to determine which vector
26208 registers to save on a context switch. We need to save
26209 VRSAVE on the stack frame, add whatever AltiVec registers we
26210 used in this function, and do the corresponding magic in the
26211 epilogue. */
26213 if (!WORLD_SAVE_P (info)
26214 && info->vrsave_size != 0)
26216 rtx reg, vrsave;
26217 int offset;
26218 int save_regno;
26220 /* Get VRSAVE onto a GPR. Note that ABI_V4 and ABI_DARWIN might
26221 be using r12 as frame_reg_rtx and r11 as the static chain
26222 pointer for nested functions. */
26223 save_regno = 12;
26224 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
26225 && !using_static_chain_p)
26226 save_regno = 11;
26227 else if (using_split_stack || REGNO (frame_reg_rtx) == 12)
26229 save_regno = 11;
26230 if (using_static_chain_p)
26231 save_regno = 0;
26234 NOT_INUSE (save_regno);
26235 reg = gen_rtx_REG (SImode, save_regno);
26236 vrsave = gen_rtx_REG (SImode, VRSAVE_REGNO);
26237 if (TARGET_MACHO)
26238 emit_insn (gen_get_vrsave_internal (reg));
26239 else
26240 emit_insn (gen_rtx_SET (reg, vrsave));
26242 /* Save VRSAVE. */
26243 offset = info->vrsave_save_offset + frame_off;
26244 insn = emit_insn (gen_frame_store (reg, frame_reg_rtx, offset));
26246 /* Include the registers in the mask. */
26247 emit_insn (gen_iorsi3 (reg, reg, GEN_INT ((int) info->vrsave_mask)));
26249 insn = emit_insn (generate_set_vrsave (reg, info, 0));
26252 /* If we are using RS6000_PIC_OFFSET_TABLE_REGNUM, we need to set it up. */
26253 if (!TARGET_SINGLE_PIC_BASE
26254 && ((TARGET_TOC && TARGET_MINIMAL_TOC && get_pool_size () != 0)
26255 || (DEFAULT_ABI == ABI_V4
26256 && (flag_pic == 1 || (flag_pic && TARGET_SECURE_PLT))
26257 && df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM))))
26259 /* If emit_load_toc_table will use the link register, we need to save
26260 it. We use R12 for this purpose because emit_load_toc_table
26261 can use register 0. This allows us to use a plain 'blr' to return
26262 from the procedure more often. */
26263 int save_LR_around_toc_setup = (TARGET_ELF
26264 && DEFAULT_ABI == ABI_V4
26265 && flag_pic
26266 && ! info->lr_save_p
26267 && EDGE_COUNT (EXIT_BLOCK_PTR_FOR_FN (cfun)->preds) > 0);
26268 if (save_LR_around_toc_setup)
26270 rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
26271 rtx tmp = gen_rtx_REG (Pmode, 12);
26273 sp_adjust = 0;
26274 insn = emit_move_insn (tmp, lr);
26275 RTX_FRAME_RELATED_P (insn) = 1;
26277 rs6000_emit_load_toc_table (TRUE);
26279 insn = emit_move_insn (lr, tmp);
26280 add_reg_note (insn, REG_CFA_RESTORE, lr);
26281 RTX_FRAME_RELATED_P (insn) = 1;
26283 else
26284 rs6000_emit_load_toc_table (TRUE);
26287 #if TARGET_MACHO
26288 if (!TARGET_SINGLE_PIC_BASE
26289 && DEFAULT_ABI == ABI_DARWIN
26290 && flag_pic && crtl->uses_pic_offset_table)
26292 rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
26293 rtx src = gen_rtx_SYMBOL_REF (Pmode, MACHOPIC_FUNCTION_BASE_NAME);
26295 /* Save and restore LR locally around this call (in R0). */
26296 if (!info->lr_save_p)
26297 emit_move_insn (gen_rtx_REG (Pmode, 0), lr);
26299 emit_insn (gen_load_macho_picbase (src));
26301 emit_move_insn (gen_rtx_REG (Pmode,
26302 RS6000_PIC_OFFSET_TABLE_REGNUM),
26303 lr);
26305 if (!info->lr_save_p)
26306 emit_move_insn (lr, gen_rtx_REG (Pmode, 0));
26308 #endif
26310 /* If we need to, save the TOC register after doing the stack setup.
26311 Do not emit eh frame info for this save. The unwinder wants info,
26312 conceptually attached to instructions in this function, about
26313 register values in the caller of this function. This R2 may have
26314 already been changed from the value in the caller.
26315 We don't attempt to write accurate DWARF EH frame info for R2
26316 because code emitted by gcc for a (non-pointer) function call
26317 doesn't save and restore R2. Instead, R2 is managed out-of-line
26318 by a linker generated plt call stub when the function resides in
26319 a shared library. This behavior is costly to describe in DWARF,
26320 both in terms of the size of DWARF info and the time taken in the
26321 unwinder to interpret it. R2 changes, apart from the
26322 calls_eh_return case earlier in this function, are handled by
26323 linux-unwind.h frob_update_context. */
26324 if (rs6000_save_toc_in_prologue_p ())
26326 rtx reg = gen_rtx_REG (reg_mode, TOC_REGNUM);
26327 emit_insn (gen_frame_store (reg, sp_reg_rtx, RS6000_TOC_SAVE_SLOT));
26330 if (using_split_stack && split_stack_arg_pointer_used_p ())
26332 /* Set up the arg pointer (r12) for -fsplit-stack code. If
26333 __morestack was called, it left the arg pointer to the old
26334 stack in r29. Otherwise, the arg pointer is the top of the
26335 current frame. */
26336 cfun->machine->split_stack_argp_used = true;
26337 if (sp_adjust)
26339 rtx r12 = gen_rtx_REG (Pmode, 12);
26340 rtx set_r12 = gen_rtx_SET (r12, sp_reg_rtx);
26341 emit_insn_before (set_r12, sp_adjust);
26343 else if (frame_off != 0 || REGNO (frame_reg_rtx) != 12)
26345 rtx r12 = gen_rtx_REG (Pmode, 12);
26346 if (frame_off == 0)
26347 emit_move_insn (r12, frame_reg_rtx);
26348 else
26349 emit_insn (gen_add3_insn (r12, frame_reg_rtx, GEN_INT (frame_off)));
26351 if (info->push_p)
26353 rtx r12 = gen_rtx_REG (Pmode, 12);
26354 rtx r29 = gen_rtx_REG (Pmode, 29);
26355 rtx cr7 = gen_rtx_REG (CCUNSmode, CR7_REGNO);
26356 rtx not_more = gen_label_rtx ();
26357 rtx jump;
26359 jump = gen_rtx_IF_THEN_ELSE (VOIDmode,
26360 gen_rtx_GEU (VOIDmode, cr7, const0_rtx),
26361 gen_rtx_LABEL_REF (VOIDmode, not_more),
26362 pc_rtx);
26363 jump = emit_jump_insn (gen_rtx_SET (pc_rtx, jump));
26364 JUMP_LABEL (jump) = not_more;
26365 LABEL_NUSES (not_more) += 1;
26366 emit_move_insn (r12, r29);
26367 emit_label (not_more);
26372 /* Output .extern statements for the save/restore routines we use. */
26374 static void
26375 rs6000_output_savres_externs (FILE *file)
26377 rs6000_stack_t *info = rs6000_stack_info ();
26379 if (TARGET_DEBUG_STACK)
26380 debug_stack_info (info);
26382 /* Write .extern for any function we will call to save and restore
26383 fp values. */
26384 if (info->first_fp_reg_save < 64
26385 && !TARGET_MACHO
26386 && !TARGET_ELF)
26388 char *name;
26389 int regno = info->first_fp_reg_save - 32;
26391 if ((info->savres_strategy & SAVE_INLINE_FPRS) == 0)
26393 bool lr = (info->savres_strategy & SAVE_NOINLINE_FPRS_SAVES_LR) != 0;
26394 int sel = SAVRES_SAVE | SAVRES_FPR | (lr ? SAVRES_LR : 0);
26395 name = rs6000_savres_routine_name (info, regno, sel);
26396 fprintf (file, "\t.extern %s\n", name);
26398 if ((info->savres_strategy & REST_INLINE_FPRS) == 0)
26400 bool lr = (info->savres_strategy
26401 & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR) == 0;
26402 int sel = SAVRES_FPR | (lr ? SAVRES_LR : 0);
26403 name = rs6000_savres_routine_name (info, regno, sel);
26404 fprintf (file, "\t.extern %s\n", name);
26409 /* Write function prologue. */
26411 static void
26412 rs6000_output_function_prologue (FILE *file,
26413 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
26415 if (!cfun->is_thunk)
26416 rs6000_output_savres_externs (file);
26418 /* ELFv2 ABI r2 setup code and local entry point. This must follow
26419 immediately after the global entry point label. */
26420 if (rs6000_global_entry_point_needed_p ())
26422 const char *name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
26424 (*targetm.asm_out.internal_label) (file, "LCF", rs6000_pic_labelno);
26426 if (TARGET_CMODEL != CMODEL_LARGE)
26428 /* In the small and medium code models, we assume the TOC is less
26429 2 GB away from the text section, so it can be computed via the
26430 following two-instruction sequence. */
26431 char buf[256];
26433 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
26434 fprintf (file, "0:\taddis 2,12,.TOC.-");
26435 assemble_name (file, buf);
26436 fprintf (file, "@ha\n");
26437 fprintf (file, "\taddi 2,2,.TOC.-");
26438 assemble_name (file, buf);
26439 fprintf (file, "@l\n");
26441 else
26443 /* In the large code model, we allow arbitrary offsets between the
26444 TOC and the text section, so we have to load the offset from
26445 memory. The data field is emitted directly before the global
26446 entry point in rs6000_elf_declare_function_name. */
26447 char buf[256];
26449 #ifdef HAVE_AS_ENTRY_MARKERS
26450 /* If supported by the linker, emit a marker relocation. If the
26451 total code size of the final executable or shared library
26452 happens to fit into 2 GB after all, the linker will replace
26453 this code sequence with the sequence for the small or medium
26454 code model. */
26455 fprintf (file, "\t.reloc .,R_PPC64_ENTRY\n");
26456 #endif
26457 fprintf (file, "\tld 2,");
26458 ASM_GENERATE_INTERNAL_LABEL (buf, "LCL", rs6000_pic_labelno);
26459 assemble_name (file, buf);
26460 fprintf (file, "-");
26461 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
26462 assemble_name (file, buf);
26463 fprintf (file, "(12)\n");
26464 fprintf (file, "\tadd 2,2,12\n");
26467 fputs ("\t.localentry\t", file);
26468 assemble_name (file, name);
26469 fputs (",.-", file);
26470 assemble_name (file, name);
26471 fputs ("\n", file);
26474 /* Output -mprofile-kernel code. This needs to be done here instead of
26475 in output_function_profile since it must go after the ELFv2 ABI
26476 local entry point. */
26477 if (TARGET_PROFILE_KERNEL && crtl->profile)
26479 gcc_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2);
26480 gcc_assert (!TARGET_32BIT);
26482 asm_fprintf (file, "\tmflr %s\n", reg_names[0]);
26484 /* In the ELFv2 ABI we have no compiler stack word. It must be
26485 the resposibility of _mcount to preserve the static chain
26486 register if required. */
26487 if (DEFAULT_ABI != ABI_ELFv2
26488 && cfun->static_chain_decl != NULL)
26490 asm_fprintf (file, "\tstd %s,24(%s)\n",
26491 reg_names[STATIC_CHAIN_REGNUM], reg_names[1]);
26492 fprintf (file, "\tbl %s\n", RS6000_MCOUNT);
26493 asm_fprintf (file, "\tld %s,24(%s)\n",
26494 reg_names[STATIC_CHAIN_REGNUM], reg_names[1]);
26496 else
26497 fprintf (file, "\tbl %s\n", RS6000_MCOUNT);
26500 rs6000_pic_labelno++;
26503 /* -mprofile-kernel code calls mcount before the function prolog,
26504 so a profiled leaf function should stay a leaf function. */
26505 static bool
26506 rs6000_keep_leaf_when_profiled ()
26508 return TARGET_PROFILE_KERNEL;
26511 /* Non-zero if vmx regs are restored before the frame pop, zero if
26512 we restore after the pop when possible. */
26513 #define ALWAYS_RESTORE_ALTIVEC_BEFORE_POP 0
26515 /* Restoring cr is a two step process: loading a reg from the frame
26516 save, then moving the reg to cr. For ABI_V4 we must let the
26517 unwinder know that the stack location is no longer valid at or
26518 before the stack deallocation, but we can't emit a cfa_restore for
26519 cr at the stack deallocation like we do for other registers.
26520 The trouble is that it is possible for the move to cr to be
26521 scheduled after the stack deallocation. So say exactly where cr
26522 is located on each of the two insns. */
26524 static rtx
26525 load_cr_save (int regno, rtx frame_reg_rtx, int offset, bool exit_func)
26527 rtx mem = gen_frame_mem_offset (SImode, frame_reg_rtx, offset);
26528 rtx reg = gen_rtx_REG (SImode, regno);
26529 rtx_insn *insn = emit_move_insn (reg, mem);
26531 if (!exit_func && DEFAULT_ABI == ABI_V4)
26533 rtx cr = gen_rtx_REG (SImode, CR2_REGNO);
26534 rtx set = gen_rtx_SET (reg, cr);
26536 add_reg_note (insn, REG_CFA_REGISTER, set);
26537 RTX_FRAME_RELATED_P (insn) = 1;
26539 return reg;
26542 /* Reload CR from REG. */
26544 static void
26545 restore_saved_cr (rtx reg, int using_mfcr_multiple, bool exit_func)
26547 int count = 0;
26548 int i;
26550 if (using_mfcr_multiple)
26552 for (i = 0; i < 8; i++)
26553 if (save_reg_p (CR0_REGNO + i))
26554 count++;
26555 gcc_assert (count);
26558 if (using_mfcr_multiple && count > 1)
26560 rtx_insn *insn;
26561 rtvec p;
26562 int ndx;
26564 p = rtvec_alloc (count);
26566 ndx = 0;
26567 for (i = 0; i < 8; i++)
26568 if (save_reg_p (CR0_REGNO + i))
26570 rtvec r = rtvec_alloc (2);
26571 RTVEC_ELT (r, 0) = reg;
26572 RTVEC_ELT (r, 1) = GEN_INT (1 << (7-i));
26573 RTVEC_ELT (p, ndx) =
26574 gen_rtx_SET (gen_rtx_REG (CCmode, CR0_REGNO + i),
26575 gen_rtx_UNSPEC (CCmode, r, UNSPEC_MOVESI_TO_CR));
26576 ndx++;
26578 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
26579 gcc_assert (ndx == count);
26581 /* For the ELFv2 ABI we generate a CFA_RESTORE for each
26582 CR field separately. */
26583 if (!exit_func && DEFAULT_ABI == ABI_ELFv2 && flag_shrink_wrap)
26585 for (i = 0; i < 8; i++)
26586 if (save_reg_p (CR0_REGNO + i))
26587 add_reg_note (insn, REG_CFA_RESTORE,
26588 gen_rtx_REG (SImode, CR0_REGNO + i));
26590 RTX_FRAME_RELATED_P (insn) = 1;
26593 else
26594 for (i = 0; i < 8; i++)
26595 if (save_reg_p (CR0_REGNO + i))
26597 rtx insn = emit_insn (gen_movsi_to_cr_one
26598 (gen_rtx_REG (CCmode, CR0_REGNO + i), reg));
26600 /* For the ELFv2 ABI we generate a CFA_RESTORE for each
26601 CR field separately, attached to the insn that in fact
26602 restores this particular CR field. */
26603 if (!exit_func && DEFAULT_ABI == ABI_ELFv2 && flag_shrink_wrap)
26605 add_reg_note (insn, REG_CFA_RESTORE,
26606 gen_rtx_REG (SImode, CR0_REGNO + i));
26608 RTX_FRAME_RELATED_P (insn) = 1;
26612 /* For other ABIs, we just generate a single CFA_RESTORE for CR2. */
26613 if (!exit_func && DEFAULT_ABI != ABI_ELFv2
26614 && (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap))
26616 rtx_insn *insn = get_last_insn ();
26617 rtx cr = gen_rtx_REG (SImode, CR2_REGNO);
26619 add_reg_note (insn, REG_CFA_RESTORE, cr);
26620 RTX_FRAME_RELATED_P (insn) = 1;
26624 /* Like cr, the move to lr instruction can be scheduled after the
26625 stack deallocation, but unlike cr, its stack frame save is still
26626 valid. So we only need to emit the cfa_restore on the correct
26627 instruction. */
26629 static void
26630 load_lr_save (int regno, rtx frame_reg_rtx, int offset)
26632 rtx mem = gen_frame_mem_offset (Pmode, frame_reg_rtx, offset);
26633 rtx reg = gen_rtx_REG (Pmode, regno);
26635 emit_move_insn (reg, mem);
26638 static void
26639 restore_saved_lr (int regno, bool exit_func)
26641 rtx reg = gen_rtx_REG (Pmode, regno);
26642 rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
26643 rtx_insn *insn = emit_move_insn (lr, reg);
26645 if (!exit_func && flag_shrink_wrap)
26647 add_reg_note (insn, REG_CFA_RESTORE, lr);
26648 RTX_FRAME_RELATED_P (insn) = 1;
26652 static rtx
26653 add_crlr_cfa_restore (const rs6000_stack_t *info, rtx cfa_restores)
26655 if (DEFAULT_ABI == ABI_ELFv2)
26657 int i;
26658 for (i = 0; i < 8; i++)
26659 if (save_reg_p (CR0_REGNO + i))
26661 rtx cr = gen_rtx_REG (SImode, CR0_REGNO + i);
26662 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, cr,
26663 cfa_restores);
26666 else if (info->cr_save_p)
26667 cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
26668 gen_rtx_REG (SImode, CR2_REGNO),
26669 cfa_restores);
26671 if (info->lr_save_p)
26672 cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
26673 gen_rtx_REG (Pmode, LR_REGNO),
26674 cfa_restores);
26675 return cfa_restores;
26678 /* Return true if OFFSET from stack pointer can be clobbered by signals.
26679 V.4 doesn't have any stack cushion, AIX ABIs have 220 or 288 bytes
26680 below stack pointer not cloberred by signals. */
26682 static inline bool
26683 offset_below_red_zone_p (HOST_WIDE_INT offset)
26685 return offset < (DEFAULT_ABI == ABI_V4
26687 : TARGET_32BIT ? -220 : -288);
26690 /* Append CFA_RESTORES to any existing REG_NOTES on the last insn. */
26692 static void
26693 emit_cfa_restores (rtx cfa_restores)
26695 rtx_insn *insn = get_last_insn ();
26696 rtx *loc = &REG_NOTES (insn);
26698 while (*loc)
26699 loc = &XEXP (*loc, 1);
26700 *loc = cfa_restores;
26701 RTX_FRAME_RELATED_P (insn) = 1;
26704 /* Emit function epilogue as insns. */
26706 void
26707 rs6000_emit_epilogue (int sibcall)
26709 rs6000_stack_t *info;
26710 int restoring_GPRs_inline;
26711 int restoring_FPRs_inline;
26712 int using_load_multiple;
26713 int using_mtcr_multiple;
26714 int use_backchain_to_restore_sp;
26715 int restore_lr;
26716 int strategy;
26717 HOST_WIDE_INT frame_off = 0;
26718 rtx sp_reg_rtx = gen_rtx_REG (Pmode, 1);
26719 rtx frame_reg_rtx = sp_reg_rtx;
26720 rtx cfa_restores = NULL_RTX;
26721 rtx insn;
26722 rtx cr_save_reg = NULL_RTX;
26723 machine_mode reg_mode = Pmode;
26724 int reg_size = TARGET_32BIT ? 4 : 8;
26725 int i;
26726 bool exit_func;
26727 unsigned ptr_regno;
26729 info = rs6000_stack_info ();
26731 if (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0)
26733 reg_mode = V2SImode;
26734 reg_size = 8;
26737 strategy = info->savres_strategy;
26738 using_load_multiple = strategy & REST_MULTIPLE;
26739 restoring_FPRs_inline = sibcall || (strategy & REST_INLINE_FPRS);
26740 restoring_GPRs_inline = sibcall || (strategy & REST_INLINE_GPRS);
26741 using_mtcr_multiple = (rs6000_cpu == PROCESSOR_PPC601
26742 || rs6000_cpu == PROCESSOR_PPC603
26743 || rs6000_cpu == PROCESSOR_PPC750
26744 || optimize_size);
26745 /* Restore via the backchain when we have a large frame, since this
26746 is more efficient than an addis, addi pair. The second condition
26747 here will not trigger at the moment; We don't actually need a
26748 frame pointer for alloca, but the generic parts of the compiler
26749 give us one anyway. */
26750 use_backchain_to_restore_sp = (info->total_size + (info->lr_save_p
26751 ? info->lr_save_offset
26752 : 0) > 32767
26753 || (cfun->calls_alloca
26754 && !frame_pointer_needed));
26755 restore_lr = (info->lr_save_p
26756 && (restoring_FPRs_inline
26757 || (strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR))
26758 && (restoring_GPRs_inline
26759 || info->first_fp_reg_save < 64));
26761 if (WORLD_SAVE_P (info))
26763 int i, j;
26764 char rname[30];
26765 const char *alloc_rname;
26766 rtvec p;
26768 /* eh_rest_world_r10 will return to the location saved in the LR
26769 stack slot (which is not likely to be our caller.)
26770 Input: R10 -- stack adjustment. Clobbers R0, R11, R12, R7, R8.
26771 rest_world is similar, except any R10 parameter is ignored.
26772 The exception-handling stuff that was here in 2.95 is no
26773 longer necessary. */
26775 p = rtvec_alloc (9
26777 + 32 - info->first_gp_reg_save
26778 + LAST_ALTIVEC_REGNO + 1 - info->first_altivec_reg_save
26779 + 63 + 1 - info->first_fp_reg_save);
26781 strcpy (rname, ((crtl->calls_eh_return) ?
26782 "*eh_rest_world_r10" : "*rest_world"));
26783 alloc_rname = ggc_strdup (rname);
26785 j = 0;
26786 RTVEC_ELT (p, j++) = ret_rtx;
26787 RTVEC_ELT (p, j++) = gen_rtx_USE (VOIDmode,
26788 gen_rtx_REG (Pmode,
26789 LR_REGNO));
26790 RTVEC_ELT (p, j++)
26791 = gen_rtx_USE (VOIDmode, gen_rtx_SYMBOL_REF (Pmode, alloc_rname));
26792 /* The instruction pattern requires a clobber here;
26793 it is shared with the restVEC helper. */
26794 RTVEC_ELT (p, j++)
26795 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, 11));
26798 /* CR register traditionally saved as CR2. */
26799 rtx reg = gen_rtx_REG (SImode, CR2_REGNO);
26800 RTVEC_ELT (p, j++)
26801 = gen_frame_load (reg, frame_reg_rtx, info->cr_save_offset);
26802 if (flag_shrink_wrap)
26804 cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
26805 gen_rtx_REG (Pmode, LR_REGNO),
26806 cfa_restores);
26807 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
26811 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
26813 rtx reg = gen_rtx_REG (reg_mode, info->first_gp_reg_save + i);
26814 RTVEC_ELT (p, j++)
26815 = gen_frame_load (reg,
26816 frame_reg_rtx, info->gp_save_offset + reg_size * i);
26817 if (flag_shrink_wrap)
26818 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
26820 for (i = 0; info->first_altivec_reg_save + i <= LAST_ALTIVEC_REGNO; i++)
26822 rtx reg = gen_rtx_REG (V4SImode, info->first_altivec_reg_save + i);
26823 RTVEC_ELT (p, j++)
26824 = gen_frame_load (reg,
26825 frame_reg_rtx, info->altivec_save_offset + 16 * i);
26826 if (flag_shrink_wrap)
26827 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
26829 for (i = 0; info->first_fp_reg_save + i <= 63; i++)
26831 rtx reg = gen_rtx_REG ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT
26832 ? DFmode : SFmode),
26833 info->first_fp_reg_save + i);
26834 RTVEC_ELT (p, j++)
26835 = gen_frame_load (reg, frame_reg_rtx, info->fp_save_offset + 8 * i);
26836 if (flag_shrink_wrap)
26837 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
26839 RTVEC_ELT (p, j++)
26840 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, 0));
26841 RTVEC_ELT (p, j++)
26842 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 12));
26843 RTVEC_ELT (p, j++)
26844 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 7));
26845 RTVEC_ELT (p, j++)
26846 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 8));
26847 RTVEC_ELT (p, j++)
26848 = gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, 10));
26849 insn = emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, p));
26851 if (flag_shrink_wrap)
26853 REG_NOTES (insn) = cfa_restores;
26854 add_reg_note (insn, REG_CFA_DEF_CFA, sp_reg_rtx);
26855 RTX_FRAME_RELATED_P (insn) = 1;
26857 return;
26860 /* frame_reg_rtx + frame_off points to the top of this stack frame. */
26861 if (info->push_p)
26862 frame_off = info->total_size;
26864 /* Restore AltiVec registers if we must do so before adjusting the
26865 stack. */
26866 if (info->altivec_size != 0
26867 && (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
26868 || (DEFAULT_ABI != ABI_V4
26869 && offset_below_red_zone_p (info->altivec_save_offset))))
26871 int i;
26872 int scratch_regno = ptr_regno_for_savres (SAVRES_VR);
26874 gcc_checking_assert (scratch_regno == 11 || scratch_regno == 12);
26875 if (use_backchain_to_restore_sp)
26877 int frame_regno = 11;
26879 if ((strategy & REST_INLINE_VRS) == 0)
26881 /* Of r11 and r12, select the one not clobbered by an
26882 out-of-line restore function for the frame register. */
26883 frame_regno = 11 + 12 - scratch_regno;
26885 frame_reg_rtx = gen_rtx_REG (Pmode, frame_regno);
26886 emit_move_insn (frame_reg_rtx,
26887 gen_rtx_MEM (Pmode, sp_reg_rtx));
26888 frame_off = 0;
26890 else if (frame_pointer_needed)
26891 frame_reg_rtx = hard_frame_pointer_rtx;
26893 if ((strategy & REST_INLINE_VRS) == 0)
26895 int end_save = info->altivec_save_offset + info->altivec_size;
26896 int ptr_off;
26897 rtx ptr_reg = gen_rtx_REG (Pmode, 0);
26898 rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
26900 if (end_save + frame_off != 0)
26902 rtx offset = GEN_INT (end_save + frame_off);
26904 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
26906 else
26907 emit_move_insn (ptr_reg, frame_reg_rtx);
26909 ptr_off = -end_save;
26910 insn = rs6000_emit_savres_rtx (info, scratch_reg,
26911 info->altivec_save_offset + ptr_off,
26912 0, V4SImode, SAVRES_VR);
26914 else
26916 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
26917 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
26919 rtx addr, areg, mem, reg;
26921 areg = gen_rtx_REG (Pmode, 0);
26922 emit_move_insn
26923 (areg, GEN_INT (info->altivec_save_offset
26924 + frame_off
26925 + 16 * (i - info->first_altivec_reg_save)));
26927 /* AltiVec addressing mode is [reg+reg]. */
26928 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx, areg);
26929 mem = gen_frame_mem (V4SImode, addr);
26931 reg = gen_rtx_REG (V4SImode, i);
26932 /* Rather than emitting a generic move, force use of the
26933 lvx instruction, which we always want. In particular
26934 we don't want lxvd2x/xxpermdi for little endian. */
26935 (void) emit_insn (gen_altivec_lvx_v4si_internal (reg, mem));
26939 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
26940 if (((strategy & REST_INLINE_VRS) == 0
26941 || (info->vrsave_mask & ALTIVEC_REG_BIT (i)) != 0)
26942 && (flag_shrink_wrap
26943 || (offset_below_red_zone_p
26944 (info->altivec_save_offset
26945 + 16 * (i - info->first_altivec_reg_save)))))
26947 rtx reg = gen_rtx_REG (V4SImode, i);
26948 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
26952 /* Restore VRSAVE if we must do so before adjusting the stack. */
26953 if (info->vrsave_size != 0
26954 && (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
26955 || (DEFAULT_ABI != ABI_V4
26956 && offset_below_red_zone_p (info->vrsave_save_offset))))
26958 rtx reg;
26960 if (frame_reg_rtx == sp_reg_rtx)
26962 if (use_backchain_to_restore_sp)
26964 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
26965 emit_move_insn (frame_reg_rtx,
26966 gen_rtx_MEM (Pmode, sp_reg_rtx));
26967 frame_off = 0;
26969 else if (frame_pointer_needed)
26970 frame_reg_rtx = hard_frame_pointer_rtx;
26973 reg = gen_rtx_REG (SImode, 12);
26974 emit_insn (gen_frame_load (reg, frame_reg_rtx,
26975 info->vrsave_save_offset + frame_off));
26977 emit_insn (generate_set_vrsave (reg, info, 1));
26980 insn = NULL_RTX;
26981 /* If we have a large stack frame, restore the old stack pointer
26982 using the backchain. */
26983 if (use_backchain_to_restore_sp)
26985 if (frame_reg_rtx == sp_reg_rtx)
26987 /* Under V.4, don't reset the stack pointer until after we're done
26988 loading the saved registers. */
26989 if (DEFAULT_ABI == ABI_V4)
26990 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
26992 insn = emit_move_insn (frame_reg_rtx,
26993 gen_rtx_MEM (Pmode, sp_reg_rtx));
26994 frame_off = 0;
26996 else if (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
26997 && DEFAULT_ABI == ABI_V4)
26998 /* frame_reg_rtx has been set up by the altivec restore. */
27000 else
27002 insn = emit_move_insn (sp_reg_rtx, frame_reg_rtx);
27003 frame_reg_rtx = sp_reg_rtx;
27006 /* If we have a frame pointer, we can restore the old stack pointer
27007 from it. */
27008 else if (frame_pointer_needed)
27010 frame_reg_rtx = sp_reg_rtx;
27011 if (DEFAULT_ABI == ABI_V4)
27012 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
27013 /* Prevent reordering memory accesses against stack pointer restore. */
27014 else if (cfun->calls_alloca
27015 || offset_below_red_zone_p (-info->total_size))
27016 rs6000_emit_stack_tie (frame_reg_rtx, true);
27018 insn = emit_insn (gen_add3_insn (frame_reg_rtx, hard_frame_pointer_rtx,
27019 GEN_INT (info->total_size)));
27020 frame_off = 0;
27022 else if (info->push_p
27023 && DEFAULT_ABI != ABI_V4
27024 && !crtl->calls_eh_return)
27026 /* Prevent reordering memory accesses against stack pointer restore. */
27027 if (cfun->calls_alloca
27028 || offset_below_red_zone_p (-info->total_size))
27029 rs6000_emit_stack_tie (frame_reg_rtx, false);
27030 insn = emit_insn (gen_add3_insn (sp_reg_rtx, sp_reg_rtx,
27031 GEN_INT (info->total_size)));
27032 frame_off = 0;
27034 if (insn && frame_reg_rtx == sp_reg_rtx)
27036 if (cfa_restores)
27038 REG_NOTES (insn) = cfa_restores;
27039 cfa_restores = NULL_RTX;
27041 add_reg_note (insn, REG_CFA_DEF_CFA, sp_reg_rtx);
27042 RTX_FRAME_RELATED_P (insn) = 1;
27045 /* Restore AltiVec registers if we have not done so already. */
27046 if (!ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
27047 && info->altivec_size != 0
27048 && (DEFAULT_ABI == ABI_V4
27049 || !offset_below_red_zone_p (info->altivec_save_offset)))
27051 int i;
27053 if ((strategy & REST_INLINE_VRS) == 0)
27055 int end_save = info->altivec_save_offset + info->altivec_size;
27056 int ptr_off;
27057 rtx ptr_reg = gen_rtx_REG (Pmode, 0);
27058 int scratch_regno = ptr_regno_for_savres (SAVRES_VR);
27059 rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
27061 if (end_save + frame_off != 0)
27063 rtx offset = GEN_INT (end_save + frame_off);
27065 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
27067 else
27068 emit_move_insn (ptr_reg, frame_reg_rtx);
27070 ptr_off = -end_save;
27071 insn = rs6000_emit_savres_rtx (info, scratch_reg,
27072 info->altivec_save_offset + ptr_off,
27073 0, V4SImode, SAVRES_VR);
27074 if (REGNO (frame_reg_rtx) == REGNO (scratch_reg))
27076 /* Frame reg was clobbered by out-of-line save. Restore it
27077 from ptr_reg, and if we are calling out-of-line gpr or
27078 fpr restore set up the correct pointer and offset. */
27079 unsigned newptr_regno = 1;
27080 if (!restoring_GPRs_inline)
27082 bool lr = info->gp_save_offset + info->gp_size == 0;
27083 int sel = SAVRES_GPR | (lr ? SAVRES_LR : 0);
27084 newptr_regno = ptr_regno_for_savres (sel);
27085 end_save = info->gp_save_offset + info->gp_size;
27087 else if (!restoring_FPRs_inline)
27089 bool lr = !(strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR);
27090 int sel = SAVRES_FPR | (lr ? SAVRES_LR : 0);
27091 newptr_regno = ptr_regno_for_savres (sel);
27092 end_save = info->fp_save_offset + info->fp_size;
27095 if (newptr_regno != 1 && REGNO (frame_reg_rtx) != newptr_regno)
27096 frame_reg_rtx = gen_rtx_REG (Pmode, newptr_regno);
27098 if (end_save + ptr_off != 0)
27100 rtx offset = GEN_INT (end_save + ptr_off);
27102 frame_off = -end_save;
27103 if (TARGET_32BIT)
27104 emit_insn (gen_addsi3_carry (frame_reg_rtx,
27105 ptr_reg, offset));
27106 else
27107 emit_insn (gen_adddi3_carry (frame_reg_rtx,
27108 ptr_reg, offset));
27110 else
27112 frame_off = ptr_off;
27113 emit_move_insn (frame_reg_rtx, ptr_reg);
27117 else
27119 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
27120 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
27122 rtx addr, areg, mem, reg;
27124 areg = gen_rtx_REG (Pmode, 0);
27125 emit_move_insn
27126 (areg, GEN_INT (info->altivec_save_offset
27127 + frame_off
27128 + 16 * (i - info->first_altivec_reg_save)));
27130 /* AltiVec addressing mode is [reg+reg]. */
27131 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx, areg);
27132 mem = gen_frame_mem (V4SImode, addr);
27134 reg = gen_rtx_REG (V4SImode, i);
27135 /* Rather than emitting a generic move, force use of the
27136 lvx instruction, which we always want. In particular
27137 we don't want lxvd2x/xxpermdi for little endian. */
27138 (void) emit_insn (gen_altivec_lvx_v4si_internal (reg, mem));
27142 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
27143 if (((strategy & REST_INLINE_VRS) == 0
27144 || (info->vrsave_mask & ALTIVEC_REG_BIT (i)) != 0)
27145 && (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap))
27147 rtx reg = gen_rtx_REG (V4SImode, i);
27148 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
27152 /* Restore VRSAVE if we have not done so already. */
27153 if (!ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
27154 && info->vrsave_size != 0
27155 && (DEFAULT_ABI == ABI_V4
27156 || !offset_below_red_zone_p (info->vrsave_save_offset)))
27158 rtx reg;
27160 reg = gen_rtx_REG (SImode, 12);
27161 emit_insn (gen_frame_load (reg, frame_reg_rtx,
27162 info->vrsave_save_offset + frame_off));
27164 emit_insn (generate_set_vrsave (reg, info, 1));
27167 /* If we exit by an out-of-line restore function on ABI_V4 then that
27168 function will deallocate the stack, so we don't need to worry
27169 about the unwinder restoring cr from an invalid stack frame
27170 location. */
27171 exit_func = (!restoring_FPRs_inline
27172 || (!restoring_GPRs_inline
27173 && info->first_fp_reg_save == 64));
27175 /* In the ELFv2 ABI we need to restore all call-saved CR fields from
27176 *separate* slots if the routine calls __builtin_eh_return, so
27177 that they can be independently restored by the unwinder. */
27178 if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return)
27180 int i, cr_off = info->ehcr_offset;
27182 for (i = 0; i < 8; i++)
27183 if (!call_used_regs[CR0_REGNO + i])
27185 rtx reg = gen_rtx_REG (SImode, 0);
27186 emit_insn (gen_frame_load (reg, frame_reg_rtx,
27187 cr_off + frame_off));
27189 insn = emit_insn (gen_movsi_to_cr_one
27190 (gen_rtx_REG (CCmode, CR0_REGNO + i), reg));
27192 if (!exit_func && flag_shrink_wrap)
27194 add_reg_note (insn, REG_CFA_RESTORE,
27195 gen_rtx_REG (SImode, CR0_REGNO + i));
27197 RTX_FRAME_RELATED_P (insn) = 1;
27200 cr_off += reg_size;
27204 /* Get the old lr if we saved it. If we are restoring registers
27205 out-of-line, then the out-of-line routines can do this for us. */
27206 if (restore_lr && restoring_GPRs_inline)
27207 load_lr_save (0, frame_reg_rtx, info->lr_save_offset + frame_off);
27209 /* Get the old cr if we saved it. */
27210 if (info->cr_save_p)
27212 unsigned cr_save_regno = 12;
27214 if (!restoring_GPRs_inline)
27216 /* Ensure we don't use the register used by the out-of-line
27217 gpr register restore below. */
27218 bool lr = info->gp_save_offset + info->gp_size == 0;
27219 int sel = SAVRES_GPR | (lr ? SAVRES_LR : 0);
27220 int gpr_ptr_regno = ptr_regno_for_savres (sel);
27222 if (gpr_ptr_regno == 12)
27223 cr_save_regno = 11;
27224 gcc_checking_assert (REGNO (frame_reg_rtx) != cr_save_regno);
27226 else if (REGNO (frame_reg_rtx) == 12)
27227 cr_save_regno = 11;
27229 cr_save_reg = load_cr_save (cr_save_regno, frame_reg_rtx,
27230 info->cr_save_offset + frame_off,
27231 exit_func);
27234 /* Set LR here to try to overlap restores below. */
27235 if (restore_lr && restoring_GPRs_inline)
27236 restore_saved_lr (0, exit_func);
27238 /* Load exception handler data registers, if needed. */
27239 if (crtl->calls_eh_return)
27241 unsigned int i, regno;
27243 if (TARGET_AIX)
27245 rtx reg = gen_rtx_REG (reg_mode, 2);
27246 emit_insn (gen_frame_load (reg, frame_reg_rtx,
27247 frame_off + RS6000_TOC_SAVE_SLOT));
27250 for (i = 0; ; ++i)
27252 rtx mem;
27254 regno = EH_RETURN_DATA_REGNO (i);
27255 if (regno == INVALID_REGNUM)
27256 break;
27258 /* Note: possible use of r0 here to address SPE regs. */
27259 mem = gen_frame_mem_offset (reg_mode, frame_reg_rtx,
27260 info->ehrd_offset + frame_off
27261 + reg_size * (int) i);
27263 emit_move_insn (gen_rtx_REG (reg_mode, regno), mem);
27267 /* Restore GPRs. This is done as a PARALLEL if we are using
27268 the load-multiple instructions. */
27269 if (TARGET_SPE_ABI
27270 && info->spe_64bit_regs_used
27271 && info->first_gp_reg_save != 32)
27273 /* Determine whether we can address all of the registers that need
27274 to be saved with an offset from frame_reg_rtx that fits in
27275 the small const field for SPE memory instructions. */
27276 int spe_regs_addressable
27277 = (SPE_CONST_OFFSET_OK (info->spe_gp_save_offset + frame_off
27278 + reg_size * (32 - info->first_gp_reg_save - 1))
27279 && restoring_GPRs_inline);
27281 if (!spe_regs_addressable)
27283 int ool_adjust = 0;
27284 rtx old_frame_reg_rtx = frame_reg_rtx;
27285 /* Make r11 point to the start of the SPE save area. We worried about
27286 not clobbering it when we were saving registers in the prologue.
27287 There's no need to worry here because the static chain is passed
27288 anew to every function. */
27290 if (!restoring_GPRs_inline)
27291 ool_adjust = 8 * (info->first_gp_reg_save - FIRST_SAVED_GP_REGNO);
27292 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
27293 emit_insn (gen_addsi3 (frame_reg_rtx, old_frame_reg_rtx,
27294 GEN_INT (info->spe_gp_save_offset
27295 + frame_off
27296 - ool_adjust)));
27297 /* Keep the invariant that frame_reg_rtx + frame_off points
27298 at the top of the stack frame. */
27299 frame_off = -info->spe_gp_save_offset + ool_adjust;
27302 if (restoring_GPRs_inline)
27304 HOST_WIDE_INT spe_offset = info->spe_gp_save_offset + frame_off;
27306 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
27307 if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i))
27309 rtx offset, addr, mem, reg;
27311 /* We're doing all this to ensure that the immediate offset
27312 fits into the immediate field of 'evldd'. */
27313 gcc_assert (SPE_CONST_OFFSET_OK (spe_offset + reg_size * i));
27315 offset = GEN_INT (spe_offset + reg_size * i);
27316 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx, offset);
27317 mem = gen_rtx_MEM (V2SImode, addr);
27318 reg = gen_rtx_REG (reg_mode, info->first_gp_reg_save + i);
27320 emit_move_insn (reg, mem);
27323 else
27324 rs6000_emit_savres_rtx (info, frame_reg_rtx,
27325 info->spe_gp_save_offset + frame_off,
27326 info->lr_save_offset + frame_off,
27327 reg_mode,
27328 SAVRES_GPR | SAVRES_LR);
27330 else if (!restoring_GPRs_inline)
27332 /* We are jumping to an out-of-line function. */
27333 rtx ptr_reg;
27334 int end_save = info->gp_save_offset + info->gp_size;
27335 bool can_use_exit = end_save == 0;
27336 int sel = SAVRES_GPR | (can_use_exit ? SAVRES_LR : 0);
27337 int ptr_off;
27339 /* Emit stack reset code if we need it. */
27340 ptr_regno = ptr_regno_for_savres (sel);
27341 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
27342 if (can_use_exit)
27343 rs6000_emit_stack_reset (info, frame_reg_rtx, frame_off, ptr_regno);
27344 else if (end_save + frame_off != 0)
27345 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx,
27346 GEN_INT (end_save + frame_off)));
27347 else if (REGNO (frame_reg_rtx) != ptr_regno)
27348 emit_move_insn (ptr_reg, frame_reg_rtx);
27349 if (REGNO (frame_reg_rtx) == ptr_regno)
27350 frame_off = -end_save;
27352 if (can_use_exit && info->cr_save_p)
27353 restore_saved_cr (cr_save_reg, using_mtcr_multiple, true);
27355 ptr_off = -end_save;
27356 rs6000_emit_savres_rtx (info, ptr_reg,
27357 info->gp_save_offset + ptr_off,
27358 info->lr_save_offset + ptr_off,
27359 reg_mode, sel);
27361 else if (using_load_multiple)
27363 rtvec p;
27364 p = rtvec_alloc (32 - info->first_gp_reg_save);
27365 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
27366 RTVEC_ELT (p, i)
27367 = gen_frame_load (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
27368 frame_reg_rtx,
27369 info->gp_save_offset + frame_off + reg_size * i);
27370 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
27372 else
27374 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
27375 if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i))
27376 emit_insn (gen_frame_load
27377 (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
27378 frame_reg_rtx,
27379 info->gp_save_offset + frame_off + reg_size * i));
27382 if (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap)
27384 /* If the frame pointer was used then we can't delay emitting
27385 a REG_CFA_DEF_CFA note. This must happen on the insn that
27386 restores the frame pointer, r31. We may have already emitted
27387 a REG_CFA_DEF_CFA note, but that's OK; A duplicate is
27388 discarded by dwarf2cfi.c/dwarf2out.c, and in any case would
27389 be harmless if emitted. */
27390 if (frame_pointer_needed)
27392 insn = get_last_insn ();
27393 add_reg_note (insn, REG_CFA_DEF_CFA,
27394 plus_constant (Pmode, frame_reg_rtx, frame_off));
27395 RTX_FRAME_RELATED_P (insn) = 1;
27398 /* Set up cfa_restores. We always need these when
27399 shrink-wrapping. If not shrink-wrapping then we only need
27400 the cfa_restore when the stack location is no longer valid.
27401 The cfa_restores must be emitted on or before the insn that
27402 invalidates the stack, and of course must not be emitted
27403 before the insn that actually does the restore. The latter
27404 is why it is a bad idea to emit the cfa_restores as a group
27405 on the last instruction here that actually does a restore:
27406 That insn may be reordered with respect to others doing
27407 restores. */
27408 if (flag_shrink_wrap
27409 && !restoring_GPRs_inline
27410 && info->first_fp_reg_save == 64)
27411 cfa_restores = add_crlr_cfa_restore (info, cfa_restores);
27413 for (i = info->first_gp_reg_save; i < 32; i++)
27414 if (!restoring_GPRs_inline
27415 || using_load_multiple
27416 || rs6000_reg_live_or_pic_offset_p (i))
27418 rtx reg = gen_rtx_REG (reg_mode, i);
27420 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
27424 if (!restoring_GPRs_inline
27425 && info->first_fp_reg_save == 64)
27427 /* We are jumping to an out-of-line function. */
27428 if (cfa_restores)
27429 emit_cfa_restores (cfa_restores);
27430 return;
27433 if (restore_lr && !restoring_GPRs_inline)
27435 load_lr_save (0, frame_reg_rtx, info->lr_save_offset + frame_off);
27436 restore_saved_lr (0, exit_func);
27439 /* Restore fpr's if we need to do it without calling a function. */
27440 if (restoring_FPRs_inline)
27441 for (i = 0; i < 64 - info->first_fp_reg_save; i++)
27442 if (save_reg_p (info->first_fp_reg_save + i))
27444 rtx reg = gen_rtx_REG ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT
27445 ? DFmode : SFmode),
27446 info->first_fp_reg_save + i);
27447 emit_insn (gen_frame_load (reg, frame_reg_rtx,
27448 info->fp_save_offset + frame_off + 8 * i));
27449 if (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap)
27450 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
27453 /* If we saved cr, restore it here. Just those that were used. */
27454 if (info->cr_save_p)
27455 restore_saved_cr (cr_save_reg, using_mtcr_multiple, exit_func);
27457 /* If this is V.4, unwind the stack pointer after all of the loads
27458 have been done, or set up r11 if we are restoring fp out of line. */
27459 ptr_regno = 1;
27460 if (!restoring_FPRs_inline)
27462 bool lr = (strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR) == 0;
27463 int sel = SAVRES_FPR | (lr ? SAVRES_LR : 0);
27464 ptr_regno = ptr_regno_for_savres (sel);
27467 insn = rs6000_emit_stack_reset (info, frame_reg_rtx, frame_off, ptr_regno);
27468 if (REGNO (frame_reg_rtx) == ptr_regno)
27469 frame_off = 0;
27471 if (insn && restoring_FPRs_inline)
27473 if (cfa_restores)
27475 REG_NOTES (insn) = cfa_restores;
27476 cfa_restores = NULL_RTX;
27478 add_reg_note (insn, REG_CFA_DEF_CFA, sp_reg_rtx);
27479 RTX_FRAME_RELATED_P (insn) = 1;
27482 if (crtl->calls_eh_return)
27484 rtx sa = EH_RETURN_STACKADJ_RTX;
27485 emit_insn (gen_add3_insn (sp_reg_rtx, sp_reg_rtx, sa));
27488 if (!sibcall)
27490 rtvec p;
27491 bool lr = (strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR) == 0;
27492 if (! restoring_FPRs_inline)
27494 p = rtvec_alloc (4 + 64 - info->first_fp_reg_save);
27495 RTVEC_ELT (p, 0) = ret_rtx;
27497 else
27499 if (cfa_restores)
27501 /* We can't hang the cfa_restores off a simple return,
27502 since the shrink-wrap code sometimes uses an existing
27503 return. This means there might be a path from
27504 pre-prologue code to this return, and dwarf2cfi code
27505 wants the eh_frame unwinder state to be the same on
27506 all paths to any point. So we need to emit the
27507 cfa_restores before the return. For -m64 we really
27508 don't need epilogue cfa_restores at all, except for
27509 this irritating dwarf2cfi with shrink-wrap
27510 requirement; The stack red-zone means eh_frame info
27511 from the prologue telling the unwinder to restore
27512 from the stack is perfectly good right to the end of
27513 the function. */
27514 emit_insn (gen_blockage ());
27515 emit_cfa_restores (cfa_restores);
27516 cfa_restores = NULL_RTX;
27518 p = rtvec_alloc (2);
27519 RTVEC_ELT (p, 0) = simple_return_rtx;
27522 RTVEC_ELT (p, 1) = ((restoring_FPRs_inline || !lr)
27523 ? gen_rtx_USE (VOIDmode,
27524 gen_rtx_REG (Pmode, LR_REGNO))
27525 : gen_rtx_CLOBBER (VOIDmode,
27526 gen_rtx_REG (Pmode, LR_REGNO)));
27528 /* If we have to restore more than two FP registers, branch to the
27529 restore function. It will return to our caller. */
27530 if (! restoring_FPRs_inline)
27532 int i;
27533 int reg;
27534 rtx sym;
27536 if (flag_shrink_wrap)
27537 cfa_restores = add_crlr_cfa_restore (info, cfa_restores);
27539 sym = rs6000_savres_routine_sym (info,
27540 SAVRES_FPR | (lr ? SAVRES_LR : 0));
27541 RTVEC_ELT (p, 2) = gen_rtx_USE (VOIDmode, sym);
27542 reg = (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)? 1 : 11;
27543 RTVEC_ELT (p, 3) = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, reg));
27545 for (i = 0; i < 64 - info->first_fp_reg_save; i++)
27547 rtx reg = gen_rtx_REG (DFmode, info->first_fp_reg_save + i);
27549 RTVEC_ELT (p, i + 4)
27550 = gen_frame_load (reg, sp_reg_rtx, info->fp_save_offset + 8 * i);
27551 if (flag_shrink_wrap)
27552 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg,
27553 cfa_restores);
27557 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, p));
27560 if (cfa_restores)
27562 if (sibcall)
27563 /* Ensure the cfa_restores are hung off an insn that won't
27564 be reordered above other restores. */
27565 emit_insn (gen_blockage ());
27567 emit_cfa_restores (cfa_restores);
27571 /* Write function epilogue. */
27573 static void
27574 rs6000_output_function_epilogue (FILE *file,
27575 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
27577 #if TARGET_MACHO
27578 macho_branch_islands ();
27579 /* Mach-O doesn't support labels at the end of objects, so if
27580 it looks like we might want one, insert a NOP. */
27582 rtx_insn *insn = get_last_insn ();
27583 rtx_insn *deleted_debug_label = NULL;
27584 while (insn
27585 && NOTE_P (insn)
27586 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
27588 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
27589 notes only, instead set their CODE_LABEL_NUMBER to -1,
27590 otherwise there would be code generation differences
27591 in between -g and -g0. */
27592 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
27593 deleted_debug_label = insn;
27594 insn = PREV_INSN (insn);
27596 if (insn
27597 && (LABEL_P (insn)
27598 || (NOTE_P (insn)
27599 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
27600 fputs ("\tnop\n", file);
27601 else if (deleted_debug_label)
27602 for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
27603 if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
27604 CODE_LABEL_NUMBER (insn) = -1;
27606 #endif
27608 /* Output a traceback table here. See /usr/include/sys/debug.h for info
27609 on its format.
27611 We don't output a traceback table if -finhibit-size-directive was
27612 used. The documentation for -finhibit-size-directive reads
27613 ``don't output a @code{.size} assembler directive, or anything
27614 else that would cause trouble if the function is split in the
27615 middle, and the two halves are placed at locations far apart in
27616 memory.'' The traceback table has this property, since it
27617 includes the offset from the start of the function to the
27618 traceback table itself.
27620 System V.4 Powerpc's (and the embedded ABI derived from it) use a
27621 different traceback table. */
27622 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
27623 && ! flag_inhibit_size_directive
27624 && rs6000_traceback != traceback_none && !cfun->is_thunk)
27626 const char *fname = NULL;
27627 const char *language_string = lang_hooks.name;
27628 int fixed_parms = 0, float_parms = 0, parm_info = 0;
27629 int i;
27630 int optional_tbtab;
27631 rs6000_stack_t *info = rs6000_stack_info ();
27633 if (rs6000_traceback == traceback_full)
27634 optional_tbtab = 1;
27635 else if (rs6000_traceback == traceback_part)
27636 optional_tbtab = 0;
27637 else
27638 optional_tbtab = !optimize_size && !TARGET_ELF;
27640 if (optional_tbtab)
27642 fname = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
27643 while (*fname == '.') /* V.4 encodes . in the name */
27644 fname++;
27646 /* Need label immediately before tbtab, so we can compute
27647 its offset from the function start. */
27648 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LT");
27649 ASM_OUTPUT_LABEL (file, fname);
27652 /* The .tbtab pseudo-op can only be used for the first eight
27653 expressions, since it can't handle the possibly variable
27654 length fields that follow. However, if you omit the optional
27655 fields, the assembler outputs zeros for all optional fields
27656 anyways, giving each variable length field is minimum length
27657 (as defined in sys/debug.h). Thus we can not use the .tbtab
27658 pseudo-op at all. */
27660 /* An all-zero word flags the start of the tbtab, for debuggers
27661 that have to find it by searching forward from the entry
27662 point or from the current pc. */
27663 fputs ("\t.long 0\n", file);
27665 /* Tbtab format type. Use format type 0. */
27666 fputs ("\t.byte 0,", file);
27668 /* Language type. Unfortunately, there does not seem to be any
27669 official way to discover the language being compiled, so we
27670 use language_string.
27671 C is 0. Fortran is 1. Pascal is 2. Ada is 3. C++ is 9.
27672 Java is 13. Objective-C is 14. Objective-C++ isn't assigned
27673 a number, so for now use 9. LTO, Go and JIT aren't assigned numbers
27674 either, so for now use 0. */
27675 if (lang_GNU_C ()
27676 || ! strcmp (language_string, "GNU GIMPLE")
27677 || ! strcmp (language_string, "GNU Go")
27678 || ! strcmp (language_string, "libgccjit"))
27679 i = 0;
27680 else if (! strcmp (language_string, "GNU F77")
27681 || lang_GNU_Fortran ())
27682 i = 1;
27683 else if (! strcmp (language_string, "GNU Pascal"))
27684 i = 2;
27685 else if (! strcmp (language_string, "GNU Ada"))
27686 i = 3;
27687 else if (lang_GNU_CXX ()
27688 || ! strcmp (language_string, "GNU Objective-C++"))
27689 i = 9;
27690 else if (! strcmp (language_string, "GNU Java"))
27691 i = 13;
27692 else if (! strcmp (language_string, "GNU Objective-C"))
27693 i = 14;
27694 else
27695 gcc_unreachable ();
27696 fprintf (file, "%d,", i);
27698 /* 8 single bit fields: global linkage (not set for C extern linkage,
27699 apparently a PL/I convention?), out-of-line epilogue/prologue, offset
27700 from start of procedure stored in tbtab, internal function, function
27701 has controlled storage, function has no toc, function uses fp,
27702 function logs/aborts fp operations. */
27703 /* Assume that fp operations are used if any fp reg must be saved. */
27704 fprintf (file, "%d,",
27705 (optional_tbtab << 5) | ((info->first_fp_reg_save != 64) << 1));
27707 /* 6 bitfields: function is interrupt handler, name present in
27708 proc table, function calls alloca, on condition directives
27709 (controls stack walks, 3 bits), saves condition reg, saves
27710 link reg. */
27711 /* The `function calls alloca' bit seems to be set whenever reg 31 is
27712 set up as a frame pointer, even when there is no alloca call. */
27713 fprintf (file, "%d,",
27714 ((optional_tbtab << 6)
27715 | ((optional_tbtab & frame_pointer_needed) << 5)
27716 | (info->cr_save_p << 1)
27717 | (info->lr_save_p)));
27719 /* 3 bitfields: saves backchain, fixup code, number of fpr saved
27720 (6 bits). */
27721 fprintf (file, "%d,",
27722 (info->push_p << 7) | (64 - info->first_fp_reg_save));
27724 /* 2 bitfields: spare bits (2 bits), number of gpr saved (6 bits). */
27725 fprintf (file, "%d,", (32 - first_reg_to_save ()));
27727 if (optional_tbtab)
27729 /* Compute the parameter info from the function decl argument
27730 list. */
27731 tree decl;
27732 int next_parm_info_bit = 31;
27734 for (decl = DECL_ARGUMENTS (current_function_decl);
27735 decl; decl = DECL_CHAIN (decl))
27737 rtx parameter = DECL_INCOMING_RTL (decl);
27738 machine_mode mode = GET_MODE (parameter);
27740 if (GET_CODE (parameter) == REG)
27742 if (SCALAR_FLOAT_MODE_P (mode))
27744 int bits;
27746 float_parms++;
27748 switch (mode)
27750 case SFmode:
27751 case SDmode:
27752 bits = 0x2;
27753 break;
27755 case DFmode:
27756 case DDmode:
27757 case TFmode:
27758 case TDmode:
27759 case IFmode:
27760 case KFmode:
27761 bits = 0x3;
27762 break;
27764 default:
27765 gcc_unreachable ();
27768 /* If only one bit will fit, don't or in this entry. */
27769 if (next_parm_info_bit > 0)
27770 parm_info |= (bits << (next_parm_info_bit - 1));
27771 next_parm_info_bit -= 2;
27773 else
27775 fixed_parms += ((GET_MODE_SIZE (mode)
27776 + (UNITS_PER_WORD - 1))
27777 / UNITS_PER_WORD);
27778 next_parm_info_bit -= 1;
27784 /* Number of fixed point parameters. */
27785 /* This is actually the number of words of fixed point parameters; thus
27786 an 8 byte struct counts as 2; and thus the maximum value is 8. */
27787 fprintf (file, "%d,", fixed_parms);
27789 /* 2 bitfields: number of floating point parameters (7 bits), parameters
27790 all on stack. */
27791 /* This is actually the number of fp registers that hold parameters;
27792 and thus the maximum value is 13. */
27793 /* Set parameters on stack bit if parameters are not in their original
27794 registers, regardless of whether they are on the stack? Xlc
27795 seems to set the bit when not optimizing. */
27796 fprintf (file, "%d\n", ((float_parms << 1) | (! optimize)));
27798 if (! optional_tbtab)
27799 return;
27801 /* Optional fields follow. Some are variable length. */
27803 /* Parameter types, left adjusted bit fields: 0 fixed, 10 single float,
27804 11 double float. */
27805 /* There is an entry for each parameter in a register, in the order that
27806 they occur in the parameter list. Any intervening arguments on the
27807 stack are ignored. If the list overflows a long (max possible length
27808 34 bits) then completely leave off all elements that don't fit. */
27809 /* Only emit this long if there was at least one parameter. */
27810 if (fixed_parms || float_parms)
27811 fprintf (file, "\t.long %d\n", parm_info);
27813 /* Offset from start of code to tb table. */
27814 fputs ("\t.long ", file);
27815 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LT");
27816 RS6000_OUTPUT_BASENAME (file, fname);
27817 putc ('-', file);
27818 rs6000_output_function_entry (file, fname);
27819 putc ('\n', file);
27821 /* Interrupt handler mask. */
27822 /* Omit this long, since we never set the interrupt handler bit
27823 above. */
27825 /* Number of CTL (controlled storage) anchors. */
27826 /* Omit this long, since the has_ctl bit is never set above. */
27828 /* Displacement into stack of each CTL anchor. */
27829 /* Omit this list of longs, because there are no CTL anchors. */
27831 /* Length of function name. */
27832 if (*fname == '*')
27833 ++fname;
27834 fprintf (file, "\t.short %d\n", (int) strlen (fname));
27836 /* Function name. */
27837 assemble_string (fname, strlen (fname));
27839 /* Register for alloca automatic storage; this is always reg 31.
27840 Only emit this if the alloca bit was set above. */
27841 if (frame_pointer_needed)
27842 fputs ("\t.byte 31\n", file);
27844 fputs ("\t.align 2\n", file);
27847 /* Arrange to define .LCTOC1 label, if not already done. */
27848 if (need_toc_init)
27850 need_toc_init = 0;
27851 if (!toc_initialized)
27853 switch_to_section (toc_section);
27854 switch_to_section (current_function_section ());
27859 /* -fsplit-stack support. */
27861 /* A SYMBOL_REF for __morestack. */
27862 static GTY(()) rtx morestack_ref;
27864 static rtx
27865 gen_add3_const (rtx rt, rtx ra, long c)
27867 if (TARGET_64BIT)
27868 return gen_adddi3 (rt, ra, GEN_INT (c));
27869 else
27870 return gen_addsi3 (rt, ra, GEN_INT (c));
27873 /* Emit -fsplit-stack prologue, which goes before the regular function
27874 prologue (at local entry point in the case of ELFv2). */
27876 void
27877 rs6000_expand_split_stack_prologue (void)
27879 rs6000_stack_t *info = rs6000_stack_info ();
27880 unsigned HOST_WIDE_INT allocate;
27881 long alloc_hi, alloc_lo;
27882 rtx r0, r1, r12, lr, ok_label, compare, jump, call_fusage;
27883 rtx_insn *insn;
27885 gcc_assert (flag_split_stack && reload_completed);
27887 if (!info->push_p)
27888 return;
27890 if (global_regs[29])
27892 error ("-fsplit-stack uses register r29");
27893 inform (DECL_SOURCE_LOCATION (global_regs_decl[29]),
27894 "conflicts with %qD", global_regs_decl[29]);
27897 allocate = info->total_size;
27898 if (allocate > (unsigned HOST_WIDE_INT) 1 << 31)
27900 sorry ("Stack frame larger than 2G is not supported for -fsplit-stack");
27901 return;
27903 if (morestack_ref == NULL_RTX)
27905 morestack_ref = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
27906 SYMBOL_REF_FLAGS (morestack_ref) |= (SYMBOL_FLAG_LOCAL
27907 | SYMBOL_FLAG_FUNCTION);
27910 r0 = gen_rtx_REG (Pmode, 0);
27911 r1 = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
27912 r12 = gen_rtx_REG (Pmode, 12);
27913 emit_insn (gen_load_split_stack_limit (r0));
27914 /* Always emit two insns here to calculate the requested stack,
27915 so that the linker can edit them when adjusting size for calling
27916 non-split-stack code. */
27917 alloc_hi = (-allocate + 0x8000) & ~0xffffL;
27918 alloc_lo = -allocate - alloc_hi;
27919 if (alloc_hi != 0)
27921 emit_insn (gen_add3_const (r12, r1, alloc_hi));
27922 if (alloc_lo != 0)
27923 emit_insn (gen_add3_const (r12, r12, alloc_lo));
27924 else
27925 emit_insn (gen_nop ());
27927 else
27929 emit_insn (gen_add3_const (r12, r1, alloc_lo));
27930 emit_insn (gen_nop ());
27933 compare = gen_rtx_REG (CCUNSmode, CR7_REGNO);
27934 emit_insn (gen_rtx_SET (compare, gen_rtx_COMPARE (CCUNSmode, r12, r0)));
27935 ok_label = gen_label_rtx ();
27936 jump = gen_rtx_IF_THEN_ELSE (VOIDmode,
27937 gen_rtx_GEU (VOIDmode, compare, const0_rtx),
27938 gen_rtx_LABEL_REF (VOIDmode, ok_label),
27939 pc_rtx);
27940 jump = emit_jump_insn (gen_rtx_SET (pc_rtx, jump));
27941 JUMP_LABEL (jump) = ok_label;
27942 /* Mark the jump as very likely to be taken. */
27943 add_int_reg_note (jump, REG_BR_PROB,
27944 REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100);
27946 lr = gen_rtx_REG (Pmode, LR_REGNO);
27947 insn = emit_move_insn (r0, lr);
27948 RTX_FRAME_RELATED_P (insn) = 1;
27949 insn = emit_insn (gen_frame_store (r0, r1, info->lr_save_offset));
27950 RTX_FRAME_RELATED_P (insn) = 1;
27952 insn = emit_call_insn (gen_call (gen_rtx_MEM (SImode, morestack_ref),
27953 const0_rtx, const0_rtx));
27954 call_fusage = NULL_RTX;
27955 use_reg (&call_fusage, r12);
27956 add_function_usage_to (insn, call_fusage);
27957 emit_insn (gen_frame_load (r0, r1, info->lr_save_offset));
27958 insn = emit_move_insn (lr, r0);
27959 add_reg_note (insn, REG_CFA_RESTORE, lr);
27960 RTX_FRAME_RELATED_P (insn) = 1;
27961 emit_insn (gen_split_stack_return ());
27963 emit_label (ok_label);
27964 LABEL_NUSES (ok_label) = 1;
27967 /* Return the internal arg pointer used for function incoming
27968 arguments. When -fsplit-stack, the arg pointer is r12 so we need
27969 to copy it to a pseudo in order for it to be preserved over calls
27970 and suchlike. We'd really like to use a pseudo here for the
27971 internal arg pointer but data-flow analysis is not prepared to
27972 accept pseudos as live at the beginning of a function. */
27974 static rtx
27975 rs6000_internal_arg_pointer (void)
27977 if (flag_split_stack
27978 && (lookup_attribute ("no_split_stack", DECL_ATTRIBUTES (cfun->decl))
27979 == NULL))
27982 if (cfun->machine->split_stack_arg_pointer == NULL_RTX)
27984 rtx pat;
27986 cfun->machine->split_stack_arg_pointer = gen_reg_rtx (Pmode);
27987 REG_POINTER (cfun->machine->split_stack_arg_pointer) = 1;
27989 /* Put the pseudo initialization right after the note at the
27990 beginning of the function. */
27991 pat = gen_rtx_SET (cfun->machine->split_stack_arg_pointer,
27992 gen_rtx_REG (Pmode, 12));
27993 push_topmost_sequence ();
27994 emit_insn_after (pat, get_insns ());
27995 pop_topmost_sequence ();
27997 return plus_constant (Pmode, cfun->machine->split_stack_arg_pointer,
27998 FIRST_PARM_OFFSET (current_function_decl));
28000 return virtual_incoming_args_rtx;
28003 /* We may have to tell the dataflow pass that the split stack prologue
28004 is initializing a register. */
28006 static void
28007 rs6000_live_on_entry (bitmap regs)
28009 if (flag_split_stack)
28010 bitmap_set_bit (regs, 12);
28013 /* Emit -fsplit-stack dynamic stack allocation space check. */
28015 void
28016 rs6000_split_stack_space_check (rtx size, rtx label)
28018 rtx sp = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
28019 rtx limit = gen_reg_rtx (Pmode);
28020 rtx requested = gen_reg_rtx (Pmode);
28021 rtx cmp = gen_reg_rtx (CCUNSmode);
28022 rtx jump;
28024 emit_insn (gen_load_split_stack_limit (limit));
28025 if (CONST_INT_P (size))
28026 emit_insn (gen_add3_insn (requested, sp, GEN_INT (-INTVAL (size))));
28027 else
28029 size = force_reg (Pmode, size);
28030 emit_move_insn (requested, gen_rtx_MINUS (Pmode, sp, size));
28032 emit_insn (gen_rtx_SET (cmp, gen_rtx_COMPARE (CCUNSmode, requested, limit)));
28033 jump = gen_rtx_IF_THEN_ELSE (VOIDmode,
28034 gen_rtx_GEU (VOIDmode, cmp, const0_rtx),
28035 gen_rtx_LABEL_REF (VOIDmode, label),
28036 pc_rtx);
28037 jump = emit_jump_insn (gen_rtx_SET (pc_rtx, jump));
28038 JUMP_LABEL (jump) = label;
28041 /* A C compound statement that outputs the assembler code for a thunk
28042 function, used to implement C++ virtual function calls with
28043 multiple inheritance. The thunk acts as a wrapper around a virtual
28044 function, adjusting the implicit object parameter before handing
28045 control off to the real function.
28047 First, emit code to add the integer DELTA to the location that
28048 contains the incoming first argument. Assume that this argument
28049 contains a pointer, and is the one used to pass the `this' pointer
28050 in C++. This is the incoming argument *before* the function
28051 prologue, e.g. `%o0' on a sparc. The addition must preserve the
28052 values of all other incoming arguments.
28054 After the addition, emit code to jump to FUNCTION, which is a
28055 `FUNCTION_DECL'. This is a direct pure jump, not a call, and does
28056 not touch the return address. Hence returning from FUNCTION will
28057 return to whoever called the current `thunk'.
28059 The effect must be as if FUNCTION had been called directly with the
28060 adjusted first argument. This macro is responsible for emitting
28061 all of the code for a thunk function; output_function_prologue()
28062 and output_function_epilogue() are not invoked.
28064 The THUNK_FNDECL is redundant. (DELTA and FUNCTION have already
28065 been extracted from it.) It might possibly be useful on some
28066 targets, but probably not.
28068 If you do not define this macro, the target-independent code in the
28069 C++ frontend will generate a less efficient heavyweight thunk that
28070 calls FUNCTION instead of jumping to it. The generic approach does
28071 not support varargs. */
28073 static void
28074 rs6000_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
28075 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
28076 tree function)
28078 rtx this_rtx, funexp;
28079 rtx_insn *insn;
28081 reload_completed = 1;
28082 epilogue_completed = 1;
28084 /* Mark the end of the (empty) prologue. */
28085 emit_note (NOTE_INSN_PROLOGUE_END);
28087 /* Find the "this" pointer. If the function returns a structure,
28088 the structure return pointer is in r3. */
28089 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
28090 this_rtx = gen_rtx_REG (Pmode, 4);
28091 else
28092 this_rtx = gen_rtx_REG (Pmode, 3);
28094 /* Apply the constant offset, if required. */
28095 if (delta)
28096 emit_insn (gen_add3_insn (this_rtx, this_rtx, GEN_INT (delta)));
28098 /* Apply the offset from the vtable, if required. */
28099 if (vcall_offset)
28101 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
28102 rtx tmp = gen_rtx_REG (Pmode, 12);
28104 emit_move_insn (tmp, gen_rtx_MEM (Pmode, this_rtx));
28105 if (((unsigned HOST_WIDE_INT) vcall_offset) + 0x8000 >= 0x10000)
28107 emit_insn (gen_add3_insn (tmp, tmp, vcall_offset_rtx));
28108 emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp));
28110 else
28112 rtx loc = gen_rtx_PLUS (Pmode, tmp, vcall_offset_rtx);
28114 emit_move_insn (tmp, gen_rtx_MEM (Pmode, loc));
28116 emit_insn (gen_add3_insn (this_rtx, this_rtx, tmp));
28119 /* Generate a tail call to the target function. */
28120 if (!TREE_USED (function))
28122 assemble_external (function);
28123 TREE_USED (function) = 1;
28125 funexp = XEXP (DECL_RTL (function), 0);
28126 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
28128 #if TARGET_MACHO
28129 if (MACHOPIC_INDIRECT)
28130 funexp = machopic_indirect_call_target (funexp);
28131 #endif
28133 /* gen_sibcall expects reload to convert scratch pseudo to LR so we must
28134 generate sibcall RTL explicitly. */
28135 insn = emit_call_insn (
28136 gen_rtx_PARALLEL (VOIDmode,
28137 gen_rtvec (4,
28138 gen_rtx_CALL (VOIDmode,
28139 funexp, const0_rtx),
28140 gen_rtx_USE (VOIDmode, const0_rtx),
28141 gen_rtx_USE (VOIDmode,
28142 gen_rtx_REG (SImode,
28143 LR_REGNO)),
28144 simple_return_rtx)));
28145 SIBLING_CALL_P (insn) = 1;
28146 emit_barrier ();
28148 /* Run just enough of rest_of_compilation to get the insns emitted.
28149 There's not really enough bulk here to make other passes such as
28150 instruction scheduling worth while. Note that use_thunk calls
28151 assemble_start_function and assemble_end_function. */
28152 insn = get_insns ();
28153 shorten_branches (insn);
28154 final_start_function (insn, file, 1);
28155 final (insn, file, 1);
28156 final_end_function ();
28158 reload_completed = 0;
28159 epilogue_completed = 0;
28162 /* A quick summary of the various types of 'constant-pool tables'
28163 under PowerPC:
28165 Target Flags Name One table per
28166 AIX (none) AIX TOC object file
28167 AIX -mfull-toc AIX TOC object file
28168 AIX -mminimal-toc AIX minimal TOC translation unit
28169 SVR4/EABI (none) SVR4 SDATA object file
28170 SVR4/EABI -fpic SVR4 pic object file
28171 SVR4/EABI -fPIC SVR4 PIC translation unit
28172 SVR4/EABI -mrelocatable EABI TOC function
28173 SVR4/EABI -maix AIX TOC object file
28174 SVR4/EABI -maix -mminimal-toc
28175 AIX minimal TOC translation unit
28177 Name Reg. Set by entries contains:
28178 made by addrs? fp? sum?
28180 AIX TOC 2 crt0 as Y option option
28181 AIX minimal TOC 30 prolog gcc Y Y option
28182 SVR4 SDATA 13 crt0 gcc N Y N
28183 SVR4 pic 30 prolog ld Y not yet N
28184 SVR4 PIC 30 prolog gcc Y option option
28185 EABI TOC 30 prolog gcc Y option option
28189 /* Hash functions for the hash table. */
28191 static unsigned
28192 rs6000_hash_constant (rtx k)
28194 enum rtx_code code = GET_CODE (k);
28195 machine_mode mode = GET_MODE (k);
28196 unsigned result = (code << 3) ^ mode;
28197 const char *format;
28198 int flen, fidx;
28200 format = GET_RTX_FORMAT (code);
28201 flen = strlen (format);
28202 fidx = 0;
28204 switch (code)
28206 case LABEL_REF:
28207 return result * 1231 + (unsigned) INSN_UID (XEXP (k, 0));
28209 case CONST_WIDE_INT:
28211 int i;
28212 flen = CONST_WIDE_INT_NUNITS (k);
28213 for (i = 0; i < flen; i++)
28214 result = result * 613 + CONST_WIDE_INT_ELT (k, i);
28215 return result;
28218 case CONST_DOUBLE:
28219 if (mode != VOIDmode)
28220 return real_hash (CONST_DOUBLE_REAL_VALUE (k)) * result;
28221 flen = 2;
28222 break;
28224 case CODE_LABEL:
28225 fidx = 3;
28226 break;
28228 default:
28229 break;
28232 for (; fidx < flen; fidx++)
28233 switch (format[fidx])
28235 case 's':
28237 unsigned i, len;
28238 const char *str = XSTR (k, fidx);
28239 len = strlen (str);
28240 result = result * 613 + len;
28241 for (i = 0; i < len; i++)
28242 result = result * 613 + (unsigned) str[i];
28243 break;
28245 case 'u':
28246 case 'e':
28247 result = result * 1231 + rs6000_hash_constant (XEXP (k, fidx));
28248 break;
28249 case 'i':
28250 case 'n':
28251 result = result * 613 + (unsigned) XINT (k, fidx);
28252 break;
28253 case 'w':
28254 if (sizeof (unsigned) >= sizeof (HOST_WIDE_INT))
28255 result = result * 613 + (unsigned) XWINT (k, fidx);
28256 else
28258 size_t i;
28259 for (i = 0; i < sizeof (HOST_WIDE_INT) / sizeof (unsigned); i++)
28260 result = result * 613 + (unsigned) (XWINT (k, fidx)
28261 >> CHAR_BIT * i);
28263 break;
28264 case '0':
28265 break;
28266 default:
28267 gcc_unreachable ();
28270 return result;
28273 hashval_t
28274 toc_hasher::hash (toc_hash_struct *thc)
28276 return rs6000_hash_constant (thc->key) ^ thc->key_mode;
28279 /* Compare H1 and H2 for equivalence. */
28281 bool
28282 toc_hasher::equal (toc_hash_struct *h1, toc_hash_struct *h2)
28284 rtx r1 = h1->key;
28285 rtx r2 = h2->key;
28287 if (h1->key_mode != h2->key_mode)
28288 return 0;
28290 return rtx_equal_p (r1, r2);
28293 /* These are the names given by the C++ front-end to vtables, and
28294 vtable-like objects. Ideally, this logic should not be here;
28295 instead, there should be some programmatic way of inquiring as
28296 to whether or not an object is a vtable. */
28298 #define VTABLE_NAME_P(NAME) \
28299 (strncmp ("_vt.", name, strlen ("_vt.")) == 0 \
28300 || strncmp ("_ZTV", name, strlen ("_ZTV")) == 0 \
28301 || strncmp ("_ZTT", name, strlen ("_ZTT")) == 0 \
28302 || strncmp ("_ZTI", name, strlen ("_ZTI")) == 0 \
28303 || strncmp ("_ZTC", name, strlen ("_ZTC")) == 0)
28305 #ifdef NO_DOLLAR_IN_LABEL
28306 /* Return a GGC-allocated character string translating dollar signs in
28307 input NAME to underscores. Used by XCOFF ASM_OUTPUT_LABELREF. */
28309 const char *
28310 rs6000_xcoff_strip_dollar (const char *name)
28312 char *strip, *p;
28313 const char *q;
28314 size_t len;
28316 q = (const char *) strchr (name, '$');
28318 if (q == 0 || q == name)
28319 return name;
28321 len = strlen (name);
28322 strip = XALLOCAVEC (char, len + 1);
28323 strcpy (strip, name);
28324 p = strip + (q - name);
28325 while (p)
28327 *p = '_';
28328 p = strchr (p + 1, '$');
28331 return ggc_alloc_string (strip, len);
28333 #endif
28335 void
28336 rs6000_output_symbol_ref (FILE *file, rtx x)
28338 /* Currently C++ toc references to vtables can be emitted before it
28339 is decided whether the vtable is public or private. If this is
28340 the case, then the linker will eventually complain that there is
28341 a reference to an unknown section. Thus, for vtables only,
28342 we emit the TOC reference to reference the symbol and not the
28343 section. */
28344 const char *name = XSTR (x, 0);
28346 tree decl = SYMBOL_REF_DECL (x);
28347 if (decl /* sync condition with assemble_external () */
28348 && DECL_P (decl) && DECL_EXTERNAL (decl) && TREE_PUBLIC (decl)
28349 && (TREE_CODE (decl) == VAR_DECL
28350 || TREE_CODE (decl) == FUNCTION_DECL)
28351 && name[strlen (name) - 1] != ']')
28353 name = concat (name,
28354 (TREE_CODE (decl) == FUNCTION_DECL
28355 ? "[DS]" : "[UA]"),
28356 NULL);
28357 XSTR (x, 0) = name;
28360 if (VTABLE_NAME_P (name))
28362 RS6000_OUTPUT_BASENAME (file, name);
28364 else
28365 assemble_name (file, name);
28368 /* Output a TOC entry. We derive the entry name from what is being
28369 written. */
28371 void
28372 output_toc (FILE *file, rtx x, int labelno, machine_mode mode)
28374 char buf[256];
28375 const char *name = buf;
28376 rtx base = x;
28377 HOST_WIDE_INT offset = 0;
28379 gcc_assert (!TARGET_NO_TOC);
28381 /* When the linker won't eliminate them, don't output duplicate
28382 TOC entries (this happens on AIX if there is any kind of TOC,
28383 and on SVR4 under -fPIC or -mrelocatable). Don't do this for
28384 CODE_LABELs. */
28385 if (TARGET_TOC && GET_CODE (x) != LABEL_REF)
28387 struct toc_hash_struct *h;
28389 /* Create toc_hash_table. This can't be done at TARGET_OPTION_OVERRIDE
28390 time because GGC is not initialized at that point. */
28391 if (toc_hash_table == NULL)
28392 toc_hash_table = hash_table<toc_hasher>::create_ggc (1021);
28394 h = ggc_alloc<toc_hash_struct> ();
28395 h->key = x;
28396 h->key_mode = mode;
28397 h->labelno = labelno;
28399 toc_hash_struct **found = toc_hash_table->find_slot (h, INSERT);
28400 if (*found == NULL)
28401 *found = h;
28402 else /* This is indeed a duplicate.
28403 Set this label equal to that label. */
28405 fputs ("\t.set ", file);
28406 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
28407 fprintf (file, "%d,", labelno);
28408 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
28409 fprintf (file, "%d\n", ((*found)->labelno));
28411 #ifdef HAVE_AS_TLS
28412 if (TARGET_XCOFF && GET_CODE (x) == SYMBOL_REF
28413 && (SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_GLOBAL_DYNAMIC
28414 || SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC))
28416 fputs ("\t.set ", file);
28417 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
28418 fprintf (file, "%d,", labelno);
28419 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
28420 fprintf (file, "%d\n", ((*found)->labelno));
28422 #endif
28423 return;
28427 /* If we're going to put a double constant in the TOC, make sure it's
28428 aligned properly when strict alignment is on. */
28429 if ((CONST_DOUBLE_P (x) || CONST_WIDE_INT_P (x))
28430 && STRICT_ALIGNMENT
28431 && GET_MODE_BITSIZE (mode) >= 64
28432 && ! (TARGET_NO_FP_IN_TOC && ! TARGET_MINIMAL_TOC)) {
28433 ASM_OUTPUT_ALIGN (file, 3);
28436 (*targetm.asm_out.internal_label) (file, "LC", labelno);
28438 /* Handle FP constants specially. Note that if we have a minimal
28439 TOC, things we put here aren't actually in the TOC, so we can allow
28440 FP constants. */
28441 if (GET_CODE (x) == CONST_DOUBLE &&
28442 (GET_MODE (x) == TFmode || GET_MODE (x) == TDmode
28443 || GET_MODE (x) == IFmode || GET_MODE (x) == KFmode))
28445 long k[4];
28447 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
28448 REAL_VALUE_TO_TARGET_DECIMAL128 (*CONST_DOUBLE_REAL_VALUE (x), k);
28449 else
28450 REAL_VALUE_TO_TARGET_LONG_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), k);
28452 if (TARGET_64BIT)
28454 if (TARGET_ELF || TARGET_MINIMAL_TOC)
28455 fputs (DOUBLE_INT_ASM_OP, file);
28456 else
28457 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
28458 k[0] & 0xffffffff, k[1] & 0xffffffff,
28459 k[2] & 0xffffffff, k[3] & 0xffffffff);
28460 fprintf (file, "0x%lx%08lx,0x%lx%08lx\n",
28461 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
28462 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff,
28463 k[WORDS_BIG_ENDIAN ? 2 : 3] & 0xffffffff,
28464 k[WORDS_BIG_ENDIAN ? 3 : 2] & 0xffffffff);
28465 return;
28467 else
28469 if (TARGET_ELF || TARGET_MINIMAL_TOC)
28470 fputs ("\t.long ", file);
28471 else
28472 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
28473 k[0] & 0xffffffff, k[1] & 0xffffffff,
28474 k[2] & 0xffffffff, k[3] & 0xffffffff);
28475 fprintf (file, "0x%lx,0x%lx,0x%lx,0x%lx\n",
28476 k[0] & 0xffffffff, k[1] & 0xffffffff,
28477 k[2] & 0xffffffff, k[3] & 0xffffffff);
28478 return;
28481 else if (GET_CODE (x) == CONST_DOUBLE &&
28482 (GET_MODE (x) == DFmode || GET_MODE (x) == DDmode))
28484 long k[2];
28486 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
28487 REAL_VALUE_TO_TARGET_DECIMAL64 (*CONST_DOUBLE_REAL_VALUE (x), k);
28488 else
28489 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), k);
28491 if (TARGET_64BIT)
28493 if (TARGET_ELF || TARGET_MINIMAL_TOC)
28494 fputs (DOUBLE_INT_ASM_OP, file);
28495 else
28496 fprintf (file, "\t.tc FD_%lx_%lx[TC],",
28497 k[0] & 0xffffffff, k[1] & 0xffffffff);
28498 fprintf (file, "0x%lx%08lx\n",
28499 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
28500 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff);
28501 return;
28503 else
28505 if (TARGET_ELF || TARGET_MINIMAL_TOC)
28506 fputs ("\t.long ", file);
28507 else
28508 fprintf (file, "\t.tc FD_%lx_%lx[TC],",
28509 k[0] & 0xffffffff, k[1] & 0xffffffff);
28510 fprintf (file, "0x%lx,0x%lx\n",
28511 k[0] & 0xffffffff, k[1] & 0xffffffff);
28512 return;
28515 else if (GET_CODE (x) == CONST_DOUBLE &&
28516 (GET_MODE (x) == SFmode || GET_MODE (x) == SDmode))
28518 long l;
28520 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
28521 REAL_VALUE_TO_TARGET_DECIMAL32 (*CONST_DOUBLE_REAL_VALUE (x), l);
28522 else
28523 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l);
28525 if (TARGET_64BIT)
28527 if (TARGET_ELF || TARGET_MINIMAL_TOC)
28528 fputs (DOUBLE_INT_ASM_OP, file);
28529 else
28530 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
28531 if (WORDS_BIG_ENDIAN)
28532 fprintf (file, "0x%lx00000000\n", l & 0xffffffff);
28533 else
28534 fprintf (file, "0x%lx\n", l & 0xffffffff);
28535 return;
28537 else
28539 if (TARGET_ELF || TARGET_MINIMAL_TOC)
28540 fputs ("\t.long ", file);
28541 else
28542 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
28543 fprintf (file, "0x%lx\n", l & 0xffffffff);
28544 return;
28547 else if (GET_MODE (x) == VOIDmode && GET_CODE (x) == CONST_INT)
28549 unsigned HOST_WIDE_INT low;
28550 HOST_WIDE_INT high;
28552 low = INTVAL (x) & 0xffffffff;
28553 high = (HOST_WIDE_INT) INTVAL (x) >> 32;
28555 /* TOC entries are always Pmode-sized, so when big-endian
28556 smaller integer constants in the TOC need to be padded.
28557 (This is still a win over putting the constants in
28558 a separate constant pool, because then we'd have
28559 to have both a TOC entry _and_ the actual constant.)
28561 For a 32-bit target, CONST_INT values are loaded and shifted
28562 entirely within `low' and can be stored in one TOC entry. */
28564 /* It would be easy to make this work, but it doesn't now. */
28565 gcc_assert (!TARGET_64BIT || POINTER_SIZE >= GET_MODE_BITSIZE (mode));
28567 if (WORDS_BIG_ENDIAN && POINTER_SIZE > GET_MODE_BITSIZE (mode))
28569 low |= high << 32;
28570 low <<= POINTER_SIZE - GET_MODE_BITSIZE (mode);
28571 high = (HOST_WIDE_INT) low >> 32;
28572 low &= 0xffffffff;
28575 if (TARGET_64BIT)
28577 if (TARGET_ELF || TARGET_MINIMAL_TOC)
28578 fputs (DOUBLE_INT_ASM_OP, file);
28579 else
28580 fprintf (file, "\t.tc ID_%lx_%lx[TC],",
28581 (long) high & 0xffffffff, (long) low & 0xffffffff);
28582 fprintf (file, "0x%lx%08lx\n",
28583 (long) high & 0xffffffff, (long) low & 0xffffffff);
28584 return;
28586 else
28588 if (POINTER_SIZE < GET_MODE_BITSIZE (mode))
28590 if (TARGET_ELF || TARGET_MINIMAL_TOC)
28591 fputs ("\t.long ", file);
28592 else
28593 fprintf (file, "\t.tc ID_%lx_%lx[TC],",
28594 (long) high & 0xffffffff, (long) low & 0xffffffff);
28595 fprintf (file, "0x%lx,0x%lx\n",
28596 (long) high & 0xffffffff, (long) low & 0xffffffff);
28598 else
28600 if (TARGET_ELF || TARGET_MINIMAL_TOC)
28601 fputs ("\t.long ", file);
28602 else
28603 fprintf (file, "\t.tc IS_%lx[TC],", (long) low & 0xffffffff);
28604 fprintf (file, "0x%lx\n", (long) low & 0xffffffff);
28606 return;
28610 if (GET_CODE (x) == CONST)
28612 gcc_assert (GET_CODE (XEXP (x, 0)) == PLUS
28613 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT);
28615 base = XEXP (XEXP (x, 0), 0);
28616 offset = INTVAL (XEXP (XEXP (x, 0), 1));
28619 switch (GET_CODE (base))
28621 case SYMBOL_REF:
28622 name = XSTR (base, 0);
28623 break;
28625 case LABEL_REF:
28626 ASM_GENERATE_INTERNAL_LABEL (buf, "L",
28627 CODE_LABEL_NUMBER (XEXP (base, 0)));
28628 break;
28630 case CODE_LABEL:
28631 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (base));
28632 break;
28634 default:
28635 gcc_unreachable ();
28638 if (TARGET_ELF || TARGET_MINIMAL_TOC)
28639 fputs (TARGET_32BIT ? "\t.long " : DOUBLE_INT_ASM_OP, file);
28640 else
28642 fputs ("\t.tc ", file);
28643 RS6000_OUTPUT_BASENAME (file, name);
28645 if (offset < 0)
28646 fprintf (file, ".N" HOST_WIDE_INT_PRINT_UNSIGNED, - offset);
28647 else if (offset)
28648 fprintf (file, ".P" HOST_WIDE_INT_PRINT_UNSIGNED, offset);
28650 /* Mark large TOC symbols on AIX with [TE] so they are mapped
28651 after other TOC symbols, reducing overflow of small TOC access
28652 to [TC] symbols. */
28653 fputs (TARGET_XCOFF && TARGET_CMODEL != CMODEL_SMALL
28654 ? "[TE]," : "[TC],", file);
28657 /* Currently C++ toc references to vtables can be emitted before it
28658 is decided whether the vtable is public or private. If this is
28659 the case, then the linker will eventually complain that there is
28660 a TOC reference to an unknown section. Thus, for vtables only,
28661 we emit the TOC reference to reference the symbol and not the
28662 section. */
28663 if (VTABLE_NAME_P (name))
28665 RS6000_OUTPUT_BASENAME (file, name);
28666 if (offset < 0)
28667 fprintf (file, HOST_WIDE_INT_PRINT_DEC, offset);
28668 else if (offset > 0)
28669 fprintf (file, "+" HOST_WIDE_INT_PRINT_DEC, offset);
28671 else
28672 output_addr_const (file, x);
28674 #if HAVE_AS_TLS
28675 if (TARGET_XCOFF && GET_CODE (base) == SYMBOL_REF)
28677 switch (SYMBOL_REF_TLS_MODEL (base))
28679 case 0:
28680 break;
28681 case TLS_MODEL_LOCAL_EXEC:
28682 fputs ("@le", file);
28683 break;
28684 case TLS_MODEL_INITIAL_EXEC:
28685 fputs ("@ie", file);
28686 break;
28687 /* Use global-dynamic for local-dynamic. */
28688 case TLS_MODEL_GLOBAL_DYNAMIC:
28689 case TLS_MODEL_LOCAL_DYNAMIC:
28690 putc ('\n', file);
28691 (*targetm.asm_out.internal_label) (file, "LCM", labelno);
28692 fputs ("\t.tc .", file);
28693 RS6000_OUTPUT_BASENAME (file, name);
28694 fputs ("[TC],", file);
28695 output_addr_const (file, x);
28696 fputs ("@m", file);
28697 break;
28698 default:
28699 gcc_unreachable ();
28702 #endif
28704 putc ('\n', file);
28707 /* Output an assembler pseudo-op to write an ASCII string of N characters
28708 starting at P to FILE.
28710 On the RS/6000, we have to do this using the .byte operation and
28711 write out special characters outside the quoted string.
28712 Also, the assembler is broken; very long strings are truncated,
28713 so we must artificially break them up early. */
28715 void
28716 output_ascii (FILE *file, const char *p, int n)
28718 char c;
28719 int i, count_string;
28720 const char *for_string = "\t.byte \"";
28721 const char *for_decimal = "\t.byte ";
28722 const char *to_close = NULL;
28724 count_string = 0;
28725 for (i = 0; i < n; i++)
28727 c = *p++;
28728 if (c >= ' ' && c < 0177)
28730 if (for_string)
28731 fputs (for_string, file);
28732 putc (c, file);
28734 /* Write two quotes to get one. */
28735 if (c == '"')
28737 putc (c, file);
28738 ++count_string;
28741 for_string = NULL;
28742 for_decimal = "\"\n\t.byte ";
28743 to_close = "\"\n";
28744 ++count_string;
28746 if (count_string >= 512)
28748 fputs (to_close, file);
28750 for_string = "\t.byte \"";
28751 for_decimal = "\t.byte ";
28752 to_close = NULL;
28753 count_string = 0;
28756 else
28758 if (for_decimal)
28759 fputs (for_decimal, file);
28760 fprintf (file, "%d", c);
28762 for_string = "\n\t.byte \"";
28763 for_decimal = ", ";
28764 to_close = "\n";
28765 count_string = 0;
28769 /* Now close the string if we have written one. Then end the line. */
28770 if (to_close)
28771 fputs (to_close, file);
28774 /* Generate a unique section name for FILENAME for a section type
28775 represented by SECTION_DESC. Output goes into BUF.
28777 SECTION_DESC can be any string, as long as it is different for each
28778 possible section type.
28780 We name the section in the same manner as xlc. The name begins with an
28781 underscore followed by the filename (after stripping any leading directory
28782 names) with the last period replaced by the string SECTION_DESC. If
28783 FILENAME does not contain a period, SECTION_DESC is appended to the end of
28784 the name. */
28786 void
28787 rs6000_gen_section_name (char **buf, const char *filename,
28788 const char *section_desc)
28790 const char *q, *after_last_slash, *last_period = 0;
28791 char *p;
28792 int len;
28794 after_last_slash = filename;
28795 for (q = filename; *q; q++)
28797 if (*q == '/')
28798 after_last_slash = q + 1;
28799 else if (*q == '.')
28800 last_period = q;
28803 len = strlen (after_last_slash) + strlen (section_desc) + 2;
28804 *buf = (char *) xmalloc (len);
28806 p = *buf;
28807 *p++ = '_';
28809 for (q = after_last_slash; *q; q++)
28811 if (q == last_period)
28813 strcpy (p, section_desc);
28814 p += strlen (section_desc);
28815 break;
28818 else if (ISALNUM (*q))
28819 *p++ = *q;
28822 if (last_period == 0)
28823 strcpy (p, section_desc);
28824 else
28825 *p = '\0';
28828 /* Emit profile function. */
28830 void
28831 output_profile_hook (int labelno ATTRIBUTE_UNUSED)
28833 /* Non-standard profiling for kernels, which just saves LR then calls
28834 _mcount without worrying about arg saves. The idea is to change
28835 the function prologue as little as possible as it isn't easy to
28836 account for arg save/restore code added just for _mcount. */
28837 if (TARGET_PROFILE_KERNEL)
28838 return;
28840 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
28842 #ifndef NO_PROFILE_COUNTERS
28843 # define NO_PROFILE_COUNTERS 0
28844 #endif
28845 if (NO_PROFILE_COUNTERS)
28846 emit_library_call (init_one_libfunc (RS6000_MCOUNT),
28847 LCT_NORMAL, VOIDmode, 0);
28848 else
28850 char buf[30];
28851 const char *label_name;
28852 rtx fun;
28854 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
28855 label_name = ggc_strdup ((*targetm.strip_name_encoding) (buf));
28856 fun = gen_rtx_SYMBOL_REF (Pmode, label_name);
28858 emit_library_call (init_one_libfunc (RS6000_MCOUNT),
28859 LCT_NORMAL, VOIDmode, 1, fun, Pmode);
28862 else if (DEFAULT_ABI == ABI_DARWIN)
28864 const char *mcount_name = RS6000_MCOUNT;
28865 int caller_addr_regno = LR_REGNO;
28867 /* Be conservative and always set this, at least for now. */
28868 crtl->uses_pic_offset_table = 1;
28870 #if TARGET_MACHO
28871 /* For PIC code, set up a stub and collect the caller's address
28872 from r0, which is where the prologue puts it. */
28873 if (MACHOPIC_INDIRECT
28874 && crtl->uses_pic_offset_table)
28875 caller_addr_regno = 0;
28876 #endif
28877 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, mcount_name),
28878 LCT_NORMAL, VOIDmode, 1,
28879 gen_rtx_REG (Pmode, caller_addr_regno), Pmode);
28883 /* Write function profiler code. */
28885 void
28886 output_function_profiler (FILE *file, int labelno)
28888 char buf[100];
28890 switch (DEFAULT_ABI)
28892 default:
28893 gcc_unreachable ();
28895 case ABI_V4:
28896 if (!TARGET_32BIT)
28898 warning (0, "no profiling of 64-bit code for this ABI");
28899 return;
28901 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
28902 fprintf (file, "\tmflr %s\n", reg_names[0]);
28903 if (NO_PROFILE_COUNTERS)
28905 asm_fprintf (file, "\tstw %s,4(%s)\n",
28906 reg_names[0], reg_names[1]);
28908 else if (TARGET_SECURE_PLT && flag_pic)
28910 if (TARGET_LINK_STACK)
28912 char name[32];
28913 get_ppc476_thunk_name (name);
28914 asm_fprintf (file, "\tbl %s\n", name);
28916 else
28917 asm_fprintf (file, "\tbcl 20,31,1f\n1:\n");
28918 asm_fprintf (file, "\tstw %s,4(%s)\n",
28919 reg_names[0], reg_names[1]);
28920 asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
28921 asm_fprintf (file, "\taddis %s,%s,",
28922 reg_names[12], reg_names[12]);
28923 assemble_name (file, buf);
28924 asm_fprintf (file, "-1b@ha\n\tla %s,", reg_names[0]);
28925 assemble_name (file, buf);
28926 asm_fprintf (file, "-1b@l(%s)\n", reg_names[12]);
28928 else if (flag_pic == 1)
28930 fputs ("\tbl _GLOBAL_OFFSET_TABLE_@local-4\n", file);
28931 asm_fprintf (file, "\tstw %s,4(%s)\n",
28932 reg_names[0], reg_names[1]);
28933 asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
28934 asm_fprintf (file, "\tlwz %s,", reg_names[0]);
28935 assemble_name (file, buf);
28936 asm_fprintf (file, "@got(%s)\n", reg_names[12]);
28938 else if (flag_pic > 1)
28940 asm_fprintf (file, "\tstw %s,4(%s)\n",
28941 reg_names[0], reg_names[1]);
28942 /* Now, we need to get the address of the label. */
28943 if (TARGET_LINK_STACK)
28945 char name[32];
28946 get_ppc476_thunk_name (name);
28947 asm_fprintf (file, "\tbl %s\n\tb 1f\n\t.long ", name);
28948 assemble_name (file, buf);
28949 fputs ("-.\n1:", file);
28950 asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
28951 asm_fprintf (file, "\taddi %s,%s,4\n",
28952 reg_names[11], reg_names[11]);
28954 else
28956 fputs ("\tbcl 20,31,1f\n\t.long ", file);
28957 assemble_name (file, buf);
28958 fputs ("-.\n1:", file);
28959 asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
28961 asm_fprintf (file, "\tlwz %s,0(%s)\n",
28962 reg_names[0], reg_names[11]);
28963 asm_fprintf (file, "\tadd %s,%s,%s\n",
28964 reg_names[0], reg_names[0], reg_names[11]);
28966 else
28968 asm_fprintf (file, "\tlis %s,", reg_names[12]);
28969 assemble_name (file, buf);
28970 fputs ("@ha\n", file);
28971 asm_fprintf (file, "\tstw %s,4(%s)\n",
28972 reg_names[0], reg_names[1]);
28973 asm_fprintf (file, "\tla %s,", reg_names[0]);
28974 assemble_name (file, buf);
28975 asm_fprintf (file, "@l(%s)\n", reg_names[12]);
28978 /* ABI_V4 saves the static chain reg with ASM_OUTPUT_REG_PUSH. */
28979 fprintf (file, "\tbl %s%s\n",
28980 RS6000_MCOUNT, flag_pic ? "@plt" : "");
28981 break;
28983 case ABI_AIX:
28984 case ABI_ELFv2:
28985 case ABI_DARWIN:
28986 /* Don't do anything, done in output_profile_hook (). */
28987 break;
28993 /* The following variable value is the last issued insn. */
28995 static rtx last_scheduled_insn;
28997 /* The following variable helps to balance issuing of load and
28998 store instructions */
29000 static int load_store_pendulum;
29002 /* Power4 load update and store update instructions are cracked into a
29003 load or store and an integer insn which are executed in the same cycle.
29004 Branches have their own dispatch slot which does not count against the
29005 GCC issue rate, but it changes the program flow so there are no other
29006 instructions to issue in this cycle. */
29008 static int
29009 rs6000_variable_issue_1 (rtx_insn *insn, int more)
29011 last_scheduled_insn = insn;
29012 if (GET_CODE (PATTERN (insn)) == USE
29013 || GET_CODE (PATTERN (insn)) == CLOBBER)
29015 cached_can_issue_more = more;
29016 return cached_can_issue_more;
29019 if (insn_terminates_group_p (insn, current_group))
29021 cached_can_issue_more = 0;
29022 return cached_can_issue_more;
29025 /* If no reservation, but reach here */
29026 if (recog_memoized (insn) < 0)
29027 return more;
29029 if (rs6000_sched_groups)
29031 if (is_microcoded_insn (insn))
29032 cached_can_issue_more = 0;
29033 else if (is_cracked_insn (insn))
29034 cached_can_issue_more = more > 2 ? more - 2 : 0;
29035 else
29036 cached_can_issue_more = more - 1;
29038 return cached_can_issue_more;
29041 if (rs6000_cpu_attr == CPU_CELL && is_nonpipeline_insn (insn))
29042 return 0;
29044 cached_can_issue_more = more - 1;
29045 return cached_can_issue_more;
29048 static int
29049 rs6000_variable_issue (FILE *stream, int verbose, rtx_insn *insn, int more)
29051 int r = rs6000_variable_issue_1 (insn, more);
29052 if (verbose)
29053 fprintf (stream, "// rs6000_variable_issue (more = %d) = %d\n", more, r);
29054 return r;
29057 /* Adjust the cost of a scheduling dependency. Return the new cost of
29058 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
29060 static int
29061 rs6000_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep_insn, int cost)
29063 enum attr_type attr_type;
29065 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
29066 return cost;
29068 switch (REG_NOTE_KIND (link))
29070 case REG_DEP_TRUE:
29072 /* Data dependency; DEP_INSN writes a register that INSN reads
29073 some cycles later. */
29075 /* Separate a load from a narrower, dependent store. */
29076 if (rs6000_sched_groups
29077 && GET_CODE (PATTERN (insn)) == SET
29078 && GET_CODE (PATTERN (dep_insn)) == SET
29079 && GET_CODE (XEXP (PATTERN (insn), 1)) == MEM
29080 && GET_CODE (XEXP (PATTERN (dep_insn), 0)) == MEM
29081 && (GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (insn), 1)))
29082 > GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (dep_insn), 0)))))
29083 return cost + 14;
29085 attr_type = get_attr_type (insn);
29087 switch (attr_type)
29089 case TYPE_JMPREG:
29090 /* Tell the first scheduling pass about the latency between
29091 a mtctr and bctr (and mtlr and br/blr). The first
29092 scheduling pass will not know about this latency since
29093 the mtctr instruction, which has the latency associated
29094 to it, will be generated by reload. */
29095 return 4;
29096 case TYPE_BRANCH:
29097 /* Leave some extra cycles between a compare and its
29098 dependent branch, to inhibit expensive mispredicts. */
29099 if ((rs6000_cpu_attr == CPU_PPC603
29100 || rs6000_cpu_attr == CPU_PPC604
29101 || rs6000_cpu_attr == CPU_PPC604E
29102 || rs6000_cpu_attr == CPU_PPC620
29103 || rs6000_cpu_attr == CPU_PPC630
29104 || rs6000_cpu_attr == CPU_PPC750
29105 || rs6000_cpu_attr == CPU_PPC7400
29106 || rs6000_cpu_attr == CPU_PPC7450
29107 || rs6000_cpu_attr == CPU_PPCE5500
29108 || rs6000_cpu_attr == CPU_PPCE6500
29109 || rs6000_cpu_attr == CPU_POWER4
29110 || rs6000_cpu_attr == CPU_POWER5
29111 || rs6000_cpu_attr == CPU_POWER7
29112 || rs6000_cpu_attr == CPU_POWER8
29113 || rs6000_cpu_attr == CPU_POWER9
29114 || rs6000_cpu_attr == CPU_CELL)
29115 && recog_memoized (dep_insn)
29116 && (INSN_CODE (dep_insn) >= 0))
29118 switch (get_attr_type (dep_insn))
29120 case TYPE_CMP:
29121 case TYPE_FPCOMPARE:
29122 case TYPE_CR_LOGICAL:
29123 case TYPE_DELAYED_CR:
29124 return cost + 2;
29125 case TYPE_EXTS:
29126 case TYPE_MUL:
29127 if (get_attr_dot (dep_insn) == DOT_YES)
29128 return cost + 2;
29129 else
29130 break;
29131 case TYPE_SHIFT:
29132 if (get_attr_dot (dep_insn) == DOT_YES
29133 && get_attr_var_shift (dep_insn) == VAR_SHIFT_NO)
29134 return cost + 2;
29135 else
29136 break;
29137 default:
29138 break;
29140 break;
29142 case TYPE_STORE:
29143 case TYPE_FPSTORE:
29144 if ((rs6000_cpu == PROCESSOR_POWER6)
29145 && recog_memoized (dep_insn)
29146 && (INSN_CODE (dep_insn) >= 0))
29149 if (GET_CODE (PATTERN (insn)) != SET)
29150 /* If this happens, we have to extend this to schedule
29151 optimally. Return default for now. */
29152 return cost;
29154 /* Adjust the cost for the case where the value written
29155 by a fixed point operation is used as the address
29156 gen value on a store. */
29157 switch (get_attr_type (dep_insn))
29159 case TYPE_LOAD:
29160 case TYPE_CNTLZ:
29162 if (! store_data_bypass_p (dep_insn, insn))
29163 return get_attr_sign_extend (dep_insn)
29164 == SIGN_EXTEND_YES ? 6 : 4;
29165 break;
29167 case TYPE_SHIFT:
29169 if (! store_data_bypass_p (dep_insn, insn))
29170 return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
29171 6 : 3;
29172 break;
29174 case TYPE_INTEGER:
29175 case TYPE_ADD:
29176 case TYPE_LOGICAL:
29177 case TYPE_EXTS:
29178 case TYPE_INSERT:
29180 if (! store_data_bypass_p (dep_insn, insn))
29181 return 3;
29182 break;
29184 case TYPE_STORE:
29185 case TYPE_FPLOAD:
29186 case TYPE_FPSTORE:
29188 if (get_attr_update (dep_insn) == UPDATE_YES
29189 && ! store_data_bypass_p (dep_insn, insn))
29190 return 3;
29191 break;
29193 case TYPE_MUL:
29195 if (! store_data_bypass_p (dep_insn, insn))
29196 return 17;
29197 break;
29199 case TYPE_DIV:
29201 if (! store_data_bypass_p (dep_insn, insn))
29202 return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
29203 break;
29205 default:
29206 break;
29209 break;
29211 case TYPE_LOAD:
29212 if ((rs6000_cpu == PROCESSOR_POWER6)
29213 && recog_memoized (dep_insn)
29214 && (INSN_CODE (dep_insn) >= 0))
29217 /* Adjust the cost for the case where the value written
29218 by a fixed point instruction is used within the address
29219 gen portion of a subsequent load(u)(x) */
29220 switch (get_attr_type (dep_insn))
29222 case TYPE_LOAD:
29223 case TYPE_CNTLZ:
29225 if (set_to_load_agen (dep_insn, insn))
29226 return get_attr_sign_extend (dep_insn)
29227 == SIGN_EXTEND_YES ? 6 : 4;
29228 break;
29230 case TYPE_SHIFT:
29232 if (set_to_load_agen (dep_insn, insn))
29233 return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
29234 6 : 3;
29235 break;
29237 case TYPE_INTEGER:
29238 case TYPE_ADD:
29239 case TYPE_LOGICAL:
29240 case TYPE_EXTS:
29241 case TYPE_INSERT:
29243 if (set_to_load_agen (dep_insn, insn))
29244 return 3;
29245 break;
29247 case TYPE_STORE:
29248 case TYPE_FPLOAD:
29249 case TYPE_FPSTORE:
29251 if (get_attr_update (dep_insn) == UPDATE_YES
29252 && set_to_load_agen (dep_insn, insn))
29253 return 3;
29254 break;
29256 case TYPE_MUL:
29258 if (set_to_load_agen (dep_insn, insn))
29259 return 17;
29260 break;
29262 case TYPE_DIV:
29264 if (set_to_load_agen (dep_insn, insn))
29265 return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
29266 break;
29268 default:
29269 break;
29272 break;
29274 case TYPE_FPLOAD:
29275 if ((rs6000_cpu == PROCESSOR_POWER6)
29276 && get_attr_update (insn) == UPDATE_NO
29277 && recog_memoized (dep_insn)
29278 && (INSN_CODE (dep_insn) >= 0)
29279 && (get_attr_type (dep_insn) == TYPE_MFFGPR))
29280 return 2;
29282 default:
29283 break;
29286 /* Fall out to return default cost. */
29288 break;
29290 case REG_DEP_OUTPUT:
29291 /* Output dependency; DEP_INSN writes a register that INSN writes some
29292 cycles later. */
29293 if ((rs6000_cpu == PROCESSOR_POWER6)
29294 && recog_memoized (dep_insn)
29295 && (INSN_CODE (dep_insn) >= 0))
29297 attr_type = get_attr_type (insn);
29299 switch (attr_type)
29301 case TYPE_FP:
29302 if (get_attr_type (dep_insn) == TYPE_FP)
29303 return 1;
29304 break;
29305 case TYPE_FPLOAD:
29306 if (get_attr_update (insn) == UPDATE_NO
29307 && get_attr_type (dep_insn) == TYPE_MFFGPR)
29308 return 2;
29309 break;
29310 default:
29311 break;
29314 case REG_DEP_ANTI:
29315 /* Anti dependency; DEP_INSN reads a register that INSN writes some
29316 cycles later. */
29317 return 0;
29319 default:
29320 gcc_unreachable ();
29323 return cost;
29326 /* Debug version of rs6000_adjust_cost. */
29328 static int
29329 rs6000_debug_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep_insn,
29330 int cost)
29332 int ret = rs6000_adjust_cost (insn, link, dep_insn, cost);
29334 if (ret != cost)
29336 const char *dep;
29338 switch (REG_NOTE_KIND (link))
29340 default: dep = "unknown depencency"; break;
29341 case REG_DEP_TRUE: dep = "data dependency"; break;
29342 case REG_DEP_OUTPUT: dep = "output dependency"; break;
29343 case REG_DEP_ANTI: dep = "anti depencency"; break;
29346 fprintf (stderr,
29347 "\nrs6000_adjust_cost, final cost = %d, orig cost = %d, "
29348 "%s, insn:\n", ret, cost, dep);
29350 debug_rtx (insn);
29353 return ret;
29356 /* The function returns a true if INSN is microcoded.
29357 Return false otherwise. */
29359 static bool
29360 is_microcoded_insn (rtx_insn *insn)
29362 if (!insn || !NONDEBUG_INSN_P (insn)
29363 || GET_CODE (PATTERN (insn)) == USE
29364 || GET_CODE (PATTERN (insn)) == CLOBBER)
29365 return false;
29367 if (rs6000_cpu_attr == CPU_CELL)
29368 return get_attr_cell_micro (insn) == CELL_MICRO_ALWAYS;
29370 if (rs6000_sched_groups
29371 && (rs6000_cpu == PROCESSOR_POWER4 || rs6000_cpu == PROCESSOR_POWER5))
29373 enum attr_type type = get_attr_type (insn);
29374 if ((type == TYPE_LOAD
29375 && get_attr_update (insn) == UPDATE_YES
29376 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES)
29377 || ((type == TYPE_LOAD || type == TYPE_STORE)
29378 && get_attr_update (insn) == UPDATE_YES
29379 && get_attr_indexed (insn) == INDEXED_YES)
29380 || type == TYPE_MFCR)
29381 return true;
29384 return false;
29387 /* The function returns true if INSN is cracked into 2 instructions
29388 by the processor (and therefore occupies 2 issue slots). */
29390 static bool
29391 is_cracked_insn (rtx_insn *insn)
29393 if (!insn || !NONDEBUG_INSN_P (insn)
29394 || GET_CODE (PATTERN (insn)) == USE
29395 || GET_CODE (PATTERN (insn)) == CLOBBER)
29396 return false;
29398 if (rs6000_sched_groups
29399 && (rs6000_cpu == PROCESSOR_POWER4 || rs6000_cpu == PROCESSOR_POWER5))
29401 enum attr_type type = get_attr_type (insn);
29402 if ((type == TYPE_LOAD
29403 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES
29404 && get_attr_update (insn) == UPDATE_NO)
29405 || (type == TYPE_LOAD
29406 && get_attr_sign_extend (insn) == SIGN_EXTEND_NO
29407 && get_attr_update (insn) == UPDATE_YES
29408 && get_attr_indexed (insn) == INDEXED_NO)
29409 || (type == TYPE_STORE
29410 && get_attr_update (insn) == UPDATE_YES
29411 && get_attr_indexed (insn) == INDEXED_NO)
29412 || ((type == TYPE_FPLOAD || type == TYPE_FPSTORE)
29413 && get_attr_update (insn) == UPDATE_YES)
29414 || type == TYPE_DELAYED_CR
29415 || (type == TYPE_EXTS
29416 && get_attr_dot (insn) == DOT_YES)
29417 || (type == TYPE_SHIFT
29418 && get_attr_dot (insn) == DOT_YES
29419 && get_attr_var_shift (insn) == VAR_SHIFT_NO)
29420 || (type == TYPE_MUL
29421 && get_attr_dot (insn) == DOT_YES)
29422 || type == TYPE_DIV
29423 || (type == TYPE_INSERT
29424 && get_attr_size (insn) == SIZE_32))
29425 return true;
29428 return false;
29431 /* The function returns true if INSN can be issued only from
29432 the branch slot. */
29434 static bool
29435 is_branch_slot_insn (rtx_insn *insn)
29437 if (!insn || !NONDEBUG_INSN_P (insn)
29438 || GET_CODE (PATTERN (insn)) == USE
29439 || GET_CODE (PATTERN (insn)) == CLOBBER)
29440 return false;
29442 if (rs6000_sched_groups)
29444 enum attr_type type = get_attr_type (insn);
29445 if (type == TYPE_BRANCH || type == TYPE_JMPREG)
29446 return true;
29447 return false;
29450 return false;
29453 /* The function returns true if out_inst sets a value that is
29454 used in the address generation computation of in_insn */
29455 static bool
29456 set_to_load_agen (rtx_insn *out_insn, rtx_insn *in_insn)
29458 rtx out_set, in_set;
29460 /* For performance reasons, only handle the simple case where
29461 both loads are a single_set. */
29462 out_set = single_set (out_insn);
29463 if (out_set)
29465 in_set = single_set (in_insn);
29466 if (in_set)
29467 return reg_mentioned_p (SET_DEST (out_set), SET_SRC (in_set));
29470 return false;
29473 /* Try to determine base/offset/size parts of the given MEM.
29474 Return true if successful, false if all the values couldn't
29475 be determined.
29477 This function only looks for REG or REG+CONST address forms.
29478 REG+REG address form will return false. */
29480 static bool
29481 get_memref_parts (rtx mem, rtx *base, HOST_WIDE_INT *offset,
29482 HOST_WIDE_INT *size)
29484 rtx addr_rtx;
29485 if MEM_SIZE_KNOWN_P (mem)
29486 *size = MEM_SIZE (mem);
29487 else
29488 return false;
29490 addr_rtx = (XEXP (mem, 0));
29491 if (GET_CODE (addr_rtx) == PRE_MODIFY)
29492 addr_rtx = XEXP (addr_rtx, 1);
29494 *offset = 0;
29495 while (GET_CODE (addr_rtx) == PLUS
29496 && CONST_INT_P (XEXP (addr_rtx, 1)))
29498 *offset += INTVAL (XEXP (addr_rtx, 1));
29499 addr_rtx = XEXP (addr_rtx, 0);
29501 if (!REG_P (addr_rtx))
29502 return false;
29504 *base = addr_rtx;
29505 return true;
29508 /* The function returns true if the target storage location of
29509 mem1 is adjacent to the target storage location of mem2 */
29510 /* Return 1 if memory locations are adjacent. */
29512 static bool
29513 adjacent_mem_locations (rtx mem1, rtx mem2)
29515 rtx reg1, reg2;
29516 HOST_WIDE_INT off1, size1, off2, size2;
29518 if (get_memref_parts (mem1, &reg1, &off1, &size1)
29519 && get_memref_parts (mem2, &reg2, &off2, &size2))
29520 return ((REGNO (reg1) == REGNO (reg2))
29521 && ((off1 + size1 == off2)
29522 || (off2 + size2 == off1)));
29524 return false;
29527 /* This function returns true if it can be determined that the two MEM
29528 locations overlap by at least 1 byte based on base reg/offset/size. */
29530 static bool
29531 mem_locations_overlap (rtx mem1, rtx mem2)
29533 rtx reg1, reg2;
29534 HOST_WIDE_INT off1, size1, off2, size2;
29536 if (get_memref_parts (mem1, &reg1, &off1, &size1)
29537 && get_memref_parts (mem2, &reg2, &off2, &size2))
29538 return ((REGNO (reg1) == REGNO (reg2))
29539 && (((off1 <= off2) && (off1 + size1 > off2))
29540 || ((off2 <= off1) && (off2 + size2 > off1))));
29542 return false;
29545 /* A C statement (sans semicolon) to update the integer scheduling
29546 priority INSN_PRIORITY (INSN). Increase the priority to execute the
29547 INSN earlier, reduce the priority to execute INSN later. Do not
29548 define this macro if you do not need to adjust the scheduling
29549 priorities of insns. */
29551 static int
29552 rs6000_adjust_priority (rtx_insn *insn ATTRIBUTE_UNUSED, int priority)
29554 rtx load_mem, str_mem;
29555 /* On machines (like the 750) which have asymmetric integer units,
29556 where one integer unit can do multiply and divides and the other
29557 can't, reduce the priority of multiply/divide so it is scheduled
29558 before other integer operations. */
29560 #if 0
29561 if (! INSN_P (insn))
29562 return priority;
29564 if (GET_CODE (PATTERN (insn)) == USE)
29565 return priority;
29567 switch (rs6000_cpu_attr) {
29568 case CPU_PPC750:
29569 switch (get_attr_type (insn))
29571 default:
29572 break;
29574 case TYPE_MUL:
29575 case TYPE_DIV:
29576 fprintf (stderr, "priority was %#x (%d) before adjustment\n",
29577 priority, priority);
29578 if (priority >= 0 && priority < 0x01000000)
29579 priority >>= 3;
29580 break;
29583 #endif
29585 if (insn_must_be_first_in_group (insn)
29586 && reload_completed
29587 && current_sched_info->sched_max_insns_priority
29588 && rs6000_sched_restricted_insns_priority)
29591 /* Prioritize insns that can be dispatched only in the first
29592 dispatch slot. */
29593 if (rs6000_sched_restricted_insns_priority == 1)
29594 /* Attach highest priority to insn. This means that in
29595 haifa-sched.c:ready_sort(), dispatch-slot restriction considerations
29596 precede 'priority' (critical path) considerations. */
29597 return current_sched_info->sched_max_insns_priority;
29598 else if (rs6000_sched_restricted_insns_priority == 2)
29599 /* Increase priority of insn by a minimal amount. This means that in
29600 haifa-sched.c:ready_sort(), only 'priority' (critical path)
29601 considerations precede dispatch-slot restriction considerations. */
29602 return (priority + 1);
29605 if (rs6000_cpu == PROCESSOR_POWER6
29606 && ((load_store_pendulum == -2 && is_load_insn (insn, &load_mem))
29607 || (load_store_pendulum == 2 && is_store_insn (insn, &str_mem))))
29608 /* Attach highest priority to insn if the scheduler has just issued two
29609 stores and this instruction is a load, or two loads and this instruction
29610 is a store. Power6 wants loads and stores scheduled alternately
29611 when possible */
29612 return current_sched_info->sched_max_insns_priority;
29614 return priority;
29617 /* Return true if the instruction is nonpipelined on the Cell. */
29618 static bool
29619 is_nonpipeline_insn (rtx_insn *insn)
29621 enum attr_type type;
29622 if (!insn || !NONDEBUG_INSN_P (insn)
29623 || GET_CODE (PATTERN (insn)) == USE
29624 || GET_CODE (PATTERN (insn)) == CLOBBER)
29625 return false;
29627 type = get_attr_type (insn);
29628 if (type == TYPE_MUL
29629 || type == TYPE_DIV
29630 || type == TYPE_SDIV
29631 || type == TYPE_DDIV
29632 || type == TYPE_SSQRT
29633 || type == TYPE_DSQRT
29634 || type == TYPE_MFCR
29635 || type == TYPE_MFCRF
29636 || type == TYPE_MFJMPR)
29638 return true;
29640 return false;
29644 /* Return how many instructions the machine can issue per cycle. */
29646 static int
29647 rs6000_issue_rate (void)
29649 /* Unless scheduling for register pressure, use issue rate of 1 for
29650 first scheduling pass to decrease degradation. */
29651 if (!reload_completed && !flag_sched_pressure)
29652 return 1;
29654 switch (rs6000_cpu_attr) {
29655 case CPU_RS64A:
29656 case CPU_PPC601: /* ? */
29657 case CPU_PPC7450:
29658 return 3;
29659 case CPU_PPC440:
29660 case CPU_PPC603:
29661 case CPU_PPC750:
29662 case CPU_PPC7400:
29663 case CPU_PPC8540:
29664 case CPU_PPC8548:
29665 case CPU_CELL:
29666 case CPU_PPCE300C2:
29667 case CPU_PPCE300C3:
29668 case CPU_PPCE500MC:
29669 case CPU_PPCE500MC64:
29670 case CPU_PPCE5500:
29671 case CPU_PPCE6500:
29672 case CPU_TITAN:
29673 return 2;
29674 case CPU_PPC476:
29675 case CPU_PPC604:
29676 case CPU_PPC604E:
29677 case CPU_PPC620:
29678 case CPU_PPC630:
29679 return 4;
29680 case CPU_POWER4:
29681 case CPU_POWER5:
29682 case CPU_POWER6:
29683 case CPU_POWER7:
29684 return 5;
29685 case CPU_POWER8:
29686 case CPU_POWER9:
29687 return 7;
29688 default:
29689 return 1;
29693 /* Return how many instructions to look ahead for better insn
29694 scheduling. */
29696 static int
29697 rs6000_use_sched_lookahead (void)
29699 switch (rs6000_cpu_attr)
29701 case CPU_PPC8540:
29702 case CPU_PPC8548:
29703 return 4;
29705 case CPU_CELL:
29706 return (reload_completed ? 8 : 0);
29708 default:
29709 return 0;
29713 /* We are choosing insn from the ready queue. Return zero if INSN can be
29714 chosen. */
29715 static int
29716 rs6000_use_sched_lookahead_guard (rtx_insn *insn, int ready_index)
29718 if (ready_index == 0)
29719 return 0;
29721 if (rs6000_cpu_attr != CPU_CELL)
29722 return 0;
29724 gcc_assert (insn != NULL_RTX && INSN_P (insn));
29726 if (!reload_completed
29727 || is_nonpipeline_insn (insn)
29728 || is_microcoded_insn (insn))
29729 return 1;
29731 return 0;
29734 /* Determine if PAT refers to memory. If so, set MEM_REF to the MEM rtx
29735 and return true. */
29737 static bool
29738 find_mem_ref (rtx pat, rtx *mem_ref)
29740 const char * fmt;
29741 int i, j;
29743 /* stack_tie does not produce any real memory traffic. */
29744 if (tie_operand (pat, VOIDmode))
29745 return false;
29747 if (GET_CODE (pat) == MEM)
29749 *mem_ref = pat;
29750 return true;
29753 /* Recursively process the pattern. */
29754 fmt = GET_RTX_FORMAT (GET_CODE (pat));
29756 for (i = GET_RTX_LENGTH (GET_CODE (pat)) - 1; i >= 0; i--)
29758 if (fmt[i] == 'e')
29760 if (find_mem_ref (XEXP (pat, i), mem_ref))
29761 return true;
29763 else if (fmt[i] == 'E')
29764 for (j = XVECLEN (pat, i) - 1; j >= 0; j--)
29766 if (find_mem_ref (XVECEXP (pat, i, j), mem_ref))
29767 return true;
29771 return false;
29774 /* Determine if PAT is a PATTERN of a load insn. */
29776 static bool
29777 is_load_insn1 (rtx pat, rtx *load_mem)
29779 if (!pat || pat == NULL_RTX)
29780 return false;
29782 if (GET_CODE (pat) == SET)
29783 return find_mem_ref (SET_SRC (pat), load_mem);
29785 if (GET_CODE (pat) == PARALLEL)
29787 int i;
29789 for (i = 0; i < XVECLEN (pat, 0); i++)
29790 if (is_load_insn1 (XVECEXP (pat, 0, i), load_mem))
29791 return true;
29794 return false;
29797 /* Determine if INSN loads from memory. */
29799 static bool
29800 is_load_insn (rtx insn, rtx *load_mem)
29802 if (!insn || !INSN_P (insn))
29803 return false;
29805 if (CALL_P (insn))
29806 return false;
29808 return is_load_insn1 (PATTERN (insn), load_mem);
29811 /* Determine if PAT is a PATTERN of a store insn. */
29813 static bool
29814 is_store_insn1 (rtx pat, rtx *str_mem)
29816 if (!pat || pat == NULL_RTX)
29817 return false;
29819 if (GET_CODE (pat) == SET)
29820 return find_mem_ref (SET_DEST (pat), str_mem);
29822 if (GET_CODE (pat) == PARALLEL)
29824 int i;
29826 for (i = 0; i < XVECLEN (pat, 0); i++)
29827 if (is_store_insn1 (XVECEXP (pat, 0, i), str_mem))
29828 return true;
29831 return false;
29834 /* Determine if INSN stores to memory. */
29836 static bool
29837 is_store_insn (rtx insn, rtx *str_mem)
29839 if (!insn || !INSN_P (insn))
29840 return false;
29842 return is_store_insn1 (PATTERN (insn), str_mem);
29845 /* Returns whether the dependence between INSN and NEXT is considered
29846 costly by the given target. */
29848 static bool
29849 rs6000_is_costly_dependence (dep_t dep, int cost, int distance)
29851 rtx insn;
29852 rtx next;
29853 rtx load_mem, str_mem;
29855 /* If the flag is not enabled - no dependence is considered costly;
29856 allow all dependent insns in the same group.
29857 This is the most aggressive option. */
29858 if (rs6000_sched_costly_dep == no_dep_costly)
29859 return false;
29861 /* If the flag is set to 1 - a dependence is always considered costly;
29862 do not allow dependent instructions in the same group.
29863 This is the most conservative option. */
29864 if (rs6000_sched_costly_dep == all_deps_costly)
29865 return true;
29867 insn = DEP_PRO (dep);
29868 next = DEP_CON (dep);
29870 if (rs6000_sched_costly_dep == store_to_load_dep_costly
29871 && is_load_insn (next, &load_mem)
29872 && is_store_insn (insn, &str_mem))
29873 /* Prevent load after store in the same group. */
29874 return true;
29876 if (rs6000_sched_costly_dep == true_store_to_load_dep_costly
29877 && is_load_insn (next, &load_mem)
29878 && is_store_insn (insn, &str_mem)
29879 && DEP_TYPE (dep) == REG_DEP_TRUE
29880 && mem_locations_overlap(str_mem, load_mem))
29881 /* Prevent load after store in the same group if it is a true
29882 dependence. */
29883 return true;
29885 /* The flag is set to X; dependences with latency >= X are considered costly,
29886 and will not be scheduled in the same group. */
29887 if (rs6000_sched_costly_dep <= max_dep_latency
29888 && ((cost - distance) >= (int)rs6000_sched_costly_dep))
29889 return true;
29891 return false;
29894 /* Return the next insn after INSN that is found before TAIL is reached,
29895 skipping any "non-active" insns - insns that will not actually occupy
29896 an issue slot. Return NULL_RTX if such an insn is not found. */
29898 static rtx_insn *
29899 get_next_active_insn (rtx_insn *insn, rtx_insn *tail)
29901 if (insn == NULL_RTX || insn == tail)
29902 return NULL;
29904 while (1)
29906 insn = NEXT_INSN (insn);
29907 if (insn == NULL_RTX || insn == tail)
29908 return NULL;
29910 if (CALL_P (insn)
29911 || JUMP_P (insn) || JUMP_TABLE_DATA_P (insn)
29912 || (NONJUMP_INSN_P (insn)
29913 && GET_CODE (PATTERN (insn)) != USE
29914 && GET_CODE (PATTERN (insn)) != CLOBBER
29915 && INSN_CODE (insn) != CODE_FOR_stack_tie))
29916 break;
29918 return insn;
29921 /* We are about to begin issuing insns for this clock cycle. */
29923 static int
29924 rs6000_sched_reorder (FILE *dump ATTRIBUTE_UNUSED, int sched_verbose,
29925 rtx_insn **ready ATTRIBUTE_UNUSED,
29926 int *pn_ready ATTRIBUTE_UNUSED,
29927 int clock_var ATTRIBUTE_UNUSED)
29929 int n_ready = *pn_ready;
29931 if (sched_verbose)
29932 fprintf (dump, "// rs6000_sched_reorder :\n");
29934 /* Reorder the ready list, if the second to last ready insn
29935 is a nonepipeline insn. */
29936 if (rs6000_cpu_attr == CPU_CELL && n_ready > 1)
29938 if (is_nonpipeline_insn (ready[n_ready - 1])
29939 && (recog_memoized (ready[n_ready - 2]) > 0))
29940 /* Simply swap first two insns. */
29941 std::swap (ready[n_ready - 1], ready[n_ready - 2]);
29944 if (rs6000_cpu == PROCESSOR_POWER6)
29945 load_store_pendulum = 0;
29947 return rs6000_issue_rate ();
29950 /* Like rs6000_sched_reorder, but called after issuing each insn. */
29952 static int
29953 rs6000_sched_reorder2 (FILE *dump, int sched_verbose, rtx_insn **ready,
29954 int *pn_ready, int clock_var ATTRIBUTE_UNUSED)
29956 if (sched_verbose)
29957 fprintf (dump, "// rs6000_sched_reorder2 :\n");
29959 /* For Power6, we need to handle some special cases to try and keep the
29960 store queue from overflowing and triggering expensive flushes.
29962 This code monitors how load and store instructions are being issued
29963 and skews the ready list one way or the other to increase the likelihood
29964 that a desired instruction is issued at the proper time.
29966 A couple of things are done. First, we maintain a "load_store_pendulum"
29967 to track the current state of load/store issue.
29969 - If the pendulum is at zero, then no loads or stores have been
29970 issued in the current cycle so we do nothing.
29972 - If the pendulum is 1, then a single load has been issued in this
29973 cycle and we attempt to locate another load in the ready list to
29974 issue with it.
29976 - If the pendulum is -2, then two stores have already been
29977 issued in this cycle, so we increase the priority of the first load
29978 in the ready list to increase it's likelihood of being chosen first
29979 in the next cycle.
29981 - If the pendulum is -1, then a single store has been issued in this
29982 cycle and we attempt to locate another store in the ready list to
29983 issue with it, preferring a store to an adjacent memory location to
29984 facilitate store pairing in the store queue.
29986 - If the pendulum is 2, then two loads have already been
29987 issued in this cycle, so we increase the priority of the first store
29988 in the ready list to increase it's likelihood of being chosen first
29989 in the next cycle.
29991 - If the pendulum < -2 or > 2, then do nothing.
29993 Note: This code covers the most common scenarios. There exist non
29994 load/store instructions which make use of the LSU and which
29995 would need to be accounted for to strictly model the behavior
29996 of the machine. Those instructions are currently unaccounted
29997 for to help minimize compile time overhead of this code.
29999 if (rs6000_cpu == PROCESSOR_POWER6 && last_scheduled_insn)
30001 int pos;
30002 int i;
30003 rtx_insn *tmp;
30004 rtx load_mem, str_mem;
30006 if (is_store_insn (last_scheduled_insn, &str_mem))
30007 /* Issuing a store, swing the load_store_pendulum to the left */
30008 load_store_pendulum--;
30009 else if (is_load_insn (last_scheduled_insn, &load_mem))
30010 /* Issuing a load, swing the load_store_pendulum to the right */
30011 load_store_pendulum++;
30012 else
30013 return cached_can_issue_more;
30015 /* If the pendulum is balanced, or there is only one instruction on
30016 the ready list, then all is well, so return. */
30017 if ((load_store_pendulum == 0) || (*pn_ready <= 1))
30018 return cached_can_issue_more;
30020 if (load_store_pendulum == 1)
30022 /* A load has been issued in this cycle. Scan the ready list
30023 for another load to issue with it */
30024 pos = *pn_ready-1;
30026 while (pos >= 0)
30028 if (is_load_insn (ready[pos], &load_mem))
30030 /* Found a load. Move it to the head of the ready list,
30031 and adjust it's priority so that it is more likely to
30032 stay there */
30033 tmp = ready[pos];
30034 for (i=pos; i<*pn_ready-1; i++)
30035 ready[i] = ready[i + 1];
30036 ready[*pn_ready-1] = tmp;
30038 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
30039 INSN_PRIORITY (tmp)++;
30040 break;
30042 pos--;
30045 else if (load_store_pendulum == -2)
30047 /* Two stores have been issued in this cycle. Increase the
30048 priority of the first load in the ready list to favor it for
30049 issuing in the next cycle. */
30050 pos = *pn_ready-1;
30052 while (pos >= 0)
30054 if (is_load_insn (ready[pos], &load_mem)
30055 && !sel_sched_p ()
30056 && INSN_PRIORITY_KNOWN (ready[pos]))
30058 INSN_PRIORITY (ready[pos])++;
30060 /* Adjust the pendulum to account for the fact that a load
30061 was found and increased in priority. This is to prevent
30062 increasing the priority of multiple loads */
30063 load_store_pendulum--;
30065 break;
30067 pos--;
30070 else if (load_store_pendulum == -1)
30072 /* A store has been issued in this cycle. Scan the ready list for
30073 another store to issue with it, preferring a store to an adjacent
30074 memory location */
30075 int first_store_pos = -1;
30077 pos = *pn_ready-1;
30079 while (pos >= 0)
30081 if (is_store_insn (ready[pos], &str_mem))
30083 rtx str_mem2;
30084 /* Maintain the index of the first store found on the
30085 list */
30086 if (first_store_pos == -1)
30087 first_store_pos = pos;
30089 if (is_store_insn (last_scheduled_insn, &str_mem2)
30090 && adjacent_mem_locations (str_mem, str_mem2))
30092 /* Found an adjacent store. Move it to the head of the
30093 ready list, and adjust it's priority so that it is
30094 more likely to stay there */
30095 tmp = ready[pos];
30096 for (i=pos; i<*pn_ready-1; i++)
30097 ready[i] = ready[i + 1];
30098 ready[*pn_ready-1] = tmp;
30100 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
30101 INSN_PRIORITY (tmp)++;
30103 first_store_pos = -1;
30105 break;
30108 pos--;
30111 if (first_store_pos >= 0)
30113 /* An adjacent store wasn't found, but a non-adjacent store was,
30114 so move the non-adjacent store to the front of the ready
30115 list, and adjust its priority so that it is more likely to
30116 stay there. */
30117 tmp = ready[first_store_pos];
30118 for (i=first_store_pos; i<*pn_ready-1; i++)
30119 ready[i] = ready[i + 1];
30120 ready[*pn_ready-1] = tmp;
30121 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
30122 INSN_PRIORITY (tmp)++;
30125 else if (load_store_pendulum == 2)
30127 /* Two loads have been issued in this cycle. Increase the priority
30128 of the first store in the ready list to favor it for issuing in
30129 the next cycle. */
30130 pos = *pn_ready-1;
30132 while (pos >= 0)
30134 if (is_store_insn (ready[pos], &str_mem)
30135 && !sel_sched_p ()
30136 && INSN_PRIORITY_KNOWN (ready[pos]))
30138 INSN_PRIORITY (ready[pos])++;
30140 /* Adjust the pendulum to account for the fact that a store
30141 was found and increased in priority. This is to prevent
30142 increasing the priority of multiple stores */
30143 load_store_pendulum++;
30145 break;
30147 pos--;
30152 return cached_can_issue_more;
30155 /* Return whether the presence of INSN causes a dispatch group termination
30156 of group WHICH_GROUP.
30158 If WHICH_GROUP == current_group, this function will return true if INSN
30159 causes the termination of the current group (i.e, the dispatch group to
30160 which INSN belongs). This means that INSN will be the last insn in the
30161 group it belongs to.
30163 If WHICH_GROUP == previous_group, this function will return true if INSN
30164 causes the termination of the previous group (i.e, the dispatch group that
30165 precedes the group to which INSN belongs). This means that INSN will be
30166 the first insn in the group it belongs to). */
30168 static bool
30169 insn_terminates_group_p (rtx_insn *insn, enum group_termination which_group)
30171 bool first, last;
30173 if (! insn)
30174 return false;
30176 first = insn_must_be_first_in_group (insn);
30177 last = insn_must_be_last_in_group (insn);
30179 if (first && last)
30180 return true;
30182 if (which_group == current_group)
30183 return last;
30184 else if (which_group == previous_group)
30185 return first;
30187 return false;
30191 static bool
30192 insn_must_be_first_in_group (rtx_insn *insn)
30194 enum attr_type type;
30196 if (!insn
30197 || NOTE_P (insn)
30198 || DEBUG_INSN_P (insn)
30199 || GET_CODE (PATTERN (insn)) == USE
30200 || GET_CODE (PATTERN (insn)) == CLOBBER)
30201 return false;
30203 switch (rs6000_cpu)
30205 case PROCESSOR_POWER5:
30206 if (is_cracked_insn (insn))
30207 return true;
30208 case PROCESSOR_POWER4:
30209 if (is_microcoded_insn (insn))
30210 return true;
30212 if (!rs6000_sched_groups)
30213 return false;
30215 type = get_attr_type (insn);
30217 switch (type)
30219 case TYPE_MFCR:
30220 case TYPE_MFCRF:
30221 case TYPE_MTCR:
30222 case TYPE_DELAYED_CR:
30223 case TYPE_CR_LOGICAL:
30224 case TYPE_MTJMPR:
30225 case TYPE_MFJMPR:
30226 case TYPE_DIV:
30227 case TYPE_LOAD_L:
30228 case TYPE_STORE_C:
30229 case TYPE_ISYNC:
30230 case TYPE_SYNC:
30231 return true;
30232 default:
30233 break;
30235 break;
30236 case PROCESSOR_POWER6:
30237 type = get_attr_type (insn);
30239 switch (type)
30241 case TYPE_EXTS:
30242 case TYPE_CNTLZ:
30243 case TYPE_TRAP:
30244 case TYPE_MUL:
30245 case TYPE_INSERT:
30246 case TYPE_FPCOMPARE:
30247 case TYPE_MFCR:
30248 case TYPE_MTCR:
30249 case TYPE_MFJMPR:
30250 case TYPE_MTJMPR:
30251 case TYPE_ISYNC:
30252 case TYPE_SYNC:
30253 case TYPE_LOAD_L:
30254 case TYPE_STORE_C:
30255 return true;
30256 case TYPE_SHIFT:
30257 if (get_attr_dot (insn) == DOT_NO
30258 || get_attr_var_shift (insn) == VAR_SHIFT_NO)
30259 return true;
30260 else
30261 break;
30262 case TYPE_DIV:
30263 if (get_attr_size (insn) == SIZE_32)
30264 return true;
30265 else
30266 break;
30267 case TYPE_LOAD:
30268 case TYPE_STORE:
30269 case TYPE_FPLOAD:
30270 case TYPE_FPSTORE:
30271 if (get_attr_update (insn) == UPDATE_YES)
30272 return true;
30273 else
30274 break;
30275 default:
30276 break;
30278 break;
30279 case PROCESSOR_POWER7:
30280 type = get_attr_type (insn);
30282 switch (type)
30284 case TYPE_CR_LOGICAL:
30285 case TYPE_MFCR:
30286 case TYPE_MFCRF:
30287 case TYPE_MTCR:
30288 case TYPE_DIV:
30289 case TYPE_ISYNC:
30290 case TYPE_LOAD_L:
30291 case TYPE_STORE_C:
30292 case TYPE_MFJMPR:
30293 case TYPE_MTJMPR:
30294 return true;
30295 case TYPE_MUL:
30296 case TYPE_SHIFT:
30297 case TYPE_EXTS:
30298 if (get_attr_dot (insn) == DOT_YES)
30299 return true;
30300 else
30301 break;
30302 case TYPE_LOAD:
30303 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
30304 || get_attr_update (insn) == UPDATE_YES)
30305 return true;
30306 else
30307 break;
30308 case TYPE_STORE:
30309 case TYPE_FPLOAD:
30310 case TYPE_FPSTORE:
30311 if (get_attr_update (insn) == UPDATE_YES)
30312 return true;
30313 else
30314 break;
30315 default:
30316 break;
30318 break;
30319 case PROCESSOR_POWER8:
30320 case PROCESSOR_POWER9:
30321 type = get_attr_type (insn);
30323 switch (type)
30325 case TYPE_CR_LOGICAL:
30326 case TYPE_DELAYED_CR:
30327 case TYPE_MFCR:
30328 case TYPE_MFCRF:
30329 case TYPE_MTCR:
30330 case TYPE_SYNC:
30331 case TYPE_ISYNC:
30332 case TYPE_LOAD_L:
30333 case TYPE_STORE_C:
30334 case TYPE_VECSTORE:
30335 case TYPE_MFJMPR:
30336 case TYPE_MTJMPR:
30337 return true;
30338 case TYPE_SHIFT:
30339 case TYPE_EXTS:
30340 case TYPE_MUL:
30341 if (get_attr_dot (insn) == DOT_YES)
30342 return true;
30343 else
30344 break;
30345 case TYPE_LOAD:
30346 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
30347 || get_attr_update (insn) == UPDATE_YES)
30348 return true;
30349 else
30350 break;
30351 case TYPE_STORE:
30352 if (get_attr_update (insn) == UPDATE_YES
30353 && get_attr_indexed (insn) == INDEXED_YES)
30354 return true;
30355 else
30356 break;
30357 default:
30358 break;
30360 break;
30361 default:
30362 break;
30365 return false;
30368 static bool
30369 insn_must_be_last_in_group (rtx_insn *insn)
30371 enum attr_type type;
30373 if (!insn
30374 || NOTE_P (insn)
30375 || DEBUG_INSN_P (insn)
30376 || GET_CODE (PATTERN (insn)) == USE
30377 || GET_CODE (PATTERN (insn)) == CLOBBER)
30378 return false;
30380 switch (rs6000_cpu) {
30381 case PROCESSOR_POWER4:
30382 case PROCESSOR_POWER5:
30383 if (is_microcoded_insn (insn))
30384 return true;
30386 if (is_branch_slot_insn (insn))
30387 return true;
30389 break;
30390 case PROCESSOR_POWER6:
30391 type = get_attr_type (insn);
30393 switch (type)
30395 case TYPE_EXTS:
30396 case TYPE_CNTLZ:
30397 case TYPE_TRAP:
30398 case TYPE_MUL:
30399 case TYPE_FPCOMPARE:
30400 case TYPE_MFCR:
30401 case TYPE_MTCR:
30402 case TYPE_MFJMPR:
30403 case TYPE_MTJMPR:
30404 case TYPE_ISYNC:
30405 case TYPE_SYNC:
30406 case TYPE_LOAD_L:
30407 case TYPE_STORE_C:
30408 return true;
30409 case TYPE_SHIFT:
30410 if (get_attr_dot (insn) == DOT_NO
30411 || get_attr_var_shift (insn) == VAR_SHIFT_NO)
30412 return true;
30413 else
30414 break;
30415 case TYPE_DIV:
30416 if (get_attr_size (insn) == SIZE_32)
30417 return true;
30418 else
30419 break;
30420 default:
30421 break;
30423 break;
30424 case PROCESSOR_POWER7:
30425 type = get_attr_type (insn);
30427 switch (type)
30429 case TYPE_ISYNC:
30430 case TYPE_SYNC:
30431 case TYPE_LOAD_L:
30432 case TYPE_STORE_C:
30433 return true;
30434 case TYPE_LOAD:
30435 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
30436 && get_attr_update (insn) == UPDATE_YES)
30437 return true;
30438 else
30439 break;
30440 case TYPE_STORE:
30441 if (get_attr_update (insn) == UPDATE_YES
30442 && get_attr_indexed (insn) == INDEXED_YES)
30443 return true;
30444 else
30445 break;
30446 default:
30447 break;
30449 break;
30450 case PROCESSOR_POWER8:
30451 case PROCESSOR_POWER9:
30452 type = get_attr_type (insn);
30454 switch (type)
30456 case TYPE_MFCR:
30457 case TYPE_MTCR:
30458 case TYPE_ISYNC:
30459 case TYPE_SYNC:
30460 case TYPE_LOAD_L:
30461 case TYPE_STORE_C:
30462 return true;
30463 case TYPE_LOAD:
30464 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
30465 && get_attr_update (insn) == UPDATE_YES)
30466 return true;
30467 else
30468 break;
30469 case TYPE_STORE:
30470 if (get_attr_update (insn) == UPDATE_YES
30471 && get_attr_indexed (insn) == INDEXED_YES)
30472 return true;
30473 else
30474 break;
30475 default:
30476 break;
30478 break;
30479 default:
30480 break;
30483 return false;
30486 /* Return true if it is recommended to keep NEXT_INSN "far" (in a separate
30487 dispatch group) from the insns in GROUP_INSNS. Return false otherwise. */
30489 static bool
30490 is_costly_group (rtx *group_insns, rtx next_insn)
30492 int i;
30493 int issue_rate = rs6000_issue_rate ();
30495 for (i = 0; i < issue_rate; i++)
30497 sd_iterator_def sd_it;
30498 dep_t dep;
30499 rtx insn = group_insns[i];
30501 if (!insn)
30502 continue;
30504 FOR_EACH_DEP (insn, SD_LIST_RES_FORW, sd_it, dep)
30506 rtx next = DEP_CON (dep);
30508 if (next == next_insn
30509 && rs6000_is_costly_dependence (dep, dep_cost (dep), 0))
30510 return true;
30514 return false;
30517 /* Utility of the function redefine_groups.
30518 Check if it is too costly to schedule NEXT_INSN together with GROUP_INSNS
30519 in the same dispatch group. If so, insert nops before NEXT_INSN, in order
30520 to keep it "far" (in a separate group) from GROUP_INSNS, following
30521 one of the following schemes, depending on the value of the flag
30522 -minsert_sched_nops = X:
30523 (1) X == sched_finish_regroup_exact: insert exactly as many nops as needed
30524 in order to force NEXT_INSN into a separate group.
30525 (2) X < sched_finish_regroup_exact: insert exactly X nops.
30526 GROUP_END, CAN_ISSUE_MORE and GROUP_COUNT record the state after nop
30527 insertion (has a group just ended, how many vacant issue slots remain in the
30528 last group, and how many dispatch groups were encountered so far). */
30530 static int
30531 force_new_group (int sched_verbose, FILE *dump, rtx *group_insns,
30532 rtx_insn *next_insn, bool *group_end, int can_issue_more,
30533 int *group_count)
30535 rtx nop;
30536 bool force;
30537 int issue_rate = rs6000_issue_rate ();
30538 bool end = *group_end;
30539 int i;
30541 if (next_insn == NULL_RTX || DEBUG_INSN_P (next_insn))
30542 return can_issue_more;
30544 if (rs6000_sched_insert_nops > sched_finish_regroup_exact)
30545 return can_issue_more;
30547 force = is_costly_group (group_insns, next_insn);
30548 if (!force)
30549 return can_issue_more;
30551 if (sched_verbose > 6)
30552 fprintf (dump,"force: group count = %d, can_issue_more = %d\n",
30553 *group_count ,can_issue_more);
30555 if (rs6000_sched_insert_nops == sched_finish_regroup_exact)
30557 if (*group_end)
30558 can_issue_more = 0;
30560 /* Since only a branch can be issued in the last issue_slot, it is
30561 sufficient to insert 'can_issue_more - 1' nops if next_insn is not
30562 a branch. If next_insn is a branch, we insert 'can_issue_more' nops;
30563 in this case the last nop will start a new group and the branch
30564 will be forced to the new group. */
30565 if (can_issue_more && !is_branch_slot_insn (next_insn))
30566 can_issue_more--;
30568 /* Do we have a special group ending nop? */
30569 if (rs6000_cpu_attr == CPU_POWER6 || rs6000_cpu_attr == CPU_POWER7
30570 || rs6000_cpu_attr == CPU_POWER8 || rs6000_cpu_attr == CPU_POWER9)
30572 nop = gen_group_ending_nop ();
30573 emit_insn_before (nop, next_insn);
30574 can_issue_more = 0;
30576 else
30577 while (can_issue_more > 0)
30579 nop = gen_nop ();
30580 emit_insn_before (nop, next_insn);
30581 can_issue_more--;
30584 *group_end = true;
30585 return 0;
30588 if (rs6000_sched_insert_nops < sched_finish_regroup_exact)
30590 int n_nops = rs6000_sched_insert_nops;
30592 /* Nops can't be issued from the branch slot, so the effective
30593 issue_rate for nops is 'issue_rate - 1'. */
30594 if (can_issue_more == 0)
30595 can_issue_more = issue_rate;
30596 can_issue_more--;
30597 if (can_issue_more == 0)
30599 can_issue_more = issue_rate - 1;
30600 (*group_count)++;
30601 end = true;
30602 for (i = 0; i < issue_rate; i++)
30604 group_insns[i] = 0;
30608 while (n_nops > 0)
30610 nop = gen_nop ();
30611 emit_insn_before (nop, next_insn);
30612 if (can_issue_more == issue_rate - 1) /* new group begins */
30613 end = false;
30614 can_issue_more--;
30615 if (can_issue_more == 0)
30617 can_issue_more = issue_rate - 1;
30618 (*group_count)++;
30619 end = true;
30620 for (i = 0; i < issue_rate; i++)
30622 group_insns[i] = 0;
30625 n_nops--;
30628 /* Scale back relative to 'issue_rate' (instead of 'issue_rate - 1'). */
30629 can_issue_more++;
30631 /* Is next_insn going to start a new group? */
30632 *group_end
30633 = (end
30634 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
30635 || (can_issue_more <= 2 && is_cracked_insn (next_insn))
30636 || (can_issue_more < issue_rate &&
30637 insn_terminates_group_p (next_insn, previous_group)));
30638 if (*group_end && end)
30639 (*group_count)--;
30641 if (sched_verbose > 6)
30642 fprintf (dump, "done force: group count = %d, can_issue_more = %d\n",
30643 *group_count, can_issue_more);
30644 return can_issue_more;
30647 return can_issue_more;
30650 /* This function tries to synch the dispatch groups that the compiler "sees"
30651 with the dispatch groups that the processor dispatcher is expected to
30652 form in practice. It tries to achieve this synchronization by forcing the
30653 estimated processor grouping on the compiler (as opposed to the function
30654 'pad_goups' which tries to force the scheduler's grouping on the processor).
30656 The function scans the insn sequence between PREV_HEAD_INSN and TAIL and
30657 examines the (estimated) dispatch groups that will be formed by the processor
30658 dispatcher. It marks these group boundaries to reflect the estimated
30659 processor grouping, overriding the grouping that the scheduler had marked.
30660 Depending on the value of the flag '-minsert-sched-nops' this function can
30661 force certain insns into separate groups or force a certain distance between
30662 them by inserting nops, for example, if there exists a "costly dependence"
30663 between the insns.
30665 The function estimates the group boundaries that the processor will form as
30666 follows: It keeps track of how many vacant issue slots are available after
30667 each insn. A subsequent insn will start a new group if one of the following
30668 4 cases applies:
30669 - no more vacant issue slots remain in the current dispatch group.
30670 - only the last issue slot, which is the branch slot, is vacant, but the next
30671 insn is not a branch.
30672 - only the last 2 or less issue slots, including the branch slot, are vacant,
30673 which means that a cracked insn (which occupies two issue slots) can't be
30674 issued in this group.
30675 - less than 'issue_rate' slots are vacant, and the next insn always needs to
30676 start a new group. */
30678 static int
30679 redefine_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
30680 rtx_insn *tail)
30682 rtx_insn *insn, *next_insn;
30683 int issue_rate;
30684 int can_issue_more;
30685 int slot, i;
30686 bool group_end;
30687 int group_count = 0;
30688 rtx *group_insns;
30690 /* Initialize. */
30691 issue_rate = rs6000_issue_rate ();
30692 group_insns = XALLOCAVEC (rtx, issue_rate);
30693 for (i = 0; i < issue_rate; i++)
30695 group_insns[i] = 0;
30697 can_issue_more = issue_rate;
30698 slot = 0;
30699 insn = get_next_active_insn (prev_head_insn, tail);
30700 group_end = false;
30702 while (insn != NULL_RTX)
30704 slot = (issue_rate - can_issue_more);
30705 group_insns[slot] = insn;
30706 can_issue_more =
30707 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
30708 if (insn_terminates_group_p (insn, current_group))
30709 can_issue_more = 0;
30711 next_insn = get_next_active_insn (insn, tail);
30712 if (next_insn == NULL_RTX)
30713 return group_count + 1;
30715 /* Is next_insn going to start a new group? */
30716 group_end
30717 = (can_issue_more == 0
30718 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
30719 || (can_issue_more <= 2 && is_cracked_insn (next_insn))
30720 || (can_issue_more < issue_rate &&
30721 insn_terminates_group_p (next_insn, previous_group)));
30723 can_issue_more = force_new_group (sched_verbose, dump, group_insns,
30724 next_insn, &group_end, can_issue_more,
30725 &group_count);
30727 if (group_end)
30729 group_count++;
30730 can_issue_more = 0;
30731 for (i = 0; i < issue_rate; i++)
30733 group_insns[i] = 0;
30737 if (GET_MODE (next_insn) == TImode && can_issue_more)
30738 PUT_MODE (next_insn, VOIDmode);
30739 else if (!can_issue_more && GET_MODE (next_insn) != TImode)
30740 PUT_MODE (next_insn, TImode);
30742 insn = next_insn;
30743 if (can_issue_more == 0)
30744 can_issue_more = issue_rate;
30745 } /* while */
30747 return group_count;
30750 /* Scan the insn sequence between PREV_HEAD_INSN and TAIL and examine the
30751 dispatch group boundaries that the scheduler had marked. Pad with nops
30752 any dispatch groups which have vacant issue slots, in order to force the
30753 scheduler's grouping on the processor dispatcher. The function
30754 returns the number of dispatch groups found. */
30756 static int
30757 pad_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
30758 rtx_insn *tail)
30760 rtx_insn *insn, *next_insn;
30761 rtx nop;
30762 int issue_rate;
30763 int can_issue_more;
30764 int group_end;
30765 int group_count = 0;
30767 /* Initialize issue_rate. */
30768 issue_rate = rs6000_issue_rate ();
30769 can_issue_more = issue_rate;
30771 insn = get_next_active_insn (prev_head_insn, tail);
30772 next_insn = get_next_active_insn (insn, tail);
30774 while (insn != NULL_RTX)
30776 can_issue_more =
30777 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
30779 group_end = (next_insn == NULL_RTX || GET_MODE (next_insn) == TImode);
30781 if (next_insn == NULL_RTX)
30782 break;
30784 if (group_end)
30786 /* If the scheduler had marked group termination at this location
30787 (between insn and next_insn), and neither insn nor next_insn will
30788 force group termination, pad the group with nops to force group
30789 termination. */
30790 if (can_issue_more
30791 && (rs6000_sched_insert_nops == sched_finish_pad_groups)
30792 && !insn_terminates_group_p (insn, current_group)
30793 && !insn_terminates_group_p (next_insn, previous_group))
30795 if (!is_branch_slot_insn (next_insn))
30796 can_issue_more--;
30798 while (can_issue_more)
30800 nop = gen_nop ();
30801 emit_insn_before (nop, next_insn);
30802 can_issue_more--;
30806 can_issue_more = issue_rate;
30807 group_count++;
30810 insn = next_insn;
30811 next_insn = get_next_active_insn (insn, tail);
30814 return group_count;
30817 /* We're beginning a new block. Initialize data structures as necessary. */
30819 static void
30820 rs6000_sched_init (FILE *dump ATTRIBUTE_UNUSED,
30821 int sched_verbose ATTRIBUTE_UNUSED,
30822 int max_ready ATTRIBUTE_UNUSED)
30824 last_scheduled_insn = NULL_RTX;
30825 load_store_pendulum = 0;
30828 /* The following function is called at the end of scheduling BB.
30829 After reload, it inserts nops at insn group bundling. */
30831 static void
30832 rs6000_sched_finish (FILE *dump, int sched_verbose)
30834 int n_groups;
30836 if (sched_verbose)
30837 fprintf (dump, "=== Finishing schedule.\n");
30839 if (reload_completed && rs6000_sched_groups)
30841 /* Do not run sched_finish hook when selective scheduling enabled. */
30842 if (sel_sched_p ())
30843 return;
30845 if (rs6000_sched_insert_nops == sched_finish_none)
30846 return;
30848 if (rs6000_sched_insert_nops == sched_finish_pad_groups)
30849 n_groups = pad_groups (dump, sched_verbose,
30850 current_sched_info->prev_head,
30851 current_sched_info->next_tail);
30852 else
30853 n_groups = redefine_groups (dump, sched_verbose,
30854 current_sched_info->prev_head,
30855 current_sched_info->next_tail);
30857 if (sched_verbose >= 6)
30859 fprintf (dump, "ngroups = %d\n", n_groups);
30860 print_rtl (dump, current_sched_info->prev_head);
30861 fprintf (dump, "Done finish_sched\n");
30866 struct _rs6000_sched_context
30868 short cached_can_issue_more;
30869 rtx last_scheduled_insn;
30870 int load_store_pendulum;
30873 typedef struct _rs6000_sched_context rs6000_sched_context_def;
30874 typedef rs6000_sched_context_def *rs6000_sched_context_t;
30876 /* Allocate store for new scheduling context. */
30877 static void *
30878 rs6000_alloc_sched_context (void)
30880 return xmalloc (sizeof (rs6000_sched_context_def));
30883 /* If CLEAN_P is true then initializes _SC with clean data,
30884 and from the global context otherwise. */
30885 static void
30886 rs6000_init_sched_context (void *_sc, bool clean_p)
30888 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
30890 if (clean_p)
30892 sc->cached_can_issue_more = 0;
30893 sc->last_scheduled_insn = NULL_RTX;
30894 sc->load_store_pendulum = 0;
30896 else
30898 sc->cached_can_issue_more = cached_can_issue_more;
30899 sc->last_scheduled_insn = last_scheduled_insn;
30900 sc->load_store_pendulum = load_store_pendulum;
30904 /* Sets the global scheduling context to the one pointed to by _SC. */
30905 static void
30906 rs6000_set_sched_context (void *_sc)
30908 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
30910 gcc_assert (sc != NULL);
30912 cached_can_issue_more = sc->cached_can_issue_more;
30913 last_scheduled_insn = sc->last_scheduled_insn;
30914 load_store_pendulum = sc->load_store_pendulum;
30917 /* Free _SC. */
30918 static void
30919 rs6000_free_sched_context (void *_sc)
30921 gcc_assert (_sc != NULL);
30923 free (_sc);
30927 /* Length in units of the trampoline for entering a nested function. */
30930 rs6000_trampoline_size (void)
30932 int ret = 0;
30934 switch (DEFAULT_ABI)
30936 default:
30937 gcc_unreachable ();
30939 case ABI_AIX:
30940 ret = (TARGET_32BIT) ? 12 : 24;
30941 break;
30943 case ABI_ELFv2:
30944 gcc_assert (!TARGET_32BIT);
30945 ret = 32;
30946 break;
30948 case ABI_DARWIN:
30949 case ABI_V4:
30950 ret = (TARGET_32BIT) ? 40 : 48;
30951 break;
30954 return ret;
30957 /* Emit RTL insns to initialize the variable parts of a trampoline.
30958 FNADDR is an RTX for the address of the function's pure code.
30959 CXT is an RTX for the static chain value for the function. */
30961 static void
30962 rs6000_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
30964 int regsize = (TARGET_32BIT) ? 4 : 8;
30965 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
30966 rtx ctx_reg = force_reg (Pmode, cxt);
30967 rtx addr = force_reg (Pmode, XEXP (m_tramp, 0));
30969 switch (DEFAULT_ABI)
30971 default:
30972 gcc_unreachable ();
30974 /* Under AIX, just build the 3 word function descriptor */
30975 case ABI_AIX:
30977 rtx fnmem, fn_reg, toc_reg;
30979 if (!TARGET_POINTERS_TO_NESTED_FUNCTIONS)
30980 error ("You cannot take the address of a nested function if you use "
30981 "the -mno-pointers-to-nested-functions option.");
30983 fnmem = gen_const_mem (Pmode, force_reg (Pmode, fnaddr));
30984 fn_reg = gen_reg_rtx (Pmode);
30985 toc_reg = gen_reg_rtx (Pmode);
30987 /* Macro to shorten the code expansions below. */
30988 # define MEM_PLUS(MEM, OFFSET) adjust_address (MEM, Pmode, OFFSET)
30990 m_tramp = replace_equiv_address (m_tramp, addr);
30992 emit_move_insn (fn_reg, MEM_PLUS (fnmem, 0));
30993 emit_move_insn (toc_reg, MEM_PLUS (fnmem, regsize));
30994 emit_move_insn (MEM_PLUS (m_tramp, 0), fn_reg);
30995 emit_move_insn (MEM_PLUS (m_tramp, regsize), toc_reg);
30996 emit_move_insn (MEM_PLUS (m_tramp, 2*regsize), ctx_reg);
30998 # undef MEM_PLUS
31000 break;
31002 /* Under V.4/eabi/darwin, __trampoline_setup does the real work. */
31003 case ABI_ELFv2:
31004 case ABI_DARWIN:
31005 case ABI_V4:
31006 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__trampoline_setup"),
31007 LCT_NORMAL, VOIDmode, 4,
31008 addr, Pmode,
31009 GEN_INT (rs6000_trampoline_size ()), SImode,
31010 fnaddr, Pmode,
31011 ctx_reg, Pmode);
31012 break;
31017 /* Returns TRUE iff the target attribute indicated by ATTR_ID takes a plain
31018 identifier as an argument, so the front end shouldn't look it up. */
31020 static bool
31021 rs6000_attribute_takes_identifier_p (const_tree attr_id)
31023 return is_attribute_p ("altivec", attr_id);
31026 /* Handle the "altivec" attribute. The attribute may have
31027 arguments as follows:
31029 __attribute__((altivec(vector__)))
31030 __attribute__((altivec(pixel__))) (always followed by 'unsigned short')
31031 __attribute__((altivec(bool__))) (always followed by 'unsigned')
31033 and may appear more than once (e.g., 'vector bool char') in a
31034 given declaration. */
31036 static tree
31037 rs6000_handle_altivec_attribute (tree *node,
31038 tree name ATTRIBUTE_UNUSED,
31039 tree args,
31040 int flags ATTRIBUTE_UNUSED,
31041 bool *no_add_attrs)
31043 tree type = *node, result = NULL_TREE;
31044 machine_mode mode;
31045 int unsigned_p;
31046 char altivec_type
31047 = ((args && TREE_CODE (args) == TREE_LIST && TREE_VALUE (args)
31048 && TREE_CODE (TREE_VALUE (args)) == IDENTIFIER_NODE)
31049 ? *IDENTIFIER_POINTER (TREE_VALUE (args))
31050 : '?');
31052 while (POINTER_TYPE_P (type)
31053 || TREE_CODE (type) == FUNCTION_TYPE
31054 || TREE_CODE (type) == METHOD_TYPE
31055 || TREE_CODE (type) == ARRAY_TYPE)
31056 type = TREE_TYPE (type);
31058 mode = TYPE_MODE (type);
31060 /* Check for invalid AltiVec type qualifiers. */
31061 if (type == long_double_type_node)
31062 error ("use of %<long double%> in AltiVec types is invalid");
31063 else if (type == boolean_type_node)
31064 error ("use of boolean types in AltiVec types is invalid");
31065 else if (TREE_CODE (type) == COMPLEX_TYPE)
31066 error ("use of %<complex%> in AltiVec types is invalid");
31067 else if (DECIMAL_FLOAT_MODE_P (mode))
31068 error ("use of decimal floating point types in AltiVec types is invalid");
31069 else if (!TARGET_VSX)
31071 if (type == long_unsigned_type_node || type == long_integer_type_node)
31073 if (TARGET_64BIT)
31074 error ("use of %<long%> in AltiVec types is invalid for "
31075 "64-bit code without -mvsx");
31076 else if (rs6000_warn_altivec_long)
31077 warning (0, "use of %<long%> in AltiVec types is deprecated; "
31078 "use %<int%>");
31080 else if (type == long_long_unsigned_type_node
31081 || type == long_long_integer_type_node)
31082 error ("use of %<long long%> in AltiVec types is invalid without "
31083 "-mvsx");
31084 else if (type == double_type_node)
31085 error ("use of %<double%> in AltiVec types is invalid without -mvsx");
31088 switch (altivec_type)
31090 case 'v':
31091 unsigned_p = TYPE_UNSIGNED (type);
31092 switch (mode)
31094 case TImode:
31095 result = (unsigned_p ? unsigned_V1TI_type_node : V1TI_type_node);
31096 break;
31097 case DImode:
31098 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
31099 break;
31100 case SImode:
31101 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
31102 break;
31103 case HImode:
31104 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
31105 break;
31106 case QImode:
31107 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
31108 break;
31109 case SFmode: result = V4SF_type_node; break;
31110 case DFmode: result = V2DF_type_node; break;
31111 /* If the user says 'vector int bool', we may be handed the 'bool'
31112 attribute _before_ the 'vector' attribute, and so select the
31113 proper type in the 'b' case below. */
31114 case V4SImode: case V8HImode: case V16QImode: case V4SFmode:
31115 case V2DImode: case V2DFmode:
31116 result = type;
31117 default: break;
31119 break;
31120 case 'b':
31121 switch (mode)
31123 case DImode: case V2DImode: result = bool_V2DI_type_node; break;
31124 case SImode: case V4SImode: result = bool_V4SI_type_node; break;
31125 case HImode: case V8HImode: result = bool_V8HI_type_node; break;
31126 case QImode: case V16QImode: result = bool_V16QI_type_node;
31127 default: break;
31129 break;
31130 case 'p':
31131 switch (mode)
31133 case V8HImode: result = pixel_V8HI_type_node;
31134 default: break;
31136 default: break;
31139 /* Propagate qualifiers attached to the element type
31140 onto the vector type. */
31141 if (result && result != type && TYPE_QUALS (type))
31142 result = build_qualified_type (result, TYPE_QUALS (type));
31144 *no_add_attrs = true; /* No need to hang on to the attribute. */
31146 if (result)
31147 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
31149 return NULL_TREE;
31152 /* AltiVec defines four built-in scalar types that serve as vector
31153 elements; we must teach the compiler how to mangle them. */
31155 static const char *
31156 rs6000_mangle_type (const_tree type)
31158 type = TYPE_MAIN_VARIANT (type);
31160 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
31161 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
31162 return NULL;
31164 if (type == bool_char_type_node) return "U6__boolc";
31165 if (type == bool_short_type_node) return "U6__bools";
31166 if (type == pixel_type_node) return "u7__pixel";
31167 if (type == bool_int_type_node) return "U6__booli";
31168 if (type == bool_long_type_node) return "U6__booll";
31170 /* Use a unique name for __float128 rather than trying to use "e" or "g". Use
31171 "g" for IBM extended double, no matter whether it is long double (using
31172 -mabi=ibmlongdouble) or the distinct __ibm128 type. */
31173 if (TARGET_FLOAT128)
31175 if (type == ieee128_float_type_node)
31176 return "U10__float128";
31178 if (type == ibm128_float_type_node)
31179 return "g";
31181 if (type == long_double_type_node && TARGET_LONG_DOUBLE_128)
31182 return (TARGET_IEEEQUAD) ? "U10__float128" : "g";
31185 /* Mangle IBM extended float long double as `g' (__float128) on
31186 powerpc*-linux where long-double-64 previously was the default. */
31187 if (TYPE_MAIN_VARIANT (type) == long_double_type_node
31188 && TARGET_ELF
31189 && TARGET_LONG_DOUBLE_128
31190 && !TARGET_IEEEQUAD)
31191 return "g";
31193 /* For all other types, use normal C++ mangling. */
31194 return NULL;
31197 /* Handle a "longcall" or "shortcall" attribute; arguments as in
31198 struct attribute_spec.handler. */
31200 static tree
31201 rs6000_handle_longcall_attribute (tree *node, tree name,
31202 tree args ATTRIBUTE_UNUSED,
31203 int flags ATTRIBUTE_UNUSED,
31204 bool *no_add_attrs)
31206 if (TREE_CODE (*node) != FUNCTION_TYPE
31207 && TREE_CODE (*node) != FIELD_DECL
31208 && TREE_CODE (*node) != TYPE_DECL)
31210 warning (OPT_Wattributes, "%qE attribute only applies to functions",
31211 name);
31212 *no_add_attrs = true;
31215 return NULL_TREE;
31218 /* Set longcall attributes on all functions declared when
31219 rs6000_default_long_calls is true. */
31220 static void
31221 rs6000_set_default_type_attributes (tree type)
31223 if (rs6000_default_long_calls
31224 && (TREE_CODE (type) == FUNCTION_TYPE
31225 || TREE_CODE (type) == METHOD_TYPE))
31226 TYPE_ATTRIBUTES (type) = tree_cons (get_identifier ("longcall"),
31227 NULL_TREE,
31228 TYPE_ATTRIBUTES (type));
31230 #if TARGET_MACHO
31231 darwin_set_default_type_attributes (type);
31232 #endif
31235 /* Return a reference suitable for calling a function with the
31236 longcall attribute. */
31239 rs6000_longcall_ref (rtx call_ref)
31241 const char *call_name;
31242 tree node;
31244 if (GET_CODE (call_ref) != SYMBOL_REF)
31245 return call_ref;
31247 /* System V adds '.' to the internal name, so skip them. */
31248 call_name = XSTR (call_ref, 0);
31249 if (*call_name == '.')
31251 while (*call_name == '.')
31252 call_name++;
31254 node = get_identifier (call_name);
31255 call_ref = gen_rtx_SYMBOL_REF (VOIDmode, IDENTIFIER_POINTER (node));
31258 return force_reg (Pmode, call_ref);
31261 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
31262 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
31263 #endif
31265 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
31266 struct attribute_spec.handler. */
31267 static tree
31268 rs6000_handle_struct_attribute (tree *node, tree name,
31269 tree args ATTRIBUTE_UNUSED,
31270 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
31272 tree *type = NULL;
31273 if (DECL_P (*node))
31275 if (TREE_CODE (*node) == TYPE_DECL)
31276 type = &TREE_TYPE (*node);
31278 else
31279 type = node;
31281 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
31282 || TREE_CODE (*type) == UNION_TYPE)))
31284 warning (OPT_Wattributes, "%qE attribute ignored", name);
31285 *no_add_attrs = true;
31288 else if ((is_attribute_p ("ms_struct", name)
31289 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
31290 || ((is_attribute_p ("gcc_struct", name)
31291 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
31293 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
31294 name);
31295 *no_add_attrs = true;
31298 return NULL_TREE;
31301 static bool
31302 rs6000_ms_bitfield_layout_p (const_tree record_type)
31304 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
31305 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
31306 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
31309 #ifdef USING_ELFOS_H
31311 /* A get_unnamed_section callback, used for switching to toc_section. */
31313 static void
31314 rs6000_elf_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED)
31316 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
31317 && TARGET_MINIMAL_TOC
31318 && !TARGET_RELOCATABLE)
31320 if (!toc_initialized)
31322 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
31323 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
31324 (*targetm.asm_out.internal_label) (asm_out_file, "LCTOC", 0);
31325 fprintf (asm_out_file, "\t.tc ");
31326 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1[TC],");
31327 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
31328 fprintf (asm_out_file, "\n");
31330 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
31331 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
31332 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
31333 fprintf (asm_out_file, " = .+32768\n");
31334 toc_initialized = 1;
31336 else
31337 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
31339 else if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
31340 && !TARGET_RELOCATABLE)
31342 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
31343 if (!toc_initialized)
31345 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
31346 toc_initialized = 1;
31349 else
31351 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
31352 if (!toc_initialized)
31354 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
31355 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
31356 fprintf (asm_out_file, " = .+32768\n");
31357 toc_initialized = 1;
31362 /* Implement TARGET_ASM_INIT_SECTIONS. */
31364 static void
31365 rs6000_elf_asm_init_sections (void)
31367 toc_section
31368 = get_unnamed_section (0, rs6000_elf_output_toc_section_asm_op, NULL);
31370 sdata2_section
31371 = get_unnamed_section (SECTION_WRITE, output_section_asm_op,
31372 SDATA2_SECTION_ASM_OP);
31375 /* Implement TARGET_SELECT_RTX_SECTION. */
31377 static section *
31378 rs6000_elf_select_rtx_section (machine_mode mode, rtx x,
31379 unsigned HOST_WIDE_INT align)
31381 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
31382 return toc_section;
31383 else
31384 return default_elf_select_rtx_section (mode, x, align);
31387 /* For a SYMBOL_REF, set generic flags and then perform some
31388 target-specific processing.
31390 When the AIX ABI is requested on a non-AIX system, replace the
31391 function name with the real name (with a leading .) rather than the
31392 function descriptor name. This saves a lot of overriding code to
31393 read the prefixes. */
31395 static void rs6000_elf_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED;
31396 static void
31397 rs6000_elf_encode_section_info (tree decl, rtx rtl, int first)
31399 default_encode_section_info (decl, rtl, first);
31401 if (first
31402 && TREE_CODE (decl) == FUNCTION_DECL
31403 && !TARGET_AIX
31404 && DEFAULT_ABI == ABI_AIX)
31406 rtx sym_ref = XEXP (rtl, 0);
31407 size_t len = strlen (XSTR (sym_ref, 0));
31408 char *str = XALLOCAVEC (char, len + 2);
31409 str[0] = '.';
31410 memcpy (str + 1, XSTR (sym_ref, 0), len + 1);
31411 XSTR (sym_ref, 0) = ggc_alloc_string (str, len + 1);
31415 static inline bool
31416 compare_section_name (const char *section, const char *templ)
31418 int len;
31420 len = strlen (templ);
31421 return (strncmp (section, templ, len) == 0
31422 && (section[len] == 0 || section[len] == '.'));
31425 bool
31426 rs6000_elf_in_small_data_p (const_tree decl)
31428 if (rs6000_sdata == SDATA_NONE)
31429 return false;
31431 /* We want to merge strings, so we never consider them small data. */
31432 if (TREE_CODE (decl) == STRING_CST)
31433 return false;
31435 /* Functions are never in the small data area. */
31436 if (TREE_CODE (decl) == FUNCTION_DECL)
31437 return false;
31439 if (TREE_CODE (decl) == VAR_DECL && DECL_SECTION_NAME (decl))
31441 const char *section = DECL_SECTION_NAME (decl);
31442 if (compare_section_name (section, ".sdata")
31443 || compare_section_name (section, ".sdata2")
31444 || compare_section_name (section, ".gnu.linkonce.s")
31445 || compare_section_name (section, ".sbss")
31446 || compare_section_name (section, ".sbss2")
31447 || compare_section_name (section, ".gnu.linkonce.sb")
31448 || strcmp (section, ".PPC.EMB.sdata0") == 0
31449 || strcmp (section, ".PPC.EMB.sbss0") == 0)
31450 return true;
31452 else
31454 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (decl));
31456 if (size > 0
31457 && size <= g_switch_value
31458 /* If it's not public, and we're not going to reference it there,
31459 there's no need to put it in the small data section. */
31460 && (rs6000_sdata != SDATA_DATA || TREE_PUBLIC (decl)))
31461 return true;
31464 return false;
31467 #endif /* USING_ELFOS_H */
31469 /* Implement TARGET_USE_BLOCKS_FOR_CONSTANT_P. */
31471 static bool
31472 rs6000_use_blocks_for_constant_p (machine_mode mode, const_rtx x)
31474 return !ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode);
31477 /* Do not place thread-local symbols refs in the object blocks. */
31479 static bool
31480 rs6000_use_blocks_for_decl_p (const_tree decl)
31482 return !DECL_THREAD_LOCAL_P (decl);
31485 /* Return a REG that occurs in ADDR with coefficient 1.
31486 ADDR can be effectively incremented by incrementing REG.
31488 r0 is special and we must not select it as an address
31489 register by this routine since our caller will try to
31490 increment the returned register via an "la" instruction. */
31493 find_addr_reg (rtx addr)
31495 while (GET_CODE (addr) == PLUS)
31497 if (GET_CODE (XEXP (addr, 0)) == REG
31498 && REGNO (XEXP (addr, 0)) != 0)
31499 addr = XEXP (addr, 0);
31500 else if (GET_CODE (XEXP (addr, 1)) == REG
31501 && REGNO (XEXP (addr, 1)) != 0)
31502 addr = XEXP (addr, 1);
31503 else if (CONSTANT_P (XEXP (addr, 0)))
31504 addr = XEXP (addr, 1);
31505 else if (CONSTANT_P (XEXP (addr, 1)))
31506 addr = XEXP (addr, 0);
31507 else
31508 gcc_unreachable ();
31510 gcc_assert (GET_CODE (addr) == REG && REGNO (addr) != 0);
31511 return addr;
31514 void
31515 rs6000_fatal_bad_address (rtx op)
31517 fatal_insn ("bad address", op);
31520 #if TARGET_MACHO
31522 typedef struct branch_island_d {
31523 tree function_name;
31524 tree label_name;
31525 int line_number;
31526 } branch_island;
31529 static vec<branch_island, va_gc> *branch_islands;
31531 /* Remember to generate a branch island for far calls to the given
31532 function. */
31534 static void
31535 add_compiler_branch_island (tree label_name, tree function_name,
31536 int line_number)
31538 branch_island bi = {function_name, label_name, line_number};
31539 vec_safe_push (branch_islands, bi);
31542 /* Generate far-jump branch islands for everything recorded in
31543 branch_islands. Invoked immediately after the last instruction of
31544 the epilogue has been emitted; the branch islands must be appended
31545 to, and contiguous with, the function body. Mach-O stubs are
31546 generated in machopic_output_stub(). */
31548 static void
31549 macho_branch_islands (void)
31551 char tmp_buf[512];
31553 while (!vec_safe_is_empty (branch_islands))
31555 branch_island *bi = &branch_islands->last ();
31556 const char *label = IDENTIFIER_POINTER (bi->label_name);
31557 const char *name = IDENTIFIER_POINTER (bi->function_name);
31558 char name_buf[512];
31559 /* Cheap copy of the details from the Darwin ASM_OUTPUT_LABELREF(). */
31560 if (name[0] == '*' || name[0] == '&')
31561 strcpy (name_buf, name+1);
31562 else
31564 name_buf[0] = '_';
31565 strcpy (name_buf+1, name);
31567 strcpy (tmp_buf, "\n");
31568 strcat (tmp_buf, label);
31569 #if defined (DBX_DEBUGGING_INFO) || defined (XCOFF_DEBUGGING_INFO)
31570 if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG)
31571 dbxout_stabd (N_SLINE, bi->line_number);
31572 #endif /* DBX_DEBUGGING_INFO || XCOFF_DEBUGGING_INFO */
31573 if (flag_pic)
31575 if (TARGET_LINK_STACK)
31577 char name[32];
31578 get_ppc476_thunk_name (name);
31579 strcat (tmp_buf, ":\n\tmflr r0\n\tbl ");
31580 strcat (tmp_buf, name);
31581 strcat (tmp_buf, "\n");
31582 strcat (tmp_buf, label);
31583 strcat (tmp_buf, "_pic:\n\tmflr r11\n");
31585 else
31587 strcat (tmp_buf, ":\n\tmflr r0\n\tbcl 20,31,");
31588 strcat (tmp_buf, label);
31589 strcat (tmp_buf, "_pic\n");
31590 strcat (tmp_buf, label);
31591 strcat (tmp_buf, "_pic:\n\tmflr r11\n");
31594 strcat (tmp_buf, "\taddis r11,r11,ha16(");
31595 strcat (tmp_buf, name_buf);
31596 strcat (tmp_buf, " - ");
31597 strcat (tmp_buf, label);
31598 strcat (tmp_buf, "_pic)\n");
31600 strcat (tmp_buf, "\tmtlr r0\n");
31602 strcat (tmp_buf, "\taddi r12,r11,lo16(");
31603 strcat (tmp_buf, name_buf);
31604 strcat (tmp_buf, " - ");
31605 strcat (tmp_buf, label);
31606 strcat (tmp_buf, "_pic)\n");
31608 strcat (tmp_buf, "\tmtctr r12\n\tbctr\n");
31610 else
31612 strcat (tmp_buf, ":\nlis r12,hi16(");
31613 strcat (tmp_buf, name_buf);
31614 strcat (tmp_buf, ")\n\tori r12,r12,lo16(");
31615 strcat (tmp_buf, name_buf);
31616 strcat (tmp_buf, ")\n\tmtctr r12\n\tbctr");
31618 output_asm_insn (tmp_buf, 0);
31619 #if defined (DBX_DEBUGGING_INFO) || defined (XCOFF_DEBUGGING_INFO)
31620 if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG)
31621 dbxout_stabd (N_SLINE, bi->line_number);
31622 #endif /* DBX_DEBUGGING_INFO || XCOFF_DEBUGGING_INFO */
31623 branch_islands->pop ();
31627 /* NO_PREVIOUS_DEF checks in the link list whether the function name is
31628 already there or not. */
31630 static int
31631 no_previous_def (tree function_name)
31633 branch_island *bi;
31634 unsigned ix;
31636 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
31637 if (function_name == bi->function_name)
31638 return 0;
31639 return 1;
31642 /* GET_PREV_LABEL gets the label name from the previous definition of
31643 the function. */
31645 static tree
31646 get_prev_label (tree function_name)
31648 branch_island *bi;
31649 unsigned ix;
31651 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
31652 if (function_name == bi->function_name)
31653 return bi->label_name;
31654 return NULL_TREE;
31657 /* INSN is either a function call or a millicode call. It may have an
31658 unconditional jump in its delay slot.
31660 CALL_DEST is the routine we are calling. */
31662 char *
31663 output_call (rtx_insn *insn, rtx *operands, int dest_operand_number,
31664 int cookie_operand_number)
31666 static char buf[256];
31667 if (darwin_emit_branch_islands
31668 && GET_CODE (operands[dest_operand_number]) == SYMBOL_REF
31669 && (INTVAL (operands[cookie_operand_number]) & CALL_LONG))
31671 tree labelname;
31672 tree funname = get_identifier (XSTR (operands[dest_operand_number], 0));
31674 if (no_previous_def (funname))
31676 rtx label_rtx = gen_label_rtx ();
31677 char *label_buf, temp_buf[256];
31678 ASM_GENERATE_INTERNAL_LABEL (temp_buf, "L",
31679 CODE_LABEL_NUMBER (label_rtx));
31680 label_buf = temp_buf[0] == '*' ? temp_buf + 1 : temp_buf;
31681 labelname = get_identifier (label_buf);
31682 add_compiler_branch_island (labelname, funname, insn_line (insn));
31684 else
31685 labelname = get_prev_label (funname);
31687 /* "jbsr foo, L42" is Mach-O for "Link as 'bl foo' if a 'bl'
31688 instruction will reach 'foo', otherwise link as 'bl L42'".
31689 "L42" should be a 'branch island', that will do a far jump to
31690 'foo'. Branch islands are generated in
31691 macho_branch_islands(). */
31692 sprintf (buf, "jbsr %%z%d,%.246s",
31693 dest_operand_number, IDENTIFIER_POINTER (labelname));
31695 else
31696 sprintf (buf, "bl %%z%d", dest_operand_number);
31697 return buf;
31700 /* Generate PIC and indirect symbol stubs. */
31702 void
31703 machopic_output_stub (FILE *file, const char *symb, const char *stub)
31705 unsigned int length;
31706 char *symbol_name, *lazy_ptr_name;
31707 char *local_label_0;
31708 static int label = 0;
31710 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
31711 symb = (*targetm.strip_name_encoding) (symb);
31714 length = strlen (symb);
31715 symbol_name = XALLOCAVEC (char, length + 32);
31716 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
31718 lazy_ptr_name = XALLOCAVEC (char, length + 32);
31719 GEN_LAZY_PTR_NAME_FOR_SYMBOL (lazy_ptr_name, symb, length);
31721 if (flag_pic == 2)
31722 switch_to_section (darwin_sections[machopic_picsymbol_stub1_section]);
31723 else
31724 switch_to_section (darwin_sections[machopic_symbol_stub1_section]);
31726 if (flag_pic == 2)
31728 fprintf (file, "\t.align 5\n");
31730 fprintf (file, "%s:\n", stub);
31731 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
31733 label++;
31734 local_label_0 = XALLOCAVEC (char, sizeof ("\"L00000000000$spb\""));
31735 sprintf (local_label_0, "\"L%011d$spb\"", label);
31737 fprintf (file, "\tmflr r0\n");
31738 if (TARGET_LINK_STACK)
31740 char name[32];
31741 get_ppc476_thunk_name (name);
31742 fprintf (file, "\tbl %s\n", name);
31743 fprintf (file, "%s:\n\tmflr r11\n", local_label_0);
31745 else
31747 fprintf (file, "\tbcl 20,31,%s\n", local_label_0);
31748 fprintf (file, "%s:\n\tmflr r11\n", local_label_0);
31750 fprintf (file, "\taddis r11,r11,ha16(%s-%s)\n",
31751 lazy_ptr_name, local_label_0);
31752 fprintf (file, "\tmtlr r0\n");
31753 fprintf (file, "\t%s r12,lo16(%s-%s)(r11)\n",
31754 (TARGET_64BIT ? "ldu" : "lwzu"),
31755 lazy_ptr_name, local_label_0);
31756 fprintf (file, "\tmtctr r12\n");
31757 fprintf (file, "\tbctr\n");
31759 else
31761 fprintf (file, "\t.align 4\n");
31763 fprintf (file, "%s:\n", stub);
31764 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
31766 fprintf (file, "\tlis r11,ha16(%s)\n", lazy_ptr_name);
31767 fprintf (file, "\t%s r12,lo16(%s)(r11)\n",
31768 (TARGET_64BIT ? "ldu" : "lwzu"),
31769 lazy_ptr_name);
31770 fprintf (file, "\tmtctr r12\n");
31771 fprintf (file, "\tbctr\n");
31774 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
31775 fprintf (file, "%s:\n", lazy_ptr_name);
31776 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
31777 fprintf (file, "%sdyld_stub_binding_helper\n",
31778 (TARGET_64BIT ? DOUBLE_INT_ASM_OP : "\t.long\t"));
31781 /* Legitimize PIC addresses. If the address is already
31782 position-independent, we return ORIG. Newly generated
31783 position-independent addresses go into a reg. This is REG if non
31784 zero, otherwise we allocate register(s) as necessary. */
31786 #define SMALL_INT(X) ((UINTVAL (X) + 0x8000) < 0x10000)
31789 rs6000_machopic_legitimize_pic_address (rtx orig, machine_mode mode,
31790 rtx reg)
31792 rtx base, offset;
31794 if (reg == NULL && ! reload_in_progress && ! reload_completed)
31795 reg = gen_reg_rtx (Pmode);
31797 if (GET_CODE (orig) == CONST)
31799 rtx reg_temp;
31801 if (GET_CODE (XEXP (orig, 0)) == PLUS
31802 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
31803 return orig;
31805 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
31807 /* Use a different reg for the intermediate value, as
31808 it will be marked UNCHANGING. */
31809 reg_temp = !can_create_pseudo_p () ? reg : gen_reg_rtx (Pmode);
31810 base = rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 0),
31811 Pmode, reg_temp);
31812 offset =
31813 rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 1),
31814 Pmode, reg);
31816 if (GET_CODE (offset) == CONST_INT)
31818 if (SMALL_INT (offset))
31819 return plus_constant (Pmode, base, INTVAL (offset));
31820 else if (! reload_in_progress && ! reload_completed)
31821 offset = force_reg (Pmode, offset);
31822 else
31824 rtx mem = force_const_mem (Pmode, orig);
31825 return machopic_legitimize_pic_address (mem, Pmode, reg);
31828 return gen_rtx_PLUS (Pmode, base, offset);
31831 /* Fall back on generic machopic code. */
31832 return machopic_legitimize_pic_address (orig, mode, reg);
31835 /* Output a .machine directive for the Darwin assembler, and call
31836 the generic start_file routine. */
31838 static void
31839 rs6000_darwin_file_start (void)
31841 static const struct
31843 const char *arg;
31844 const char *name;
31845 HOST_WIDE_INT if_set;
31846 } mapping[] = {
31847 { "ppc64", "ppc64", MASK_64BIT },
31848 { "970", "ppc970", MASK_PPC_GPOPT | MASK_MFCRF | MASK_POWERPC64 },
31849 { "power4", "ppc970", 0 },
31850 { "G5", "ppc970", 0 },
31851 { "7450", "ppc7450", 0 },
31852 { "7400", "ppc7400", MASK_ALTIVEC },
31853 { "G4", "ppc7400", 0 },
31854 { "750", "ppc750", 0 },
31855 { "740", "ppc750", 0 },
31856 { "G3", "ppc750", 0 },
31857 { "604e", "ppc604e", 0 },
31858 { "604", "ppc604", 0 },
31859 { "603e", "ppc603", 0 },
31860 { "603", "ppc603", 0 },
31861 { "601", "ppc601", 0 },
31862 { NULL, "ppc", 0 } };
31863 const char *cpu_id = "";
31864 size_t i;
31866 rs6000_file_start ();
31867 darwin_file_start ();
31869 /* Determine the argument to -mcpu=. Default to G3 if not specified. */
31871 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
31872 cpu_id = rs6000_default_cpu;
31874 if (global_options_set.x_rs6000_cpu_index)
31875 cpu_id = processor_target_table[rs6000_cpu_index].name;
31877 /* Look through the mapping array. Pick the first name that either
31878 matches the argument, has a bit set in IF_SET that is also set
31879 in the target flags, or has a NULL name. */
31881 i = 0;
31882 while (mapping[i].arg != NULL
31883 && strcmp (mapping[i].arg, cpu_id) != 0
31884 && (mapping[i].if_set & rs6000_isa_flags) == 0)
31885 i++;
31887 fprintf (asm_out_file, "\t.machine %s\n", mapping[i].name);
31890 #endif /* TARGET_MACHO */
31892 #if TARGET_ELF
31893 static int
31894 rs6000_elf_reloc_rw_mask (void)
31896 if (flag_pic)
31897 return 3;
31898 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
31899 return 2;
31900 else
31901 return 0;
31904 /* Record an element in the table of global constructors. SYMBOL is
31905 a SYMBOL_REF of the function to be called; PRIORITY is a number
31906 between 0 and MAX_INIT_PRIORITY.
31908 This differs from default_named_section_asm_out_constructor in
31909 that we have special handling for -mrelocatable. */
31911 static void rs6000_elf_asm_out_constructor (rtx, int) ATTRIBUTE_UNUSED;
31912 static void
31913 rs6000_elf_asm_out_constructor (rtx symbol, int priority)
31915 const char *section = ".ctors";
31916 char buf[16];
31918 if (priority != DEFAULT_INIT_PRIORITY)
31920 sprintf (buf, ".ctors.%.5u",
31921 /* Invert the numbering so the linker puts us in the proper
31922 order; constructors are run from right to left, and the
31923 linker sorts in increasing order. */
31924 MAX_INIT_PRIORITY - priority);
31925 section = buf;
31928 switch_to_section (get_section (section, SECTION_WRITE, NULL));
31929 assemble_align (POINTER_SIZE);
31931 if (TARGET_RELOCATABLE)
31933 fputs ("\t.long (", asm_out_file);
31934 output_addr_const (asm_out_file, symbol);
31935 fputs (")@fixup\n", asm_out_file);
31937 else
31938 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
31941 static void rs6000_elf_asm_out_destructor (rtx, int) ATTRIBUTE_UNUSED;
31942 static void
31943 rs6000_elf_asm_out_destructor (rtx symbol, int priority)
31945 const char *section = ".dtors";
31946 char buf[16];
31948 if (priority != DEFAULT_INIT_PRIORITY)
31950 sprintf (buf, ".dtors.%.5u",
31951 /* Invert the numbering so the linker puts us in the proper
31952 order; constructors are run from right to left, and the
31953 linker sorts in increasing order. */
31954 MAX_INIT_PRIORITY - priority);
31955 section = buf;
31958 switch_to_section (get_section (section, SECTION_WRITE, NULL));
31959 assemble_align (POINTER_SIZE);
31961 if (TARGET_RELOCATABLE)
31963 fputs ("\t.long (", asm_out_file);
31964 output_addr_const (asm_out_file, symbol);
31965 fputs (")@fixup\n", asm_out_file);
31967 else
31968 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
31971 void
31972 rs6000_elf_declare_function_name (FILE *file, const char *name, tree decl)
31974 if (TARGET_64BIT && DEFAULT_ABI != ABI_ELFv2)
31976 fputs ("\t.section\t\".opd\",\"aw\"\n\t.align 3\n", file);
31977 ASM_OUTPUT_LABEL (file, name);
31978 fputs (DOUBLE_INT_ASM_OP, file);
31979 rs6000_output_function_entry (file, name);
31980 fputs (",.TOC.@tocbase,0\n\t.previous\n", file);
31981 if (DOT_SYMBOLS)
31983 fputs ("\t.size\t", file);
31984 assemble_name (file, name);
31985 fputs (",24\n\t.type\t.", file);
31986 assemble_name (file, name);
31987 fputs (",@function\n", file);
31988 if (TREE_PUBLIC (decl) && ! DECL_WEAK (decl))
31990 fputs ("\t.globl\t.", file);
31991 assemble_name (file, name);
31992 putc ('\n', file);
31995 else
31996 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
31997 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
31998 rs6000_output_function_entry (file, name);
31999 fputs (":\n", file);
32000 return;
32003 if (TARGET_RELOCATABLE
32004 && !TARGET_SECURE_PLT
32005 && (get_pool_size () != 0 || crtl->profile)
32006 && uses_TOC ())
32008 char buf[256];
32010 (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno);
32012 fprintf (file, "\t.long ");
32013 assemble_name (file, toc_label_name);
32014 need_toc_init = 1;
32015 putc ('-', file);
32016 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
32017 assemble_name (file, buf);
32018 putc ('\n', file);
32021 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
32022 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
32024 if (TARGET_CMODEL == CMODEL_LARGE && rs6000_global_entry_point_needed_p ())
32026 char buf[256];
32028 (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno);
32030 fprintf (file, "\t.quad .TOC.-");
32031 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
32032 assemble_name (file, buf);
32033 putc ('\n', file);
32036 if (DEFAULT_ABI == ABI_AIX)
32038 const char *desc_name, *orig_name;
32040 orig_name = (*targetm.strip_name_encoding) (name);
32041 desc_name = orig_name;
32042 while (*desc_name == '.')
32043 desc_name++;
32045 if (TREE_PUBLIC (decl))
32046 fprintf (file, "\t.globl %s\n", desc_name);
32048 fprintf (file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
32049 fprintf (file, "%s:\n", desc_name);
32050 fprintf (file, "\t.long %s\n", orig_name);
32051 fputs ("\t.long _GLOBAL_OFFSET_TABLE_\n", file);
32052 fputs ("\t.long 0\n", file);
32053 fprintf (file, "\t.previous\n");
32055 ASM_OUTPUT_LABEL (file, name);
32058 static void rs6000_elf_file_end (void) ATTRIBUTE_UNUSED;
32059 static void
32060 rs6000_elf_file_end (void)
32062 #ifdef HAVE_AS_GNU_ATTRIBUTE
32063 if (TARGET_32BIT && DEFAULT_ABI == ABI_V4)
32065 if (rs6000_passes_float)
32066 fprintf (asm_out_file, "\t.gnu_attribute 4, %d\n",
32067 ((TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT) ? 1
32068 : (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT) ? 3
32069 : 2));
32070 if (rs6000_passes_vector)
32071 fprintf (asm_out_file, "\t.gnu_attribute 8, %d\n",
32072 (TARGET_ALTIVEC_ABI ? 2
32073 : TARGET_SPE_ABI ? 3
32074 : 1));
32075 if (rs6000_returns_struct)
32076 fprintf (asm_out_file, "\t.gnu_attribute 12, %d\n",
32077 aix_struct_return ? 2 : 1);
32079 #endif
32080 #if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD)
32081 if (TARGET_32BIT || DEFAULT_ABI == ABI_ELFv2)
32082 file_end_indicate_exec_stack ();
32083 #endif
32085 if (flag_split_stack)
32086 file_end_indicate_split_stack ();
32088 if (cpu_builtin_p)
32090 /* We have expanded a CPU builtin, so we need to emit a reference to
32091 the special symbol that LIBC uses to declare it supports the
32092 AT_PLATFORM and AT_HWCAP/AT_HWCAP2 in the TCB feature. */
32093 switch_to_section (data_section);
32094 fprintf (asm_out_file, "\t.align %u\n", TARGET_32BIT ? 2 : 3);
32095 fprintf (asm_out_file, "\t%s %s\n",
32096 TARGET_32BIT ? ".long" : ".quad", tcb_verification_symbol);
32099 #endif
32101 #if TARGET_XCOFF
32103 #ifndef HAVE_XCOFF_DWARF_EXTRAS
32104 #define HAVE_XCOFF_DWARF_EXTRAS 0
32105 #endif
32107 static enum unwind_info_type
32108 rs6000_xcoff_debug_unwind_info (void)
32110 return UI_NONE;
32113 static void
32114 rs6000_xcoff_asm_output_anchor (rtx symbol)
32116 char buffer[100];
32118 sprintf (buffer, "$ + " HOST_WIDE_INT_PRINT_DEC,
32119 SYMBOL_REF_BLOCK_OFFSET (symbol));
32120 fprintf (asm_out_file, "%s", SET_ASM_OP);
32121 RS6000_OUTPUT_BASENAME (asm_out_file, XSTR (symbol, 0));
32122 fprintf (asm_out_file, ",");
32123 RS6000_OUTPUT_BASENAME (asm_out_file, buffer);
32124 fprintf (asm_out_file, "\n");
32127 static void
32128 rs6000_xcoff_asm_globalize_label (FILE *stream, const char *name)
32130 fputs (GLOBAL_ASM_OP, stream);
32131 RS6000_OUTPUT_BASENAME (stream, name);
32132 putc ('\n', stream);
32135 /* A get_unnamed_decl callback, used for read-only sections. PTR
32136 points to the section string variable. */
32138 static void
32139 rs6000_xcoff_output_readonly_section_asm_op (const void *directive)
32141 fprintf (asm_out_file, "\t.csect %s[RO],%s\n",
32142 *(const char *const *) directive,
32143 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
32146 /* Likewise for read-write sections. */
32148 static void
32149 rs6000_xcoff_output_readwrite_section_asm_op (const void *directive)
32151 fprintf (asm_out_file, "\t.csect %s[RW],%s\n",
32152 *(const char *const *) directive,
32153 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
32156 static void
32157 rs6000_xcoff_output_tls_section_asm_op (const void *directive)
32159 fprintf (asm_out_file, "\t.csect %s[TL],%s\n",
32160 *(const char *const *) directive,
32161 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
32164 /* A get_unnamed_section callback, used for switching to toc_section. */
32166 static void
32167 rs6000_xcoff_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED)
32169 if (TARGET_MINIMAL_TOC)
32171 /* toc_section is always selected at least once from
32172 rs6000_xcoff_file_start, so this is guaranteed to
32173 always be defined once and only once in each file. */
32174 if (!toc_initialized)
32176 fputs ("\t.toc\nLCTOC..1:\n", asm_out_file);
32177 fputs ("\t.tc toc_table[TC],toc_table[RW]\n", asm_out_file);
32178 toc_initialized = 1;
32180 fprintf (asm_out_file, "\t.csect toc_table[RW]%s\n",
32181 (TARGET_32BIT ? "" : ",3"));
32183 else
32184 fputs ("\t.toc\n", asm_out_file);
32187 /* Implement TARGET_ASM_INIT_SECTIONS. */
32189 static void
32190 rs6000_xcoff_asm_init_sections (void)
32192 read_only_data_section
32193 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
32194 &xcoff_read_only_section_name);
32196 private_data_section
32197 = get_unnamed_section (SECTION_WRITE,
32198 rs6000_xcoff_output_readwrite_section_asm_op,
32199 &xcoff_private_data_section_name);
32201 tls_data_section
32202 = get_unnamed_section (SECTION_TLS,
32203 rs6000_xcoff_output_tls_section_asm_op,
32204 &xcoff_tls_data_section_name);
32206 tls_private_data_section
32207 = get_unnamed_section (SECTION_TLS,
32208 rs6000_xcoff_output_tls_section_asm_op,
32209 &xcoff_private_data_section_name);
32211 read_only_private_data_section
32212 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
32213 &xcoff_private_data_section_name);
32215 toc_section
32216 = get_unnamed_section (0, rs6000_xcoff_output_toc_section_asm_op, NULL);
32218 readonly_data_section = read_only_data_section;
32221 static int
32222 rs6000_xcoff_reloc_rw_mask (void)
32224 return 3;
32227 static void
32228 rs6000_xcoff_asm_named_section (const char *name, unsigned int flags,
32229 tree decl ATTRIBUTE_UNUSED)
32231 int smclass;
32232 static const char * const suffix[5] = { "PR", "RO", "RW", "TL", "XO" };
32234 if (flags & SECTION_EXCLUDE)
32235 smclass = 4;
32236 else if (flags & SECTION_DEBUG)
32238 fprintf (asm_out_file, "\t.dwsect %s\n", name);
32239 return;
32241 else if (flags & SECTION_CODE)
32242 smclass = 0;
32243 else if (flags & SECTION_TLS)
32244 smclass = 3;
32245 else if (flags & SECTION_WRITE)
32246 smclass = 2;
32247 else
32248 smclass = 1;
32250 fprintf (asm_out_file, "\t.csect %s%s[%s],%u\n",
32251 (flags & SECTION_CODE) ? "." : "",
32252 name, suffix[smclass], flags & SECTION_ENTSIZE);
32255 #define IN_NAMED_SECTION(DECL) \
32256 ((TREE_CODE (DECL) == FUNCTION_DECL || TREE_CODE (DECL) == VAR_DECL) \
32257 && DECL_SECTION_NAME (DECL) != NULL)
32259 static section *
32260 rs6000_xcoff_select_section (tree decl, int reloc,
32261 unsigned HOST_WIDE_INT align)
32263 /* Place variables with alignment stricter than BIGGEST_ALIGNMENT into
32264 named section. */
32265 if (align > BIGGEST_ALIGNMENT)
32267 resolve_unique_section (decl, reloc, true);
32268 if (IN_NAMED_SECTION (decl))
32269 return get_named_section (decl, NULL, reloc);
32272 if (decl_readonly_section (decl, reloc))
32274 if (TREE_PUBLIC (decl))
32275 return read_only_data_section;
32276 else
32277 return read_only_private_data_section;
32279 else
32281 #if HAVE_AS_TLS
32282 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
32284 if (TREE_PUBLIC (decl))
32285 return tls_data_section;
32286 else if (bss_initializer_p (decl))
32288 /* Convert to COMMON to emit in BSS. */
32289 DECL_COMMON (decl) = 1;
32290 return tls_comm_section;
32292 else
32293 return tls_private_data_section;
32295 else
32296 #endif
32297 if (TREE_PUBLIC (decl))
32298 return data_section;
32299 else
32300 return private_data_section;
32304 static void
32305 rs6000_xcoff_unique_section (tree decl, int reloc ATTRIBUTE_UNUSED)
32307 const char *name;
32309 /* Use select_section for private data and uninitialized data with
32310 alignment <= BIGGEST_ALIGNMENT. */
32311 if (!TREE_PUBLIC (decl)
32312 || DECL_COMMON (decl)
32313 || (DECL_INITIAL (decl) == NULL_TREE
32314 && DECL_ALIGN (decl) <= BIGGEST_ALIGNMENT)
32315 || DECL_INITIAL (decl) == error_mark_node
32316 || (flag_zero_initialized_in_bss
32317 && initializer_zerop (DECL_INITIAL (decl))))
32318 return;
32320 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
32321 name = (*targetm.strip_name_encoding) (name);
32322 set_decl_section_name (decl, name);
32325 /* Select section for constant in constant pool.
32327 On RS/6000, all constants are in the private read-only data area.
32328 However, if this is being placed in the TOC it must be output as a
32329 toc entry. */
32331 static section *
32332 rs6000_xcoff_select_rtx_section (machine_mode mode, rtx x,
32333 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
32335 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
32336 return toc_section;
32337 else
32338 return read_only_private_data_section;
32341 /* Remove any trailing [DS] or the like from the symbol name. */
32343 static const char *
32344 rs6000_xcoff_strip_name_encoding (const char *name)
32346 size_t len;
32347 if (*name == '*')
32348 name++;
32349 len = strlen (name);
32350 if (name[len - 1] == ']')
32351 return ggc_alloc_string (name, len - 4);
32352 else
32353 return name;
32356 /* Section attributes. AIX is always PIC. */
32358 static unsigned int
32359 rs6000_xcoff_section_type_flags (tree decl, const char *name, int reloc)
32361 unsigned int align;
32362 unsigned int flags = default_section_type_flags (decl, name, reloc);
32364 /* Align to at least UNIT size. */
32365 if ((flags & SECTION_CODE) != 0 || !decl || !DECL_P (decl))
32366 align = MIN_UNITS_PER_WORD;
32367 else
32368 /* Increase alignment of large objects if not already stricter. */
32369 align = MAX ((DECL_ALIGN (decl) / BITS_PER_UNIT),
32370 int_size_in_bytes (TREE_TYPE (decl)) > MIN_UNITS_PER_WORD
32371 ? UNITS_PER_FP_WORD : MIN_UNITS_PER_WORD);
32373 return flags | (exact_log2 (align) & SECTION_ENTSIZE);
32376 /* Output at beginning of assembler file.
32378 Initialize the section names for the RS/6000 at this point.
32380 Specify filename, including full path, to assembler.
32382 We want to go into the TOC section so at least one .toc will be emitted.
32383 Also, in order to output proper .bs/.es pairs, we need at least one static
32384 [RW] section emitted.
32386 Finally, declare mcount when profiling to make the assembler happy. */
32388 static void
32389 rs6000_xcoff_file_start (void)
32391 rs6000_gen_section_name (&xcoff_bss_section_name,
32392 main_input_filename, ".bss_");
32393 rs6000_gen_section_name (&xcoff_private_data_section_name,
32394 main_input_filename, ".rw_");
32395 rs6000_gen_section_name (&xcoff_read_only_section_name,
32396 main_input_filename, ".ro_");
32397 rs6000_gen_section_name (&xcoff_tls_data_section_name,
32398 main_input_filename, ".tls_");
32399 rs6000_gen_section_name (&xcoff_tbss_section_name,
32400 main_input_filename, ".tbss_[UL]");
32402 fputs ("\t.file\t", asm_out_file);
32403 output_quoted_string (asm_out_file, main_input_filename);
32404 fputc ('\n', asm_out_file);
32405 if (write_symbols != NO_DEBUG)
32406 switch_to_section (private_data_section);
32407 switch_to_section (toc_section);
32408 switch_to_section (text_section);
32409 if (profile_flag)
32410 fprintf (asm_out_file, "\t.extern %s\n", RS6000_MCOUNT);
32411 rs6000_file_start ();
32414 /* Output at end of assembler file.
32415 On the RS/6000, referencing data should automatically pull in text. */
32417 static void
32418 rs6000_xcoff_file_end (void)
32420 switch_to_section (text_section);
32421 fputs ("_section_.text:\n", asm_out_file);
32422 switch_to_section (data_section);
32423 fputs (TARGET_32BIT
32424 ? "\t.long _section_.text\n" : "\t.llong _section_.text\n",
32425 asm_out_file);
32428 struct declare_alias_data
32430 FILE *file;
32431 bool function_descriptor;
32434 /* Declare alias N. A helper function for for_node_and_aliases. */
32436 static bool
32437 rs6000_declare_alias (struct symtab_node *n, void *d)
32439 struct declare_alias_data *data = (struct declare_alias_data *)d;
32440 /* Main symbol is output specially, because varasm machinery does part of
32441 the job for us - we do not need to declare .globl/lglobs and such. */
32442 if (!n->alias || n->weakref)
32443 return false;
32445 if (lookup_attribute ("ifunc", DECL_ATTRIBUTES (n->decl)))
32446 return false;
32448 /* Prevent assemble_alias from trying to use .set pseudo operation
32449 that does not behave as expected by the middle-end. */
32450 TREE_ASM_WRITTEN (n->decl) = true;
32452 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (n->decl));
32453 char *buffer = (char *) alloca (strlen (name) + 2);
32454 char *p;
32455 int dollar_inside = 0;
32457 strcpy (buffer, name);
32458 p = strchr (buffer, '$');
32459 while (p) {
32460 *p = '_';
32461 dollar_inside++;
32462 p = strchr (p + 1, '$');
32464 if (TREE_PUBLIC (n->decl))
32466 if (!RS6000_WEAK || !DECL_WEAK (n->decl))
32468 if (dollar_inside) {
32469 if (data->function_descriptor)
32470 fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
32471 fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
32473 if (data->function_descriptor)
32475 fputs ("\t.globl .", data->file);
32476 RS6000_OUTPUT_BASENAME (data->file, buffer);
32477 putc ('\n', data->file);
32479 fputs ("\t.globl ", data->file);
32480 RS6000_OUTPUT_BASENAME (data->file, buffer);
32481 putc ('\n', data->file);
32483 #ifdef ASM_WEAKEN_DECL
32484 else if (DECL_WEAK (n->decl) && !data->function_descriptor)
32485 ASM_WEAKEN_DECL (data->file, n->decl, name, NULL);
32486 #endif
32488 else
32490 if (dollar_inside)
32492 if (data->function_descriptor)
32493 fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
32494 fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
32496 if (data->function_descriptor)
32498 fputs ("\t.lglobl .", data->file);
32499 RS6000_OUTPUT_BASENAME (data->file, buffer);
32500 putc ('\n', data->file);
32502 fputs ("\t.lglobl ", data->file);
32503 RS6000_OUTPUT_BASENAME (data->file, buffer);
32504 putc ('\n', data->file);
32506 if (data->function_descriptor)
32507 fputs (".", data->file);
32508 RS6000_OUTPUT_BASENAME (data->file, buffer);
32509 fputs (":\n", data->file);
32510 return false;
32513 /* This macro produces the initial definition of a function name.
32514 On the RS/6000, we need to place an extra '.' in the function name and
32515 output the function descriptor.
32516 Dollar signs are converted to underscores.
32518 The csect for the function will have already been created when
32519 text_section was selected. We do have to go back to that csect, however.
32521 The third and fourth parameters to the .function pseudo-op (16 and 044)
32522 are placeholders which no longer have any use.
32524 Because AIX assembler's .set command has unexpected semantics, we output
32525 all aliases as alternative labels in front of the definition. */
32527 void
32528 rs6000_xcoff_declare_function_name (FILE *file, const char *name, tree decl)
32530 char *buffer = (char *) alloca (strlen (name) + 1);
32531 char *p;
32532 int dollar_inside = 0;
32533 struct declare_alias_data data = {file, false};
32535 strcpy (buffer, name);
32536 p = strchr (buffer, '$');
32537 while (p) {
32538 *p = '_';
32539 dollar_inside++;
32540 p = strchr (p + 1, '$');
32542 if (TREE_PUBLIC (decl))
32544 if (!RS6000_WEAK || !DECL_WEAK (decl))
32546 if (dollar_inside) {
32547 fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
32548 fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
32550 fputs ("\t.globl .", file);
32551 RS6000_OUTPUT_BASENAME (file, buffer);
32552 putc ('\n', file);
32555 else
32557 if (dollar_inside) {
32558 fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
32559 fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
32561 fputs ("\t.lglobl .", file);
32562 RS6000_OUTPUT_BASENAME (file, buffer);
32563 putc ('\n', file);
32565 fputs ("\t.csect ", file);
32566 RS6000_OUTPUT_BASENAME (file, buffer);
32567 fputs (TARGET_32BIT ? "[DS]\n" : "[DS],3\n", file);
32568 RS6000_OUTPUT_BASENAME (file, buffer);
32569 fputs (":\n", file);
32570 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias, &data, true);
32571 fputs (TARGET_32BIT ? "\t.long ." : "\t.llong .", file);
32572 RS6000_OUTPUT_BASENAME (file, buffer);
32573 fputs (", TOC[tc0], 0\n", file);
32574 in_section = NULL;
32575 switch_to_section (function_section (decl));
32576 putc ('.', file);
32577 RS6000_OUTPUT_BASENAME (file, buffer);
32578 fputs (":\n", file);
32579 data.function_descriptor = true;
32580 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias, &data, true);
32581 if (!DECL_IGNORED_P (decl))
32583 if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG)
32584 xcoffout_declare_function (file, decl, buffer);
32585 else if (write_symbols == DWARF2_DEBUG)
32587 name = (*targetm.strip_name_encoding) (name);
32588 fprintf (file, "\t.function .%s,.%s,2,0\n", name, name);
32591 return;
32594 /* This macro produces the initial definition of a object (variable) name.
32595 Because AIX assembler's .set command has unexpected semantics, we output
32596 all aliases as alternative labels in front of the definition. */
32598 void
32599 rs6000_xcoff_declare_object_name (FILE *file, const char *name, tree decl)
32601 struct declare_alias_data data = {file, false};
32602 RS6000_OUTPUT_BASENAME (file, name);
32603 fputs (":\n", file);
32604 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias, &data, true);
32607 /* Overide the default 'SYMBOL-.' syntax with AIX compatible 'SYMBOL-$'. */
32609 void
32610 rs6000_asm_output_dwarf_pcrel (FILE *file, int size, const char *label)
32612 fputs (integer_asm_op (size, FALSE), file);
32613 assemble_name (file, label);
32614 fputs ("-$", file);
32617 /* Output a symbol offset relative to the dbase for the current object.
32618 We use __gcc_unwind_dbase as an arbitrary base for dbase and assume
32619 signed offsets.
32621 __gcc_unwind_dbase is embedded in all executables/libraries through
32622 libgcc/config/rs6000/crtdbase.S. */
32624 void
32625 rs6000_asm_output_dwarf_datarel (FILE *file, int size, const char *label)
32627 fputs (integer_asm_op (size, FALSE), file);
32628 assemble_name (file, label);
32629 fputs("-__gcc_unwind_dbase", file);
32632 #ifdef HAVE_AS_TLS
32633 static void
32634 rs6000_xcoff_encode_section_info (tree decl, rtx rtl, int first)
32636 rtx symbol;
32637 int flags;
32639 default_encode_section_info (decl, rtl, first);
32641 /* Careful not to prod global register variables. */
32642 if (!MEM_P (rtl))
32643 return;
32644 symbol = XEXP (rtl, 0);
32645 if (GET_CODE (symbol) != SYMBOL_REF)
32646 return;
32648 flags = SYMBOL_REF_FLAGS (symbol);
32650 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
32651 flags &= ~SYMBOL_FLAG_HAS_BLOCK_INFO;
32653 SYMBOL_REF_FLAGS (symbol) = flags;
32655 #endif /* HAVE_AS_TLS */
32656 #endif /* TARGET_XCOFF */
32658 /* Return true if INSN should not be copied. */
32660 static bool
32661 rs6000_cannot_copy_insn_p (rtx_insn *insn)
32663 return recog_memoized (insn) >= 0
32664 && get_attr_cannot_copy (insn);
32667 /* Compute a (partial) cost for rtx X. Return true if the complete
32668 cost has been computed, and false if subexpressions should be
32669 scanned. In either case, *TOTAL contains the cost result. */
32671 static bool
32672 rs6000_rtx_costs (rtx x, machine_mode mode, int outer_code,
32673 int opno ATTRIBUTE_UNUSED, int *total, bool speed)
32675 int code = GET_CODE (x);
32677 switch (code)
32679 /* On the RS/6000, if it is valid in the insn, it is free. */
32680 case CONST_INT:
32681 if (((outer_code == SET
32682 || outer_code == PLUS
32683 || outer_code == MINUS)
32684 && (satisfies_constraint_I (x)
32685 || satisfies_constraint_L (x)))
32686 || (outer_code == AND
32687 && (satisfies_constraint_K (x)
32688 || (mode == SImode
32689 ? satisfies_constraint_L (x)
32690 : satisfies_constraint_J (x))))
32691 || ((outer_code == IOR || outer_code == XOR)
32692 && (satisfies_constraint_K (x)
32693 || (mode == SImode
32694 ? satisfies_constraint_L (x)
32695 : satisfies_constraint_J (x))))
32696 || outer_code == ASHIFT
32697 || outer_code == ASHIFTRT
32698 || outer_code == LSHIFTRT
32699 || outer_code == ROTATE
32700 || outer_code == ROTATERT
32701 || outer_code == ZERO_EXTRACT
32702 || (outer_code == MULT
32703 && satisfies_constraint_I (x))
32704 || ((outer_code == DIV || outer_code == UDIV
32705 || outer_code == MOD || outer_code == UMOD)
32706 && exact_log2 (INTVAL (x)) >= 0)
32707 || (outer_code == COMPARE
32708 && (satisfies_constraint_I (x)
32709 || satisfies_constraint_K (x)))
32710 || ((outer_code == EQ || outer_code == NE)
32711 && (satisfies_constraint_I (x)
32712 || satisfies_constraint_K (x)
32713 || (mode == SImode
32714 ? satisfies_constraint_L (x)
32715 : satisfies_constraint_J (x))))
32716 || (outer_code == GTU
32717 && satisfies_constraint_I (x))
32718 || (outer_code == LTU
32719 && satisfies_constraint_P (x)))
32721 *total = 0;
32722 return true;
32724 else if ((outer_code == PLUS
32725 && reg_or_add_cint_operand (x, VOIDmode))
32726 || (outer_code == MINUS
32727 && reg_or_sub_cint_operand (x, VOIDmode))
32728 || ((outer_code == SET
32729 || outer_code == IOR
32730 || outer_code == XOR)
32731 && (INTVAL (x)
32732 & ~ (unsigned HOST_WIDE_INT) 0xffffffff) == 0))
32734 *total = COSTS_N_INSNS (1);
32735 return true;
32737 /* FALLTHRU */
32739 case CONST_DOUBLE:
32740 case CONST_WIDE_INT:
32741 case CONST:
32742 case HIGH:
32743 case SYMBOL_REF:
32744 case MEM:
32745 /* When optimizing for size, MEM should be slightly more expensive
32746 than generating address, e.g., (plus (reg) (const)).
32747 L1 cache latency is about two instructions. */
32748 *total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
32749 return true;
32751 case LABEL_REF:
32752 *total = 0;
32753 return true;
32755 case PLUS:
32756 case MINUS:
32757 if (FLOAT_MODE_P (mode))
32758 *total = rs6000_cost->fp;
32759 else
32760 *total = COSTS_N_INSNS (1);
32761 return false;
32763 case MULT:
32764 if (GET_CODE (XEXP (x, 1)) == CONST_INT
32765 && satisfies_constraint_I (XEXP (x, 1)))
32767 if (INTVAL (XEXP (x, 1)) >= -256
32768 && INTVAL (XEXP (x, 1)) <= 255)
32769 *total = rs6000_cost->mulsi_const9;
32770 else
32771 *total = rs6000_cost->mulsi_const;
32773 else if (mode == SFmode)
32774 *total = rs6000_cost->fp;
32775 else if (FLOAT_MODE_P (mode))
32776 *total = rs6000_cost->dmul;
32777 else if (mode == DImode)
32778 *total = rs6000_cost->muldi;
32779 else
32780 *total = rs6000_cost->mulsi;
32781 return false;
32783 case FMA:
32784 if (mode == SFmode)
32785 *total = rs6000_cost->fp;
32786 else
32787 *total = rs6000_cost->dmul;
32788 break;
32790 case DIV:
32791 case MOD:
32792 if (FLOAT_MODE_P (mode))
32794 *total = mode == DFmode ? rs6000_cost->ddiv
32795 : rs6000_cost->sdiv;
32796 return false;
32798 /* FALLTHRU */
32800 case UDIV:
32801 case UMOD:
32802 if (GET_CODE (XEXP (x, 1)) == CONST_INT
32803 && exact_log2 (INTVAL (XEXP (x, 1))) >= 0)
32805 if (code == DIV || code == MOD)
32806 /* Shift, addze */
32807 *total = COSTS_N_INSNS (2);
32808 else
32809 /* Shift */
32810 *total = COSTS_N_INSNS (1);
32812 else
32814 if (GET_MODE (XEXP (x, 1)) == DImode)
32815 *total = rs6000_cost->divdi;
32816 else
32817 *total = rs6000_cost->divsi;
32819 /* Add in shift and subtract for MOD unless we have a mod instruction. */
32820 if (!TARGET_MODULO && (code == MOD || code == UMOD))
32821 *total += COSTS_N_INSNS (2);
32822 return false;
32824 case CTZ:
32825 *total = COSTS_N_INSNS (TARGET_CTZ ? 1 : 4);
32826 return false;
32828 case FFS:
32829 *total = COSTS_N_INSNS (4);
32830 return false;
32832 case POPCOUNT:
32833 *total = COSTS_N_INSNS (TARGET_POPCNTD ? 1 : 6);
32834 return false;
32836 case PARITY:
32837 *total = COSTS_N_INSNS (TARGET_CMPB ? 2 : 6);
32838 return false;
32840 case NOT:
32841 if (outer_code == AND || outer_code == IOR || outer_code == XOR)
32842 *total = 0;
32843 else
32844 *total = COSTS_N_INSNS (1);
32845 return false;
32847 case AND:
32848 if (CONST_INT_P (XEXP (x, 1)))
32850 rtx left = XEXP (x, 0);
32851 rtx_code left_code = GET_CODE (left);
32853 /* rotate-and-mask: 1 insn. */
32854 if ((left_code == ROTATE
32855 || left_code == ASHIFT
32856 || left_code == LSHIFTRT)
32857 && rs6000_is_valid_shift_mask (XEXP (x, 1), left, mode))
32859 *total = rtx_cost (XEXP (left, 0), mode, left_code, 0, speed);
32860 if (!CONST_INT_P (XEXP (left, 1)))
32861 *total += rtx_cost (XEXP (left, 1), SImode, left_code, 1, speed);
32862 *total += COSTS_N_INSNS (1);
32863 return true;
32866 /* rotate-and-mask (no rotate), andi., andis.: 1 insn. */
32867 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
32868 if (rs6000_is_valid_and_mask (XEXP (x, 1), mode)
32869 || (val & 0xffff) == val
32870 || (val & 0xffff0000) == val
32871 || ((val & 0xffff) == 0 && mode == SImode))
32873 *total = rtx_cost (left, mode, AND, 0, speed);
32874 *total += COSTS_N_INSNS (1);
32875 return true;
32878 /* 2 insns. */
32879 if (rs6000_is_valid_2insn_and (XEXP (x, 1), mode))
32881 *total = rtx_cost (left, mode, AND, 0, speed);
32882 *total += COSTS_N_INSNS (2);
32883 return true;
32887 *total = COSTS_N_INSNS (1);
32888 return false;
32890 case IOR:
32891 /* FIXME */
32892 *total = COSTS_N_INSNS (1);
32893 return true;
32895 case CLZ:
32896 case XOR:
32897 case ZERO_EXTRACT:
32898 *total = COSTS_N_INSNS (1);
32899 return false;
32901 case ASHIFT:
32902 /* The EXTSWSLI instruction is a combined instruction. Don't count both
32903 the sign extend and shift separately within the insn. */
32904 if (TARGET_EXTSWSLI && mode == DImode
32905 && GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
32906 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode)
32908 *total = 0;
32909 return false;
32911 /* fall through */
32913 case ASHIFTRT:
32914 case LSHIFTRT:
32915 case ROTATE:
32916 case ROTATERT:
32917 /* Handle mul_highpart. */
32918 if (outer_code == TRUNCATE
32919 && GET_CODE (XEXP (x, 0)) == MULT)
32921 if (mode == DImode)
32922 *total = rs6000_cost->muldi;
32923 else
32924 *total = rs6000_cost->mulsi;
32925 return true;
32927 else if (outer_code == AND)
32928 *total = 0;
32929 else
32930 *total = COSTS_N_INSNS (1);
32931 return false;
32933 case SIGN_EXTEND:
32934 case ZERO_EXTEND:
32935 if (GET_CODE (XEXP (x, 0)) == MEM)
32936 *total = 0;
32937 else
32938 *total = COSTS_N_INSNS (1);
32939 return false;
32941 case COMPARE:
32942 case NEG:
32943 case ABS:
32944 if (!FLOAT_MODE_P (mode))
32946 *total = COSTS_N_INSNS (1);
32947 return false;
32949 /* FALLTHRU */
32951 case FLOAT:
32952 case UNSIGNED_FLOAT:
32953 case FIX:
32954 case UNSIGNED_FIX:
32955 case FLOAT_TRUNCATE:
32956 *total = rs6000_cost->fp;
32957 return false;
32959 case FLOAT_EXTEND:
32960 if (mode == DFmode)
32961 *total = rs6000_cost->sfdf_convert;
32962 else
32963 *total = rs6000_cost->fp;
32964 return false;
32966 case UNSPEC:
32967 switch (XINT (x, 1))
32969 case UNSPEC_FRSP:
32970 *total = rs6000_cost->fp;
32971 return true;
32973 default:
32974 break;
32976 break;
32978 case CALL:
32979 case IF_THEN_ELSE:
32980 if (!speed)
32982 *total = COSTS_N_INSNS (1);
32983 return true;
32985 else if (FLOAT_MODE_P (mode)
32986 && TARGET_PPC_GFXOPT && TARGET_HARD_FLOAT && TARGET_FPRS)
32988 *total = rs6000_cost->fp;
32989 return false;
32991 break;
32993 case NE:
32994 case EQ:
32995 case GTU:
32996 case LTU:
32997 /* Carry bit requires mode == Pmode.
32998 NEG or PLUS already counted so only add one. */
32999 if (mode == Pmode
33000 && (outer_code == NEG || outer_code == PLUS))
33002 *total = COSTS_N_INSNS (1);
33003 return true;
33005 if (outer_code == SET)
33007 if (XEXP (x, 1) == const0_rtx)
33009 if (TARGET_ISEL && !TARGET_MFCRF)
33010 *total = COSTS_N_INSNS (8);
33011 else
33012 *total = COSTS_N_INSNS (2);
33013 return true;
33015 else
33017 *total = COSTS_N_INSNS (3);
33018 return false;
33021 /* FALLTHRU */
33023 case GT:
33024 case LT:
33025 case UNORDERED:
33026 if (outer_code == SET && (XEXP (x, 1) == const0_rtx))
33028 if (TARGET_ISEL && !TARGET_MFCRF)
33029 *total = COSTS_N_INSNS (8);
33030 else
33031 *total = COSTS_N_INSNS (2);
33032 return true;
33034 /* CC COMPARE. */
33035 if (outer_code == COMPARE)
33037 *total = 0;
33038 return true;
33040 break;
33042 default:
33043 break;
33046 return false;
33049 /* Debug form of r6000_rtx_costs that is selected if -mdebug=cost. */
33051 static bool
33052 rs6000_debug_rtx_costs (rtx x, machine_mode mode, int outer_code,
33053 int opno, int *total, bool speed)
33055 bool ret = rs6000_rtx_costs (x, mode, outer_code, opno, total, speed);
33057 fprintf (stderr,
33058 "\nrs6000_rtx_costs, return = %s, mode = %s, outer_code = %s, "
33059 "opno = %d, total = %d, speed = %s, x:\n",
33060 ret ? "complete" : "scan inner",
33061 GET_MODE_NAME (mode),
33062 GET_RTX_NAME (outer_code),
33063 opno,
33064 *total,
33065 speed ? "true" : "false");
33067 debug_rtx (x);
33069 return ret;
33072 /* Debug form of ADDRESS_COST that is selected if -mdebug=cost. */
33074 static int
33075 rs6000_debug_address_cost (rtx x, machine_mode mode,
33076 addr_space_t as, bool speed)
33078 int ret = TARGET_ADDRESS_COST (x, mode, as, speed);
33080 fprintf (stderr, "\nrs6000_address_cost, return = %d, speed = %s, x:\n",
33081 ret, speed ? "true" : "false");
33082 debug_rtx (x);
33084 return ret;
33088 /* A C expression returning the cost of moving data from a register of class
33089 CLASS1 to one of CLASS2. */
33091 static int
33092 rs6000_register_move_cost (machine_mode mode,
33093 reg_class_t from, reg_class_t to)
33095 int ret;
33097 if (TARGET_DEBUG_COST)
33098 dbg_cost_ctrl++;
33100 /* Moves from/to GENERAL_REGS. */
33101 if (reg_classes_intersect_p (to, GENERAL_REGS)
33102 || reg_classes_intersect_p (from, GENERAL_REGS))
33104 reg_class_t rclass = from;
33106 if (! reg_classes_intersect_p (to, GENERAL_REGS))
33107 rclass = to;
33109 if (rclass == FLOAT_REGS || rclass == ALTIVEC_REGS || rclass == VSX_REGS)
33110 ret = (rs6000_memory_move_cost (mode, rclass, false)
33111 + rs6000_memory_move_cost (mode, GENERAL_REGS, false));
33113 /* It's more expensive to move CR_REGS than CR0_REGS because of the
33114 shift. */
33115 else if (rclass == CR_REGS)
33116 ret = 4;
33118 /* For those processors that have slow LR/CTR moves, make them more
33119 expensive than memory in order to bias spills to memory .*/
33120 else if ((rs6000_cpu == PROCESSOR_POWER6
33121 || rs6000_cpu == PROCESSOR_POWER7
33122 || rs6000_cpu == PROCESSOR_POWER8
33123 || rs6000_cpu == PROCESSOR_POWER9)
33124 && reg_classes_intersect_p (rclass, LINK_OR_CTR_REGS))
33125 ret = 6 * hard_regno_nregs[0][mode];
33127 else
33128 /* A move will cost one instruction per GPR moved. */
33129 ret = 2 * hard_regno_nregs[0][mode];
33132 /* If we have VSX, we can easily move between FPR or Altivec registers. */
33133 else if (VECTOR_MEM_VSX_P (mode)
33134 && reg_classes_intersect_p (to, VSX_REGS)
33135 && reg_classes_intersect_p (from, VSX_REGS))
33136 ret = 2 * hard_regno_nregs[32][mode];
33138 /* Moving between two similar registers is just one instruction. */
33139 else if (reg_classes_intersect_p (to, from))
33140 ret = (FLOAT128_2REG_P (mode)) ? 4 : 2;
33142 /* Everything else has to go through GENERAL_REGS. */
33143 else
33144 ret = (rs6000_register_move_cost (mode, GENERAL_REGS, to)
33145 + rs6000_register_move_cost (mode, from, GENERAL_REGS));
33147 if (TARGET_DEBUG_COST)
33149 if (dbg_cost_ctrl == 1)
33150 fprintf (stderr,
33151 "rs6000_register_move_cost:, ret=%d, mode=%s, from=%s, to=%s\n",
33152 ret, GET_MODE_NAME (mode), reg_class_names[from],
33153 reg_class_names[to]);
33154 dbg_cost_ctrl--;
33157 return ret;
33160 /* A C expressions returning the cost of moving data of MODE from a register to
33161 or from memory. */
33163 static int
33164 rs6000_memory_move_cost (machine_mode mode, reg_class_t rclass,
33165 bool in ATTRIBUTE_UNUSED)
33167 int ret;
33169 if (TARGET_DEBUG_COST)
33170 dbg_cost_ctrl++;
33172 if (reg_classes_intersect_p (rclass, GENERAL_REGS))
33173 ret = 4 * hard_regno_nregs[0][mode];
33174 else if ((reg_classes_intersect_p (rclass, FLOAT_REGS)
33175 || reg_classes_intersect_p (rclass, VSX_REGS)))
33176 ret = 4 * hard_regno_nregs[32][mode];
33177 else if (reg_classes_intersect_p (rclass, ALTIVEC_REGS))
33178 ret = 4 * hard_regno_nregs[FIRST_ALTIVEC_REGNO][mode];
33179 else
33180 ret = 4 + rs6000_register_move_cost (mode, rclass, GENERAL_REGS);
33182 if (TARGET_DEBUG_COST)
33184 if (dbg_cost_ctrl == 1)
33185 fprintf (stderr,
33186 "rs6000_memory_move_cost: ret=%d, mode=%s, rclass=%s, in=%d\n",
33187 ret, GET_MODE_NAME (mode), reg_class_names[rclass], in);
33188 dbg_cost_ctrl--;
33191 return ret;
33194 /* Returns a code for a target-specific builtin that implements
33195 reciprocal of the function, or NULL_TREE if not available. */
33197 static tree
33198 rs6000_builtin_reciprocal (tree fndecl)
33200 switch (DECL_FUNCTION_CODE (fndecl))
33202 case VSX_BUILTIN_XVSQRTDP:
33203 if (!RS6000_RECIP_AUTO_RSQRTE_P (V2DFmode))
33204 return NULL_TREE;
33206 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_2DF];
33208 case VSX_BUILTIN_XVSQRTSP:
33209 if (!RS6000_RECIP_AUTO_RSQRTE_P (V4SFmode))
33210 return NULL_TREE;
33212 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_4SF];
33214 default:
33215 return NULL_TREE;
33219 /* Load up a constant. If the mode is a vector mode, splat the value across
33220 all of the vector elements. */
33222 static rtx
33223 rs6000_load_constant_and_splat (machine_mode mode, REAL_VALUE_TYPE dconst)
33225 rtx reg;
33227 if (mode == SFmode || mode == DFmode)
33229 rtx d = const_double_from_real_value (dconst, mode);
33230 reg = force_reg (mode, d);
33232 else if (mode == V4SFmode)
33234 rtx d = const_double_from_real_value (dconst, SFmode);
33235 rtvec v = gen_rtvec (4, d, d, d, d);
33236 reg = gen_reg_rtx (mode);
33237 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
33239 else if (mode == V2DFmode)
33241 rtx d = const_double_from_real_value (dconst, DFmode);
33242 rtvec v = gen_rtvec (2, d, d);
33243 reg = gen_reg_rtx (mode);
33244 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
33246 else
33247 gcc_unreachable ();
33249 return reg;
33252 /* Generate an FMA instruction. */
33254 static void
33255 rs6000_emit_madd (rtx target, rtx m1, rtx m2, rtx a)
33257 machine_mode mode = GET_MODE (target);
33258 rtx dst;
33260 dst = expand_ternary_op (mode, fma_optab, m1, m2, a, target, 0);
33261 gcc_assert (dst != NULL);
33263 if (dst != target)
33264 emit_move_insn (target, dst);
33267 /* Generate a FNMSUB instruction: dst = -fma(m1, m2, -a). */
33269 static void
33270 rs6000_emit_nmsub (rtx dst, rtx m1, rtx m2, rtx a)
33272 machine_mode mode = GET_MODE (dst);
33273 rtx r;
33275 /* This is a tad more complicated, since the fnma_optab is for
33276 a different expression: fma(-m1, m2, a), which is the same
33277 thing except in the case of signed zeros.
33279 Fortunately we know that if FMA is supported that FNMSUB is
33280 also supported in the ISA. Just expand it directly. */
33282 gcc_assert (optab_handler (fma_optab, mode) != CODE_FOR_nothing);
33284 r = gen_rtx_NEG (mode, a);
33285 r = gen_rtx_FMA (mode, m1, m2, r);
33286 r = gen_rtx_NEG (mode, r);
33287 emit_insn (gen_rtx_SET (dst, r));
33290 /* Newton-Raphson approximation of floating point divide DST = N/D. If NOTE_P,
33291 add a reg_note saying that this was a division. Support both scalar and
33292 vector divide. Assumes no trapping math and finite arguments. */
33294 void
33295 rs6000_emit_swdiv (rtx dst, rtx n, rtx d, bool note_p)
33297 machine_mode mode = GET_MODE (dst);
33298 rtx one, x0, e0, x1, xprev, eprev, xnext, enext, u, v;
33299 int i;
33301 /* Low precision estimates guarantee 5 bits of accuracy. High
33302 precision estimates guarantee 14 bits of accuracy. SFmode
33303 requires 23 bits of accuracy. DFmode requires 52 bits of
33304 accuracy. Each pass at least doubles the accuracy, leading
33305 to the following. */
33306 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
33307 if (mode == DFmode || mode == V2DFmode)
33308 passes++;
33310 enum insn_code code = optab_handler (smul_optab, mode);
33311 insn_gen_fn gen_mul = GEN_FCN (code);
33313 gcc_assert (code != CODE_FOR_nothing);
33315 one = rs6000_load_constant_and_splat (mode, dconst1);
33317 /* x0 = 1./d estimate */
33318 x0 = gen_reg_rtx (mode);
33319 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, d),
33320 UNSPEC_FRES)));
33322 /* Each iteration but the last calculates x_(i+1) = x_i * (2 - d * x_i). */
33323 if (passes > 1) {
33325 /* e0 = 1. - d * x0 */
33326 e0 = gen_reg_rtx (mode);
33327 rs6000_emit_nmsub (e0, d, x0, one);
33329 /* x1 = x0 + e0 * x0 */
33330 x1 = gen_reg_rtx (mode);
33331 rs6000_emit_madd (x1, e0, x0, x0);
33333 for (i = 0, xprev = x1, eprev = e0; i < passes - 2;
33334 ++i, xprev = xnext, eprev = enext) {
33336 /* enext = eprev * eprev */
33337 enext = gen_reg_rtx (mode);
33338 emit_insn (gen_mul (enext, eprev, eprev));
33340 /* xnext = xprev + enext * xprev */
33341 xnext = gen_reg_rtx (mode);
33342 rs6000_emit_madd (xnext, enext, xprev, xprev);
33345 } else
33346 xprev = x0;
33348 /* The last iteration calculates x_(i+1) = n * x_i * (2 - d * x_i). */
33350 /* u = n * xprev */
33351 u = gen_reg_rtx (mode);
33352 emit_insn (gen_mul (u, n, xprev));
33354 /* v = n - (d * u) */
33355 v = gen_reg_rtx (mode);
33356 rs6000_emit_nmsub (v, d, u, n);
33358 /* dst = (v * xprev) + u */
33359 rs6000_emit_madd (dst, v, xprev, u);
33361 if (note_p)
33362 add_reg_note (get_last_insn (), REG_EQUAL, gen_rtx_DIV (mode, n, d));
33365 /* Goldschmidt's Algorithm for single/double-precision floating point
33366 sqrt and rsqrt. Assumes no trapping math and finite arguments. */
33368 void
33369 rs6000_emit_swsqrt (rtx dst, rtx src, bool recip)
33371 machine_mode mode = GET_MODE (src);
33372 rtx e = gen_reg_rtx (mode);
33373 rtx g = gen_reg_rtx (mode);
33374 rtx h = gen_reg_rtx (mode);
33376 /* Low precision estimates guarantee 5 bits of accuracy. High
33377 precision estimates guarantee 14 bits of accuracy. SFmode
33378 requires 23 bits of accuracy. DFmode requires 52 bits of
33379 accuracy. Each pass at least doubles the accuracy, leading
33380 to the following. */
33381 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
33382 if (mode == DFmode || mode == V2DFmode)
33383 passes++;
33385 int i;
33386 rtx mhalf;
33387 enum insn_code code = optab_handler (smul_optab, mode);
33388 insn_gen_fn gen_mul = GEN_FCN (code);
33390 gcc_assert (code != CODE_FOR_nothing);
33392 mhalf = rs6000_load_constant_and_splat (mode, dconsthalf);
33394 /* e = rsqrt estimate */
33395 emit_insn (gen_rtx_SET (e, gen_rtx_UNSPEC (mode, gen_rtvec (1, src),
33396 UNSPEC_RSQRT)));
33398 /* If (src == 0.0) filter infinity to prevent NaN for sqrt(0.0). */
33399 if (!recip)
33401 rtx zero = force_reg (mode, CONST0_RTX (mode));
33403 if (mode == SFmode)
33405 rtx target = emit_conditional_move (e, GT, src, zero, mode,
33406 e, zero, mode, 0);
33407 if (target != e)
33408 emit_move_insn (e, target);
33410 else
33412 rtx cond = gen_rtx_GT (VOIDmode, e, zero);
33413 rs6000_emit_vector_cond_expr (e, e, zero, cond, src, zero);
33417 /* g = sqrt estimate. */
33418 emit_insn (gen_mul (g, e, src));
33419 /* h = 1/(2*sqrt) estimate. */
33420 emit_insn (gen_mul (h, e, mhalf));
33422 if (recip)
33424 if (passes == 1)
33426 rtx t = gen_reg_rtx (mode);
33427 rs6000_emit_nmsub (t, g, h, mhalf);
33428 /* Apply correction directly to 1/rsqrt estimate. */
33429 rs6000_emit_madd (dst, e, t, e);
33431 else
33433 for (i = 0; i < passes; i++)
33435 rtx t1 = gen_reg_rtx (mode);
33436 rtx g1 = gen_reg_rtx (mode);
33437 rtx h1 = gen_reg_rtx (mode);
33439 rs6000_emit_nmsub (t1, g, h, mhalf);
33440 rs6000_emit_madd (g1, g, t1, g);
33441 rs6000_emit_madd (h1, h, t1, h);
33443 g = g1;
33444 h = h1;
33446 /* Multiply by 2 for 1/rsqrt. */
33447 emit_insn (gen_add3_insn (dst, h, h));
33450 else
33452 rtx t = gen_reg_rtx (mode);
33453 rs6000_emit_nmsub (t, g, h, mhalf);
33454 rs6000_emit_madd (dst, g, t, g);
33457 return;
33460 /* Emit popcount intrinsic on TARGET_POPCNTB (Power5) and TARGET_POPCNTD
33461 (Power7) targets. DST is the target, and SRC is the argument operand. */
33463 void
33464 rs6000_emit_popcount (rtx dst, rtx src)
33466 machine_mode mode = GET_MODE (dst);
33467 rtx tmp1, tmp2;
33469 /* Use the PPC ISA 2.06 popcnt{w,d} instruction if we can. */
33470 if (TARGET_POPCNTD)
33472 if (mode == SImode)
33473 emit_insn (gen_popcntdsi2 (dst, src));
33474 else
33475 emit_insn (gen_popcntddi2 (dst, src));
33476 return;
33479 tmp1 = gen_reg_rtx (mode);
33481 if (mode == SImode)
33483 emit_insn (gen_popcntbsi2 (tmp1, src));
33484 tmp2 = expand_mult (SImode, tmp1, GEN_INT (0x01010101),
33485 NULL_RTX, 0);
33486 tmp2 = force_reg (SImode, tmp2);
33487 emit_insn (gen_lshrsi3 (dst, tmp2, GEN_INT (24)));
33489 else
33491 emit_insn (gen_popcntbdi2 (tmp1, src));
33492 tmp2 = expand_mult (DImode, tmp1,
33493 GEN_INT ((HOST_WIDE_INT)
33494 0x01010101 << 32 | 0x01010101),
33495 NULL_RTX, 0);
33496 tmp2 = force_reg (DImode, tmp2);
33497 emit_insn (gen_lshrdi3 (dst, tmp2, GEN_INT (56)));
33502 /* Emit parity intrinsic on TARGET_POPCNTB targets. DST is the
33503 target, and SRC is the argument operand. */
33505 void
33506 rs6000_emit_parity (rtx dst, rtx src)
33508 machine_mode mode = GET_MODE (dst);
33509 rtx tmp;
33511 tmp = gen_reg_rtx (mode);
33513 /* Use the PPC ISA 2.05 prtyw/prtyd instruction if we can. */
33514 if (TARGET_CMPB)
33516 if (mode == SImode)
33518 emit_insn (gen_popcntbsi2 (tmp, src));
33519 emit_insn (gen_paritysi2_cmpb (dst, tmp));
33521 else
33523 emit_insn (gen_popcntbdi2 (tmp, src));
33524 emit_insn (gen_paritydi2_cmpb (dst, tmp));
33526 return;
33529 if (mode == SImode)
33531 /* Is mult+shift >= shift+xor+shift+xor? */
33532 if (rs6000_cost->mulsi_const >= COSTS_N_INSNS (3))
33534 rtx tmp1, tmp2, tmp3, tmp4;
33536 tmp1 = gen_reg_rtx (SImode);
33537 emit_insn (gen_popcntbsi2 (tmp1, src));
33539 tmp2 = gen_reg_rtx (SImode);
33540 emit_insn (gen_lshrsi3 (tmp2, tmp1, GEN_INT (16)));
33541 tmp3 = gen_reg_rtx (SImode);
33542 emit_insn (gen_xorsi3 (tmp3, tmp1, tmp2));
33544 tmp4 = gen_reg_rtx (SImode);
33545 emit_insn (gen_lshrsi3 (tmp4, tmp3, GEN_INT (8)));
33546 emit_insn (gen_xorsi3 (tmp, tmp3, tmp4));
33548 else
33549 rs6000_emit_popcount (tmp, src);
33550 emit_insn (gen_andsi3 (dst, tmp, const1_rtx));
33552 else
33554 /* Is mult+shift >= shift+xor+shift+xor+shift+xor? */
33555 if (rs6000_cost->muldi >= COSTS_N_INSNS (5))
33557 rtx tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
33559 tmp1 = gen_reg_rtx (DImode);
33560 emit_insn (gen_popcntbdi2 (tmp1, src));
33562 tmp2 = gen_reg_rtx (DImode);
33563 emit_insn (gen_lshrdi3 (tmp2, tmp1, GEN_INT (32)));
33564 tmp3 = gen_reg_rtx (DImode);
33565 emit_insn (gen_xordi3 (tmp3, tmp1, tmp2));
33567 tmp4 = gen_reg_rtx (DImode);
33568 emit_insn (gen_lshrdi3 (tmp4, tmp3, GEN_INT (16)));
33569 tmp5 = gen_reg_rtx (DImode);
33570 emit_insn (gen_xordi3 (tmp5, tmp3, tmp4));
33572 tmp6 = gen_reg_rtx (DImode);
33573 emit_insn (gen_lshrdi3 (tmp6, tmp5, GEN_INT (8)));
33574 emit_insn (gen_xordi3 (tmp, tmp5, tmp6));
33576 else
33577 rs6000_emit_popcount (tmp, src);
33578 emit_insn (gen_anddi3 (dst, tmp, const1_rtx));
33582 /* Expand an Altivec constant permutation for little endian mode.
33583 There are two issues: First, the two input operands must be
33584 swapped so that together they form a double-wide array in LE
33585 order. Second, the vperm instruction has surprising behavior
33586 in LE mode: it interprets the elements of the source vectors
33587 in BE mode ("left to right") and interprets the elements of
33588 the destination vector in LE mode ("right to left"). To
33589 correct for this, we must subtract each element of the permute
33590 control vector from 31.
33592 For example, suppose we want to concatenate vr10 = {0, 1, 2, 3}
33593 with vr11 = {4, 5, 6, 7} and extract {0, 2, 4, 6} using a vperm.
33594 We place {0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27} in vr12 to
33595 serve as the permute control vector. Then, in BE mode,
33597 vperm 9,10,11,12
33599 places the desired result in vr9. However, in LE mode the
33600 vector contents will be
33602 vr10 = 00000003 00000002 00000001 00000000
33603 vr11 = 00000007 00000006 00000005 00000004
33605 The result of the vperm using the same permute control vector is
33607 vr9 = 05000000 07000000 01000000 03000000
33609 That is, the leftmost 4 bytes of vr10 are interpreted as the
33610 source for the rightmost 4 bytes of vr9, and so on.
33612 If we change the permute control vector to
33614 vr12 = {31,20,29,28,23,22,21,20,15,14,13,12,7,6,5,4}
33616 and issue
33618 vperm 9,11,10,12
33620 we get the desired
33622 vr9 = 00000006 00000004 00000002 00000000. */
33624 void
33625 altivec_expand_vec_perm_const_le (rtx operands[4])
33627 unsigned int i;
33628 rtx perm[16];
33629 rtx constv, unspec;
33630 rtx target = operands[0];
33631 rtx op0 = operands[1];
33632 rtx op1 = operands[2];
33633 rtx sel = operands[3];
33635 /* Unpack and adjust the constant selector. */
33636 for (i = 0; i < 16; ++i)
33638 rtx e = XVECEXP (sel, 0, i);
33639 unsigned int elt = 31 - (INTVAL (e) & 31);
33640 perm[i] = GEN_INT (elt);
33643 /* Expand to a permute, swapping the inputs and using the
33644 adjusted selector. */
33645 if (!REG_P (op0))
33646 op0 = force_reg (V16QImode, op0);
33647 if (!REG_P (op1))
33648 op1 = force_reg (V16QImode, op1);
33650 constv = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm));
33651 constv = force_reg (V16QImode, constv);
33652 unspec = gen_rtx_UNSPEC (V16QImode, gen_rtvec (3, op1, op0, constv),
33653 UNSPEC_VPERM);
33654 if (!REG_P (target))
33656 rtx tmp = gen_reg_rtx (V16QImode);
33657 emit_move_insn (tmp, unspec);
33658 unspec = tmp;
33661 emit_move_insn (target, unspec);
33664 /* Similarly to altivec_expand_vec_perm_const_le, we must adjust the
33665 permute control vector. But here it's not a constant, so we must
33666 generate a vector NAND or NOR to do the adjustment. */
33668 void
33669 altivec_expand_vec_perm_le (rtx operands[4])
33671 rtx notx, iorx, unspec;
33672 rtx target = operands[0];
33673 rtx op0 = operands[1];
33674 rtx op1 = operands[2];
33675 rtx sel = operands[3];
33676 rtx tmp = target;
33677 rtx norreg = gen_reg_rtx (V16QImode);
33678 machine_mode mode = GET_MODE (target);
33680 /* Get everything in regs so the pattern matches. */
33681 if (!REG_P (op0))
33682 op0 = force_reg (mode, op0);
33683 if (!REG_P (op1))
33684 op1 = force_reg (mode, op1);
33685 if (!REG_P (sel))
33686 sel = force_reg (V16QImode, sel);
33687 if (!REG_P (target))
33688 tmp = gen_reg_rtx (mode);
33690 /* Invert the selector with a VNAND if available, else a VNOR.
33691 The VNAND is preferred for future fusion opportunities. */
33692 notx = gen_rtx_NOT (V16QImode, sel);
33693 iorx = (TARGET_P8_VECTOR
33694 ? gen_rtx_IOR (V16QImode, notx, notx)
33695 : gen_rtx_AND (V16QImode, notx, notx));
33696 emit_insn (gen_rtx_SET (norreg, iorx));
33698 /* Permute with operands reversed and adjusted selector. */
33699 unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op0, norreg),
33700 UNSPEC_VPERM);
33702 /* Copy into target, possibly by way of a register. */
33703 if (!REG_P (target))
33705 emit_move_insn (tmp, unspec);
33706 unspec = tmp;
33709 emit_move_insn (target, unspec);
33712 /* Expand an Altivec constant permutation. Return true if we match
33713 an efficient implementation; false to fall back to VPERM. */
33715 bool
33716 altivec_expand_vec_perm_const (rtx operands[4])
33718 struct altivec_perm_insn {
33719 HOST_WIDE_INT mask;
33720 enum insn_code impl;
33721 unsigned char perm[16];
33723 static const struct altivec_perm_insn patterns[] = {
33724 { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuhum_direct,
33725 { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } },
33726 { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuwum_direct,
33727 { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } },
33728 { OPTION_MASK_ALTIVEC,
33729 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghb_direct
33730 : CODE_FOR_altivec_vmrglb_direct),
33731 { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } },
33732 { OPTION_MASK_ALTIVEC,
33733 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghh_direct
33734 : CODE_FOR_altivec_vmrglh_direct),
33735 { 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } },
33736 { OPTION_MASK_ALTIVEC,
33737 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghw_direct
33738 : CODE_FOR_altivec_vmrglw_direct),
33739 { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } },
33740 { OPTION_MASK_ALTIVEC,
33741 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglb_direct
33742 : CODE_FOR_altivec_vmrghb_direct),
33743 { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } },
33744 { OPTION_MASK_ALTIVEC,
33745 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglh_direct
33746 : CODE_FOR_altivec_vmrghh_direct),
33747 { 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } },
33748 { OPTION_MASK_ALTIVEC,
33749 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglw_direct
33750 : CODE_FOR_altivec_vmrghw_direct),
33751 { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } },
33752 { OPTION_MASK_P8_VECTOR, CODE_FOR_p8_vmrgew,
33753 { 0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 24, 25, 26, 27 } },
33754 { OPTION_MASK_P8_VECTOR, CODE_FOR_p8_vmrgow,
33755 { 4, 5, 6, 7, 20, 21, 22, 23, 12, 13, 14, 15, 28, 29, 30, 31 } }
33758 unsigned int i, j, elt, which;
33759 unsigned char perm[16];
33760 rtx target, op0, op1, sel, x;
33761 bool one_vec;
33763 target = operands[0];
33764 op0 = operands[1];
33765 op1 = operands[2];
33766 sel = operands[3];
33768 /* Unpack the constant selector. */
33769 for (i = which = 0; i < 16; ++i)
33771 rtx e = XVECEXP (sel, 0, i);
33772 elt = INTVAL (e) & 31;
33773 which |= (elt < 16 ? 1 : 2);
33774 perm[i] = elt;
33777 /* Simplify the constant selector based on operands. */
33778 switch (which)
33780 default:
33781 gcc_unreachable ();
33783 case 3:
33784 one_vec = false;
33785 if (!rtx_equal_p (op0, op1))
33786 break;
33787 /* FALLTHRU */
33789 case 2:
33790 for (i = 0; i < 16; ++i)
33791 perm[i] &= 15;
33792 op0 = op1;
33793 one_vec = true;
33794 break;
33796 case 1:
33797 op1 = op0;
33798 one_vec = true;
33799 break;
33802 /* Look for splat patterns. */
33803 if (one_vec)
33805 elt = perm[0];
33807 for (i = 0; i < 16; ++i)
33808 if (perm[i] != elt)
33809 break;
33810 if (i == 16)
33812 if (!BYTES_BIG_ENDIAN)
33813 elt = 15 - elt;
33814 emit_insn (gen_altivec_vspltb_direct (target, op0, GEN_INT (elt)));
33815 return true;
33818 if (elt % 2 == 0)
33820 for (i = 0; i < 16; i += 2)
33821 if (perm[i] != elt || perm[i + 1] != elt + 1)
33822 break;
33823 if (i == 16)
33825 int field = BYTES_BIG_ENDIAN ? elt / 2 : 7 - elt / 2;
33826 x = gen_reg_rtx (V8HImode);
33827 emit_insn (gen_altivec_vsplth_direct (x, gen_lowpart (V8HImode, op0),
33828 GEN_INT (field)));
33829 emit_move_insn (target, gen_lowpart (V16QImode, x));
33830 return true;
33834 if (elt % 4 == 0)
33836 for (i = 0; i < 16; i += 4)
33837 if (perm[i] != elt
33838 || perm[i + 1] != elt + 1
33839 || perm[i + 2] != elt + 2
33840 || perm[i + 3] != elt + 3)
33841 break;
33842 if (i == 16)
33844 int field = BYTES_BIG_ENDIAN ? elt / 4 : 3 - elt / 4;
33845 x = gen_reg_rtx (V4SImode);
33846 emit_insn (gen_altivec_vspltw_direct (x, gen_lowpart (V4SImode, op0),
33847 GEN_INT (field)));
33848 emit_move_insn (target, gen_lowpart (V16QImode, x));
33849 return true;
33854 /* Look for merge and pack patterns. */
33855 for (j = 0; j < ARRAY_SIZE (patterns); ++j)
33857 bool swapped;
33859 if ((patterns[j].mask & rs6000_isa_flags) == 0)
33860 continue;
33862 elt = patterns[j].perm[0];
33863 if (perm[0] == elt)
33864 swapped = false;
33865 else if (perm[0] == elt + 16)
33866 swapped = true;
33867 else
33868 continue;
33869 for (i = 1; i < 16; ++i)
33871 elt = patterns[j].perm[i];
33872 if (swapped)
33873 elt = (elt >= 16 ? elt - 16 : elt + 16);
33874 else if (one_vec && elt >= 16)
33875 elt -= 16;
33876 if (perm[i] != elt)
33877 break;
33879 if (i == 16)
33881 enum insn_code icode = patterns[j].impl;
33882 machine_mode omode = insn_data[icode].operand[0].mode;
33883 machine_mode imode = insn_data[icode].operand[1].mode;
33885 /* For little-endian, don't use vpkuwum and vpkuhum if the
33886 underlying vector type is not V4SI and V8HI, respectively.
33887 For example, using vpkuwum with a V8HI picks up the even
33888 halfwords (BE numbering) when the even halfwords (LE
33889 numbering) are what we need. */
33890 if (!BYTES_BIG_ENDIAN
33891 && icode == CODE_FOR_altivec_vpkuwum_direct
33892 && ((GET_CODE (op0) == REG
33893 && GET_MODE (op0) != V4SImode)
33894 || (GET_CODE (op0) == SUBREG
33895 && GET_MODE (XEXP (op0, 0)) != V4SImode)))
33896 continue;
33897 if (!BYTES_BIG_ENDIAN
33898 && icode == CODE_FOR_altivec_vpkuhum_direct
33899 && ((GET_CODE (op0) == REG
33900 && GET_MODE (op0) != V8HImode)
33901 || (GET_CODE (op0) == SUBREG
33902 && GET_MODE (XEXP (op0, 0)) != V8HImode)))
33903 continue;
33905 /* For little-endian, the two input operands must be swapped
33906 (or swapped back) to ensure proper right-to-left numbering
33907 from 0 to 2N-1. */
33908 if (swapped ^ !BYTES_BIG_ENDIAN)
33909 std::swap (op0, op1);
33910 if (imode != V16QImode)
33912 op0 = gen_lowpart (imode, op0);
33913 op1 = gen_lowpart (imode, op1);
33915 if (omode == V16QImode)
33916 x = target;
33917 else
33918 x = gen_reg_rtx (omode);
33919 emit_insn (GEN_FCN (icode) (x, op0, op1));
33920 if (omode != V16QImode)
33921 emit_move_insn (target, gen_lowpart (V16QImode, x));
33922 return true;
33926 if (!BYTES_BIG_ENDIAN)
33928 altivec_expand_vec_perm_const_le (operands);
33929 return true;
33932 return false;
33935 /* Expand a Paired Single, VSX Permute Doubleword, or SPE constant permutation.
33936 Return true if we match an efficient implementation. */
33938 static bool
33939 rs6000_expand_vec_perm_const_1 (rtx target, rtx op0, rtx op1,
33940 unsigned char perm0, unsigned char perm1)
33942 rtx x;
33944 /* If both selectors come from the same operand, fold to single op. */
33945 if ((perm0 & 2) == (perm1 & 2))
33947 if (perm0 & 2)
33948 op0 = op1;
33949 else
33950 op1 = op0;
33952 /* If both operands are equal, fold to simpler permutation. */
33953 if (rtx_equal_p (op0, op1))
33955 perm0 = perm0 & 1;
33956 perm1 = (perm1 & 1) + 2;
33958 /* If the first selector comes from the second operand, swap. */
33959 else if (perm0 & 2)
33961 if (perm1 & 2)
33962 return false;
33963 perm0 -= 2;
33964 perm1 += 2;
33965 std::swap (op0, op1);
33967 /* If the second selector does not come from the second operand, fail. */
33968 else if ((perm1 & 2) == 0)
33969 return false;
33971 /* Success! */
33972 if (target != NULL)
33974 machine_mode vmode, dmode;
33975 rtvec v;
33977 vmode = GET_MODE (target);
33978 gcc_assert (GET_MODE_NUNITS (vmode) == 2);
33979 dmode = mode_for_vector (GET_MODE_INNER (vmode), 4);
33980 x = gen_rtx_VEC_CONCAT (dmode, op0, op1);
33981 v = gen_rtvec (2, GEN_INT (perm0), GEN_INT (perm1));
33982 x = gen_rtx_VEC_SELECT (vmode, x, gen_rtx_PARALLEL (VOIDmode, v));
33983 emit_insn (gen_rtx_SET (target, x));
33985 return true;
33988 bool
33989 rs6000_expand_vec_perm_const (rtx operands[4])
33991 rtx target, op0, op1, sel;
33992 unsigned char perm0, perm1;
33994 target = operands[0];
33995 op0 = operands[1];
33996 op1 = operands[2];
33997 sel = operands[3];
33999 /* Unpack the constant selector. */
34000 perm0 = INTVAL (XVECEXP (sel, 0, 0)) & 3;
34001 perm1 = INTVAL (XVECEXP (sel, 0, 1)) & 3;
34003 return rs6000_expand_vec_perm_const_1 (target, op0, op1, perm0, perm1);
34006 /* Test whether a constant permutation is supported. */
34008 static bool
34009 rs6000_vectorize_vec_perm_const_ok (machine_mode vmode,
34010 const unsigned char *sel)
34012 /* AltiVec (and thus VSX) can handle arbitrary permutations. */
34013 if (TARGET_ALTIVEC)
34014 return true;
34016 /* Check for ps_merge* or evmerge* insns. */
34017 if ((TARGET_PAIRED_FLOAT && vmode == V2SFmode)
34018 || (TARGET_SPE && vmode == V2SImode))
34020 rtx op0 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 1);
34021 rtx op1 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 2);
34022 return rs6000_expand_vec_perm_const_1 (NULL, op0, op1, sel[0], sel[1]);
34025 return false;
34028 /* A subroutine for rs6000_expand_extract_even & rs6000_expand_interleave. */
34030 static void
34031 rs6000_do_expand_vec_perm (rtx target, rtx op0, rtx op1,
34032 machine_mode vmode, unsigned nelt, rtx perm[])
34034 machine_mode imode;
34035 rtx x;
34037 imode = vmode;
34038 if (GET_MODE_CLASS (vmode) != MODE_VECTOR_INT)
34040 imode = mode_for_size (GET_MODE_UNIT_BITSIZE (vmode), MODE_INT, 0);
34041 imode = mode_for_vector (imode, nelt);
34044 x = gen_rtx_CONST_VECTOR (imode, gen_rtvec_v (nelt, perm));
34045 x = expand_vec_perm (vmode, op0, op1, x, target);
34046 if (x != target)
34047 emit_move_insn (target, x);
34050 /* Expand an extract even operation. */
34052 void
34053 rs6000_expand_extract_even (rtx target, rtx op0, rtx op1)
34055 machine_mode vmode = GET_MODE (target);
34056 unsigned i, nelt = GET_MODE_NUNITS (vmode);
34057 rtx perm[16];
34059 for (i = 0; i < nelt; i++)
34060 perm[i] = GEN_INT (i * 2);
34062 rs6000_do_expand_vec_perm (target, op0, op1, vmode, nelt, perm);
34065 /* Expand a vector interleave operation. */
34067 void
34068 rs6000_expand_interleave (rtx target, rtx op0, rtx op1, bool highp)
34070 machine_mode vmode = GET_MODE (target);
34071 unsigned i, high, nelt = GET_MODE_NUNITS (vmode);
34072 rtx perm[16];
34074 high = (highp ? 0 : nelt / 2);
34075 for (i = 0; i < nelt / 2; i++)
34077 perm[i * 2] = GEN_INT (i + high);
34078 perm[i * 2 + 1] = GEN_INT (i + nelt + high);
34081 rs6000_do_expand_vec_perm (target, op0, op1, vmode, nelt, perm);
34084 /* Scale a V2DF vector SRC by two to the SCALE and place in TGT. */
34085 void
34086 rs6000_scale_v2df (rtx tgt, rtx src, int scale)
34088 HOST_WIDE_INT hwi_scale (scale);
34089 REAL_VALUE_TYPE r_pow;
34090 rtvec v = rtvec_alloc (2);
34091 rtx elt;
34092 rtx scale_vec = gen_reg_rtx (V2DFmode);
34093 (void)real_powi (&r_pow, DFmode, &dconst2, hwi_scale);
34094 elt = const_double_from_real_value (r_pow, DFmode);
34095 RTVEC_ELT (v, 0) = elt;
34096 RTVEC_ELT (v, 1) = elt;
34097 rs6000_expand_vector_init (scale_vec, gen_rtx_PARALLEL (V2DFmode, v));
34098 emit_insn (gen_mulv2df3 (tgt, src, scale_vec));
34101 /* Return an RTX representing where to find the function value of a
34102 function returning MODE. */
34103 static rtx
34104 rs6000_complex_function_value (machine_mode mode)
34106 unsigned int regno;
34107 rtx r1, r2;
34108 machine_mode inner = GET_MODE_INNER (mode);
34109 unsigned int inner_bytes = GET_MODE_UNIT_SIZE (mode);
34111 if (TARGET_FLOAT128
34112 && (mode == KCmode
34113 || (mode == TCmode && TARGET_IEEEQUAD)))
34114 regno = ALTIVEC_ARG_RETURN;
34116 else if (FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT && TARGET_FPRS)
34117 regno = FP_ARG_RETURN;
34119 else
34121 regno = GP_ARG_RETURN;
34123 /* 32-bit is OK since it'll go in r3/r4. */
34124 if (TARGET_32BIT && inner_bytes >= 4)
34125 return gen_rtx_REG (mode, regno);
34128 if (inner_bytes >= 8)
34129 return gen_rtx_REG (mode, regno);
34131 r1 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno),
34132 const0_rtx);
34133 r2 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno + 1),
34134 GEN_INT (inner_bytes));
34135 return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r2));
34138 /* Return an rtx describing a return value of MODE as a PARALLEL
34139 in N_ELTS registers, each of mode ELT_MODE, starting at REGNO,
34140 stride REG_STRIDE. */
34142 static rtx
34143 rs6000_parallel_return (machine_mode mode,
34144 int n_elts, machine_mode elt_mode,
34145 unsigned int regno, unsigned int reg_stride)
34147 rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
34149 int i;
34150 for (i = 0; i < n_elts; i++)
34152 rtx r = gen_rtx_REG (elt_mode, regno);
34153 rtx off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
34154 XVECEXP (par, 0, i) = gen_rtx_EXPR_LIST (VOIDmode, r, off);
34155 regno += reg_stride;
34158 return par;
34161 /* Target hook for TARGET_FUNCTION_VALUE.
34163 On the SPE, both FPs and vectors are returned in r3.
34165 On RS/6000 an integer value is in r3 and a floating-point value is in
34166 fp1, unless -msoft-float. */
34168 static rtx
34169 rs6000_function_value (const_tree valtype,
34170 const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
34171 bool outgoing ATTRIBUTE_UNUSED)
34173 machine_mode mode;
34174 unsigned int regno;
34175 machine_mode elt_mode;
34176 int n_elts;
34178 /* Special handling for structs in darwin64. */
34179 if (TARGET_MACHO
34180 && rs6000_darwin64_struct_check_p (TYPE_MODE (valtype), valtype))
34182 CUMULATIVE_ARGS valcum;
34183 rtx valret;
34185 valcum.words = 0;
34186 valcum.fregno = FP_ARG_MIN_REG;
34187 valcum.vregno = ALTIVEC_ARG_MIN_REG;
34188 /* Do a trial code generation as if this were going to be passed as
34189 an argument; if any part goes in memory, we return NULL. */
34190 valret = rs6000_darwin64_record_arg (&valcum, valtype, true, /* retval= */ true);
34191 if (valret)
34192 return valret;
34193 /* Otherwise fall through to standard ABI rules. */
34196 mode = TYPE_MODE (valtype);
34198 /* The ELFv2 ABI returns homogeneous VFP aggregates in registers. */
34199 if (rs6000_discover_homogeneous_aggregate (mode, valtype, &elt_mode, &n_elts))
34201 int first_reg, n_regs;
34203 if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (elt_mode))
34205 /* _Decimal128 must use even/odd register pairs. */
34206 first_reg = (elt_mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
34207 n_regs = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
34209 else
34211 first_reg = ALTIVEC_ARG_RETURN;
34212 n_regs = 1;
34215 return rs6000_parallel_return (mode, n_elts, elt_mode, first_reg, n_regs);
34218 /* Some return value types need be split in -mpowerpc64, 32bit ABI. */
34219 if (TARGET_32BIT && TARGET_POWERPC64)
34220 switch (mode)
34222 default:
34223 break;
34224 case DImode:
34225 case SCmode:
34226 case DCmode:
34227 case TCmode:
34228 int count = GET_MODE_SIZE (mode) / 4;
34229 return rs6000_parallel_return (mode, count, SImode, GP_ARG_RETURN, 1);
34232 if ((INTEGRAL_TYPE_P (valtype)
34233 && GET_MODE_BITSIZE (mode) < (TARGET_32BIT ? 32 : 64))
34234 || POINTER_TYPE_P (valtype))
34235 mode = TARGET_32BIT ? SImode : DImode;
34237 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT && TARGET_FPRS)
34238 /* _Decimal128 must use an even/odd register pair. */
34239 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
34240 else if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode) && TARGET_HARD_FLOAT && TARGET_FPRS
34241 && ((TARGET_SINGLE_FLOAT && (mode == SFmode)) || TARGET_DOUBLE_FLOAT))
34242 regno = FP_ARG_RETURN;
34243 else if (TREE_CODE (valtype) == COMPLEX_TYPE
34244 && targetm.calls.split_complex_arg)
34245 return rs6000_complex_function_value (mode);
34246 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
34247 return register is used in both cases, and we won't see V2DImode/V2DFmode
34248 for pure altivec, combine the two cases. */
34249 else if ((TREE_CODE (valtype) == VECTOR_TYPE || FLOAT128_VECTOR_P (mode))
34250 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI
34251 && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
34252 regno = ALTIVEC_ARG_RETURN;
34253 else if (TARGET_E500_DOUBLE && TARGET_HARD_FLOAT
34254 && (mode == DFmode || mode == DCmode
34255 || FLOAT128_IBM_P (mode) || mode == TCmode))
34256 return spe_build_register_parallel (mode, GP_ARG_RETURN);
34257 else
34258 regno = GP_ARG_RETURN;
34260 return gen_rtx_REG (mode, regno);
34263 /* Define how to find the value returned by a library function
34264 assuming the value has mode MODE. */
34266 rs6000_libcall_value (machine_mode mode)
34268 unsigned int regno;
34270 /* Long long return value need be split in -mpowerpc64, 32bit ABI. */
34271 if (TARGET_32BIT && TARGET_POWERPC64 && mode == DImode)
34272 return rs6000_parallel_return (mode, 2, SImode, GP_ARG_RETURN, 1);
34274 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT && TARGET_FPRS)
34275 /* _Decimal128 must use an even/odd register pair. */
34276 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
34277 else if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode)
34278 && TARGET_HARD_FLOAT && TARGET_FPRS
34279 && ((TARGET_SINGLE_FLOAT && mode == SFmode) || TARGET_DOUBLE_FLOAT))
34280 regno = FP_ARG_RETURN;
34281 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
34282 return register is used in both cases, and we won't see V2DImode/V2DFmode
34283 for pure altivec, combine the two cases. */
34284 else if (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
34285 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI)
34286 regno = ALTIVEC_ARG_RETURN;
34287 else if (COMPLEX_MODE_P (mode) && targetm.calls.split_complex_arg)
34288 return rs6000_complex_function_value (mode);
34289 else if (TARGET_E500_DOUBLE && TARGET_HARD_FLOAT
34290 && (mode == DFmode || mode == DCmode
34291 || FLOAT128_IBM_P (mode) || mode == TCmode))
34292 return spe_build_register_parallel (mode, GP_ARG_RETURN);
34293 else
34294 regno = GP_ARG_RETURN;
34296 return gen_rtx_REG (mode, regno);
34300 /* Return true if we use LRA instead of reload pass. */
34301 static bool
34302 rs6000_lra_p (void)
34304 return rs6000_lra_flag;
34307 /* Given FROM and TO register numbers, say whether this elimination is allowed.
34308 Frame pointer elimination is automatically handled.
34310 For the RS/6000, if frame pointer elimination is being done, we would like
34311 to convert ap into fp, not sp.
34313 We need r30 if -mminimal-toc was specified, and there are constant pool
34314 references. */
34316 static bool
34317 rs6000_can_eliminate (const int from, const int to)
34319 return (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM
34320 ? ! frame_pointer_needed
34321 : from == RS6000_PIC_OFFSET_TABLE_REGNUM
34322 ? ! TARGET_MINIMAL_TOC || TARGET_NO_TOC || get_pool_size () == 0
34323 : true);
34326 /* Define the offset between two registers, FROM to be eliminated and its
34327 replacement TO, at the start of a routine. */
34328 HOST_WIDE_INT
34329 rs6000_initial_elimination_offset (int from, int to)
34331 rs6000_stack_t *info = rs6000_stack_info ();
34332 HOST_WIDE_INT offset;
34334 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
34335 offset = info->push_p ? 0 : -info->total_size;
34336 else if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
34338 offset = info->push_p ? 0 : -info->total_size;
34339 if (FRAME_GROWS_DOWNWARD)
34340 offset += info->fixed_size + info->vars_size + info->parm_size;
34342 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
34343 offset = FRAME_GROWS_DOWNWARD
34344 ? info->fixed_size + info->vars_size + info->parm_size
34345 : 0;
34346 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
34347 offset = info->total_size;
34348 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
34349 offset = info->push_p ? info->total_size : 0;
34350 else if (from == RS6000_PIC_OFFSET_TABLE_REGNUM)
34351 offset = 0;
34352 else
34353 gcc_unreachable ();
34355 return offset;
34358 static rtx
34359 rs6000_dwarf_register_span (rtx reg)
34361 rtx parts[8];
34362 int i, words;
34363 unsigned regno = REGNO (reg);
34364 machine_mode mode = GET_MODE (reg);
34366 if (TARGET_SPE
34367 && regno < 32
34368 && (SPE_VECTOR_MODE (GET_MODE (reg))
34369 || (TARGET_E500_DOUBLE && FLOAT_MODE_P (mode)
34370 && mode != SFmode && mode != SDmode && mode != SCmode)))
34372 else
34373 return NULL_RTX;
34375 regno = REGNO (reg);
34377 /* The duality of the SPE register size wreaks all kinds of havoc.
34378 This is a way of distinguishing r0 in 32-bits from r0 in
34379 64-bits. */
34380 words = (GET_MODE_SIZE (mode) + UNITS_PER_FP_WORD - 1) / UNITS_PER_FP_WORD;
34381 gcc_assert (words <= 4);
34382 for (i = 0; i < words; i++, regno++)
34384 if (BYTES_BIG_ENDIAN)
34386 parts[2 * i] = gen_rtx_REG (SImode, regno + FIRST_SPE_HIGH_REGNO);
34387 parts[2 * i + 1] = gen_rtx_REG (SImode, regno);
34389 else
34391 parts[2 * i] = gen_rtx_REG (SImode, regno);
34392 parts[2 * i + 1] = gen_rtx_REG (SImode, regno + FIRST_SPE_HIGH_REGNO);
34396 return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (words * 2, parts));
34399 /* Fill in sizes for SPE register high parts in table used by unwinder. */
34401 static void
34402 rs6000_init_dwarf_reg_sizes_extra (tree address)
34404 if (TARGET_SPE)
34406 int i;
34407 machine_mode mode = TYPE_MODE (char_type_node);
34408 rtx addr = expand_expr (address, NULL_RTX, VOIDmode, EXPAND_NORMAL);
34409 rtx mem = gen_rtx_MEM (BLKmode, addr);
34410 rtx value = gen_int_mode (4, mode);
34412 for (i = FIRST_SPE_HIGH_REGNO; i < LAST_SPE_HIGH_REGNO+1; i++)
34414 int column = DWARF_REG_TO_UNWIND_COLUMN
34415 (DWARF2_FRAME_REG_OUT (DWARF_FRAME_REGNUM (i), true));
34416 HOST_WIDE_INT offset = column * GET_MODE_SIZE (mode);
34418 emit_move_insn (adjust_address (mem, mode, offset), value);
34422 if (TARGET_MACHO && ! TARGET_ALTIVEC)
34424 int i;
34425 machine_mode mode = TYPE_MODE (char_type_node);
34426 rtx addr = expand_expr (address, NULL_RTX, VOIDmode, EXPAND_NORMAL);
34427 rtx mem = gen_rtx_MEM (BLKmode, addr);
34428 rtx value = gen_int_mode (16, mode);
34430 /* On Darwin, libgcc may be built to run on both G3 and G4/5.
34431 The unwinder still needs to know the size of Altivec registers. */
34433 for (i = FIRST_ALTIVEC_REGNO; i < LAST_ALTIVEC_REGNO+1; i++)
34435 int column = DWARF_REG_TO_UNWIND_COLUMN
34436 (DWARF2_FRAME_REG_OUT (DWARF_FRAME_REGNUM (i), true));
34437 HOST_WIDE_INT offset = column * GET_MODE_SIZE (mode);
34439 emit_move_insn (adjust_address (mem, mode, offset), value);
34444 /* Map internal gcc register numbers to debug format register numbers.
34445 FORMAT specifies the type of debug register number to use:
34446 0 -- debug information, except for frame-related sections
34447 1 -- DWARF .debug_frame section
34448 2 -- DWARF .eh_frame section */
34450 unsigned int
34451 rs6000_dbx_register_number (unsigned int regno, unsigned int format)
34453 /* We never use the GCC internal number for SPE high registers.
34454 Those are mapped to the 1200..1231 range for all debug formats. */
34455 if (SPE_HIGH_REGNO_P (regno))
34456 return regno - FIRST_SPE_HIGH_REGNO + 1200;
34458 /* Except for the above, we use the internal number for non-DWARF
34459 debug information, and also for .eh_frame. */
34460 if ((format == 0 && write_symbols != DWARF2_DEBUG) || format == 2)
34461 return regno;
34463 /* On some platforms, we use the standard DWARF register
34464 numbering for .debug_info and .debug_frame. */
34465 #ifdef RS6000_USE_DWARF_NUMBERING
34466 if (regno <= 63)
34467 return regno;
34468 if (regno == LR_REGNO)
34469 return 108;
34470 if (regno == CTR_REGNO)
34471 return 109;
34472 /* Special handling for CR for .debug_frame: rs6000_emit_prologue has
34473 translated any combination of CR2, CR3, CR4 saves to a save of CR2.
34474 The actual code emitted saves the whole of CR, so we map CR2_REGNO
34475 to the DWARF reg for CR. */
34476 if (format == 1 && regno == CR2_REGNO)
34477 return 64;
34478 if (CR_REGNO_P (regno))
34479 return regno - CR0_REGNO + 86;
34480 if (regno == CA_REGNO)
34481 return 101; /* XER */
34482 if (ALTIVEC_REGNO_P (regno))
34483 return regno - FIRST_ALTIVEC_REGNO + 1124;
34484 if (regno == VRSAVE_REGNO)
34485 return 356;
34486 if (regno == VSCR_REGNO)
34487 return 67;
34488 if (regno == SPE_ACC_REGNO)
34489 return 99;
34490 if (regno == SPEFSCR_REGNO)
34491 return 612;
34492 #endif
34493 return regno;
34496 /* target hook eh_return_filter_mode */
34497 static machine_mode
34498 rs6000_eh_return_filter_mode (void)
34500 return TARGET_32BIT ? SImode : word_mode;
34503 /* Target hook for scalar_mode_supported_p. */
34504 static bool
34505 rs6000_scalar_mode_supported_p (machine_mode mode)
34507 /* -m32 does not support TImode. This is the default, from
34508 default_scalar_mode_supported_p. For -m32 -mpowerpc64 we want the
34509 same ABI as for -m32. But default_scalar_mode_supported_p allows
34510 integer modes of precision 2 * BITS_PER_WORD, which matches TImode
34511 for -mpowerpc64. */
34512 if (TARGET_32BIT && mode == TImode)
34513 return false;
34515 if (DECIMAL_FLOAT_MODE_P (mode))
34516 return default_decimal_float_supported_p ();
34517 else if (TARGET_FLOAT128 && (mode == KFmode || mode == IFmode))
34518 return true;
34519 else
34520 return default_scalar_mode_supported_p (mode);
34523 /* Target hook for vector_mode_supported_p. */
34524 static bool
34525 rs6000_vector_mode_supported_p (machine_mode mode)
34528 if (TARGET_PAIRED_FLOAT && PAIRED_VECTOR_MODE (mode))
34529 return true;
34531 if (TARGET_SPE && SPE_VECTOR_MODE (mode))
34532 return true;
34534 /* There is no vector form for IEEE 128-bit. If we return true for IEEE
34535 128-bit, the compiler might try to widen IEEE 128-bit to IBM
34536 double-double. */
34537 else if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode) && !FLOAT128_IEEE_P (mode))
34538 return true;
34540 else
34541 return false;
34544 /* Target hook for c_mode_for_suffix. */
34545 static machine_mode
34546 rs6000_c_mode_for_suffix (char suffix)
34548 if (TARGET_FLOAT128)
34550 if (suffix == 'q' || suffix == 'Q')
34551 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
34553 /* At the moment, we are not defining a suffix for IBM extended double.
34554 If/when the default for -mabi=ieeelongdouble is changed, and we want
34555 to support __ibm128 constants in legacy library code, we may need to
34556 re-evalaute this decision. Currently, c-lex.c only supports 'w' and
34557 'q' as machine dependent suffixes. The x86_64 port uses 'w' for
34558 __float80 constants. */
34561 return VOIDmode;
34564 /* Target hook for invalid_arg_for_unprototyped_fn. */
34565 static const char *
34566 invalid_arg_for_unprototyped_fn (const_tree typelist, const_tree funcdecl, const_tree val)
34568 return (!rs6000_darwin64_abi
34569 && typelist == 0
34570 && TREE_CODE (TREE_TYPE (val)) == VECTOR_TYPE
34571 && (funcdecl == NULL_TREE
34572 || (TREE_CODE (funcdecl) == FUNCTION_DECL
34573 && DECL_BUILT_IN_CLASS (funcdecl) != BUILT_IN_MD)))
34574 ? N_("AltiVec argument passed to unprototyped function")
34575 : NULL;
34578 /* For TARGET_SECURE_PLT 32-bit PIC code we can save PIC register
34579 setup by using __stack_chk_fail_local hidden function instead of
34580 calling __stack_chk_fail directly. Otherwise it is better to call
34581 __stack_chk_fail directly. */
34583 static tree ATTRIBUTE_UNUSED
34584 rs6000_stack_protect_fail (void)
34586 return (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
34587 ? default_hidden_stack_protect_fail ()
34588 : default_external_stack_protect_fail ();
34591 void
34592 rs6000_final_prescan_insn (rtx_insn *insn, rtx *operand ATTRIBUTE_UNUSED,
34593 int num_operands ATTRIBUTE_UNUSED)
34595 if (rs6000_warn_cell_microcode)
34597 const char *temp;
34598 int insn_code_number = recog_memoized (insn);
34599 location_t location = INSN_LOCATION (insn);
34601 /* Punt on insns we cannot recognize. */
34602 if (insn_code_number < 0)
34603 return;
34605 temp = get_insn_template (insn_code_number, insn);
34607 if (get_attr_cell_micro (insn) == CELL_MICRO_ALWAYS)
34608 warning_at (location, OPT_mwarn_cell_microcode,
34609 "emitting microcode insn %s\t[%s] #%d",
34610 temp, insn_data[INSN_CODE (insn)].name, INSN_UID (insn));
34611 else if (get_attr_cell_micro (insn) == CELL_MICRO_CONDITIONAL)
34612 warning_at (location, OPT_mwarn_cell_microcode,
34613 "emitting conditional microcode insn %s\t[%s] #%d",
34614 temp, insn_data[INSN_CODE (insn)].name, INSN_UID (insn));
34618 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
34620 #if TARGET_ELF
34621 static unsigned HOST_WIDE_INT
34622 rs6000_asan_shadow_offset (void)
34624 return (unsigned HOST_WIDE_INT) 1 << (TARGET_64BIT ? 41 : 29);
34626 #endif
34628 /* Mask options that we want to support inside of attribute((target)) and
34629 #pragma GCC target operations. Note, we do not include things like
34630 64/32-bit, endianess, hard/soft floating point, etc. that would have
34631 different calling sequences. */
34633 struct rs6000_opt_mask {
34634 const char *name; /* option name */
34635 HOST_WIDE_INT mask; /* mask to set */
34636 bool invert; /* invert sense of mask */
34637 bool valid_target; /* option is a target option */
34640 static struct rs6000_opt_mask const rs6000_opt_masks[] =
34642 { "altivec", OPTION_MASK_ALTIVEC, false, true },
34643 { "cmpb", OPTION_MASK_CMPB, false, true },
34644 { "crypto", OPTION_MASK_CRYPTO, false, true },
34645 { "direct-move", OPTION_MASK_DIRECT_MOVE, false, true },
34646 { "dlmzb", OPTION_MASK_DLMZB, false, true },
34647 { "efficient-unaligned-vsx", OPTION_MASK_EFFICIENT_UNALIGNED_VSX,
34648 false, true },
34649 { "float128", OPTION_MASK_FLOAT128, false, false },
34650 { "float128-hardware", OPTION_MASK_FLOAT128_HW, false, false },
34651 { "fprnd", OPTION_MASK_FPRND, false, true },
34652 { "hard-dfp", OPTION_MASK_DFP, false, true },
34653 { "htm", OPTION_MASK_HTM, false, true },
34654 { "isel", OPTION_MASK_ISEL, false, true },
34655 { "mfcrf", OPTION_MASK_MFCRF, false, true },
34656 { "mfpgpr", OPTION_MASK_MFPGPR, false, true },
34657 { "modulo", OPTION_MASK_MODULO, false, true },
34658 { "mulhw", OPTION_MASK_MULHW, false, true },
34659 { "multiple", OPTION_MASK_MULTIPLE, false, true },
34660 { "popcntb", OPTION_MASK_POPCNTB, false, true },
34661 { "popcntd", OPTION_MASK_POPCNTD, false, true },
34662 { "power8-fusion", OPTION_MASK_P8_FUSION, false, true },
34663 { "power8-fusion-sign", OPTION_MASK_P8_FUSION_SIGN, false, true },
34664 { "power8-vector", OPTION_MASK_P8_VECTOR, false, true },
34665 { "power9-dform", OPTION_MASK_P9_DFORM, false, true },
34666 { "power9-fusion", OPTION_MASK_P9_FUSION, false, true },
34667 { "power9-minmax", OPTION_MASK_P9_MINMAX, false, true },
34668 { "power9-vector", OPTION_MASK_P9_VECTOR, false, true },
34669 { "powerpc-gfxopt", OPTION_MASK_PPC_GFXOPT, false, true },
34670 { "powerpc-gpopt", OPTION_MASK_PPC_GPOPT, false, true },
34671 { "quad-memory", OPTION_MASK_QUAD_MEMORY, false, true },
34672 { "quad-memory-atomic", OPTION_MASK_QUAD_MEMORY_ATOMIC, false, true },
34673 { "recip-precision", OPTION_MASK_RECIP_PRECISION, false, true },
34674 { "save-toc-indirect", OPTION_MASK_SAVE_TOC_INDIRECT, false, true },
34675 { "string", OPTION_MASK_STRING, false, true },
34676 { "toc-fusion", OPTION_MASK_TOC_FUSION, false, true },
34677 { "update", OPTION_MASK_NO_UPDATE, true , true },
34678 { "upper-regs-df", OPTION_MASK_UPPER_REGS_DF, false, true },
34679 { "upper-regs-sf", OPTION_MASK_UPPER_REGS_SF, false, true },
34680 { "vsx", OPTION_MASK_VSX, false, true },
34681 { "vsx-timode", OPTION_MASK_VSX_TIMODE, false, true },
34682 #ifdef OPTION_MASK_64BIT
34683 #if TARGET_AIX_OS
34684 { "aix64", OPTION_MASK_64BIT, false, false },
34685 { "aix32", OPTION_MASK_64BIT, true, false },
34686 #else
34687 { "64", OPTION_MASK_64BIT, false, false },
34688 { "32", OPTION_MASK_64BIT, true, false },
34689 #endif
34690 #endif
34691 #ifdef OPTION_MASK_EABI
34692 { "eabi", OPTION_MASK_EABI, false, false },
34693 #endif
34694 #ifdef OPTION_MASK_LITTLE_ENDIAN
34695 { "little", OPTION_MASK_LITTLE_ENDIAN, false, false },
34696 { "big", OPTION_MASK_LITTLE_ENDIAN, true, false },
34697 #endif
34698 #ifdef OPTION_MASK_RELOCATABLE
34699 { "relocatable", OPTION_MASK_RELOCATABLE, false, false },
34700 #endif
34701 #ifdef OPTION_MASK_STRICT_ALIGN
34702 { "strict-align", OPTION_MASK_STRICT_ALIGN, false, false },
34703 #endif
34704 { "soft-float", OPTION_MASK_SOFT_FLOAT, false, false },
34705 { "string", OPTION_MASK_STRING, false, false },
34708 /* Builtin mask mapping for printing the flags. */
34709 static struct rs6000_opt_mask const rs6000_builtin_mask_names[] =
34711 { "altivec", RS6000_BTM_ALTIVEC, false, false },
34712 { "vsx", RS6000_BTM_VSX, false, false },
34713 { "spe", RS6000_BTM_SPE, false, false },
34714 { "paired", RS6000_BTM_PAIRED, false, false },
34715 { "fre", RS6000_BTM_FRE, false, false },
34716 { "fres", RS6000_BTM_FRES, false, false },
34717 { "frsqrte", RS6000_BTM_FRSQRTE, false, false },
34718 { "frsqrtes", RS6000_BTM_FRSQRTES, false, false },
34719 { "popcntd", RS6000_BTM_POPCNTD, false, false },
34720 { "cell", RS6000_BTM_CELL, false, false },
34721 { "power8-vector", RS6000_BTM_P8_VECTOR, false, false },
34722 { "power9-vector", RS6000_BTM_P9_VECTOR, false, false },
34723 { "crypto", RS6000_BTM_CRYPTO, false, false },
34724 { "htm", RS6000_BTM_HTM, false, false },
34725 { "hard-dfp", RS6000_BTM_DFP, false, false },
34726 { "hard-float", RS6000_BTM_HARD_FLOAT, false, false },
34727 { "long-double-128", RS6000_BTM_LDBL128, false, false },
34730 /* Option variables that we want to support inside attribute((target)) and
34731 #pragma GCC target operations. */
34733 struct rs6000_opt_var {
34734 const char *name; /* option name */
34735 size_t global_offset; /* offset of the option in global_options. */
34736 size_t target_offset; /* offset of the option in target options. */
34739 static struct rs6000_opt_var const rs6000_opt_vars[] =
34741 { "friz",
34742 offsetof (struct gcc_options, x_TARGET_FRIZ),
34743 offsetof (struct cl_target_option, x_TARGET_FRIZ), },
34744 { "avoid-indexed-addresses",
34745 offsetof (struct gcc_options, x_TARGET_AVOID_XFORM),
34746 offsetof (struct cl_target_option, x_TARGET_AVOID_XFORM) },
34747 { "paired",
34748 offsetof (struct gcc_options, x_rs6000_paired_float),
34749 offsetof (struct cl_target_option, x_rs6000_paired_float), },
34750 { "longcall",
34751 offsetof (struct gcc_options, x_rs6000_default_long_calls),
34752 offsetof (struct cl_target_option, x_rs6000_default_long_calls), },
34753 { "optimize-swaps",
34754 offsetof (struct gcc_options, x_rs6000_optimize_swaps),
34755 offsetof (struct cl_target_option, x_rs6000_optimize_swaps), },
34756 { "allow-movmisalign",
34757 offsetof (struct gcc_options, x_TARGET_ALLOW_MOVMISALIGN),
34758 offsetof (struct cl_target_option, x_TARGET_ALLOW_MOVMISALIGN), },
34759 { "allow-df-permute",
34760 offsetof (struct gcc_options, x_TARGET_ALLOW_DF_PERMUTE),
34761 offsetof (struct cl_target_option, x_TARGET_ALLOW_DF_PERMUTE), },
34762 { "sched-groups",
34763 offsetof (struct gcc_options, x_TARGET_SCHED_GROUPS),
34764 offsetof (struct cl_target_option, x_TARGET_SCHED_GROUPS), },
34765 { "always-hint",
34766 offsetof (struct gcc_options, x_TARGET_ALWAYS_HINT),
34767 offsetof (struct cl_target_option, x_TARGET_ALWAYS_HINT), },
34768 { "align-branch-targets",
34769 offsetof (struct gcc_options, x_TARGET_ALIGN_BRANCH_TARGETS),
34770 offsetof (struct cl_target_option, x_TARGET_ALIGN_BRANCH_TARGETS), },
34771 { "vectorize-builtins",
34772 offsetof (struct gcc_options, x_TARGET_VECTORIZE_BUILTINS),
34773 offsetof (struct cl_target_option, x_TARGET_VECTORIZE_BUILTINS), },
34774 { "tls-markers",
34775 offsetof (struct gcc_options, x_tls_markers),
34776 offsetof (struct cl_target_option, x_tls_markers), },
34777 { "sched-prolog",
34778 offsetof (struct gcc_options, x_TARGET_SCHED_PROLOG),
34779 offsetof (struct cl_target_option, x_TARGET_SCHED_PROLOG), },
34780 { "sched-epilog",
34781 offsetof (struct gcc_options, x_TARGET_SCHED_PROLOG),
34782 offsetof (struct cl_target_option, x_TARGET_SCHED_PROLOG), },
34783 { "gen-cell-microcode",
34784 offsetof (struct gcc_options, x_rs6000_gen_cell_microcode),
34785 offsetof (struct cl_target_option, x_rs6000_gen_cell_microcode), },
34786 { "warn-cell-microcode",
34787 offsetof (struct gcc_options, x_rs6000_warn_cell_microcode),
34788 offsetof (struct cl_target_option, x_rs6000_warn_cell_microcode), },
34791 /* Inner function to handle attribute((target("..."))) and #pragma GCC target
34792 parsing. Return true if there were no errors. */
34794 static bool
34795 rs6000_inner_target_options (tree args, bool attr_p)
34797 bool ret = true;
34799 if (args == NULL_TREE)
34802 else if (TREE_CODE (args) == STRING_CST)
34804 char *p = ASTRDUP (TREE_STRING_POINTER (args));
34805 char *q;
34807 while ((q = strtok (p, ",")) != NULL)
34809 bool error_p = false;
34810 bool not_valid_p = false;
34811 const char *cpu_opt = NULL;
34813 p = NULL;
34814 if (strncmp (q, "cpu=", 4) == 0)
34816 int cpu_index = rs6000_cpu_name_lookup (q+4);
34817 if (cpu_index >= 0)
34818 rs6000_cpu_index = cpu_index;
34819 else
34821 error_p = true;
34822 cpu_opt = q+4;
34825 else if (strncmp (q, "tune=", 5) == 0)
34827 int tune_index = rs6000_cpu_name_lookup (q+5);
34828 if (tune_index >= 0)
34829 rs6000_tune_index = tune_index;
34830 else
34832 error_p = true;
34833 cpu_opt = q+5;
34836 else
34838 size_t i;
34839 bool invert = false;
34840 char *r = q;
34842 error_p = true;
34843 if (strncmp (r, "no-", 3) == 0)
34845 invert = true;
34846 r += 3;
34849 for (i = 0; i < ARRAY_SIZE (rs6000_opt_masks); i++)
34850 if (strcmp (r, rs6000_opt_masks[i].name) == 0)
34852 HOST_WIDE_INT mask = rs6000_opt_masks[i].mask;
34854 if (!rs6000_opt_masks[i].valid_target)
34855 not_valid_p = true;
34856 else
34858 error_p = false;
34859 rs6000_isa_flags_explicit |= mask;
34861 /* VSX needs altivec, so -mvsx automagically sets
34862 altivec and disables -mavoid-indexed-addresses. */
34863 if (!invert)
34865 if (mask == OPTION_MASK_VSX)
34867 mask |= OPTION_MASK_ALTIVEC;
34868 TARGET_AVOID_XFORM = 0;
34872 if (rs6000_opt_masks[i].invert)
34873 invert = !invert;
34875 if (invert)
34876 rs6000_isa_flags &= ~mask;
34877 else
34878 rs6000_isa_flags |= mask;
34880 break;
34883 if (error_p && !not_valid_p)
34885 for (i = 0; i < ARRAY_SIZE (rs6000_opt_vars); i++)
34886 if (strcmp (r, rs6000_opt_vars[i].name) == 0)
34888 size_t j = rs6000_opt_vars[i].global_offset;
34889 *((int *) ((char *)&global_options + j)) = !invert;
34890 error_p = false;
34891 not_valid_p = false;
34892 break;
34897 if (error_p)
34899 const char *eprefix, *esuffix;
34901 ret = false;
34902 if (attr_p)
34904 eprefix = "__attribute__((__target__(";
34905 esuffix = ")))";
34907 else
34909 eprefix = "#pragma GCC target ";
34910 esuffix = "";
34913 if (cpu_opt)
34914 error ("invalid cpu \"%s\" for %s\"%s\"%s", cpu_opt, eprefix,
34915 q, esuffix);
34916 else if (not_valid_p)
34917 error ("%s\"%s\"%s is not allowed", eprefix, q, esuffix);
34918 else
34919 error ("%s\"%s\"%s is invalid", eprefix, q, esuffix);
34924 else if (TREE_CODE (args) == TREE_LIST)
34928 tree value = TREE_VALUE (args);
34929 if (value)
34931 bool ret2 = rs6000_inner_target_options (value, attr_p);
34932 if (!ret2)
34933 ret = false;
34935 args = TREE_CHAIN (args);
34937 while (args != NULL_TREE);
34940 else
34941 gcc_unreachable ();
34943 return ret;
34946 /* Print out the target options as a list for -mdebug=target. */
34948 static void
34949 rs6000_debug_target_options (tree args, const char *prefix)
34951 if (args == NULL_TREE)
34952 fprintf (stderr, "%s<NULL>", prefix);
34954 else if (TREE_CODE (args) == STRING_CST)
34956 char *p = ASTRDUP (TREE_STRING_POINTER (args));
34957 char *q;
34959 while ((q = strtok (p, ",")) != NULL)
34961 p = NULL;
34962 fprintf (stderr, "%s\"%s\"", prefix, q);
34963 prefix = ", ";
34967 else if (TREE_CODE (args) == TREE_LIST)
34971 tree value = TREE_VALUE (args);
34972 if (value)
34974 rs6000_debug_target_options (value, prefix);
34975 prefix = ", ";
34977 args = TREE_CHAIN (args);
34979 while (args != NULL_TREE);
34982 else
34983 gcc_unreachable ();
34985 return;
34989 /* Hook to validate attribute((target("..."))). */
34991 static bool
34992 rs6000_valid_attribute_p (tree fndecl,
34993 tree ARG_UNUSED (name),
34994 tree args,
34995 int flags)
34997 struct cl_target_option cur_target;
34998 bool ret;
34999 tree old_optimize = build_optimization_node (&global_options);
35000 tree new_target, new_optimize;
35001 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
35003 gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
35005 if (TARGET_DEBUG_TARGET)
35007 tree tname = DECL_NAME (fndecl);
35008 fprintf (stderr, "\n==================== rs6000_valid_attribute_p:\n");
35009 if (tname)
35010 fprintf (stderr, "function: %.*s\n",
35011 (int) IDENTIFIER_LENGTH (tname),
35012 IDENTIFIER_POINTER (tname));
35013 else
35014 fprintf (stderr, "function: unknown\n");
35016 fprintf (stderr, "args:");
35017 rs6000_debug_target_options (args, " ");
35018 fprintf (stderr, "\n");
35020 if (flags)
35021 fprintf (stderr, "flags: 0x%x\n", flags);
35023 fprintf (stderr, "--------------------\n");
35026 old_optimize = build_optimization_node (&global_options);
35027 func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
35029 /* If the function changed the optimization levels as well as setting target
35030 options, start with the optimizations specified. */
35031 if (func_optimize && func_optimize != old_optimize)
35032 cl_optimization_restore (&global_options,
35033 TREE_OPTIMIZATION (func_optimize));
35035 /* The target attributes may also change some optimization flags, so update
35036 the optimization options if necessary. */
35037 cl_target_option_save (&cur_target, &global_options);
35038 rs6000_cpu_index = rs6000_tune_index = -1;
35039 ret = rs6000_inner_target_options (args, true);
35041 /* Set up any additional state. */
35042 if (ret)
35044 ret = rs6000_option_override_internal (false);
35045 new_target = build_target_option_node (&global_options);
35047 else
35048 new_target = NULL;
35050 new_optimize = build_optimization_node (&global_options);
35052 if (!new_target)
35053 ret = false;
35055 else if (fndecl)
35057 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
35059 if (old_optimize != new_optimize)
35060 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
35063 cl_target_option_restore (&global_options, &cur_target);
35065 if (old_optimize != new_optimize)
35066 cl_optimization_restore (&global_options,
35067 TREE_OPTIMIZATION (old_optimize));
35069 return ret;
35073 /* Hook to validate the current #pragma GCC target and set the state, and
35074 update the macros based on what was changed. If ARGS is NULL, then
35075 POP_TARGET is used to reset the options. */
35077 bool
35078 rs6000_pragma_target_parse (tree args, tree pop_target)
35080 tree prev_tree = build_target_option_node (&global_options);
35081 tree cur_tree;
35082 struct cl_target_option *prev_opt, *cur_opt;
35083 HOST_WIDE_INT prev_flags, cur_flags, diff_flags;
35084 HOST_WIDE_INT prev_bumask, cur_bumask, diff_bumask;
35086 if (TARGET_DEBUG_TARGET)
35088 fprintf (stderr, "\n==================== rs6000_pragma_target_parse\n");
35089 fprintf (stderr, "args:");
35090 rs6000_debug_target_options (args, " ");
35091 fprintf (stderr, "\n");
35093 if (pop_target)
35095 fprintf (stderr, "pop_target:\n");
35096 debug_tree (pop_target);
35098 else
35099 fprintf (stderr, "pop_target: <NULL>\n");
35101 fprintf (stderr, "--------------------\n");
35104 if (! args)
35106 cur_tree = ((pop_target)
35107 ? pop_target
35108 : target_option_default_node);
35109 cl_target_option_restore (&global_options,
35110 TREE_TARGET_OPTION (cur_tree));
35112 else
35114 rs6000_cpu_index = rs6000_tune_index = -1;
35115 if (!rs6000_inner_target_options (args, false)
35116 || !rs6000_option_override_internal (false)
35117 || (cur_tree = build_target_option_node (&global_options))
35118 == NULL_TREE)
35120 if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
35121 fprintf (stderr, "invalid pragma\n");
35123 return false;
35127 target_option_current_node = cur_tree;
35129 /* If we have the preprocessor linked in (i.e. C or C++ languages), possibly
35130 change the macros that are defined. */
35131 if (rs6000_target_modify_macros_ptr)
35133 prev_opt = TREE_TARGET_OPTION (prev_tree);
35134 prev_bumask = prev_opt->x_rs6000_builtin_mask;
35135 prev_flags = prev_opt->x_rs6000_isa_flags;
35137 cur_opt = TREE_TARGET_OPTION (cur_tree);
35138 cur_flags = cur_opt->x_rs6000_isa_flags;
35139 cur_bumask = cur_opt->x_rs6000_builtin_mask;
35141 diff_bumask = (prev_bumask ^ cur_bumask);
35142 diff_flags = (prev_flags ^ cur_flags);
35144 if ((diff_flags != 0) || (diff_bumask != 0))
35146 /* Delete old macros. */
35147 rs6000_target_modify_macros_ptr (false,
35148 prev_flags & diff_flags,
35149 prev_bumask & diff_bumask);
35151 /* Define new macros. */
35152 rs6000_target_modify_macros_ptr (true,
35153 cur_flags & diff_flags,
35154 cur_bumask & diff_bumask);
35158 return true;
35162 /* Remember the last target of rs6000_set_current_function. */
35163 static GTY(()) tree rs6000_previous_fndecl;
35165 /* Establish appropriate back-end context for processing the function
35166 FNDECL. The argument might be NULL to indicate processing at top
35167 level, outside of any function scope. */
35168 static void
35169 rs6000_set_current_function (tree fndecl)
35171 tree old_tree = (rs6000_previous_fndecl
35172 ? DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl)
35173 : NULL_TREE);
35175 tree new_tree = (fndecl
35176 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
35177 : NULL_TREE);
35179 if (TARGET_DEBUG_TARGET)
35181 bool print_final = false;
35182 fprintf (stderr, "\n==================== rs6000_set_current_function");
35184 if (fndecl)
35185 fprintf (stderr, ", fndecl %s (%p)",
35186 (DECL_NAME (fndecl)
35187 ? IDENTIFIER_POINTER (DECL_NAME (fndecl))
35188 : "<unknown>"), (void *)fndecl);
35190 if (rs6000_previous_fndecl)
35191 fprintf (stderr, ", prev_fndecl (%p)", (void *)rs6000_previous_fndecl);
35193 fprintf (stderr, "\n");
35194 if (new_tree)
35196 fprintf (stderr, "\nnew fndecl target specific options:\n");
35197 debug_tree (new_tree);
35198 print_final = true;
35201 if (old_tree)
35203 fprintf (stderr, "\nold fndecl target specific options:\n");
35204 debug_tree (old_tree);
35205 print_final = true;
35208 if (print_final)
35209 fprintf (stderr, "--------------------\n");
35212 /* Only change the context if the function changes. This hook is called
35213 several times in the course of compiling a function, and we don't want to
35214 slow things down too much or call target_reinit when it isn't safe. */
35215 if (fndecl && fndecl != rs6000_previous_fndecl)
35217 rs6000_previous_fndecl = fndecl;
35218 if (old_tree == new_tree)
35221 else if (new_tree && new_tree != target_option_default_node)
35223 cl_target_option_restore (&global_options,
35224 TREE_TARGET_OPTION (new_tree));
35225 if (TREE_TARGET_GLOBALS (new_tree))
35226 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
35227 else
35228 TREE_TARGET_GLOBALS (new_tree)
35229 = save_target_globals_default_opts ();
35232 else if (old_tree && old_tree != target_option_default_node)
35234 new_tree = target_option_current_node;
35235 cl_target_option_restore (&global_options,
35236 TREE_TARGET_OPTION (new_tree));
35237 if (TREE_TARGET_GLOBALS (new_tree))
35238 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
35239 else if (new_tree == target_option_default_node)
35240 restore_target_globals (&default_target_globals);
35241 else
35242 TREE_TARGET_GLOBALS (new_tree)
35243 = save_target_globals_default_opts ();
35249 /* Save the current options */
35251 static void
35252 rs6000_function_specific_save (struct cl_target_option *ptr,
35253 struct gcc_options *opts)
35255 ptr->x_rs6000_isa_flags = opts->x_rs6000_isa_flags;
35256 ptr->x_rs6000_isa_flags_explicit = opts->x_rs6000_isa_flags_explicit;
35259 /* Restore the current options */
35261 static void
35262 rs6000_function_specific_restore (struct gcc_options *opts,
35263 struct cl_target_option *ptr)
35266 opts->x_rs6000_isa_flags = ptr->x_rs6000_isa_flags;
35267 opts->x_rs6000_isa_flags_explicit = ptr->x_rs6000_isa_flags_explicit;
35268 (void) rs6000_option_override_internal (false);
35271 /* Print the current options */
35273 static void
35274 rs6000_function_specific_print (FILE *file, int indent,
35275 struct cl_target_option *ptr)
35277 rs6000_print_isa_options (file, indent, "Isa options set",
35278 ptr->x_rs6000_isa_flags);
35280 rs6000_print_isa_options (file, indent, "Isa options explicit",
35281 ptr->x_rs6000_isa_flags_explicit);
35284 /* Helper function to print the current isa or misc options on a line. */
35286 static void
35287 rs6000_print_options_internal (FILE *file,
35288 int indent,
35289 const char *string,
35290 HOST_WIDE_INT flags,
35291 const char *prefix,
35292 const struct rs6000_opt_mask *opts,
35293 size_t num_elements)
35295 size_t i;
35296 size_t start_column = 0;
35297 size_t cur_column;
35298 size_t max_column = 76;
35299 const char *comma = "";
35301 if (indent)
35302 start_column += fprintf (file, "%*s", indent, "");
35304 if (!flags)
35306 fprintf (stderr, DEBUG_FMT_S, string, "<none>");
35307 return;
35310 start_column += fprintf (stderr, DEBUG_FMT_WX, string, flags);
35312 /* Print the various mask options. */
35313 cur_column = start_column;
35314 for (i = 0; i < num_elements; i++)
35316 if ((flags & opts[i].mask) != 0)
35318 const char *no_str = rs6000_opt_masks[i].invert ? "no-" : "";
35319 size_t len = (strlen (comma)
35320 + strlen (prefix)
35321 + strlen (no_str)
35322 + strlen (rs6000_opt_masks[i].name));
35324 cur_column += len;
35325 if (cur_column > max_column)
35327 fprintf (stderr, ", \\\n%*s", (int)start_column, "");
35328 cur_column = start_column + len;
35329 comma = "";
35332 fprintf (file, "%s%s%s%s", comma, prefix, no_str,
35333 rs6000_opt_masks[i].name);
35334 flags &= ~ opts[i].mask;
35335 comma = ", ";
35339 fputs ("\n", file);
35342 /* Helper function to print the current isa options on a line. */
35344 static void
35345 rs6000_print_isa_options (FILE *file, int indent, const char *string,
35346 HOST_WIDE_INT flags)
35348 rs6000_print_options_internal (file, indent, string, flags, "-m",
35349 &rs6000_opt_masks[0],
35350 ARRAY_SIZE (rs6000_opt_masks));
35353 static void
35354 rs6000_print_builtin_options (FILE *file, int indent, const char *string,
35355 HOST_WIDE_INT flags)
35357 rs6000_print_options_internal (file, indent, string, flags, "",
35358 &rs6000_builtin_mask_names[0],
35359 ARRAY_SIZE (rs6000_builtin_mask_names));
35363 /* Hook to determine if one function can safely inline another. */
35365 static bool
35366 rs6000_can_inline_p (tree caller, tree callee)
35368 bool ret = false;
35369 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
35370 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
35372 /* If callee has no option attributes, then it is ok to inline. */
35373 if (!callee_tree)
35374 ret = true;
35376 /* If caller has no option attributes, but callee does then it is not ok to
35377 inline. */
35378 else if (!caller_tree)
35379 ret = false;
35381 else
35383 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
35384 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
35386 /* Callee's options should a subset of the caller's, i.e. a vsx function
35387 can inline an altivec function but a non-vsx function can't inline a
35388 vsx function. */
35389 if ((caller_opts->x_rs6000_isa_flags & callee_opts->x_rs6000_isa_flags)
35390 == callee_opts->x_rs6000_isa_flags)
35391 ret = true;
35394 if (TARGET_DEBUG_TARGET)
35395 fprintf (stderr, "rs6000_can_inline_p:, caller %s, callee %s, %s inline\n",
35396 (DECL_NAME (caller)
35397 ? IDENTIFIER_POINTER (DECL_NAME (caller))
35398 : "<unknown>"),
35399 (DECL_NAME (callee)
35400 ? IDENTIFIER_POINTER (DECL_NAME (callee))
35401 : "<unknown>"),
35402 (ret ? "can" : "cannot"));
35404 return ret;
35407 /* Allocate a stack temp and fixup the address so it meets the particular
35408 memory requirements (either offetable or REG+REG addressing). */
35411 rs6000_allocate_stack_temp (machine_mode mode,
35412 bool offsettable_p,
35413 bool reg_reg_p)
35415 rtx stack = assign_stack_temp (mode, GET_MODE_SIZE (mode));
35416 rtx addr = XEXP (stack, 0);
35417 int strict_p = (reload_in_progress || reload_completed);
35419 if (!legitimate_indirect_address_p (addr, strict_p))
35421 if (offsettable_p
35422 && !rs6000_legitimate_offset_address_p (mode, addr, strict_p, true))
35423 stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
35425 else if (reg_reg_p && !legitimate_indexed_address_p (addr, strict_p))
35426 stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
35429 return stack;
35432 /* Given a memory reference, if it is not a reg or reg+reg addressing, convert
35433 to such a form to deal with memory reference instructions like STFIWX that
35434 only take reg+reg addressing. */
35437 rs6000_address_for_fpconvert (rtx x)
35439 int strict_p = (reload_in_progress || reload_completed);
35440 rtx addr;
35442 gcc_assert (MEM_P (x));
35443 addr = XEXP (x, 0);
35444 if (! legitimate_indirect_address_p (addr, strict_p)
35445 && ! legitimate_indexed_address_p (addr, strict_p))
35447 if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
35449 rtx reg = XEXP (addr, 0);
35450 HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (x));
35451 rtx size_rtx = GEN_INT ((GET_CODE (addr) == PRE_DEC) ? -size : size);
35452 gcc_assert (REG_P (reg));
35453 emit_insn (gen_add3_insn (reg, reg, size_rtx));
35454 addr = reg;
35456 else if (GET_CODE (addr) == PRE_MODIFY)
35458 rtx reg = XEXP (addr, 0);
35459 rtx expr = XEXP (addr, 1);
35460 gcc_assert (REG_P (reg));
35461 gcc_assert (GET_CODE (expr) == PLUS);
35462 emit_insn (gen_add3_insn (reg, XEXP (expr, 0), XEXP (expr, 1)));
35463 addr = reg;
35466 x = replace_equiv_address (x, copy_addr_to_reg (addr));
35469 return x;
35472 /* Given a memory reference, if it is not in the form for altivec memory
35473 reference instructions (i.e. reg or reg+reg addressing with AND of -16),
35474 convert to the altivec format. */
35477 rs6000_address_for_altivec (rtx x)
35479 gcc_assert (MEM_P (x));
35480 if (!altivec_indexed_or_indirect_operand (x, GET_MODE (x)))
35482 rtx addr = XEXP (x, 0);
35483 int strict_p = (reload_in_progress || reload_completed);
35485 if (!legitimate_indexed_address_p (addr, strict_p)
35486 && !legitimate_indirect_address_p (addr, strict_p))
35487 addr = copy_to_mode_reg (Pmode, addr);
35489 addr = gen_rtx_AND (Pmode, addr, GEN_INT (-16));
35490 x = change_address (x, GET_MODE (x), addr);
35493 return x;
35496 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
35498 On the RS/6000, all integer constants are acceptable, most won't be valid
35499 for particular insns, though. Only easy FP constants are acceptable. */
35501 static bool
35502 rs6000_legitimate_constant_p (machine_mode mode, rtx x)
35504 if (TARGET_ELF && tls_referenced_p (x))
35505 return false;
35507 return ((GET_CODE (x) != CONST_DOUBLE && GET_CODE (x) != CONST_VECTOR)
35508 || GET_MODE (x) == VOIDmode
35509 || (TARGET_POWERPC64 && mode == DImode)
35510 || easy_fp_constant (x, mode)
35511 || easy_vector_constant (x, mode));
35515 /* Return TRUE iff the sequence ending in LAST sets the static chain. */
35517 static bool
35518 chain_already_loaded (rtx_insn *last)
35520 for (; last != NULL; last = PREV_INSN (last))
35522 if (NONJUMP_INSN_P (last))
35524 rtx patt = PATTERN (last);
35526 if (GET_CODE (patt) == SET)
35528 rtx lhs = XEXP (patt, 0);
35530 if (REG_P (lhs) && REGNO (lhs) == STATIC_CHAIN_REGNUM)
35531 return true;
35535 return false;
35538 /* Expand code to perform a call under the AIX or ELFv2 ABI. */
35540 void
35541 rs6000_call_aix (rtx value, rtx func_desc, rtx flag, rtx cookie)
35543 const bool direct_call_p
35544 = GET_CODE (func_desc) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (func_desc);
35545 rtx toc_reg = gen_rtx_REG (Pmode, TOC_REGNUM);
35546 rtx toc_load = NULL_RTX;
35547 rtx toc_restore = NULL_RTX;
35548 rtx func_addr;
35549 rtx abi_reg = NULL_RTX;
35550 rtx call[4];
35551 int n_call;
35552 rtx insn;
35554 /* Handle longcall attributes. */
35555 if (INTVAL (cookie) & CALL_LONG)
35556 func_desc = rs6000_longcall_ref (func_desc);
35558 /* Handle indirect calls. */
35559 if (GET_CODE (func_desc) != SYMBOL_REF
35560 || (DEFAULT_ABI == ABI_AIX && !SYMBOL_REF_FUNCTION_P (func_desc)))
35562 /* Save the TOC into its reserved slot before the call,
35563 and prepare to restore it after the call. */
35564 rtx stack_ptr = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
35565 rtx stack_toc_offset = GEN_INT (RS6000_TOC_SAVE_SLOT);
35566 rtx stack_toc_mem = gen_frame_mem (Pmode,
35567 gen_rtx_PLUS (Pmode, stack_ptr,
35568 stack_toc_offset));
35569 rtx stack_toc_unspec = gen_rtx_UNSPEC (Pmode,
35570 gen_rtvec (1, stack_toc_offset),
35571 UNSPEC_TOCSLOT);
35572 toc_restore = gen_rtx_SET (toc_reg, stack_toc_unspec);
35574 /* Can we optimize saving the TOC in the prologue or
35575 do we need to do it at every call? */
35576 if (TARGET_SAVE_TOC_INDIRECT && !cfun->calls_alloca)
35577 cfun->machine->save_toc_in_prologue = true;
35578 else
35580 MEM_VOLATILE_P (stack_toc_mem) = 1;
35581 emit_move_insn (stack_toc_mem, toc_reg);
35584 if (DEFAULT_ABI == ABI_ELFv2)
35586 /* A function pointer in the ELFv2 ABI is just a plain address, but
35587 the ABI requires it to be loaded into r12 before the call. */
35588 func_addr = gen_rtx_REG (Pmode, 12);
35589 emit_move_insn (func_addr, func_desc);
35590 abi_reg = func_addr;
35592 else
35594 /* A function pointer under AIX is a pointer to a data area whose
35595 first word contains the actual address of the function, whose
35596 second word contains a pointer to its TOC, and whose third word
35597 contains a value to place in the static chain register (r11).
35598 Note that if we load the static chain, our "trampoline" need
35599 not have any executable code. */
35601 /* Load up address of the actual function. */
35602 func_desc = force_reg (Pmode, func_desc);
35603 func_addr = gen_reg_rtx (Pmode);
35604 emit_move_insn (func_addr, gen_rtx_MEM (Pmode, func_desc));
35606 /* Prepare to load the TOC of the called function. Note that the
35607 TOC load must happen immediately before the actual call so
35608 that unwinding the TOC registers works correctly. See the
35609 comment in frob_update_context. */
35610 rtx func_toc_offset = GEN_INT (GET_MODE_SIZE (Pmode));
35611 rtx func_toc_mem = gen_rtx_MEM (Pmode,
35612 gen_rtx_PLUS (Pmode, func_desc,
35613 func_toc_offset));
35614 toc_load = gen_rtx_USE (VOIDmode, func_toc_mem);
35616 /* If we have a static chain, load it up. But, if the call was
35617 originally direct, the 3rd word has not been written since no
35618 trampoline has been built, so we ought not to load it, lest we
35619 override a static chain value. */
35620 if (!direct_call_p
35621 && TARGET_POINTERS_TO_NESTED_FUNCTIONS
35622 && !chain_already_loaded (get_current_sequence ()->next->last))
35624 rtx sc_reg = gen_rtx_REG (Pmode, STATIC_CHAIN_REGNUM);
35625 rtx func_sc_offset = GEN_INT (2 * GET_MODE_SIZE (Pmode));
35626 rtx func_sc_mem = gen_rtx_MEM (Pmode,
35627 gen_rtx_PLUS (Pmode, func_desc,
35628 func_sc_offset));
35629 emit_move_insn (sc_reg, func_sc_mem);
35630 abi_reg = sc_reg;
35634 else
35636 /* Direct calls use the TOC: for local calls, the callee will
35637 assume the TOC register is set; for non-local calls, the
35638 PLT stub needs the TOC register. */
35639 abi_reg = toc_reg;
35640 func_addr = func_desc;
35643 /* Create the call. */
35644 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), flag);
35645 if (value != NULL_RTX)
35646 call[0] = gen_rtx_SET (value, call[0]);
35647 n_call = 1;
35649 if (toc_load)
35650 call[n_call++] = toc_load;
35651 if (toc_restore)
35652 call[n_call++] = toc_restore;
35654 call[n_call++] = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, LR_REGNO));
35656 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (n_call, call));
35657 insn = emit_call_insn (insn);
35659 /* Mention all registers defined by the ABI to hold information
35660 as uses in CALL_INSN_FUNCTION_USAGE. */
35661 if (abi_reg)
35662 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
35665 /* Expand code to perform a sibling call under the AIX or ELFv2 ABI. */
35667 void
35668 rs6000_sibcall_aix (rtx value, rtx func_desc, rtx flag, rtx cookie)
35670 rtx call[2];
35671 rtx insn;
35673 gcc_assert (INTVAL (cookie) == 0);
35675 /* Create the call. */
35676 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_desc), flag);
35677 if (value != NULL_RTX)
35678 call[0] = gen_rtx_SET (value, call[0]);
35680 call[1] = simple_return_rtx;
35682 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (2, call));
35683 insn = emit_call_insn (insn);
35685 /* Note use of the TOC register. */
35686 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), gen_rtx_REG (Pmode, TOC_REGNUM));
35687 /* We need to also mark a use of the link register since the function we
35688 sibling-call to will use it to return to our caller. */
35689 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), gen_rtx_REG (Pmode, LR_REGNO));
35692 /* Return whether we need to always update the saved TOC pointer when we update
35693 the stack pointer. */
35695 static bool
35696 rs6000_save_toc_in_prologue_p (void)
35698 return (cfun && cfun->machine && cfun->machine->save_toc_in_prologue);
35701 #ifdef HAVE_GAS_HIDDEN
35702 # define USE_HIDDEN_LINKONCE 1
35703 #else
35704 # define USE_HIDDEN_LINKONCE 0
35705 #endif
35707 /* Fills in the label name that should be used for a 476 link stack thunk. */
35709 void
35710 get_ppc476_thunk_name (char name[32])
35712 gcc_assert (TARGET_LINK_STACK);
35714 if (USE_HIDDEN_LINKONCE)
35715 sprintf (name, "__ppc476.get_thunk");
35716 else
35717 ASM_GENERATE_INTERNAL_LABEL (name, "LPPC476_", 0);
35720 /* This function emits the simple thunk routine that is used to preserve
35721 the link stack on the 476 cpu. */
35723 static void rs6000_code_end (void) ATTRIBUTE_UNUSED;
35724 static void
35725 rs6000_code_end (void)
35727 char name[32];
35728 tree decl;
35730 if (!TARGET_LINK_STACK)
35731 return;
35733 get_ppc476_thunk_name (name);
35735 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, get_identifier (name),
35736 build_function_type_list (void_type_node, NULL_TREE));
35737 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
35738 NULL_TREE, void_type_node);
35739 TREE_PUBLIC (decl) = 1;
35740 TREE_STATIC (decl) = 1;
35742 #if RS6000_WEAK
35743 if (USE_HIDDEN_LINKONCE)
35745 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
35746 targetm.asm_out.unique_section (decl, 0);
35747 switch_to_section (get_named_section (decl, NULL, 0));
35748 DECL_WEAK (decl) = 1;
35749 ASM_WEAKEN_DECL (asm_out_file, decl, name, 0);
35750 targetm.asm_out.globalize_label (asm_out_file, name);
35751 targetm.asm_out.assemble_visibility (decl, VISIBILITY_HIDDEN);
35752 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
35754 else
35755 #endif
35757 switch_to_section (text_section);
35758 ASM_OUTPUT_LABEL (asm_out_file, name);
35761 DECL_INITIAL (decl) = make_node (BLOCK);
35762 current_function_decl = decl;
35763 allocate_struct_function (decl, false);
35764 init_function_start (decl);
35765 first_function_block_is_cold = false;
35766 /* Make sure unwind info is emitted for the thunk if needed. */
35767 final_start_function (emit_barrier (), asm_out_file, 1);
35769 fputs ("\tblr\n", asm_out_file);
35771 final_end_function ();
35772 init_insn_lengths ();
35773 free_after_compilation (cfun);
35774 set_cfun (NULL);
35775 current_function_decl = NULL;
35778 /* Add r30 to hard reg set if the prologue sets it up and it is not
35779 pic_offset_table_rtx. */
35781 static void
35782 rs6000_set_up_by_prologue (struct hard_reg_set_container *set)
35784 if (!TARGET_SINGLE_PIC_BASE
35785 && TARGET_TOC
35786 && TARGET_MINIMAL_TOC
35787 && get_pool_size () != 0)
35788 add_to_hard_reg_set (&set->set, Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
35789 if (cfun->machine->split_stack_argp_used)
35790 add_to_hard_reg_set (&set->set, Pmode, 12);
35794 /* Helper function for rs6000_split_logical to emit a logical instruction after
35795 spliting the operation to single GPR registers.
35797 DEST is the destination register.
35798 OP1 and OP2 are the input source registers.
35799 CODE is the base operation (AND, IOR, XOR, NOT).
35800 MODE is the machine mode.
35801 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
35802 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
35803 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
35805 static void
35806 rs6000_split_logical_inner (rtx dest,
35807 rtx op1,
35808 rtx op2,
35809 enum rtx_code code,
35810 machine_mode mode,
35811 bool complement_final_p,
35812 bool complement_op1_p,
35813 bool complement_op2_p)
35815 rtx bool_rtx;
35817 /* Optimize AND of 0/0xffffffff and IOR/XOR of 0. */
35818 if (op2 && GET_CODE (op2) == CONST_INT
35819 && (mode == SImode || (mode == DImode && TARGET_POWERPC64))
35820 && !complement_final_p && !complement_op1_p && !complement_op2_p)
35822 HOST_WIDE_INT mask = GET_MODE_MASK (mode);
35823 HOST_WIDE_INT value = INTVAL (op2) & mask;
35825 /* Optimize AND of 0 to just set 0. Optimize AND of -1 to be a move. */
35826 if (code == AND)
35828 if (value == 0)
35830 emit_insn (gen_rtx_SET (dest, const0_rtx));
35831 return;
35834 else if (value == mask)
35836 if (!rtx_equal_p (dest, op1))
35837 emit_insn (gen_rtx_SET (dest, op1));
35838 return;
35842 /* Optimize IOR/XOR of 0 to be a simple move. Split large operations
35843 into separate ORI/ORIS or XORI/XORIS instrucitons. */
35844 else if (code == IOR || code == XOR)
35846 if (value == 0)
35848 if (!rtx_equal_p (dest, op1))
35849 emit_insn (gen_rtx_SET (dest, op1));
35850 return;
35855 if (code == AND && mode == SImode
35856 && !complement_final_p && !complement_op1_p && !complement_op2_p)
35858 emit_insn (gen_andsi3 (dest, op1, op2));
35859 return;
35862 if (complement_op1_p)
35863 op1 = gen_rtx_NOT (mode, op1);
35865 if (complement_op2_p)
35866 op2 = gen_rtx_NOT (mode, op2);
35868 /* For canonical RTL, if only one arm is inverted it is the first. */
35869 if (!complement_op1_p && complement_op2_p)
35870 std::swap (op1, op2);
35872 bool_rtx = ((code == NOT)
35873 ? gen_rtx_NOT (mode, op1)
35874 : gen_rtx_fmt_ee (code, mode, op1, op2));
35876 if (complement_final_p)
35877 bool_rtx = gen_rtx_NOT (mode, bool_rtx);
35879 emit_insn (gen_rtx_SET (dest, bool_rtx));
35882 /* Split a DImode AND/IOR/XOR with a constant on a 32-bit system. These
35883 operations are split immediately during RTL generation to allow for more
35884 optimizations of the AND/IOR/XOR.
35886 OPERANDS is an array containing the destination and two input operands.
35887 CODE is the base operation (AND, IOR, XOR, NOT).
35888 MODE is the machine mode.
35889 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
35890 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
35891 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT.
35892 CLOBBER_REG is either NULL or a scratch register of type CC to allow
35893 formation of the AND instructions. */
35895 static void
35896 rs6000_split_logical_di (rtx operands[3],
35897 enum rtx_code code,
35898 bool complement_final_p,
35899 bool complement_op1_p,
35900 bool complement_op2_p)
35902 const HOST_WIDE_INT lower_32bits = HOST_WIDE_INT_C(0xffffffff);
35903 const HOST_WIDE_INT upper_32bits = ~ lower_32bits;
35904 const HOST_WIDE_INT sign_bit = HOST_WIDE_INT_C(0x80000000);
35905 enum hi_lo { hi = 0, lo = 1 };
35906 rtx op0_hi_lo[2], op1_hi_lo[2], op2_hi_lo[2];
35907 size_t i;
35909 op0_hi_lo[hi] = gen_highpart (SImode, operands[0]);
35910 op1_hi_lo[hi] = gen_highpart (SImode, operands[1]);
35911 op0_hi_lo[lo] = gen_lowpart (SImode, operands[0]);
35912 op1_hi_lo[lo] = gen_lowpart (SImode, operands[1]);
35914 if (code == NOT)
35915 op2_hi_lo[hi] = op2_hi_lo[lo] = NULL_RTX;
35916 else
35918 if (GET_CODE (operands[2]) != CONST_INT)
35920 op2_hi_lo[hi] = gen_highpart_mode (SImode, DImode, operands[2]);
35921 op2_hi_lo[lo] = gen_lowpart (SImode, operands[2]);
35923 else
35925 HOST_WIDE_INT value = INTVAL (operands[2]);
35926 HOST_WIDE_INT value_hi_lo[2];
35928 gcc_assert (!complement_final_p);
35929 gcc_assert (!complement_op1_p);
35930 gcc_assert (!complement_op2_p);
35932 value_hi_lo[hi] = value >> 32;
35933 value_hi_lo[lo] = value & lower_32bits;
35935 for (i = 0; i < 2; i++)
35937 HOST_WIDE_INT sub_value = value_hi_lo[i];
35939 if (sub_value & sign_bit)
35940 sub_value |= upper_32bits;
35942 op2_hi_lo[i] = GEN_INT (sub_value);
35944 /* If this is an AND instruction, check to see if we need to load
35945 the value in a register. */
35946 if (code == AND && sub_value != -1 && sub_value != 0
35947 && !and_operand (op2_hi_lo[i], SImode))
35948 op2_hi_lo[i] = force_reg (SImode, op2_hi_lo[i]);
35953 for (i = 0; i < 2; i++)
35955 /* Split large IOR/XOR operations. */
35956 if ((code == IOR || code == XOR)
35957 && GET_CODE (op2_hi_lo[i]) == CONST_INT
35958 && !complement_final_p
35959 && !complement_op1_p
35960 && !complement_op2_p
35961 && !logical_const_operand (op2_hi_lo[i], SImode))
35963 HOST_WIDE_INT value = INTVAL (op2_hi_lo[i]);
35964 HOST_WIDE_INT hi_16bits = value & HOST_WIDE_INT_C(0xffff0000);
35965 HOST_WIDE_INT lo_16bits = value & HOST_WIDE_INT_C(0x0000ffff);
35966 rtx tmp = gen_reg_rtx (SImode);
35968 /* Make sure the constant is sign extended. */
35969 if ((hi_16bits & sign_bit) != 0)
35970 hi_16bits |= upper_32bits;
35972 rs6000_split_logical_inner (tmp, op1_hi_lo[i], GEN_INT (hi_16bits),
35973 code, SImode, false, false, false);
35975 rs6000_split_logical_inner (op0_hi_lo[i], tmp, GEN_INT (lo_16bits),
35976 code, SImode, false, false, false);
35978 else
35979 rs6000_split_logical_inner (op0_hi_lo[i], op1_hi_lo[i], op2_hi_lo[i],
35980 code, SImode, complement_final_p,
35981 complement_op1_p, complement_op2_p);
35984 return;
35987 /* Split the insns that make up boolean operations operating on multiple GPR
35988 registers. The boolean MD patterns ensure that the inputs either are
35989 exactly the same as the output registers, or there is no overlap.
35991 OPERANDS is an array containing the destination and two input operands.
35992 CODE is the base operation (AND, IOR, XOR, NOT).
35993 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
35994 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
35995 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
35997 void
35998 rs6000_split_logical (rtx operands[3],
35999 enum rtx_code code,
36000 bool complement_final_p,
36001 bool complement_op1_p,
36002 bool complement_op2_p)
36004 machine_mode mode = GET_MODE (operands[0]);
36005 machine_mode sub_mode;
36006 rtx op0, op1, op2;
36007 int sub_size, regno0, regno1, nregs, i;
36009 /* If this is DImode, use the specialized version that can run before
36010 register allocation. */
36011 if (mode == DImode && !TARGET_POWERPC64)
36013 rs6000_split_logical_di (operands, code, complement_final_p,
36014 complement_op1_p, complement_op2_p);
36015 return;
36018 op0 = operands[0];
36019 op1 = operands[1];
36020 op2 = (code == NOT) ? NULL_RTX : operands[2];
36021 sub_mode = (TARGET_POWERPC64) ? DImode : SImode;
36022 sub_size = GET_MODE_SIZE (sub_mode);
36023 regno0 = REGNO (op0);
36024 regno1 = REGNO (op1);
36026 gcc_assert (reload_completed);
36027 gcc_assert (IN_RANGE (regno0, FIRST_GPR_REGNO, LAST_GPR_REGNO));
36028 gcc_assert (IN_RANGE (regno1, FIRST_GPR_REGNO, LAST_GPR_REGNO));
36030 nregs = rs6000_hard_regno_nregs[(int)mode][regno0];
36031 gcc_assert (nregs > 1);
36033 if (op2 && REG_P (op2))
36034 gcc_assert (IN_RANGE (REGNO (op2), FIRST_GPR_REGNO, LAST_GPR_REGNO));
36036 for (i = 0; i < nregs; i++)
36038 int offset = i * sub_size;
36039 rtx sub_op0 = simplify_subreg (sub_mode, op0, mode, offset);
36040 rtx sub_op1 = simplify_subreg (sub_mode, op1, mode, offset);
36041 rtx sub_op2 = ((code == NOT)
36042 ? NULL_RTX
36043 : simplify_subreg (sub_mode, op2, mode, offset));
36045 rs6000_split_logical_inner (sub_op0, sub_op1, sub_op2, code, sub_mode,
36046 complement_final_p, complement_op1_p,
36047 complement_op2_p);
36050 return;
36054 /* Return true if the peephole2 can combine a load involving a combination of
36055 an addis instruction and a load with an offset that can be fused together on
36056 a power8. */
36058 bool
36059 fusion_gpr_load_p (rtx addis_reg, /* register set via addis. */
36060 rtx addis_value, /* addis value. */
36061 rtx target, /* target register that is loaded. */
36062 rtx mem) /* bottom part of the memory addr. */
36064 rtx addr;
36065 rtx base_reg;
36067 /* Validate arguments. */
36068 if (!base_reg_operand (addis_reg, GET_MODE (addis_reg)))
36069 return false;
36071 if (!base_reg_operand (target, GET_MODE (target)))
36072 return false;
36074 if (!fusion_gpr_addis (addis_value, GET_MODE (addis_value)))
36075 return false;
36077 /* Allow sign/zero extension. */
36078 if (GET_CODE (mem) == ZERO_EXTEND
36079 || (GET_CODE (mem) == SIGN_EXTEND && TARGET_P8_FUSION_SIGN))
36080 mem = XEXP (mem, 0);
36082 if (!MEM_P (mem))
36083 return false;
36085 if (!fusion_gpr_mem_load (mem, GET_MODE (mem)))
36086 return false;
36088 addr = XEXP (mem, 0); /* either PLUS or LO_SUM. */
36089 if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM)
36090 return false;
36092 /* Validate that the register used to load the high value is either the
36093 register being loaded, or we can safely replace its use.
36095 This function is only called from the peephole2 pass and we assume that
36096 there are 2 instructions in the peephole (addis and load), so we want to
36097 check if the target register was not used in the memory address and the
36098 register to hold the addis result is dead after the peephole. */
36099 if (REGNO (addis_reg) != REGNO (target))
36101 if (reg_mentioned_p (target, mem))
36102 return false;
36104 if (!peep2_reg_dead_p (2, addis_reg))
36105 return false;
36107 /* If the target register being loaded is the stack pointer, we must
36108 avoid loading any other value into it, even temporarily. */
36109 if (REG_P (target) && REGNO (target) == STACK_POINTER_REGNUM)
36110 return false;
36113 base_reg = XEXP (addr, 0);
36114 return REGNO (addis_reg) == REGNO (base_reg);
36117 /* During the peephole2 pass, adjust and expand the insns for a load fusion
36118 sequence. We adjust the addis register to use the target register. If the
36119 load sign extends, we adjust the code to do the zero extending load, and an
36120 explicit sign extension later since the fusion only covers zero extending
36121 loads.
36123 The operands are:
36124 operands[0] register set with addis (to be replaced with target)
36125 operands[1] value set via addis
36126 operands[2] target register being loaded
36127 operands[3] D-form memory reference using operands[0]. */
36129 void
36130 expand_fusion_gpr_load (rtx *operands)
36132 rtx addis_value = operands[1];
36133 rtx target = operands[2];
36134 rtx orig_mem = operands[3];
36135 rtx new_addr, new_mem, orig_addr, offset;
36136 enum rtx_code plus_or_lo_sum;
36137 machine_mode target_mode = GET_MODE (target);
36138 machine_mode extend_mode = target_mode;
36139 machine_mode ptr_mode = Pmode;
36140 enum rtx_code extend = UNKNOWN;
36142 if (GET_CODE (orig_mem) == ZERO_EXTEND
36143 || (TARGET_P8_FUSION_SIGN && GET_CODE (orig_mem) == SIGN_EXTEND))
36145 extend = GET_CODE (orig_mem);
36146 orig_mem = XEXP (orig_mem, 0);
36147 target_mode = GET_MODE (orig_mem);
36150 gcc_assert (MEM_P (orig_mem));
36152 orig_addr = XEXP (orig_mem, 0);
36153 plus_or_lo_sum = GET_CODE (orig_addr);
36154 gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM);
36156 offset = XEXP (orig_addr, 1);
36157 new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_value, offset);
36158 new_mem = replace_equiv_address_nv (orig_mem, new_addr, false);
36160 if (extend != UNKNOWN)
36161 new_mem = gen_rtx_fmt_e (ZERO_EXTEND, extend_mode, new_mem);
36163 new_mem = gen_rtx_UNSPEC (extend_mode, gen_rtvec (1, new_mem),
36164 UNSPEC_FUSION_GPR);
36165 emit_insn (gen_rtx_SET (target, new_mem));
36167 if (extend == SIGN_EXTEND)
36169 int sub_off = ((BYTES_BIG_ENDIAN)
36170 ? GET_MODE_SIZE (extend_mode) - GET_MODE_SIZE (target_mode)
36171 : 0);
36172 rtx sign_reg
36173 = simplify_subreg (target_mode, target, extend_mode, sub_off);
36175 emit_insn (gen_rtx_SET (target,
36176 gen_rtx_SIGN_EXTEND (extend_mode, sign_reg)));
36179 return;
36182 /* Emit the addis instruction that will be part of a fused instruction
36183 sequence. */
36185 void
36186 emit_fusion_addis (rtx target, rtx addis_value, const char *comment,
36187 const char *mode_name)
36189 rtx fuse_ops[10];
36190 char insn_template[80];
36191 const char *addis_str = NULL;
36192 const char *comment_str = ASM_COMMENT_START;
36194 if (*comment_str == ' ')
36195 comment_str++;
36197 /* Emit the addis instruction. */
36198 fuse_ops[0] = target;
36199 if (satisfies_constraint_L (addis_value))
36201 fuse_ops[1] = addis_value;
36202 addis_str = "lis %0,%v1";
36205 else if (GET_CODE (addis_value) == PLUS)
36207 rtx op0 = XEXP (addis_value, 0);
36208 rtx op1 = XEXP (addis_value, 1);
36210 if (REG_P (op0) && CONST_INT_P (op1)
36211 && satisfies_constraint_L (op1))
36213 fuse_ops[1] = op0;
36214 fuse_ops[2] = op1;
36215 addis_str = "addis %0,%1,%v2";
36219 else if (GET_CODE (addis_value) == HIGH)
36221 rtx value = XEXP (addis_value, 0);
36222 if (GET_CODE (value) == UNSPEC && XINT (value, 1) == UNSPEC_TOCREL)
36224 fuse_ops[1] = XVECEXP (value, 0, 0); /* symbol ref. */
36225 fuse_ops[2] = XVECEXP (value, 0, 1); /* TOC register. */
36226 if (TARGET_ELF)
36227 addis_str = "addis %0,%2,%1@toc@ha";
36229 else if (TARGET_XCOFF)
36230 addis_str = "addis %0,%1@u(%2)";
36232 else
36233 gcc_unreachable ();
36236 else if (GET_CODE (value) == PLUS)
36238 rtx op0 = XEXP (value, 0);
36239 rtx op1 = XEXP (value, 1);
36241 if (GET_CODE (op0) == UNSPEC
36242 && XINT (op0, 1) == UNSPEC_TOCREL
36243 && CONST_INT_P (op1))
36245 fuse_ops[1] = XVECEXP (op0, 0, 0); /* symbol ref. */
36246 fuse_ops[2] = XVECEXP (op0, 0, 1); /* TOC register. */
36247 fuse_ops[3] = op1;
36248 if (TARGET_ELF)
36249 addis_str = "addis %0,%2,%1+%3@toc@ha";
36251 else if (TARGET_XCOFF)
36252 addis_str = "addis %0,%1+%3@u(%2)";
36254 else
36255 gcc_unreachable ();
36259 else if (satisfies_constraint_L (value))
36261 fuse_ops[1] = value;
36262 addis_str = "lis %0,%v1";
36265 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (value))
36267 fuse_ops[1] = value;
36268 addis_str = "lis %0,%1@ha";
36272 if (!addis_str)
36273 fatal_insn ("Could not generate addis value for fusion", addis_value);
36275 sprintf (insn_template, "%s\t\t%s %s, type %s", addis_str, comment_str,
36276 comment, mode_name);
36277 output_asm_insn (insn_template, fuse_ops);
36280 /* Emit a D-form load or store instruction that is the second instruction
36281 of a fusion sequence. */
36283 void
36284 emit_fusion_load_store (rtx load_store_reg, rtx addis_reg, rtx offset,
36285 const char *insn_str)
36287 rtx fuse_ops[10];
36288 char insn_template[80];
36290 fuse_ops[0] = load_store_reg;
36291 fuse_ops[1] = addis_reg;
36293 if (CONST_INT_P (offset) && satisfies_constraint_I (offset))
36295 sprintf (insn_template, "%s %%0,%%2(%%1)", insn_str);
36296 fuse_ops[2] = offset;
36297 output_asm_insn (insn_template, fuse_ops);
36300 else if (GET_CODE (offset) == UNSPEC
36301 && XINT (offset, 1) == UNSPEC_TOCREL)
36303 if (TARGET_ELF)
36304 sprintf (insn_template, "%s %%0,%%2@toc@l(%%1)", insn_str);
36306 else if (TARGET_XCOFF)
36307 sprintf (insn_template, "%s %%0,%%2@l(%%1)", insn_str);
36309 else
36310 gcc_unreachable ();
36312 fuse_ops[2] = XVECEXP (offset, 0, 0);
36313 output_asm_insn (insn_template, fuse_ops);
36316 else if (GET_CODE (offset) == PLUS
36317 && GET_CODE (XEXP (offset, 0)) == UNSPEC
36318 && XINT (XEXP (offset, 0), 1) == UNSPEC_TOCREL
36319 && CONST_INT_P (XEXP (offset, 1)))
36321 rtx tocrel_unspec = XEXP (offset, 0);
36322 if (TARGET_ELF)
36323 sprintf (insn_template, "%s %%0,%%2+%%3@toc@l(%%1)", insn_str);
36325 else if (TARGET_XCOFF)
36326 sprintf (insn_template, "%s %%0,%%2+%%3@l(%%1)", insn_str);
36328 else
36329 gcc_unreachable ();
36331 fuse_ops[2] = XVECEXP (tocrel_unspec, 0, 0);
36332 fuse_ops[3] = XEXP (offset, 1);
36333 output_asm_insn (insn_template, fuse_ops);
36336 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (offset))
36338 sprintf (insn_template, "%s %%0,%%2@l(%%1)", insn_str);
36340 fuse_ops[2] = offset;
36341 output_asm_insn (insn_template, fuse_ops);
36344 else
36345 fatal_insn ("Unable to generate load/store offset for fusion", offset);
36347 return;
36350 /* Wrap a TOC address that can be fused to indicate that special fusion
36351 processing is needed. */
36354 fusion_wrap_memory_address (rtx old_mem)
36356 rtx old_addr = XEXP (old_mem, 0);
36357 rtvec v = gen_rtvec (1, old_addr);
36358 rtx new_addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_FUSION_ADDIS);
36359 return replace_equiv_address_nv (old_mem, new_addr, false);
36362 /* Given an address, convert it into the addis and load offset parts. Addresses
36363 created during the peephole2 process look like:
36364 (lo_sum (high (unspec [(sym)] UNSPEC_TOCREL))
36365 (unspec [(...)] UNSPEC_TOCREL))
36367 Addresses created via toc fusion look like:
36368 (unspec [(unspec [(...)] UNSPEC_TOCREL)] UNSPEC_FUSION_ADDIS)) */
36370 static void
36371 fusion_split_address (rtx addr, rtx *p_hi, rtx *p_lo)
36373 rtx hi, lo;
36375 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_FUSION_ADDIS)
36377 lo = XVECEXP (addr, 0, 0);
36378 hi = gen_rtx_HIGH (Pmode, lo);
36380 else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
36382 hi = XEXP (addr, 0);
36383 lo = XEXP (addr, 1);
36385 else
36386 gcc_unreachable ();
36388 *p_hi = hi;
36389 *p_lo = lo;
36392 /* Return a string to fuse an addis instruction with a gpr load to the same
36393 register that we loaded up the addis instruction. The address that is used
36394 is the logical address that was formed during peephole2:
36395 (lo_sum (high) (low-part))
36397 Or the address is the TOC address that is wrapped before register allocation:
36398 (unspec [(addr) (toc-reg)] UNSPEC_FUSION_ADDIS)
36400 The code is complicated, so we call output_asm_insn directly, and just
36401 return "". */
36403 const char *
36404 emit_fusion_gpr_load (rtx target, rtx mem)
36406 rtx addis_value;
36407 rtx addr;
36408 rtx load_offset;
36409 const char *load_str = NULL;
36410 const char *mode_name = NULL;
36411 machine_mode mode;
36413 if (GET_CODE (mem) == ZERO_EXTEND)
36414 mem = XEXP (mem, 0);
36416 gcc_assert (REG_P (target) && MEM_P (mem));
36418 addr = XEXP (mem, 0);
36419 fusion_split_address (addr, &addis_value, &load_offset);
36421 /* Now emit the load instruction to the same register. */
36422 mode = GET_MODE (mem);
36423 switch (mode)
36425 case QImode:
36426 mode_name = "char";
36427 load_str = "lbz";
36428 break;
36430 case HImode:
36431 mode_name = "short";
36432 load_str = "lhz";
36433 break;
36435 case SImode:
36436 case SFmode:
36437 mode_name = (mode == SFmode) ? "float" : "int";
36438 load_str = "lwz";
36439 break;
36441 case DImode:
36442 case DFmode:
36443 gcc_assert (TARGET_POWERPC64);
36444 mode_name = (mode == DFmode) ? "double" : "long";
36445 load_str = "ld";
36446 break;
36448 default:
36449 fatal_insn ("Bad GPR fusion", gen_rtx_SET (target, mem));
36452 /* Emit the addis instruction. */
36453 emit_fusion_addis (target, addis_value, "gpr load fusion", mode_name);
36455 /* Emit the D-form load instruction. */
36456 emit_fusion_load_store (target, target, load_offset, load_str);
36458 return "";
36462 /* Return true if the peephole2 can combine a load/store involving a
36463 combination of an addis instruction and the memory operation. This was
36464 added to the ISA 3.0 (power9) hardware. */
36466 bool
36467 fusion_p9_p (rtx addis_reg, /* register set via addis. */
36468 rtx addis_value, /* addis value. */
36469 rtx dest, /* destination (memory or register). */
36470 rtx src) /* source (register or memory). */
36472 rtx addr, mem, offset;
36473 enum machine_mode mode = GET_MODE (src);
36475 /* Validate arguments. */
36476 if (!base_reg_operand (addis_reg, GET_MODE (addis_reg)))
36477 return false;
36479 if (!fusion_gpr_addis (addis_value, GET_MODE (addis_value)))
36480 return false;
36482 /* Ignore extend operations that are part of the load. */
36483 if (GET_CODE (src) == FLOAT_EXTEND || GET_CODE (src) == ZERO_EXTEND)
36484 src = XEXP (src, 0);
36486 /* Test for memory<-register or register<-memory. */
36487 if (fpr_reg_operand (src, mode) || int_reg_operand (src, mode))
36489 if (!MEM_P (dest))
36490 return false;
36492 mem = dest;
36495 else if (MEM_P (src))
36497 if (!fpr_reg_operand (dest, mode) && !int_reg_operand (dest, mode))
36498 return false;
36500 mem = src;
36503 else
36504 return false;
36506 addr = XEXP (mem, 0); /* either PLUS or LO_SUM. */
36507 if (GET_CODE (addr) == PLUS)
36509 if (!rtx_equal_p (addis_reg, XEXP (addr, 0)))
36510 return false;
36512 return satisfies_constraint_I (XEXP (addr, 1));
36515 else if (GET_CODE (addr) == LO_SUM)
36517 if (!rtx_equal_p (addis_reg, XEXP (addr, 0)))
36518 return false;
36520 offset = XEXP (addr, 1);
36521 if (TARGET_XCOFF || (TARGET_ELF && TARGET_POWERPC64))
36522 return small_toc_ref (offset, GET_MODE (offset));
36524 else if (TARGET_ELF && !TARGET_POWERPC64)
36525 return CONSTANT_P (offset);
36528 return false;
36531 /* During the peephole2 pass, adjust and expand the insns for an extended fusion
36532 load sequence.
36534 The operands are:
36535 operands[0] register set with addis
36536 operands[1] value set via addis
36537 operands[2] target register being loaded
36538 operands[3] D-form memory reference using operands[0].
36540 This is similar to the fusion introduced with power8, except it scales to
36541 both loads/stores and does not require the result register to be the same as
36542 the base register. At the moment, we only do this if register set with addis
36543 is dead. */
36545 void
36546 expand_fusion_p9_load (rtx *operands)
36548 rtx tmp_reg = operands[0];
36549 rtx addis_value = operands[1];
36550 rtx target = operands[2];
36551 rtx orig_mem = operands[3];
36552 rtx new_addr, new_mem, orig_addr, offset, set, clobber, insn;
36553 enum rtx_code plus_or_lo_sum;
36554 machine_mode target_mode = GET_MODE (target);
36555 machine_mode extend_mode = target_mode;
36556 machine_mode ptr_mode = Pmode;
36557 enum rtx_code extend = UNKNOWN;
36559 if (GET_CODE (orig_mem) == FLOAT_EXTEND || GET_CODE (orig_mem) == ZERO_EXTEND)
36561 extend = GET_CODE (orig_mem);
36562 orig_mem = XEXP (orig_mem, 0);
36563 target_mode = GET_MODE (orig_mem);
36566 gcc_assert (MEM_P (orig_mem));
36568 orig_addr = XEXP (orig_mem, 0);
36569 plus_or_lo_sum = GET_CODE (orig_addr);
36570 gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM);
36572 offset = XEXP (orig_addr, 1);
36573 new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_value, offset);
36574 new_mem = replace_equiv_address_nv (orig_mem, new_addr, false);
36576 if (extend != UNKNOWN)
36577 new_mem = gen_rtx_fmt_e (extend, extend_mode, new_mem);
36579 new_mem = gen_rtx_UNSPEC (extend_mode, gen_rtvec (1, new_mem),
36580 UNSPEC_FUSION_P9);
36582 set = gen_rtx_SET (target, new_mem);
36583 clobber = gen_rtx_CLOBBER (VOIDmode, tmp_reg);
36584 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber));
36585 emit_insn (insn);
36587 return;
36590 /* During the peephole2 pass, adjust and expand the insns for an extended fusion
36591 store sequence.
36593 The operands are:
36594 operands[0] register set with addis
36595 operands[1] value set via addis
36596 operands[2] target D-form memory being stored to
36597 operands[3] register being stored
36599 This is similar to the fusion introduced with power8, except it scales to
36600 both loads/stores and does not require the result register to be the same as
36601 the base register. At the moment, we only do this if register set with addis
36602 is dead. */
36604 void
36605 expand_fusion_p9_store (rtx *operands)
36607 rtx tmp_reg = operands[0];
36608 rtx addis_value = operands[1];
36609 rtx orig_mem = operands[2];
36610 rtx src = operands[3];
36611 rtx new_addr, new_mem, orig_addr, offset, set, clobber, insn, new_src;
36612 enum rtx_code plus_or_lo_sum;
36613 machine_mode target_mode = GET_MODE (orig_mem);
36614 machine_mode ptr_mode = Pmode;
36616 gcc_assert (MEM_P (orig_mem));
36618 orig_addr = XEXP (orig_mem, 0);
36619 plus_or_lo_sum = GET_CODE (orig_addr);
36620 gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM);
36622 offset = XEXP (orig_addr, 1);
36623 new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_value, offset);
36624 new_mem = replace_equiv_address_nv (orig_mem, new_addr, false);
36626 new_src = gen_rtx_UNSPEC (target_mode, gen_rtvec (1, src),
36627 UNSPEC_FUSION_P9);
36629 set = gen_rtx_SET (new_mem, new_src);
36630 clobber = gen_rtx_CLOBBER (VOIDmode, tmp_reg);
36631 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber));
36632 emit_insn (insn);
36634 return;
36637 /* Return a string to fuse an addis instruction with a load using extended
36638 fusion. The address that is used is the logical address that was formed
36639 during peephole2: (lo_sum (high) (low-part))
36641 The code is complicated, so we call output_asm_insn directly, and just
36642 return "". */
36644 const char *
36645 emit_fusion_p9_load (rtx reg, rtx mem, rtx tmp_reg)
36647 enum machine_mode mode = GET_MODE (reg);
36648 rtx hi;
36649 rtx lo;
36650 rtx addr;
36651 const char *load_string;
36652 int r;
36654 if (GET_CODE (mem) == FLOAT_EXTEND || GET_CODE (mem) == ZERO_EXTEND)
36656 mem = XEXP (mem, 0);
36657 mode = GET_MODE (mem);
36660 if (GET_CODE (reg) == SUBREG)
36662 gcc_assert (SUBREG_BYTE (reg) == 0);
36663 reg = SUBREG_REG (reg);
36666 if (!REG_P (reg))
36667 fatal_insn ("emit_fusion_p9_load, bad reg #1", reg);
36669 r = REGNO (reg);
36670 if (FP_REGNO_P (r))
36672 if (mode == SFmode)
36673 load_string = "lfs";
36674 else if (mode == DFmode || mode == DImode)
36675 load_string = "lfd";
36676 else
36677 gcc_unreachable ();
36679 else if (INT_REGNO_P (r))
36681 switch (mode)
36683 case QImode:
36684 load_string = "lbz";
36685 break;
36686 case HImode:
36687 load_string = "lhz";
36688 break;
36689 case SImode:
36690 case SFmode:
36691 load_string = "lwz";
36692 break;
36693 case DImode:
36694 case DFmode:
36695 if (!TARGET_POWERPC64)
36696 gcc_unreachable ();
36697 load_string = "ld";
36698 break;
36699 default:
36700 gcc_unreachable ();
36703 else
36704 fatal_insn ("emit_fusion_p9_load, bad reg #2", reg);
36706 if (!MEM_P (mem))
36707 fatal_insn ("emit_fusion_p9_load not MEM", mem);
36709 addr = XEXP (mem, 0);
36710 fusion_split_address (addr, &hi, &lo);
36712 /* Emit the addis instruction. */
36713 emit_fusion_addis (tmp_reg, hi, "power9 load fusion", GET_MODE_NAME (mode));
36715 /* Emit the D-form load instruction. */
36716 emit_fusion_load_store (reg, tmp_reg, lo, load_string);
36718 return "";
36721 /* Return a string to fuse an addis instruction with a store using extended
36722 fusion. The address that is used is the logical address that was formed
36723 during peephole2: (lo_sum (high) (low-part))
36725 The code is complicated, so we call output_asm_insn directly, and just
36726 return "". */
36728 const char *
36729 emit_fusion_p9_store (rtx mem, rtx reg, rtx tmp_reg)
36731 enum machine_mode mode = GET_MODE (reg);
36732 rtx hi;
36733 rtx lo;
36734 rtx addr;
36735 const char *store_string;
36736 int r;
36738 if (GET_CODE (reg) == SUBREG)
36740 gcc_assert (SUBREG_BYTE (reg) == 0);
36741 reg = SUBREG_REG (reg);
36744 if (!REG_P (reg))
36745 fatal_insn ("emit_fusion_p9_store, bad reg #1", reg);
36747 r = REGNO (reg);
36748 if (FP_REGNO_P (r))
36750 if (mode == SFmode)
36751 store_string = "stfs";
36752 else if (mode == DFmode)
36753 store_string = "stfd";
36754 else
36755 gcc_unreachable ();
36757 else if (INT_REGNO_P (r))
36759 switch (mode)
36761 case QImode:
36762 store_string = "stb";
36763 break;
36764 case HImode:
36765 store_string = "sth";
36766 break;
36767 case SImode:
36768 case SFmode:
36769 store_string = "stw";
36770 break;
36771 case DImode:
36772 case DFmode:
36773 if (!TARGET_POWERPC64)
36774 gcc_unreachable ();
36775 store_string = "std";
36776 break;
36777 default:
36778 gcc_unreachable ();
36781 else
36782 fatal_insn ("emit_fusion_p9_store, bad reg #2", reg);
36784 if (!MEM_P (mem))
36785 fatal_insn ("emit_fusion_p9_store not MEM", mem);
36787 addr = XEXP (mem, 0);
36788 fusion_split_address (addr, &hi, &lo);
36790 /* Emit the addis instruction. */
36791 emit_fusion_addis (tmp_reg, hi, "power9 store fusion", GET_MODE_NAME (mode));
36793 /* Emit the D-form load instruction. */
36794 emit_fusion_load_store (reg, tmp_reg, lo, store_string);
36796 return "";
36800 /* Analyze vector computations and remove unnecessary doubleword
36801 swaps (xxswapdi instructions). This pass is performed only
36802 for little-endian VSX code generation.
36804 For this specific case, loads and stores of 4x32 and 2x64 vectors
36805 are inefficient. These are implemented using the lvx2dx and
36806 stvx2dx instructions, which invert the order of doublewords in
36807 a vector register. Thus the code generation inserts an xxswapdi
36808 after each such load, and prior to each such store. (For spill
36809 code after register assignment, an additional xxswapdi is inserted
36810 following each store in order to return a hard register to its
36811 unpermuted value.)
36813 The extra xxswapdi instructions reduce performance. This can be
36814 particularly bad for vectorized code. The purpose of this pass
36815 is to reduce the number of xxswapdi instructions required for
36816 correctness.
36818 The primary insight is that much code that operates on vectors
36819 does not care about the relative order of elements in a register,
36820 so long as the correct memory order is preserved. If we have
36821 a computation where all input values are provided by lvxd2x/xxswapdi
36822 sequences, all outputs are stored using xxswapdi/stvxd2x sequences,
36823 and all intermediate computations are pure SIMD (independent of
36824 element order), then all the xxswapdi's associated with the loads
36825 and stores may be removed.
36827 This pass uses some of the infrastructure and logical ideas from
36828 the "web" pass in web.c. We create maximal webs of computations
36829 fitting the description above using union-find. Each such web is
36830 then optimized by removing its unnecessary xxswapdi instructions.
36832 The pass is placed prior to global optimization so that we can
36833 perform the optimization in the safest and simplest way possible;
36834 that is, by replacing each xxswapdi insn with a register copy insn.
36835 Subsequent forward propagation will remove copies where possible.
36837 There are some operations sensitive to element order for which we
36838 can still allow the operation, provided we modify those operations.
36839 These include CONST_VECTORs, for which we must swap the first and
36840 second halves of the constant vector; and SUBREGs, for which we
36841 must adjust the byte offset to account for the swapped doublewords.
36842 A remaining opportunity would be non-immediate-form splats, for
36843 which we should adjust the selected lane of the input. We should
36844 also make code generation adjustments for sum-across operations,
36845 since this is a common vectorizer reduction.
36847 Because we run prior to the first split, we can see loads and stores
36848 here that match *vsx_le_perm_{load,store}_<mode>. These are vanilla
36849 vector loads and stores that have not yet been split into a permuting
36850 load/store and a swap. (One way this can happen is with a builtin
36851 call to vec_vsx_{ld,st}.) We can handle these as well, but rather
36852 than deleting a swap, we convert the load/store into a permuting
36853 load/store (which effectively removes the swap). */
36855 /* Notes on Permutes
36857 We do not currently handle computations that contain permutes. There
36858 is a general transformation that can be performed correctly, but it
36859 may introduce more expensive code than it replaces. To handle these
36860 would require a cost model to determine when to perform the optimization.
36861 This commentary records how this could be done if desired.
36863 The most general permute is something like this (example for V16QI):
36865 (vec_select:V16QI (vec_concat:V32QI (op1:V16QI) (op2:V16QI))
36866 (parallel [(const_int a0) (const_int a1)
36868 (const_int a14) (const_int a15)]))
36870 where a0,...,a15 are in [0,31] and select elements from op1 and op2
36871 to produce in the result.
36873 Regardless of mode, we can convert the PARALLEL to a mask of 16
36874 byte-element selectors. Let's call this M, with M[i] representing
36875 the ith byte-element selector value. Then if we swap doublewords
36876 throughout the computation, we can get correct behavior by replacing
36877 M with M' as follows:
36879 M'[i] = { (M[i]+8)%16 : M[i] in [0,15]
36880 { ((M[i]+8)%16)+16 : M[i] in [16,31]
36882 This seems promising at first, since we are just replacing one mask
36883 with another. But certain masks are preferable to others. If M
36884 is a mask that matches a vmrghh pattern, for example, M' certainly
36885 will not. Instead of a single vmrghh, we would generate a load of
36886 M' and a vperm. So we would need to know how many xxswapd's we can
36887 remove as a result of this transformation to determine if it's
36888 profitable; and preferably the logic would need to be aware of all
36889 the special preferable masks.
36891 Another form of permute is an UNSPEC_VPERM, in which the mask is
36892 already in a register. In some cases, this mask may be a constant
36893 that we can discover with ud-chains, in which case the above
36894 transformation is ok. However, the common usage here is for the
36895 mask to be produced by an UNSPEC_LVSL, in which case the mask
36896 cannot be known at compile time. In such a case we would have to
36897 generate several instructions to compute M' as above at run time,
36898 and a cost model is needed again.
36900 However, when the mask M for an UNSPEC_VPERM is loaded from the
36901 constant pool, we can replace M with M' as above at no cost
36902 beyond adding a constant pool entry. */
36904 /* This is based on the union-find logic in web.c. web_entry_base is
36905 defined in df.h. */
36906 class swap_web_entry : public web_entry_base
36908 public:
36909 /* Pointer to the insn. */
36910 rtx_insn *insn;
36911 /* Set if insn contains a mention of a vector register. All other
36912 fields are undefined if this field is unset. */
36913 unsigned int is_relevant : 1;
36914 /* Set if insn is a load. */
36915 unsigned int is_load : 1;
36916 /* Set if insn is a store. */
36917 unsigned int is_store : 1;
36918 /* Set if insn is a doubleword swap. This can either be a register swap
36919 or a permuting load or store (test is_load and is_store for this). */
36920 unsigned int is_swap : 1;
36921 /* Set if the insn has a live-in use of a parameter register. */
36922 unsigned int is_live_in : 1;
36923 /* Set if the insn has a live-out def of a return register. */
36924 unsigned int is_live_out : 1;
36925 /* Set if the insn contains a subreg reference of a vector register. */
36926 unsigned int contains_subreg : 1;
36927 /* Set if the insn contains a 128-bit integer operand. */
36928 unsigned int is_128_int : 1;
36929 /* Set if this is a call-insn. */
36930 unsigned int is_call : 1;
36931 /* Set if this insn does not perform a vector operation for which
36932 element order matters, or if we know how to fix it up if it does.
36933 Undefined if is_swap is set. */
36934 unsigned int is_swappable : 1;
36935 /* A nonzero value indicates what kind of special handling for this
36936 insn is required if doublewords are swapped. Undefined if
36937 is_swappable is not set. */
36938 unsigned int special_handling : 4;
36939 /* Set if the web represented by this entry cannot be optimized. */
36940 unsigned int web_not_optimizable : 1;
36941 /* Set if this insn should be deleted. */
36942 unsigned int will_delete : 1;
36945 enum special_handling_values {
36946 SH_NONE = 0,
36947 SH_CONST_VECTOR,
36948 SH_SUBREG,
36949 SH_NOSWAP_LD,
36950 SH_NOSWAP_ST,
36951 SH_EXTRACT,
36952 SH_SPLAT,
36953 SH_XXPERMDI,
36954 SH_CONCAT,
36955 SH_VPERM
36958 /* Union INSN with all insns containing definitions that reach USE.
36959 Detect whether USE is live-in to the current function. */
36960 static void
36961 union_defs (swap_web_entry *insn_entry, rtx insn, df_ref use)
36963 struct df_link *link = DF_REF_CHAIN (use);
36965 if (!link)
36966 insn_entry[INSN_UID (insn)].is_live_in = 1;
36968 while (link)
36970 if (DF_REF_IS_ARTIFICIAL (link->ref))
36971 insn_entry[INSN_UID (insn)].is_live_in = 1;
36973 if (DF_REF_INSN_INFO (link->ref))
36975 rtx def_insn = DF_REF_INSN (link->ref);
36976 (void)unionfind_union (insn_entry + INSN_UID (insn),
36977 insn_entry + INSN_UID (def_insn));
36980 link = link->next;
36984 /* Union INSN with all insns containing uses reached from DEF.
36985 Detect whether DEF is live-out from the current function. */
36986 static void
36987 union_uses (swap_web_entry *insn_entry, rtx insn, df_ref def)
36989 struct df_link *link = DF_REF_CHAIN (def);
36991 if (!link)
36992 insn_entry[INSN_UID (insn)].is_live_out = 1;
36994 while (link)
36996 /* This could be an eh use or some other artificial use;
36997 we treat these all the same (killing the optimization). */
36998 if (DF_REF_IS_ARTIFICIAL (link->ref))
36999 insn_entry[INSN_UID (insn)].is_live_out = 1;
37001 if (DF_REF_INSN_INFO (link->ref))
37003 rtx use_insn = DF_REF_INSN (link->ref);
37004 (void)unionfind_union (insn_entry + INSN_UID (insn),
37005 insn_entry + INSN_UID (use_insn));
37008 link = link->next;
37012 /* Return 1 iff INSN is a load insn, including permuting loads that
37013 represent an lvxd2x instruction; else return 0. */
37014 static unsigned int
37015 insn_is_load_p (rtx insn)
37017 rtx body = PATTERN (insn);
37019 if (GET_CODE (body) == SET)
37021 if (GET_CODE (SET_SRC (body)) == MEM)
37022 return 1;
37024 if (GET_CODE (SET_SRC (body)) == VEC_SELECT
37025 && GET_CODE (XEXP (SET_SRC (body), 0)) == MEM)
37026 return 1;
37028 return 0;
37031 if (GET_CODE (body) != PARALLEL)
37032 return 0;
37034 rtx set = XVECEXP (body, 0, 0);
37036 if (GET_CODE (set) == SET && GET_CODE (SET_SRC (set)) == MEM)
37037 return 1;
37039 return 0;
37042 /* Return 1 iff INSN is a store insn, including permuting stores that
37043 represent an stvxd2x instruction; else return 0. */
37044 static unsigned int
37045 insn_is_store_p (rtx insn)
37047 rtx body = PATTERN (insn);
37048 if (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == MEM)
37049 return 1;
37050 if (GET_CODE (body) != PARALLEL)
37051 return 0;
37052 rtx set = XVECEXP (body, 0, 0);
37053 if (GET_CODE (set) == SET && GET_CODE (SET_DEST (set)) == MEM)
37054 return 1;
37055 return 0;
37058 /* Return 1 iff INSN swaps doublewords. This may be a reg-reg swap,
37059 a permuting load, or a permuting store. */
37060 static unsigned int
37061 insn_is_swap_p (rtx insn)
37063 rtx body = PATTERN (insn);
37064 if (GET_CODE (body) != SET)
37065 return 0;
37066 rtx rhs = SET_SRC (body);
37067 if (GET_CODE (rhs) != VEC_SELECT)
37068 return 0;
37069 rtx parallel = XEXP (rhs, 1);
37070 if (GET_CODE (parallel) != PARALLEL)
37071 return 0;
37072 unsigned int len = XVECLEN (parallel, 0);
37073 if (len != 2 && len != 4 && len != 8 && len != 16)
37074 return 0;
37075 for (unsigned int i = 0; i < len / 2; ++i)
37077 rtx op = XVECEXP (parallel, 0, i);
37078 if (GET_CODE (op) != CONST_INT || INTVAL (op) != len / 2 + i)
37079 return 0;
37081 for (unsigned int i = len / 2; i < len; ++i)
37083 rtx op = XVECEXP (parallel, 0, i);
37084 if (GET_CODE (op) != CONST_INT || INTVAL (op) != i - len / 2)
37085 return 0;
37087 return 1;
37090 /* Return TRUE if insn is a swap fed by a load from the constant pool. */
37091 static bool
37092 const_load_sequence_p (swap_web_entry *insn_entry, rtx insn)
37094 unsigned uid = INSN_UID (insn);
37095 if (!insn_entry[uid].is_swap || insn_entry[uid].is_load)
37096 return false;
37098 /* Find the unique use in the swap and locate its def. If the def
37099 isn't unique, punt. */
37100 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
37101 df_ref use;
37102 FOR_EACH_INSN_INFO_USE (use, insn_info)
37104 struct df_link *def_link = DF_REF_CHAIN (use);
37105 if (!def_link || def_link->next)
37106 return false;
37108 rtx def_insn = DF_REF_INSN (def_link->ref);
37109 unsigned uid2 = INSN_UID (def_insn);
37110 if (!insn_entry[uid2].is_load || !insn_entry[uid2].is_swap)
37111 return false;
37113 rtx body = PATTERN (def_insn);
37114 if (GET_CODE (body) != SET
37115 || GET_CODE (SET_SRC (body)) != VEC_SELECT
37116 || GET_CODE (XEXP (SET_SRC (body), 0)) != MEM)
37117 return false;
37119 rtx mem = XEXP (SET_SRC (body), 0);
37120 rtx base_reg = XEXP (mem, 0);
37122 df_ref base_use;
37123 insn_info = DF_INSN_INFO_GET (def_insn);
37124 FOR_EACH_INSN_INFO_USE (base_use, insn_info)
37126 if (!rtx_equal_p (DF_REF_REG (base_use), base_reg))
37127 continue;
37129 struct df_link *base_def_link = DF_REF_CHAIN (base_use);
37130 if (!base_def_link || base_def_link->next)
37131 return false;
37133 rtx tocrel_insn = DF_REF_INSN (base_def_link->ref);
37134 rtx tocrel_body = PATTERN (tocrel_insn);
37135 rtx base, offset;
37136 if (GET_CODE (tocrel_body) != SET)
37137 return false;
37138 /* There is an extra level of indirection for small/large
37139 code models. */
37140 rtx tocrel_expr = SET_SRC (tocrel_body);
37141 if (GET_CODE (tocrel_expr) == MEM)
37142 tocrel_expr = XEXP (tocrel_expr, 0);
37143 if (!toc_relative_expr_p (tocrel_expr, false))
37144 return false;
37145 split_const (XVECEXP (tocrel_base, 0, 0), &base, &offset);
37146 if (GET_CODE (base) != SYMBOL_REF || !CONSTANT_POOL_ADDRESS_P (base))
37147 return false;
37150 return true;
37153 /* Return TRUE iff OP matches a V2DF reduction pattern. See the
37154 definition of vsx_reduc_<VEC_reduc_name>_v2df in vsx.md. */
37155 static bool
37156 v2df_reduction_p (rtx op)
37158 if (GET_MODE (op) != V2DFmode)
37159 return false;
37161 enum rtx_code code = GET_CODE (op);
37162 if (code != PLUS && code != SMIN && code != SMAX)
37163 return false;
37165 rtx concat = XEXP (op, 0);
37166 if (GET_CODE (concat) != VEC_CONCAT)
37167 return false;
37169 rtx select0 = XEXP (concat, 0);
37170 rtx select1 = XEXP (concat, 1);
37171 if (GET_CODE (select0) != VEC_SELECT || GET_CODE (select1) != VEC_SELECT)
37172 return false;
37174 rtx reg0 = XEXP (select0, 0);
37175 rtx reg1 = XEXP (select1, 0);
37176 if (!rtx_equal_p (reg0, reg1) || !REG_P (reg0))
37177 return false;
37179 rtx parallel0 = XEXP (select0, 1);
37180 rtx parallel1 = XEXP (select1, 1);
37181 if (GET_CODE (parallel0) != PARALLEL || GET_CODE (parallel1) != PARALLEL)
37182 return false;
37184 if (!rtx_equal_p (XVECEXP (parallel0, 0, 0), const1_rtx)
37185 || !rtx_equal_p (XVECEXP (parallel1, 0, 0), const0_rtx))
37186 return false;
37188 return true;
37191 /* Return 1 iff OP is an operand that will not be affected by having
37192 vector doublewords swapped in memory. */
37193 static unsigned int
37194 rtx_is_swappable_p (rtx op, unsigned int *special)
37196 enum rtx_code code = GET_CODE (op);
37197 int i, j;
37198 rtx parallel;
37200 switch (code)
37202 case LABEL_REF:
37203 case SYMBOL_REF:
37204 case CLOBBER:
37205 case REG:
37206 return 1;
37208 case VEC_CONCAT:
37209 case ASM_INPUT:
37210 case ASM_OPERANDS:
37211 return 0;
37213 case CONST_VECTOR:
37215 *special = SH_CONST_VECTOR;
37216 return 1;
37219 case VEC_DUPLICATE:
37220 /* Opportunity: If XEXP (op, 0) has the same mode as the result,
37221 and XEXP (op, 1) is a PARALLEL with a single QImode const int,
37222 it represents a vector splat for which we can do special
37223 handling. */
37224 if (GET_CODE (XEXP (op, 0)) == CONST_INT)
37225 return 1;
37226 else if (GET_CODE (XEXP (op, 0)) == REG
37227 && GET_MODE_INNER (GET_MODE (op)) == GET_MODE (XEXP (op, 0)))
37228 /* This catches V2DF and V2DI splat, at a minimum. */
37229 return 1;
37230 else if (GET_CODE (XEXP (op, 0)) == VEC_SELECT)
37231 /* If the duplicated item is from a select, defer to the select
37232 processing to see if we can change the lane for the splat. */
37233 return rtx_is_swappable_p (XEXP (op, 0), special);
37234 else
37235 return 0;
37237 case VEC_SELECT:
37238 /* A vec_extract operation is ok if we change the lane. */
37239 if (GET_CODE (XEXP (op, 0)) == REG
37240 && GET_MODE_INNER (GET_MODE (XEXP (op, 0))) == GET_MODE (op)
37241 && GET_CODE ((parallel = XEXP (op, 1))) == PARALLEL
37242 && XVECLEN (parallel, 0) == 1
37243 && GET_CODE (XVECEXP (parallel, 0, 0)) == CONST_INT)
37245 *special = SH_EXTRACT;
37246 return 1;
37248 /* An XXPERMDI is ok if we adjust the lanes. Note that if the
37249 XXPERMDI is a swap operation, it will be identified by
37250 insn_is_swap_p and therefore we won't get here. */
37251 else if (GET_CODE (XEXP (op, 0)) == VEC_CONCAT
37252 && (GET_MODE (XEXP (op, 0)) == V4DFmode
37253 || GET_MODE (XEXP (op, 0)) == V4DImode)
37254 && GET_CODE ((parallel = XEXP (op, 1))) == PARALLEL
37255 && XVECLEN (parallel, 0) == 2
37256 && GET_CODE (XVECEXP (parallel, 0, 0)) == CONST_INT
37257 && GET_CODE (XVECEXP (parallel, 0, 1)) == CONST_INT)
37259 *special = SH_XXPERMDI;
37260 return 1;
37262 else if (v2df_reduction_p (op))
37263 return 1;
37264 else
37265 return 0;
37267 case UNSPEC:
37269 /* Various operations are unsafe for this optimization, at least
37270 without significant additional work. Permutes are obviously
37271 problematic, as both the permute control vector and the ordering
37272 of the target values are invalidated by doubleword swapping.
37273 Vector pack and unpack modify the number of vector lanes.
37274 Merge-high/low will not operate correctly on swapped operands.
37275 Vector shifts across element boundaries are clearly uncool,
37276 as are vector select and concatenate operations. Vector
37277 sum-across instructions define one operand with a specific
37278 order-dependent element, so additional fixup code would be
37279 needed to make those work. Vector set and non-immediate-form
37280 vector splat are element-order sensitive. A few of these
37281 cases might be workable with special handling if required.
37282 Adding cost modeling would be appropriate in some cases. */
37283 int val = XINT (op, 1);
37284 switch (val)
37286 default:
37287 break;
37288 case UNSPEC_VMRGH_DIRECT:
37289 case UNSPEC_VMRGL_DIRECT:
37290 case UNSPEC_VPACK_SIGN_SIGN_SAT:
37291 case UNSPEC_VPACK_SIGN_UNS_SAT:
37292 case UNSPEC_VPACK_UNS_UNS_MOD:
37293 case UNSPEC_VPACK_UNS_UNS_MOD_DIRECT:
37294 case UNSPEC_VPACK_UNS_UNS_SAT:
37295 case UNSPEC_VPERM:
37296 case UNSPEC_VPERM_UNS:
37297 case UNSPEC_VPERMHI:
37298 case UNSPEC_VPERMSI:
37299 case UNSPEC_VPKPX:
37300 case UNSPEC_VSLDOI:
37301 case UNSPEC_VSLO:
37302 case UNSPEC_VSRO:
37303 case UNSPEC_VSUM2SWS:
37304 case UNSPEC_VSUM4S:
37305 case UNSPEC_VSUM4UBS:
37306 case UNSPEC_VSUMSWS:
37307 case UNSPEC_VSUMSWS_DIRECT:
37308 case UNSPEC_VSX_CONCAT:
37309 case UNSPEC_VSX_SET:
37310 case UNSPEC_VSX_SLDWI:
37311 case UNSPEC_VUNPACK_HI_SIGN:
37312 case UNSPEC_VUNPACK_HI_SIGN_DIRECT:
37313 case UNSPEC_VUNPACK_LO_SIGN:
37314 case UNSPEC_VUNPACK_LO_SIGN_DIRECT:
37315 case UNSPEC_VUPKHPX:
37316 case UNSPEC_VUPKHS_V4SF:
37317 case UNSPEC_VUPKHU_V4SF:
37318 case UNSPEC_VUPKLPX:
37319 case UNSPEC_VUPKLS_V4SF:
37320 case UNSPEC_VUPKLU_V4SF:
37321 case UNSPEC_VSX_CVDPSPN:
37322 case UNSPEC_VSX_CVSPDP:
37323 case UNSPEC_VSX_CVSPDPN:
37324 return 0;
37325 case UNSPEC_VSPLT_DIRECT:
37326 *special = SH_SPLAT;
37327 return 1;
37328 case UNSPEC_REDUC_PLUS:
37329 case UNSPEC_REDUC:
37330 return 1;
37334 default:
37335 break;
37338 const char *fmt = GET_RTX_FORMAT (code);
37339 int ok = 1;
37341 for (i = 0; i < GET_RTX_LENGTH (code); ++i)
37342 if (fmt[i] == 'e' || fmt[i] == 'u')
37344 unsigned int special_op = SH_NONE;
37345 ok &= rtx_is_swappable_p (XEXP (op, i), &special_op);
37346 if (special_op == SH_NONE)
37347 continue;
37348 /* Ensure we never have two kinds of special handling
37349 for the same insn. */
37350 if (*special != SH_NONE && *special != special_op)
37351 return 0;
37352 *special = special_op;
37354 else if (fmt[i] == 'E')
37355 for (j = 0; j < XVECLEN (op, i); ++j)
37357 unsigned int special_op = SH_NONE;
37358 ok &= rtx_is_swappable_p (XVECEXP (op, i, j), &special_op);
37359 if (special_op == SH_NONE)
37360 continue;
37361 /* Ensure we never have two kinds of special handling
37362 for the same insn. */
37363 if (*special != SH_NONE && *special != special_op)
37364 return 0;
37365 *special = special_op;
37368 return ok;
37371 /* Return 1 iff INSN is an operand that will not be affected by
37372 having vector doublewords swapped in memory (in which case
37373 *SPECIAL is unchanged), or that can be modified to be correct
37374 if vector doublewords are swapped in memory (in which case
37375 *SPECIAL is changed to a value indicating how). */
37376 static unsigned int
37377 insn_is_swappable_p (swap_web_entry *insn_entry, rtx insn,
37378 unsigned int *special)
37380 /* Calls are always bad. */
37381 if (GET_CODE (insn) == CALL_INSN)
37382 return 0;
37384 /* Loads and stores seen here are not permuting, but we can still
37385 fix them up by converting them to permuting ones. Exceptions:
37386 UNSPEC_LVE, UNSPEC_LVX, and UNSPEC_STVX, which have a PARALLEL
37387 body instead of a SET; and UNSPEC_STVE, which has an UNSPEC
37388 for the SET source. Also we must now make an exception for lvx
37389 and stvx when they are not in the UNSPEC_LVX/STVX form (with the
37390 explicit "& -16") since this leads to unrecognizable insns. */
37391 rtx body = PATTERN (insn);
37392 int i = INSN_UID (insn);
37394 if (insn_entry[i].is_load)
37396 if (GET_CODE (body) == SET)
37398 rtx rhs = SET_SRC (body);
37399 gcc_assert (GET_CODE (rhs) == MEM);
37400 if (GET_CODE (XEXP (rhs, 0)) == AND)
37401 return 0;
37403 *special = SH_NOSWAP_LD;
37404 return 1;
37406 else
37407 return 0;
37410 if (insn_entry[i].is_store)
37412 if (GET_CODE (body) == SET
37413 && GET_CODE (SET_SRC (body)) != UNSPEC)
37415 rtx lhs = SET_DEST (body);
37416 gcc_assert (GET_CODE (lhs) == MEM);
37417 if (GET_CODE (XEXP (lhs, 0)) == AND)
37418 return 0;
37420 *special = SH_NOSWAP_ST;
37421 return 1;
37423 else
37424 return 0;
37427 /* A convert to single precision can be left as is provided that
37428 all of its uses are in xxspltw instructions that splat BE element
37429 zero. */
37430 if (GET_CODE (body) == SET
37431 && GET_CODE (SET_SRC (body)) == UNSPEC
37432 && XINT (SET_SRC (body), 1) == UNSPEC_VSX_CVDPSPN)
37434 df_ref def;
37435 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
37437 FOR_EACH_INSN_INFO_DEF (def, insn_info)
37439 struct df_link *link = DF_REF_CHAIN (def);
37440 if (!link)
37441 return 0;
37443 for (; link; link = link->next) {
37444 rtx use_insn = DF_REF_INSN (link->ref);
37445 rtx use_body = PATTERN (use_insn);
37446 if (GET_CODE (use_body) != SET
37447 || GET_CODE (SET_SRC (use_body)) != UNSPEC
37448 || XINT (SET_SRC (use_body), 1) != UNSPEC_VSX_XXSPLTW
37449 || XEXP (XEXP (SET_SRC (use_body), 0), 1) != const0_rtx)
37450 return 0;
37454 return 1;
37457 /* A concatenation of two doublewords is ok if we reverse the
37458 order of the inputs. */
37459 if (GET_CODE (body) == SET
37460 && GET_CODE (SET_SRC (body)) == VEC_CONCAT
37461 && (GET_MODE (SET_SRC (body)) == V2DFmode
37462 || GET_MODE (SET_SRC (body)) == V2DImode))
37464 *special = SH_CONCAT;
37465 return 1;
37468 /* V2DF reductions are always swappable. */
37469 if (GET_CODE (body) == PARALLEL)
37471 rtx expr = XVECEXP (body, 0, 0);
37472 if (GET_CODE (expr) == SET
37473 && v2df_reduction_p (SET_SRC (expr)))
37474 return 1;
37477 /* An UNSPEC_VPERM is ok if the mask operand is loaded from the
37478 constant pool. */
37479 if (GET_CODE (body) == SET
37480 && GET_CODE (SET_SRC (body)) == UNSPEC
37481 && XINT (SET_SRC (body), 1) == UNSPEC_VPERM
37482 && XVECLEN (SET_SRC (body), 0) == 3
37483 && GET_CODE (XVECEXP (SET_SRC (body), 0, 2)) == REG)
37485 rtx mask_reg = XVECEXP (SET_SRC (body), 0, 2);
37486 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
37487 df_ref use;
37488 FOR_EACH_INSN_INFO_USE (use, insn_info)
37489 if (rtx_equal_p (DF_REF_REG (use), mask_reg))
37491 struct df_link *def_link = DF_REF_CHAIN (use);
37492 /* Punt if multiple definitions for this reg. */
37493 if (def_link && !def_link->next &&
37494 const_load_sequence_p (insn_entry,
37495 DF_REF_INSN (def_link->ref)))
37497 *special = SH_VPERM;
37498 return 1;
37503 /* Otherwise check the operands for vector lane violations. */
37504 return rtx_is_swappable_p (body, special);
37507 enum chain_purpose { FOR_LOADS, FOR_STORES };
37509 /* Return true if the UD or DU chain headed by LINK is non-empty,
37510 and every entry on the chain references an insn that is a
37511 register swap. Furthermore, if PURPOSE is FOR_LOADS, each such
37512 register swap must have only permuting loads as reaching defs.
37513 If PURPOSE is FOR_STORES, each such register swap must have only
37514 register swaps or permuting stores as reached uses. */
37515 static bool
37516 chain_contains_only_swaps (swap_web_entry *insn_entry, struct df_link *link,
37517 enum chain_purpose purpose)
37519 if (!link)
37520 return false;
37522 for (; link; link = link->next)
37524 if (!ALTIVEC_OR_VSX_VECTOR_MODE (GET_MODE (DF_REF_REG (link->ref))))
37525 continue;
37527 if (DF_REF_IS_ARTIFICIAL (link->ref))
37528 return false;
37530 rtx reached_insn = DF_REF_INSN (link->ref);
37531 unsigned uid = INSN_UID (reached_insn);
37532 struct df_insn_info *insn_info = DF_INSN_INFO_GET (reached_insn);
37534 if (!insn_entry[uid].is_swap || insn_entry[uid].is_load
37535 || insn_entry[uid].is_store)
37536 return false;
37538 if (purpose == FOR_LOADS)
37540 df_ref use;
37541 FOR_EACH_INSN_INFO_USE (use, insn_info)
37543 struct df_link *swap_link = DF_REF_CHAIN (use);
37545 while (swap_link)
37547 if (DF_REF_IS_ARTIFICIAL (link->ref))
37548 return false;
37550 rtx swap_def_insn = DF_REF_INSN (swap_link->ref);
37551 unsigned uid2 = INSN_UID (swap_def_insn);
37553 /* Only permuting loads are allowed. */
37554 if (!insn_entry[uid2].is_swap || !insn_entry[uid2].is_load)
37555 return false;
37557 swap_link = swap_link->next;
37561 else if (purpose == FOR_STORES)
37563 df_ref def;
37564 FOR_EACH_INSN_INFO_DEF (def, insn_info)
37566 struct df_link *swap_link = DF_REF_CHAIN (def);
37568 while (swap_link)
37570 if (DF_REF_IS_ARTIFICIAL (link->ref))
37571 return false;
37573 rtx swap_use_insn = DF_REF_INSN (swap_link->ref);
37574 unsigned uid2 = INSN_UID (swap_use_insn);
37576 /* Permuting stores or register swaps are allowed. */
37577 if (!insn_entry[uid2].is_swap || insn_entry[uid2].is_load)
37578 return false;
37580 swap_link = swap_link->next;
37586 return true;
37589 /* Mark the xxswapdi instructions associated with permuting loads and
37590 stores for removal. Note that we only flag them for deletion here,
37591 as there is a possibility of a swap being reached from multiple
37592 loads, etc. */
37593 static void
37594 mark_swaps_for_removal (swap_web_entry *insn_entry, unsigned int i)
37596 rtx insn = insn_entry[i].insn;
37597 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
37599 if (insn_entry[i].is_load)
37601 df_ref def;
37602 FOR_EACH_INSN_INFO_DEF (def, insn_info)
37604 struct df_link *link = DF_REF_CHAIN (def);
37606 /* We know by now that these are swaps, so we can delete
37607 them confidently. */
37608 while (link)
37610 rtx use_insn = DF_REF_INSN (link->ref);
37611 insn_entry[INSN_UID (use_insn)].will_delete = 1;
37612 link = link->next;
37616 else if (insn_entry[i].is_store)
37618 df_ref use;
37619 FOR_EACH_INSN_INFO_USE (use, insn_info)
37621 /* Ignore uses for addressability. */
37622 machine_mode mode = GET_MODE (DF_REF_REG (use));
37623 if (!ALTIVEC_OR_VSX_VECTOR_MODE (mode))
37624 continue;
37626 struct df_link *link = DF_REF_CHAIN (use);
37628 /* We know by now that these are swaps, so we can delete
37629 them confidently. */
37630 while (link)
37632 rtx def_insn = DF_REF_INSN (link->ref);
37633 insn_entry[INSN_UID (def_insn)].will_delete = 1;
37634 link = link->next;
37640 /* OP is either a CONST_VECTOR or an expression containing one.
37641 Swap the first half of the vector with the second in the first
37642 case. Recurse to find it in the second. */
37643 static void
37644 swap_const_vector_halves (rtx op)
37646 int i;
37647 enum rtx_code code = GET_CODE (op);
37648 if (GET_CODE (op) == CONST_VECTOR)
37650 int half_units = GET_MODE_NUNITS (GET_MODE (op)) / 2;
37651 for (i = 0; i < half_units; ++i)
37653 rtx temp = CONST_VECTOR_ELT (op, i);
37654 CONST_VECTOR_ELT (op, i) = CONST_VECTOR_ELT (op, i + half_units);
37655 CONST_VECTOR_ELT (op, i + half_units) = temp;
37658 else
37660 int j;
37661 const char *fmt = GET_RTX_FORMAT (code);
37662 for (i = 0; i < GET_RTX_LENGTH (code); ++i)
37663 if (fmt[i] == 'e' || fmt[i] == 'u')
37664 swap_const_vector_halves (XEXP (op, i));
37665 else if (fmt[i] == 'E')
37666 for (j = 0; j < XVECLEN (op, i); ++j)
37667 swap_const_vector_halves (XVECEXP (op, i, j));
37671 /* Find all subregs of a vector expression that perform a narrowing,
37672 and adjust the subreg index to account for doubleword swapping. */
37673 static void
37674 adjust_subreg_index (rtx op)
37676 enum rtx_code code = GET_CODE (op);
37677 if (code == SUBREG
37678 && (GET_MODE_SIZE (GET_MODE (op))
37679 < GET_MODE_SIZE (GET_MODE (XEXP (op, 0)))))
37681 unsigned int index = SUBREG_BYTE (op);
37682 if (index < 8)
37683 index += 8;
37684 else
37685 index -= 8;
37686 SUBREG_BYTE (op) = index;
37689 const char *fmt = GET_RTX_FORMAT (code);
37690 int i,j;
37691 for (i = 0; i < GET_RTX_LENGTH (code); ++i)
37692 if (fmt[i] == 'e' || fmt[i] == 'u')
37693 adjust_subreg_index (XEXP (op, i));
37694 else if (fmt[i] == 'E')
37695 for (j = 0; j < XVECLEN (op, i); ++j)
37696 adjust_subreg_index (XVECEXP (op, i, j));
37699 /* Convert the non-permuting load INSN to a permuting one. */
37700 static void
37701 permute_load (rtx_insn *insn)
37703 rtx body = PATTERN (insn);
37704 rtx mem_op = SET_SRC (body);
37705 rtx tgt_reg = SET_DEST (body);
37706 machine_mode mode = GET_MODE (tgt_reg);
37707 int n_elts = GET_MODE_NUNITS (mode);
37708 int half_elts = n_elts / 2;
37709 rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
37710 int i, j;
37711 for (i = 0, j = half_elts; i < half_elts; ++i, ++j)
37712 XVECEXP (par, 0, i) = GEN_INT (j);
37713 for (i = half_elts, j = 0; j < half_elts; ++i, ++j)
37714 XVECEXP (par, 0, i) = GEN_INT (j);
37715 rtx sel = gen_rtx_VEC_SELECT (mode, mem_op, par);
37716 SET_SRC (body) = sel;
37717 INSN_CODE (insn) = -1; /* Force re-recognition. */
37718 df_insn_rescan (insn);
37720 if (dump_file)
37721 fprintf (dump_file, "Replacing load %d with permuted load\n",
37722 INSN_UID (insn));
37725 /* Convert the non-permuting store INSN to a permuting one. */
37726 static void
37727 permute_store (rtx_insn *insn)
37729 rtx body = PATTERN (insn);
37730 rtx src_reg = SET_SRC (body);
37731 machine_mode mode = GET_MODE (src_reg);
37732 int n_elts = GET_MODE_NUNITS (mode);
37733 int half_elts = n_elts / 2;
37734 rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
37735 int i, j;
37736 for (i = 0, j = half_elts; i < half_elts; ++i, ++j)
37737 XVECEXP (par, 0, i) = GEN_INT (j);
37738 for (i = half_elts, j = 0; j < half_elts; ++i, ++j)
37739 XVECEXP (par, 0, i) = GEN_INT (j);
37740 rtx sel = gen_rtx_VEC_SELECT (mode, src_reg, par);
37741 SET_SRC (body) = sel;
37742 INSN_CODE (insn) = -1; /* Force re-recognition. */
37743 df_insn_rescan (insn);
37745 if (dump_file)
37746 fprintf (dump_file, "Replacing store %d with permuted store\n",
37747 INSN_UID (insn));
37750 /* Given OP that contains a vector extract operation, adjust the index
37751 of the extracted lane to account for the doubleword swap. */
37752 static void
37753 adjust_extract (rtx_insn *insn)
37755 rtx pattern = PATTERN (insn);
37756 if (GET_CODE (pattern) == PARALLEL)
37757 pattern = XVECEXP (pattern, 0, 0);
37758 rtx src = SET_SRC (pattern);
37759 /* The vec_select may be wrapped in a vec_duplicate for a splat, so
37760 account for that. */
37761 rtx sel = GET_CODE (src) == VEC_DUPLICATE ? XEXP (src, 0) : src;
37762 rtx par = XEXP (sel, 1);
37763 int half_elts = GET_MODE_NUNITS (GET_MODE (XEXP (sel, 0))) >> 1;
37764 int lane = INTVAL (XVECEXP (par, 0, 0));
37765 lane = lane >= half_elts ? lane - half_elts : lane + half_elts;
37766 XVECEXP (par, 0, 0) = GEN_INT (lane);
37767 INSN_CODE (insn) = -1; /* Force re-recognition. */
37768 df_insn_rescan (insn);
37770 if (dump_file)
37771 fprintf (dump_file, "Changing lane for extract %d\n", INSN_UID (insn));
37774 /* Given OP that contains a vector direct-splat operation, adjust the index
37775 of the source lane to account for the doubleword swap. */
37776 static void
37777 adjust_splat (rtx_insn *insn)
37779 rtx body = PATTERN (insn);
37780 rtx unspec = XEXP (body, 1);
37781 int half_elts = GET_MODE_NUNITS (GET_MODE (unspec)) >> 1;
37782 int lane = INTVAL (XVECEXP (unspec, 0, 1));
37783 lane = lane >= half_elts ? lane - half_elts : lane + half_elts;
37784 XVECEXP (unspec, 0, 1) = GEN_INT (lane);
37785 INSN_CODE (insn) = -1; /* Force re-recognition. */
37786 df_insn_rescan (insn);
37788 if (dump_file)
37789 fprintf (dump_file, "Changing lane for splat %d\n", INSN_UID (insn));
37792 /* Given OP that contains an XXPERMDI operation (that is not a doubleword
37793 swap), reverse the order of the source operands and adjust the indices
37794 of the source lanes to account for doubleword reversal. */
37795 static void
37796 adjust_xxpermdi (rtx_insn *insn)
37798 rtx set = PATTERN (insn);
37799 rtx select = XEXP (set, 1);
37800 rtx concat = XEXP (select, 0);
37801 rtx src0 = XEXP (concat, 0);
37802 XEXP (concat, 0) = XEXP (concat, 1);
37803 XEXP (concat, 1) = src0;
37804 rtx parallel = XEXP (select, 1);
37805 int lane0 = INTVAL (XVECEXP (parallel, 0, 0));
37806 int lane1 = INTVAL (XVECEXP (parallel, 0, 1));
37807 int new_lane0 = 3 - lane1;
37808 int new_lane1 = 3 - lane0;
37809 XVECEXP (parallel, 0, 0) = GEN_INT (new_lane0);
37810 XVECEXP (parallel, 0, 1) = GEN_INT (new_lane1);
37811 INSN_CODE (insn) = -1; /* Force re-recognition. */
37812 df_insn_rescan (insn);
37814 if (dump_file)
37815 fprintf (dump_file, "Changing lanes for xxpermdi %d\n", INSN_UID (insn));
37818 /* Given OP that contains a VEC_CONCAT operation of two doublewords,
37819 reverse the order of those inputs. */
37820 static void
37821 adjust_concat (rtx_insn *insn)
37823 rtx set = PATTERN (insn);
37824 rtx concat = XEXP (set, 1);
37825 rtx src0 = XEXP (concat, 0);
37826 XEXP (concat, 0) = XEXP (concat, 1);
37827 XEXP (concat, 1) = src0;
37828 INSN_CODE (insn) = -1; /* Force re-recognition. */
37829 df_insn_rescan (insn);
37831 if (dump_file)
37832 fprintf (dump_file, "Reversing inputs for concat %d\n", INSN_UID (insn));
37835 /* Given an UNSPEC_VPERM insn, modify the mask loaded from the
37836 constant pool to reflect swapped doublewords. */
37837 static void
37838 adjust_vperm (rtx_insn *insn)
37840 /* We previously determined that the UNSPEC_VPERM was fed by a
37841 swap of a swapping load of a TOC-relative constant pool symbol.
37842 Find the MEM in the swapping load and replace it with a MEM for
37843 the adjusted mask constant. */
37844 rtx set = PATTERN (insn);
37845 rtx mask_reg = XVECEXP (SET_SRC (set), 0, 2);
37847 /* Find the swap. */
37848 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
37849 df_ref use;
37850 rtx_insn *swap_insn = 0;
37851 FOR_EACH_INSN_INFO_USE (use, insn_info)
37852 if (rtx_equal_p (DF_REF_REG (use), mask_reg))
37854 struct df_link *def_link = DF_REF_CHAIN (use);
37855 gcc_assert (def_link && !def_link->next);
37856 swap_insn = DF_REF_INSN (def_link->ref);
37857 break;
37859 gcc_assert (swap_insn);
37861 /* Find the load. */
37862 insn_info = DF_INSN_INFO_GET (swap_insn);
37863 rtx_insn *load_insn = 0;
37864 FOR_EACH_INSN_INFO_USE (use, insn_info)
37866 struct df_link *def_link = DF_REF_CHAIN (use);
37867 gcc_assert (def_link && !def_link->next);
37868 load_insn = DF_REF_INSN (def_link->ref);
37869 break;
37871 gcc_assert (load_insn);
37873 /* Find the TOC-relative symbol access. */
37874 insn_info = DF_INSN_INFO_GET (load_insn);
37875 rtx_insn *tocrel_insn = 0;
37876 FOR_EACH_INSN_INFO_USE (use, insn_info)
37878 struct df_link *def_link = DF_REF_CHAIN (use);
37879 gcc_assert (def_link && !def_link->next);
37880 tocrel_insn = DF_REF_INSN (def_link->ref);
37881 break;
37883 gcc_assert (tocrel_insn);
37885 /* Find the embedded CONST_VECTOR. We have to call toc_relative_expr_p
37886 to set tocrel_base; otherwise it would be unnecessary as we've
37887 already established it will return true. */
37888 rtx base, offset;
37889 rtx tocrel_expr = SET_SRC (PATTERN (tocrel_insn));
37890 /* There is an extra level of indirection for small/large code models. */
37891 if (GET_CODE (tocrel_expr) == MEM)
37892 tocrel_expr = XEXP (tocrel_expr, 0);
37893 if (!toc_relative_expr_p (tocrel_expr, false))
37894 gcc_unreachable ();
37895 split_const (XVECEXP (tocrel_base, 0, 0), &base, &offset);
37896 rtx const_vector = get_pool_constant (base);
37897 /* With the extra indirection, get_pool_constant will produce the
37898 real constant from the reg_equal expression, so get the real
37899 constant. */
37900 if (GET_CODE (const_vector) == SYMBOL_REF)
37901 const_vector = get_pool_constant (const_vector);
37902 gcc_assert (GET_CODE (const_vector) == CONST_VECTOR);
37904 /* Create an adjusted mask from the initial mask. */
37905 unsigned int new_mask[16], i, val;
37906 for (i = 0; i < 16; ++i) {
37907 val = INTVAL (XVECEXP (const_vector, 0, i));
37908 if (val < 16)
37909 new_mask[i] = (val + 8) % 16;
37910 else
37911 new_mask[i] = ((val + 8) % 16) + 16;
37914 /* Create a new CONST_VECTOR and a MEM that references it. */
37915 rtx vals = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
37916 for (i = 0; i < 16; ++i)
37917 XVECEXP (vals, 0, i) = GEN_INT (new_mask[i]);
37918 rtx new_const_vector = gen_rtx_CONST_VECTOR (V16QImode, XVEC (vals, 0));
37919 rtx new_mem = force_const_mem (V16QImode, new_const_vector);
37920 /* This gives us a MEM whose base operand is a SYMBOL_REF, which we
37921 can't recognize. Force the SYMBOL_REF into a register. */
37922 if (!REG_P (XEXP (new_mem, 0))) {
37923 rtx base_reg = force_reg (Pmode, XEXP (new_mem, 0));
37924 XEXP (new_mem, 0) = base_reg;
37925 /* Move the newly created insn ahead of the load insn. */
37926 rtx_insn *force_insn = get_last_insn ();
37927 remove_insn (force_insn);
37928 rtx_insn *before_load_insn = PREV_INSN (load_insn);
37929 add_insn_after (force_insn, before_load_insn, BLOCK_FOR_INSN (load_insn));
37930 df_insn_rescan (before_load_insn);
37931 df_insn_rescan (force_insn);
37934 /* Replace the MEM in the load instruction and rescan it. */
37935 XEXP (SET_SRC (PATTERN (load_insn)), 0) = new_mem;
37936 INSN_CODE (load_insn) = -1; /* Force re-recognition. */
37937 df_insn_rescan (load_insn);
37939 if (dump_file)
37940 fprintf (dump_file, "Adjusting mask for vperm %d\n", INSN_UID (insn));
37943 /* The insn described by INSN_ENTRY[I] can be swapped, but only
37944 with special handling. Take care of that here. */
37945 static void
37946 handle_special_swappables (swap_web_entry *insn_entry, unsigned i)
37948 rtx_insn *insn = insn_entry[i].insn;
37949 rtx body = PATTERN (insn);
37951 switch (insn_entry[i].special_handling)
37953 default:
37954 gcc_unreachable ();
37955 case SH_CONST_VECTOR:
37957 /* A CONST_VECTOR will only show up somewhere in the RHS of a SET. */
37958 gcc_assert (GET_CODE (body) == SET);
37959 rtx rhs = SET_SRC (body);
37960 swap_const_vector_halves (rhs);
37961 if (dump_file)
37962 fprintf (dump_file, "Swapping constant halves in insn %d\n", i);
37963 break;
37965 case SH_SUBREG:
37966 /* A subreg of the same size is already safe. For subregs that
37967 select a smaller portion of a reg, adjust the index for
37968 swapped doublewords. */
37969 adjust_subreg_index (body);
37970 if (dump_file)
37971 fprintf (dump_file, "Adjusting subreg in insn %d\n", i);
37972 break;
37973 case SH_NOSWAP_LD:
37974 /* Convert a non-permuting load to a permuting one. */
37975 permute_load (insn);
37976 break;
37977 case SH_NOSWAP_ST:
37978 /* Convert a non-permuting store to a permuting one. */
37979 permute_store (insn);
37980 break;
37981 case SH_EXTRACT:
37982 /* Change the lane on an extract operation. */
37983 adjust_extract (insn);
37984 break;
37985 case SH_SPLAT:
37986 /* Change the lane on a direct-splat operation. */
37987 adjust_splat (insn);
37988 break;
37989 case SH_XXPERMDI:
37990 /* Change the lanes on an XXPERMDI operation. */
37991 adjust_xxpermdi (insn);
37992 break;
37993 case SH_CONCAT:
37994 /* Reverse the order of a concatenation operation. */
37995 adjust_concat (insn);
37996 break;
37997 case SH_VPERM:
37998 /* Change the mask loaded from the constant pool for a VPERM. */
37999 adjust_vperm (insn);
38000 break;
38004 /* Find the insn from the Ith table entry, which is known to be a
38005 register swap Y = SWAP(X). Replace it with a copy Y = X. */
38006 static void
38007 replace_swap_with_copy (swap_web_entry *insn_entry, unsigned i)
38009 rtx_insn *insn = insn_entry[i].insn;
38010 rtx body = PATTERN (insn);
38011 rtx src_reg = XEXP (SET_SRC (body), 0);
38012 rtx copy = gen_rtx_SET (SET_DEST (body), src_reg);
38013 rtx_insn *new_insn = emit_insn_before (copy, insn);
38014 set_block_for_insn (new_insn, BLOCK_FOR_INSN (insn));
38015 df_insn_rescan (new_insn);
38017 if (dump_file)
38019 unsigned int new_uid = INSN_UID (new_insn);
38020 fprintf (dump_file, "Replacing swap %d with copy %d\n", i, new_uid);
38023 df_insn_delete (insn);
38024 remove_insn (insn);
38025 insn->set_deleted ();
38028 /* Dump the swap table to DUMP_FILE. */
38029 static void
38030 dump_swap_insn_table (swap_web_entry *insn_entry)
38032 int e = get_max_uid ();
38033 fprintf (dump_file, "\nRelevant insns with their flag settings\n\n");
38035 for (int i = 0; i < e; ++i)
38036 if (insn_entry[i].is_relevant)
38038 swap_web_entry *pred_entry = (swap_web_entry *)insn_entry[i].pred ();
38039 fprintf (dump_file, "%6d %6d ", i,
38040 pred_entry && pred_entry->insn
38041 ? INSN_UID (pred_entry->insn) : 0);
38042 if (insn_entry[i].is_load)
38043 fputs ("load ", dump_file);
38044 if (insn_entry[i].is_store)
38045 fputs ("store ", dump_file);
38046 if (insn_entry[i].is_swap)
38047 fputs ("swap ", dump_file);
38048 if (insn_entry[i].is_live_in)
38049 fputs ("live-in ", dump_file);
38050 if (insn_entry[i].is_live_out)
38051 fputs ("live-out ", dump_file);
38052 if (insn_entry[i].contains_subreg)
38053 fputs ("subreg ", dump_file);
38054 if (insn_entry[i].is_128_int)
38055 fputs ("int128 ", dump_file);
38056 if (insn_entry[i].is_call)
38057 fputs ("call ", dump_file);
38058 if (insn_entry[i].is_swappable)
38060 fputs ("swappable ", dump_file);
38061 if (insn_entry[i].special_handling == SH_CONST_VECTOR)
38062 fputs ("special:constvec ", dump_file);
38063 else if (insn_entry[i].special_handling == SH_SUBREG)
38064 fputs ("special:subreg ", dump_file);
38065 else if (insn_entry[i].special_handling == SH_NOSWAP_LD)
38066 fputs ("special:load ", dump_file);
38067 else if (insn_entry[i].special_handling == SH_NOSWAP_ST)
38068 fputs ("special:store ", dump_file);
38069 else if (insn_entry[i].special_handling == SH_EXTRACT)
38070 fputs ("special:extract ", dump_file);
38071 else if (insn_entry[i].special_handling == SH_SPLAT)
38072 fputs ("special:splat ", dump_file);
38073 else if (insn_entry[i].special_handling == SH_XXPERMDI)
38074 fputs ("special:xxpermdi ", dump_file);
38075 else if (insn_entry[i].special_handling == SH_CONCAT)
38076 fputs ("special:concat ", dump_file);
38077 else if (insn_entry[i].special_handling == SH_VPERM)
38078 fputs ("special:vperm ", dump_file);
38080 if (insn_entry[i].web_not_optimizable)
38081 fputs ("unoptimizable ", dump_file);
38082 if (insn_entry[i].will_delete)
38083 fputs ("delete ", dump_file);
38084 fputs ("\n", dump_file);
38086 fputs ("\n", dump_file);
38089 /* Return RTX with its address canonicalized to (reg) or (+ reg reg).
38090 Here RTX is an (& addr (const_int -16)). Always return a new copy
38091 to avoid problems with combine. */
38092 static rtx
38093 alignment_with_canonical_addr (rtx align)
38095 rtx canon;
38096 rtx addr = XEXP (align, 0);
38098 if (REG_P (addr))
38099 canon = addr;
38101 else if (GET_CODE (addr) == PLUS)
38103 rtx addrop0 = XEXP (addr, 0);
38104 rtx addrop1 = XEXP (addr, 1);
38106 if (!REG_P (addrop0))
38107 addrop0 = force_reg (GET_MODE (addrop0), addrop0);
38109 if (!REG_P (addrop1))
38110 addrop1 = force_reg (GET_MODE (addrop1), addrop1);
38112 canon = gen_rtx_PLUS (GET_MODE (addr), addrop0, addrop1);
38115 else
38116 canon = force_reg (GET_MODE (addr), addr);
38118 return gen_rtx_AND (GET_MODE (align), canon, GEN_INT (-16));
38121 /* Check whether an rtx is an alignment mask, and if so, return
38122 a fully-expanded rtx for the masking operation. */
38123 static rtx
38124 alignment_mask (rtx_insn *insn)
38126 rtx body = PATTERN (insn);
38128 if (GET_CODE (body) != SET
38129 || GET_CODE (SET_SRC (body)) != AND
38130 || !REG_P (XEXP (SET_SRC (body), 0)))
38131 return 0;
38133 rtx mask = XEXP (SET_SRC (body), 1);
38135 if (GET_CODE (mask) == CONST_INT)
38137 if (INTVAL (mask) == -16)
38138 return alignment_with_canonical_addr (SET_SRC (body));
38139 else
38140 return 0;
38143 if (!REG_P (mask))
38144 return 0;
38146 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
38147 df_ref use;
38148 rtx real_mask = 0;
38150 FOR_EACH_INSN_INFO_USE (use, insn_info)
38152 if (!rtx_equal_p (DF_REF_REG (use), mask))
38153 continue;
38155 struct df_link *def_link = DF_REF_CHAIN (use);
38156 if (!def_link || def_link->next)
38157 return 0;
38159 rtx_insn *const_insn = DF_REF_INSN (def_link->ref);
38160 rtx const_body = PATTERN (const_insn);
38161 if (GET_CODE (const_body) != SET)
38162 return 0;
38164 real_mask = SET_SRC (const_body);
38166 if (GET_CODE (real_mask) != CONST_INT
38167 || INTVAL (real_mask) != -16)
38168 return 0;
38171 if (real_mask == 0)
38172 return 0;
38174 return alignment_with_canonical_addr (SET_SRC (body));
38177 /* Given INSN that's a load or store based at BASE_REG, look for a
38178 feeding computation that aligns its address on a 16-byte boundary. */
38179 static rtx
38180 find_alignment_op (rtx_insn *insn, rtx base_reg)
38182 df_ref base_use;
38183 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
38184 rtx and_operation = 0;
38186 FOR_EACH_INSN_INFO_USE (base_use, insn_info)
38188 if (!rtx_equal_p (DF_REF_REG (base_use), base_reg))
38189 continue;
38191 struct df_link *base_def_link = DF_REF_CHAIN (base_use);
38192 if (!base_def_link || base_def_link->next)
38193 break;
38195 rtx_insn *and_insn = DF_REF_INSN (base_def_link->ref);
38196 and_operation = alignment_mask (and_insn);
38197 if (and_operation != 0)
38198 break;
38201 return and_operation;
38204 struct del_info { bool replace; rtx_insn *replace_insn; };
38206 /* If INSN is the load for an lvx pattern, put it in canonical form. */
38207 static void
38208 recombine_lvx_pattern (rtx_insn *insn, del_info *to_delete)
38210 rtx body = PATTERN (insn);
38211 gcc_assert (GET_CODE (body) == SET
38212 && GET_CODE (SET_SRC (body)) == VEC_SELECT
38213 && GET_CODE (XEXP (SET_SRC (body), 0)) == MEM);
38215 rtx mem = XEXP (SET_SRC (body), 0);
38216 rtx base_reg = XEXP (mem, 0);
38218 rtx and_operation = find_alignment_op (insn, base_reg);
38220 if (and_operation != 0)
38222 df_ref def;
38223 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
38224 FOR_EACH_INSN_INFO_DEF (def, insn_info)
38226 struct df_link *link = DF_REF_CHAIN (def);
38227 if (!link || link->next)
38228 break;
38230 rtx_insn *swap_insn = DF_REF_INSN (link->ref);
38231 if (!insn_is_swap_p (swap_insn)
38232 || insn_is_load_p (swap_insn)
38233 || insn_is_store_p (swap_insn))
38234 break;
38236 /* Expected lvx pattern found. Change the swap to
38237 a copy, and propagate the AND operation into the
38238 load. */
38239 to_delete[INSN_UID (swap_insn)].replace = true;
38240 to_delete[INSN_UID (swap_insn)].replace_insn = swap_insn;
38242 XEXP (mem, 0) = and_operation;
38243 SET_SRC (body) = mem;
38244 INSN_CODE (insn) = -1; /* Force re-recognition. */
38245 df_insn_rescan (insn);
38247 if (dump_file)
38248 fprintf (dump_file, "lvx opportunity found at %d\n",
38249 INSN_UID (insn));
38254 /* If INSN is the store for an stvx pattern, put it in canonical form. */
38255 static void
38256 recombine_stvx_pattern (rtx_insn *insn, del_info *to_delete)
38258 rtx body = PATTERN (insn);
38259 gcc_assert (GET_CODE (body) == SET
38260 && GET_CODE (SET_DEST (body)) == MEM
38261 && GET_CODE (SET_SRC (body)) == VEC_SELECT);
38262 rtx mem = SET_DEST (body);
38263 rtx base_reg = XEXP (mem, 0);
38265 rtx and_operation = find_alignment_op (insn, base_reg);
38267 if (and_operation != 0)
38269 rtx src_reg = XEXP (SET_SRC (body), 0);
38270 df_ref src_use;
38271 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
38272 FOR_EACH_INSN_INFO_USE (src_use, insn_info)
38274 if (!rtx_equal_p (DF_REF_REG (src_use), src_reg))
38275 continue;
38277 struct df_link *link = DF_REF_CHAIN (src_use);
38278 if (!link || link->next)
38279 break;
38281 rtx_insn *swap_insn = DF_REF_INSN (link->ref);
38282 if (!insn_is_swap_p (swap_insn)
38283 || insn_is_load_p (swap_insn)
38284 || insn_is_store_p (swap_insn))
38285 break;
38287 /* Expected stvx pattern found. Change the swap to
38288 a copy, and propagate the AND operation into the
38289 store. */
38290 to_delete[INSN_UID (swap_insn)].replace = true;
38291 to_delete[INSN_UID (swap_insn)].replace_insn = swap_insn;
38293 XEXP (mem, 0) = and_operation;
38294 SET_SRC (body) = src_reg;
38295 INSN_CODE (insn) = -1; /* Force re-recognition. */
38296 df_insn_rescan (insn);
38298 if (dump_file)
38299 fprintf (dump_file, "stvx opportunity found at %d\n",
38300 INSN_UID (insn));
38305 /* Look for patterns created from builtin lvx and stvx calls, and
38306 canonicalize them to be properly recognized as such. */
38307 static void
38308 recombine_lvx_stvx_patterns (function *fun)
38310 int i;
38311 basic_block bb;
38312 rtx_insn *insn;
38314 int num_insns = get_max_uid ();
38315 del_info *to_delete = XCNEWVEC (del_info, num_insns);
38317 FOR_ALL_BB_FN (bb, fun)
38318 FOR_BB_INSNS (bb, insn)
38320 if (!NONDEBUG_INSN_P (insn))
38321 continue;
38323 if (insn_is_load_p (insn) && insn_is_swap_p (insn))
38324 recombine_lvx_pattern (insn, to_delete);
38325 else if (insn_is_store_p (insn) && insn_is_swap_p (insn))
38326 recombine_stvx_pattern (insn, to_delete);
38329 /* Turning swaps into copies is delayed until now, to avoid problems
38330 with deleting instructions during the insn walk. */
38331 for (i = 0; i < num_insns; i++)
38332 if (to_delete[i].replace)
38334 rtx swap_body = PATTERN (to_delete[i].replace_insn);
38335 rtx src_reg = XEXP (SET_SRC (swap_body), 0);
38336 rtx copy = gen_rtx_SET (SET_DEST (swap_body), src_reg);
38337 rtx_insn *new_insn = emit_insn_before (copy,
38338 to_delete[i].replace_insn);
38339 set_block_for_insn (new_insn,
38340 BLOCK_FOR_INSN (to_delete[i].replace_insn));
38341 df_insn_rescan (new_insn);
38342 df_insn_delete (to_delete[i].replace_insn);
38343 remove_insn (to_delete[i].replace_insn);
38344 to_delete[i].replace_insn->set_deleted ();
38347 free (to_delete);
38350 /* Main entry point for this pass. */
38351 unsigned int
38352 rs6000_analyze_swaps (function *fun)
38354 swap_web_entry *insn_entry;
38355 basic_block bb;
38356 rtx_insn *insn, *curr_insn = 0;
38358 /* Dataflow analysis for use-def chains. */
38359 df_set_flags (DF_RD_PRUNE_DEAD_DEFS);
38360 df_chain_add_problem (DF_DU_CHAIN | DF_UD_CHAIN);
38361 df_analyze ();
38362 df_set_flags (DF_DEFER_INSN_RESCAN);
38364 /* Pre-pass to recombine lvx and stvx patterns so we don't lose info. */
38365 recombine_lvx_stvx_patterns (fun);
38367 /* Allocate structure to represent webs of insns. */
38368 insn_entry = XCNEWVEC (swap_web_entry, get_max_uid ());
38370 /* Walk the insns to gather basic data. */
38371 FOR_ALL_BB_FN (bb, fun)
38372 FOR_BB_INSNS_SAFE (bb, insn, curr_insn)
38374 unsigned int uid = INSN_UID (insn);
38375 if (NONDEBUG_INSN_P (insn))
38377 insn_entry[uid].insn = insn;
38379 if (GET_CODE (insn) == CALL_INSN)
38380 insn_entry[uid].is_call = 1;
38382 /* Walk the uses and defs to see if we mention vector regs.
38383 Record any constraints on optimization of such mentions. */
38384 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
38385 df_ref mention;
38386 FOR_EACH_INSN_INFO_USE (mention, insn_info)
38388 /* We use DF_REF_REAL_REG here to get inside any subregs. */
38389 machine_mode mode = GET_MODE (DF_REF_REAL_REG (mention));
38391 /* If a use gets its value from a call insn, it will be
38392 a hard register and will look like (reg:V4SI 3 3).
38393 The df analysis creates two mentions for GPR3 and GPR4,
38394 both DImode. We must recognize this and treat it as a
38395 vector mention to ensure the call is unioned with this
38396 use. */
38397 if (mode == DImode && DF_REF_INSN_INFO (mention))
38399 rtx feeder = DF_REF_INSN (mention);
38400 /* FIXME: It is pretty hard to get from the df mention
38401 to the mode of the use in the insn. We arbitrarily
38402 pick a vector mode here, even though the use might
38403 be a real DImode. We can be too conservative
38404 (create a web larger than necessary) because of
38405 this, so consider eventually fixing this. */
38406 if (GET_CODE (feeder) == CALL_INSN)
38407 mode = V4SImode;
38410 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode) || mode == TImode)
38412 insn_entry[uid].is_relevant = 1;
38413 if (mode == TImode || mode == V1TImode
38414 || FLOAT128_VECTOR_P (mode))
38415 insn_entry[uid].is_128_int = 1;
38416 if (DF_REF_INSN_INFO (mention))
38417 insn_entry[uid].contains_subreg
38418 = !rtx_equal_p (DF_REF_REG (mention),
38419 DF_REF_REAL_REG (mention));
38420 union_defs (insn_entry, insn, mention);
38423 FOR_EACH_INSN_INFO_DEF (mention, insn_info)
38425 /* We use DF_REF_REAL_REG here to get inside any subregs. */
38426 machine_mode mode = GET_MODE (DF_REF_REAL_REG (mention));
38428 /* If we're loading up a hard vector register for a call,
38429 it looks like (set (reg:V4SI 9 9) (...)). The df
38430 analysis creates two mentions for GPR9 and GPR10, both
38431 DImode. So relying on the mode from the mentions
38432 isn't sufficient to ensure we union the call into the
38433 web with the parameter setup code. */
38434 if (mode == DImode && GET_CODE (insn) == SET
38435 && ALTIVEC_OR_VSX_VECTOR_MODE (GET_MODE (SET_DEST (insn))))
38436 mode = GET_MODE (SET_DEST (insn));
38438 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode) || mode == TImode)
38440 insn_entry[uid].is_relevant = 1;
38441 if (mode == TImode || mode == V1TImode
38442 || FLOAT128_VECTOR_P (mode))
38443 insn_entry[uid].is_128_int = 1;
38444 if (DF_REF_INSN_INFO (mention))
38445 insn_entry[uid].contains_subreg
38446 = !rtx_equal_p (DF_REF_REG (mention),
38447 DF_REF_REAL_REG (mention));
38448 /* REG_FUNCTION_VALUE_P is not valid for subregs. */
38449 else if (REG_FUNCTION_VALUE_P (DF_REF_REG (mention)))
38450 insn_entry[uid].is_live_out = 1;
38451 union_uses (insn_entry, insn, mention);
38455 if (insn_entry[uid].is_relevant)
38457 /* Determine if this is a load or store. */
38458 insn_entry[uid].is_load = insn_is_load_p (insn);
38459 insn_entry[uid].is_store = insn_is_store_p (insn);
38461 /* Determine if this is a doubleword swap. If not,
38462 determine whether it can legally be swapped. */
38463 if (insn_is_swap_p (insn))
38464 insn_entry[uid].is_swap = 1;
38465 else
38467 unsigned int special = SH_NONE;
38468 insn_entry[uid].is_swappable
38469 = insn_is_swappable_p (insn_entry, insn, &special);
38470 if (special != SH_NONE && insn_entry[uid].contains_subreg)
38471 insn_entry[uid].is_swappable = 0;
38472 else if (special != SH_NONE)
38473 insn_entry[uid].special_handling = special;
38474 else if (insn_entry[uid].contains_subreg)
38475 insn_entry[uid].special_handling = SH_SUBREG;
38481 if (dump_file)
38483 fprintf (dump_file, "\nSwap insn entry table when first built\n");
38484 dump_swap_insn_table (insn_entry);
38487 /* Record unoptimizable webs. */
38488 unsigned e = get_max_uid (), i;
38489 for (i = 0; i < e; ++i)
38491 if (!insn_entry[i].is_relevant)
38492 continue;
38494 swap_web_entry *root
38495 = (swap_web_entry*)(&insn_entry[i])->unionfind_root ();
38497 if (insn_entry[i].is_live_in || insn_entry[i].is_live_out
38498 || (insn_entry[i].contains_subreg
38499 && insn_entry[i].special_handling != SH_SUBREG)
38500 || insn_entry[i].is_128_int || insn_entry[i].is_call
38501 || !(insn_entry[i].is_swappable || insn_entry[i].is_swap))
38502 root->web_not_optimizable = 1;
38504 /* If we have loads or stores that aren't permuting then the
38505 optimization isn't appropriate. */
38506 else if ((insn_entry[i].is_load || insn_entry[i].is_store)
38507 && !insn_entry[i].is_swap && !insn_entry[i].is_swappable)
38508 root->web_not_optimizable = 1;
38510 /* If we have permuting loads or stores that are not accompanied
38511 by a register swap, the optimization isn't appropriate. */
38512 else if (insn_entry[i].is_load && insn_entry[i].is_swap)
38514 rtx insn = insn_entry[i].insn;
38515 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
38516 df_ref def;
38518 FOR_EACH_INSN_INFO_DEF (def, insn_info)
38520 struct df_link *link = DF_REF_CHAIN (def);
38522 if (!chain_contains_only_swaps (insn_entry, link, FOR_LOADS))
38524 root->web_not_optimizable = 1;
38525 break;
38529 else if (insn_entry[i].is_store && insn_entry[i].is_swap)
38531 rtx insn = insn_entry[i].insn;
38532 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
38533 df_ref use;
38535 FOR_EACH_INSN_INFO_USE (use, insn_info)
38537 struct df_link *link = DF_REF_CHAIN (use);
38539 if (!chain_contains_only_swaps (insn_entry, link, FOR_STORES))
38541 root->web_not_optimizable = 1;
38542 break;
38548 if (dump_file)
38550 fprintf (dump_file, "\nSwap insn entry table after web analysis\n");
38551 dump_swap_insn_table (insn_entry);
38554 /* For each load and store in an optimizable web (which implies
38555 the loads and stores are permuting), find the associated
38556 register swaps and mark them for removal. Due to various
38557 optimizations we may mark the same swap more than once. Also
38558 perform special handling for swappable insns that require it. */
38559 for (i = 0; i < e; ++i)
38560 if ((insn_entry[i].is_load || insn_entry[i].is_store)
38561 && insn_entry[i].is_swap)
38563 swap_web_entry* root_entry
38564 = (swap_web_entry*)((&insn_entry[i])->unionfind_root ());
38565 if (!root_entry->web_not_optimizable)
38566 mark_swaps_for_removal (insn_entry, i);
38568 else if (insn_entry[i].is_swappable && insn_entry[i].special_handling)
38570 swap_web_entry* root_entry
38571 = (swap_web_entry*)((&insn_entry[i])->unionfind_root ());
38572 if (!root_entry->web_not_optimizable)
38573 handle_special_swappables (insn_entry, i);
38576 /* Now delete the swaps marked for removal. */
38577 for (i = 0; i < e; ++i)
38578 if (insn_entry[i].will_delete)
38579 replace_swap_with_copy (insn_entry, i);
38581 /* Clean up. */
38582 free (insn_entry);
38583 return 0;
38586 const pass_data pass_data_analyze_swaps =
38588 RTL_PASS, /* type */
38589 "swaps", /* name */
38590 OPTGROUP_NONE, /* optinfo_flags */
38591 TV_NONE, /* tv_id */
38592 0, /* properties_required */
38593 0, /* properties_provided */
38594 0, /* properties_destroyed */
38595 0, /* todo_flags_start */
38596 TODO_df_finish, /* todo_flags_finish */
38599 class pass_analyze_swaps : public rtl_opt_pass
38601 public:
38602 pass_analyze_swaps(gcc::context *ctxt)
38603 : rtl_opt_pass(pass_data_analyze_swaps, ctxt)
38606 /* opt_pass methods: */
38607 virtual bool gate (function *)
38609 return (optimize > 0 && !BYTES_BIG_ENDIAN && TARGET_VSX
38610 && !TARGET_P9_VECTOR && rs6000_optimize_swaps);
38613 virtual unsigned int execute (function *fun)
38615 return rs6000_analyze_swaps (fun);
38618 }; // class pass_analyze_swaps
38620 rtl_opt_pass *
38621 make_pass_analyze_swaps (gcc::context *ctxt)
38623 return new pass_analyze_swaps (ctxt);
38626 #ifdef RS6000_GLIBC_ATOMIC_FENV
38627 /* Function declarations for rs6000_atomic_assign_expand_fenv. */
38628 static tree atomic_hold_decl, atomic_clear_decl, atomic_update_decl;
38629 #endif
38631 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
38633 static void
38634 rs6000_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
38636 if (!TARGET_HARD_FLOAT || !TARGET_FPRS)
38638 #ifdef RS6000_GLIBC_ATOMIC_FENV
38639 if (atomic_hold_decl == NULL_TREE)
38641 atomic_hold_decl
38642 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
38643 get_identifier ("__atomic_feholdexcept"),
38644 build_function_type_list (void_type_node,
38645 double_ptr_type_node,
38646 NULL_TREE));
38647 TREE_PUBLIC (atomic_hold_decl) = 1;
38648 DECL_EXTERNAL (atomic_hold_decl) = 1;
38651 if (atomic_clear_decl == NULL_TREE)
38653 atomic_clear_decl
38654 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
38655 get_identifier ("__atomic_feclearexcept"),
38656 build_function_type_list (void_type_node,
38657 NULL_TREE));
38658 TREE_PUBLIC (atomic_clear_decl) = 1;
38659 DECL_EXTERNAL (atomic_clear_decl) = 1;
38662 tree const_double = build_qualified_type (double_type_node,
38663 TYPE_QUAL_CONST);
38664 tree const_double_ptr = build_pointer_type (const_double);
38665 if (atomic_update_decl == NULL_TREE)
38667 atomic_update_decl
38668 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
38669 get_identifier ("__atomic_feupdateenv"),
38670 build_function_type_list (void_type_node,
38671 const_double_ptr,
38672 NULL_TREE));
38673 TREE_PUBLIC (atomic_update_decl) = 1;
38674 DECL_EXTERNAL (atomic_update_decl) = 1;
38677 tree fenv_var = create_tmp_var_raw (double_type_node);
38678 TREE_ADDRESSABLE (fenv_var) = 1;
38679 tree fenv_addr = build1 (ADDR_EXPR, double_ptr_type_node, fenv_var);
38681 *hold = build_call_expr (atomic_hold_decl, 1, fenv_addr);
38682 *clear = build_call_expr (atomic_clear_decl, 0);
38683 *update = build_call_expr (atomic_update_decl, 1,
38684 fold_convert (const_double_ptr, fenv_addr));
38685 #endif
38686 return;
38689 tree mffs = rs6000_builtin_decls[RS6000_BUILTIN_MFFS];
38690 tree mtfsf = rs6000_builtin_decls[RS6000_BUILTIN_MTFSF];
38691 tree call_mffs = build_call_expr (mffs, 0);
38693 /* Generates the equivalent of feholdexcept (&fenv_var)
38695 *fenv_var = __builtin_mffs ();
38696 double fenv_hold;
38697 *(uint64_t*)&fenv_hold = *(uint64_t*)fenv_var & 0xffffffff00000007LL;
38698 __builtin_mtfsf (0xff, fenv_hold); */
38700 /* Mask to clear everything except for the rounding modes and non-IEEE
38701 arithmetic flag. */
38702 const unsigned HOST_WIDE_INT hold_exception_mask =
38703 HOST_WIDE_INT_C (0xffffffff00000007);
38705 tree fenv_var = create_tmp_var_raw (double_type_node);
38707 tree hold_mffs = build2 (MODIFY_EXPR, void_type_node, fenv_var, call_mffs);
38709 tree fenv_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_var);
38710 tree fenv_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
38711 build_int_cst (uint64_type_node,
38712 hold_exception_mask));
38714 tree fenv_hold_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
38715 fenv_llu_and);
38717 tree hold_mtfsf = build_call_expr (mtfsf, 2,
38718 build_int_cst (unsigned_type_node, 0xff),
38719 fenv_hold_mtfsf);
38721 *hold = build2 (COMPOUND_EXPR, void_type_node, hold_mffs, hold_mtfsf);
38723 /* Generates the equivalent of feclearexcept (FE_ALL_EXCEPT):
38725 double fenv_clear = __builtin_mffs ();
38726 *(uint64_t)&fenv_clear &= 0xffffffff00000000LL;
38727 __builtin_mtfsf (0xff, fenv_clear); */
38729 /* Mask to clear everything except for the rounding modes and non-IEEE
38730 arithmetic flag. */
38731 const unsigned HOST_WIDE_INT clear_exception_mask =
38732 HOST_WIDE_INT_C (0xffffffff00000000);
38734 tree fenv_clear = create_tmp_var_raw (double_type_node);
38736 tree clear_mffs = build2 (MODIFY_EXPR, void_type_node, fenv_clear, call_mffs);
38738 tree fenv_clean_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_clear);
38739 tree fenv_clear_llu_and = build2 (BIT_AND_EXPR, uint64_type_node,
38740 fenv_clean_llu,
38741 build_int_cst (uint64_type_node,
38742 clear_exception_mask));
38744 tree fenv_clear_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
38745 fenv_clear_llu_and);
38747 tree clear_mtfsf = build_call_expr (mtfsf, 2,
38748 build_int_cst (unsigned_type_node, 0xff),
38749 fenv_clear_mtfsf);
38751 *clear = build2 (COMPOUND_EXPR, void_type_node, clear_mffs, clear_mtfsf);
38753 /* Generates the equivalent of feupdateenv (&fenv_var)
38755 double old_fenv = __builtin_mffs ();
38756 double fenv_update;
38757 *(uint64_t*)&fenv_update = (*(uint64_t*)&old & 0xffffffff1fffff00LL) |
38758 (*(uint64_t*)fenv_var 0x1ff80fff);
38759 __builtin_mtfsf (0xff, fenv_update); */
38761 const unsigned HOST_WIDE_INT update_exception_mask =
38762 HOST_WIDE_INT_C (0xffffffff1fffff00);
38763 const unsigned HOST_WIDE_INT new_exception_mask =
38764 HOST_WIDE_INT_C (0x1ff80fff);
38766 tree old_fenv = create_tmp_var_raw (double_type_node);
38767 tree update_mffs = build2 (MODIFY_EXPR, void_type_node, old_fenv, call_mffs);
38769 tree old_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, old_fenv);
38770 tree old_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, old_llu,
38771 build_int_cst (uint64_type_node,
38772 update_exception_mask));
38774 tree new_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
38775 build_int_cst (uint64_type_node,
38776 new_exception_mask));
38778 tree new_llu_mask = build2 (BIT_IOR_EXPR, uint64_type_node,
38779 old_llu_and, new_llu_and);
38781 tree fenv_update_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
38782 new_llu_mask);
38784 tree update_mtfsf = build_call_expr (mtfsf, 2,
38785 build_int_cst (unsigned_type_node, 0xff),
38786 fenv_update_mtfsf);
38788 *update = build2 (COMPOUND_EXPR, void_type_node, update_mffs, update_mtfsf);
38791 /* Implement the TARGET_OPTAB_SUPPORTED_P hook. */
38793 static bool
38794 rs6000_optab_supported_p (int op, machine_mode mode1, machine_mode,
38795 optimization_type opt_type)
38797 switch (op)
38799 case rsqrt_optab:
38800 return (opt_type == OPTIMIZE_FOR_SPEED
38801 && RS6000_RECIP_AUTO_RSQRTE_P (mode1));
38803 default:
38804 return true;
38808 struct gcc_target targetm = TARGET_INITIALIZER;
38810 #include "gt-rs6000.h"