[gcc]
[official-gcc.git] / gcc / config / rs6000 / rs6000.c
blobd2a62bdd72b6a8282591781feab1b6f88912b986
1 /* Subroutines used for code generation on IBM RS/6000.
2 Copyright (C) 1991-2016 Free Software Foundation, Inc.
3 Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published
9 by the Free Software Foundation; either version 3, or (at your
10 option) any later version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "gimple.h"
28 #include "cfghooks.h"
29 #include "cfgloop.h"
30 #include "df.h"
31 #include "tm_p.h"
32 #include "stringpool.h"
33 #include "expmed.h"
34 #include "optabs.h"
35 #include "regs.h"
36 #include "ira.h"
37 #include "recog.h"
38 #include "cgraph.h"
39 #include "diagnostic-core.h"
40 #include "insn-attr.h"
41 #include "flags.h"
42 #include "alias.h"
43 #include "fold-const.h"
44 #include "stor-layout.h"
45 #include "calls.h"
46 #include "print-tree.h"
47 #include "varasm.h"
48 #include "explow.h"
49 #include "expr.h"
50 #include "output.h"
51 #include "dbxout.h"
52 #include "common/common-target.h"
53 #include "langhooks.h"
54 #include "reload.h"
55 #include "sched-int.h"
56 #include "gimplify.h"
57 #include "gimple-iterator.h"
58 #include "gimple-walk.h"
59 #include "intl.h"
60 #include "params.h"
61 #include "tm-constrs.h"
62 #include "tree-vectorizer.h"
63 #include "target-globals.h"
64 #include "builtins.h"
65 #include "context.h"
66 #include "tree-pass.h"
67 #if TARGET_XCOFF
68 #include "xcoffout.h" /* get declarations of xcoff_*_section_name */
69 #endif
70 #if TARGET_MACHO
71 #include "gstab.h" /* for N_SLINE */
72 #endif
73 #include "case-cfn-macros.h"
74 #include "ppc-auxv.h"
76 /* This file should be included last. */
77 #include "target-def.h"
79 #ifndef TARGET_NO_PROTOTYPE
80 #define TARGET_NO_PROTOTYPE 0
81 #endif
83 #define min(A,B) ((A) < (B) ? (A) : (B))
84 #define max(A,B) ((A) > (B) ? (A) : (B))
86 /* Structure used to define the rs6000 stack */
87 typedef struct rs6000_stack {
88 int reload_completed; /* stack info won't change from here on */
89 int first_gp_reg_save; /* first callee saved GP register used */
90 int first_fp_reg_save; /* first callee saved FP register used */
91 int first_altivec_reg_save; /* first callee saved AltiVec register used */
92 int lr_save_p; /* true if the link reg needs to be saved */
93 int cr_save_p; /* true if the CR reg needs to be saved */
94 unsigned int vrsave_mask; /* mask of vec registers to save */
95 int push_p; /* true if we need to allocate stack space */
96 int calls_p; /* true if the function makes any calls */
97 int world_save_p; /* true if we're saving *everything*:
98 r13-r31, cr, f14-f31, vrsave, v20-v31 */
99 enum rs6000_abi abi; /* which ABI to use */
100 int gp_save_offset; /* offset to save GP regs from initial SP */
101 int fp_save_offset; /* offset to save FP regs from initial SP */
102 int altivec_save_offset; /* offset to save AltiVec regs from initial SP */
103 int lr_save_offset; /* offset to save LR from initial SP */
104 int cr_save_offset; /* offset to save CR from initial SP */
105 int vrsave_save_offset; /* offset to save VRSAVE from initial SP */
106 int spe_gp_save_offset; /* offset to save spe 64-bit gprs */
107 int varargs_save_offset; /* offset to save the varargs registers */
108 int ehrd_offset; /* offset to EH return data */
109 int ehcr_offset; /* offset to EH CR field data */
110 int reg_size; /* register size (4 or 8) */
111 HOST_WIDE_INT vars_size; /* variable save area size */
112 int parm_size; /* outgoing parameter size */
113 int save_size; /* save area size */
114 int fixed_size; /* fixed size of stack frame */
115 int gp_size; /* size of saved GP registers */
116 int fp_size; /* size of saved FP registers */
117 int altivec_size; /* size of saved AltiVec registers */
118 int cr_size; /* size to hold CR if not in fixed area */
119 int vrsave_size; /* size to hold VRSAVE */
120 int altivec_padding_size; /* size of altivec alignment padding */
121 int spe_gp_size; /* size of 64-bit GPR save size for SPE */
122 int spe_padding_size;
123 HOST_WIDE_INT total_size; /* total bytes allocated for stack */
124 int spe_64bit_regs_used;
125 int savres_strategy;
126 } rs6000_stack_t;
128 /* A C structure for machine-specific, per-function data.
129 This is added to the cfun structure. */
130 typedef struct GTY(()) machine_function
132 /* Whether the instruction chain has been scanned already. */
133 int spe_insn_chain_scanned_p;
134 /* Flags if __builtin_return_address (n) with n >= 1 was used. */
135 int ra_needs_full_frame;
136 /* Flags if __builtin_return_address (0) was used. */
137 int ra_need_lr;
138 /* Cache lr_save_p after expansion of builtin_eh_return. */
139 int lr_save_state;
140 /* Whether we need to save the TOC to the reserved stack location in the
141 function prologue. */
142 bool save_toc_in_prologue;
143 /* Offset from virtual_stack_vars_rtx to the start of the ABI_V4
144 varargs save area. */
145 HOST_WIDE_INT varargs_save_offset;
146 /* Temporary stack slot to use for SDmode copies. This slot is
147 64-bits wide and is allocated early enough so that the offset
148 does not overflow the 16-bit load/store offset field. */
149 rtx sdmode_stack_slot;
150 /* Alternative internal arg pointer for -fsplit-stack. */
151 rtx split_stack_arg_pointer;
152 bool split_stack_argp_used;
153 /* Flag if r2 setup is needed with ELFv2 ABI. */
154 bool r2_setup_needed;
155 } machine_function;
157 /* Support targetm.vectorize.builtin_mask_for_load. */
158 static GTY(()) tree altivec_builtin_mask_for_load;
160 /* Set to nonzero once AIX common-mode calls have been defined. */
161 static GTY(()) int common_mode_defined;
163 /* Label number of label created for -mrelocatable, to call to so we can
164 get the address of the GOT section */
165 static int rs6000_pic_labelno;
167 #ifdef USING_ELFOS_H
168 /* Counter for labels which are to be placed in .fixup. */
169 int fixuplabelno = 0;
170 #endif
172 /* Whether to use variant of AIX ABI for PowerPC64 Linux. */
173 int dot_symbols;
175 /* Specify the machine mode that pointers have. After generation of rtl, the
176 compiler makes no further distinction between pointers and any other objects
177 of this machine mode. The type is unsigned since not all things that
178 include rs6000.h also include machmode.h. */
179 unsigned rs6000_pmode;
181 /* Width in bits of a pointer. */
182 unsigned rs6000_pointer_size;
184 #ifdef HAVE_AS_GNU_ATTRIBUTE
185 /* Flag whether floating point values have been passed/returned. */
186 static bool rs6000_passes_float;
187 /* Flag whether vector values have been passed/returned. */
188 static bool rs6000_passes_vector;
189 /* Flag whether small (<= 8 byte) structures have been returned. */
190 static bool rs6000_returns_struct;
191 #endif
193 /* Value is TRUE if register/mode pair is acceptable. */
194 bool rs6000_hard_regno_mode_ok_p[NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
196 /* Maximum number of registers needed for a given register class and mode. */
197 unsigned char rs6000_class_max_nregs[NUM_MACHINE_MODES][LIM_REG_CLASSES];
199 /* How many registers are needed for a given register and mode. */
200 unsigned char rs6000_hard_regno_nregs[NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
202 /* Map register number to register class. */
203 enum reg_class rs6000_regno_regclass[FIRST_PSEUDO_REGISTER];
205 static int dbg_cost_ctrl;
207 /* Built in types. */
208 tree rs6000_builtin_types[RS6000_BTI_MAX];
209 tree rs6000_builtin_decls[RS6000_BUILTIN_COUNT];
211 /* Flag to say the TOC is initialized */
212 int toc_initialized, need_toc_init;
213 char toc_label_name[10];
215 /* Cached value of rs6000_variable_issue. This is cached in
216 rs6000_variable_issue hook and returned from rs6000_sched_reorder2. */
217 static short cached_can_issue_more;
219 static GTY(()) section *read_only_data_section;
220 static GTY(()) section *private_data_section;
221 static GTY(()) section *tls_data_section;
222 static GTY(()) section *tls_private_data_section;
223 static GTY(()) section *read_only_private_data_section;
224 static GTY(()) section *sdata2_section;
225 static GTY(()) section *toc_section;
227 struct builtin_description
229 const HOST_WIDE_INT mask;
230 const enum insn_code icode;
231 const char *const name;
232 const enum rs6000_builtins code;
235 /* Describe the vector unit used for modes. */
236 enum rs6000_vector rs6000_vector_unit[NUM_MACHINE_MODES];
237 enum rs6000_vector rs6000_vector_mem[NUM_MACHINE_MODES];
239 /* Register classes for various constraints that are based on the target
240 switches. */
241 enum reg_class rs6000_constraints[RS6000_CONSTRAINT_MAX];
243 /* Describe the alignment of a vector. */
244 int rs6000_vector_align[NUM_MACHINE_MODES];
246 /* Map selected modes to types for builtins. */
247 static GTY(()) tree builtin_mode_to_type[MAX_MACHINE_MODE][2];
249 /* What modes to automatically generate reciprocal divide estimate (fre) and
250 reciprocal sqrt (frsqrte) for. */
251 unsigned char rs6000_recip_bits[MAX_MACHINE_MODE];
253 /* Masks to determine which reciprocal esitmate instructions to generate
254 automatically. */
255 enum rs6000_recip_mask {
256 RECIP_SF_DIV = 0x001, /* Use divide estimate */
257 RECIP_DF_DIV = 0x002,
258 RECIP_V4SF_DIV = 0x004,
259 RECIP_V2DF_DIV = 0x008,
261 RECIP_SF_RSQRT = 0x010, /* Use reciprocal sqrt estimate. */
262 RECIP_DF_RSQRT = 0x020,
263 RECIP_V4SF_RSQRT = 0x040,
264 RECIP_V2DF_RSQRT = 0x080,
266 /* Various combination of flags for -mrecip=xxx. */
267 RECIP_NONE = 0,
268 RECIP_ALL = (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
269 | RECIP_V2DF_DIV | RECIP_SF_RSQRT | RECIP_DF_RSQRT
270 | RECIP_V4SF_RSQRT | RECIP_V2DF_RSQRT),
272 RECIP_HIGH_PRECISION = RECIP_ALL,
274 /* On low precision machines like the power5, don't enable double precision
275 reciprocal square root estimate, since it isn't accurate enough. */
276 RECIP_LOW_PRECISION = (RECIP_ALL & ~(RECIP_DF_RSQRT | RECIP_V2DF_RSQRT))
279 /* -mrecip options. */
280 static struct
282 const char *string; /* option name */
283 unsigned int mask; /* mask bits to set */
284 } recip_options[] = {
285 { "all", RECIP_ALL },
286 { "none", RECIP_NONE },
287 { "div", (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
288 | RECIP_V2DF_DIV) },
289 { "divf", (RECIP_SF_DIV | RECIP_V4SF_DIV) },
290 { "divd", (RECIP_DF_DIV | RECIP_V2DF_DIV) },
291 { "rsqrt", (RECIP_SF_RSQRT | RECIP_DF_RSQRT | RECIP_V4SF_RSQRT
292 | RECIP_V2DF_RSQRT) },
293 { "rsqrtf", (RECIP_SF_RSQRT | RECIP_V4SF_RSQRT) },
294 { "rsqrtd", (RECIP_DF_RSQRT | RECIP_V2DF_RSQRT) },
297 /* Used by __builtin_cpu_is(), mapping from PLATFORM names to values. */
298 static const struct
300 const char *cpu;
301 unsigned int cpuid;
302 } cpu_is_info[] = {
303 { "power9", PPC_PLATFORM_POWER9 },
304 { "power8", PPC_PLATFORM_POWER8 },
305 { "power7", PPC_PLATFORM_POWER7 },
306 { "power6x", PPC_PLATFORM_POWER6X },
307 { "power6", PPC_PLATFORM_POWER6 },
308 { "power5+", PPC_PLATFORM_POWER5_PLUS },
309 { "power5", PPC_PLATFORM_POWER5 },
310 { "ppc970", PPC_PLATFORM_PPC970 },
311 { "power4", PPC_PLATFORM_POWER4 },
312 { "ppca2", PPC_PLATFORM_PPCA2 },
313 { "ppc476", PPC_PLATFORM_PPC476 },
314 { "ppc464", PPC_PLATFORM_PPC464 },
315 { "ppc440", PPC_PLATFORM_PPC440 },
316 { "ppc405", PPC_PLATFORM_PPC405 },
317 { "ppc-cell-be", PPC_PLATFORM_CELL_BE }
320 /* Used by __builtin_cpu_supports(), mapping from HWCAP names to masks. */
321 static const struct
323 const char *hwcap;
324 int mask;
325 unsigned int id;
326 } cpu_supports_info[] = {
327 /* AT_HWCAP masks. */
328 { "4xxmac", PPC_FEATURE_HAS_4xxMAC, 0 },
329 { "altivec", PPC_FEATURE_HAS_ALTIVEC, 0 },
330 { "arch_2_05", PPC_FEATURE_ARCH_2_05, 0 },
331 { "arch_2_06", PPC_FEATURE_ARCH_2_06, 0 },
332 { "archpmu", PPC_FEATURE_PERFMON_COMPAT, 0 },
333 { "booke", PPC_FEATURE_BOOKE, 0 },
334 { "cellbe", PPC_FEATURE_CELL_BE, 0 },
335 { "dfp", PPC_FEATURE_HAS_DFP, 0 },
336 { "efpdouble", PPC_FEATURE_HAS_EFP_DOUBLE, 0 },
337 { "efpsingle", PPC_FEATURE_HAS_EFP_SINGLE, 0 },
338 { "fpu", PPC_FEATURE_HAS_FPU, 0 },
339 { "ic_snoop", PPC_FEATURE_ICACHE_SNOOP, 0 },
340 { "mmu", PPC_FEATURE_HAS_MMU, 0 },
341 { "notb", PPC_FEATURE_NO_TB, 0 },
342 { "pa6t", PPC_FEATURE_PA6T, 0 },
343 { "power4", PPC_FEATURE_POWER4, 0 },
344 { "power5", PPC_FEATURE_POWER5, 0 },
345 { "power5+", PPC_FEATURE_POWER5_PLUS, 0 },
346 { "power6x", PPC_FEATURE_POWER6_EXT, 0 },
347 { "ppc32", PPC_FEATURE_32, 0 },
348 { "ppc601", PPC_FEATURE_601_INSTR, 0 },
349 { "ppc64", PPC_FEATURE_64, 0 },
350 { "ppcle", PPC_FEATURE_PPC_LE, 0 },
351 { "smt", PPC_FEATURE_SMT, 0 },
352 { "spe", PPC_FEATURE_HAS_SPE, 0 },
353 { "true_le", PPC_FEATURE_TRUE_LE, 0 },
354 { "ucache", PPC_FEATURE_UNIFIED_CACHE, 0 },
355 { "vsx", PPC_FEATURE_HAS_VSX, 0 },
357 /* AT_HWCAP2 masks. */
358 { "arch_2_07", PPC_FEATURE2_ARCH_2_07, 1 },
359 { "dscr", PPC_FEATURE2_HAS_DSCR, 1 },
360 { "ebb", PPC_FEATURE2_HAS_EBB, 1 },
361 { "htm", PPC_FEATURE2_HAS_HTM, 1 },
362 { "htm-nosc", PPC_FEATURE2_HTM_NOSC, 1 },
363 { "isel", PPC_FEATURE2_HAS_ISEL, 1 },
364 { "tar", PPC_FEATURE2_HAS_TAR, 1 },
365 { "vcrypto", PPC_FEATURE2_HAS_VEC_CRYPTO, 1 },
366 { "arch_3_00", PPC_FEATURE2_ARCH_3_00, 1 },
367 { "ieee128", PPC_FEATURE2_HAS_IEEE128, 1 }
370 /* Newer LIBCs explicitly export this symbol to declare that they provide
371 the AT_PLATFORM and AT_HWCAP/AT_HWCAP2 values in the TCB. We emit a
372 reference to this symbol whenever we expand a CPU builtin, so that
373 we never link against an old LIBC. */
374 const char *tcb_verification_symbol = "__parse_hwcap_and_convert_at_platform";
376 /* True if we have expanded a CPU builtin. */
377 bool cpu_builtin_p;
379 /* Pointer to function (in rs6000-c.c) that can define or undefine target
380 macros that have changed. Languages that don't support the preprocessor
381 don't link in rs6000-c.c, so we can't call it directly. */
382 void (*rs6000_target_modify_macros_ptr) (bool, HOST_WIDE_INT, HOST_WIDE_INT);
384 /* Simplfy register classes into simpler classifications. We assume
385 GPR_REG_TYPE - FPR_REG_TYPE are ordered so that we can use a simple range
386 check for standard register classes (gpr/floating/altivec/vsx) and
387 floating/vector classes (float/altivec/vsx). */
389 enum rs6000_reg_type {
390 NO_REG_TYPE,
391 PSEUDO_REG_TYPE,
392 GPR_REG_TYPE,
393 VSX_REG_TYPE,
394 ALTIVEC_REG_TYPE,
395 FPR_REG_TYPE,
396 SPR_REG_TYPE,
397 CR_REG_TYPE,
398 SPE_ACC_TYPE,
399 SPEFSCR_REG_TYPE
402 /* Map register class to register type. */
403 static enum rs6000_reg_type reg_class_to_reg_type[N_REG_CLASSES];
405 /* First/last register type for the 'normal' register types (i.e. general
406 purpose, floating point, altivec, and VSX registers). */
407 #define IS_STD_REG_TYPE(RTYPE) IN_RANGE(RTYPE, GPR_REG_TYPE, FPR_REG_TYPE)
409 #define IS_FP_VECT_REG_TYPE(RTYPE) IN_RANGE(RTYPE, VSX_REG_TYPE, FPR_REG_TYPE)
412 /* Register classes we care about in secondary reload or go if legitimate
413 address. We only need to worry about GPR, FPR, and Altivec registers here,
414 along an ANY field that is the OR of the 3 register classes. */
416 enum rs6000_reload_reg_type {
417 RELOAD_REG_GPR, /* General purpose registers. */
418 RELOAD_REG_FPR, /* Traditional floating point regs. */
419 RELOAD_REG_VMX, /* Altivec (VMX) registers. */
420 RELOAD_REG_ANY, /* OR of GPR, FPR, Altivec masks. */
421 N_RELOAD_REG
424 /* For setting up register classes, loop through the 3 register classes mapping
425 into real registers, and skip the ANY class, which is just an OR of the
426 bits. */
427 #define FIRST_RELOAD_REG_CLASS RELOAD_REG_GPR
428 #define LAST_RELOAD_REG_CLASS RELOAD_REG_VMX
430 /* Map reload register type to a register in the register class. */
431 struct reload_reg_map_type {
432 const char *name; /* Register class name. */
433 int reg; /* Register in the register class. */
436 static const struct reload_reg_map_type reload_reg_map[N_RELOAD_REG] = {
437 { "Gpr", FIRST_GPR_REGNO }, /* RELOAD_REG_GPR. */
438 { "Fpr", FIRST_FPR_REGNO }, /* RELOAD_REG_FPR. */
439 { "VMX", FIRST_ALTIVEC_REGNO }, /* RELOAD_REG_VMX. */
440 { "Any", -1 }, /* RELOAD_REG_ANY. */
443 /* Mask bits for each register class, indexed per mode. Historically the
444 compiler has been more restrictive which types can do PRE_MODIFY instead of
445 PRE_INC and PRE_DEC, so keep track of sepaate bits for these two. */
446 typedef unsigned char addr_mask_type;
448 #define RELOAD_REG_VALID 0x01 /* Mode valid in register.. */
449 #define RELOAD_REG_MULTIPLE 0x02 /* Mode takes multiple registers. */
450 #define RELOAD_REG_INDEXED 0x04 /* Reg+reg addressing. */
451 #define RELOAD_REG_OFFSET 0x08 /* Reg+offset addressing. */
452 #define RELOAD_REG_PRE_INCDEC 0x10 /* PRE_INC/PRE_DEC valid. */
453 #define RELOAD_REG_PRE_MODIFY 0x20 /* PRE_MODIFY valid. */
454 #define RELOAD_REG_AND_M16 0x40 /* AND -16 addressing. */
455 #define RELOAD_REG_QUAD_OFFSET 0x80 /* quad offset is limited. */
457 /* Register type masks based on the type, of valid addressing modes. */
458 struct rs6000_reg_addr {
459 enum insn_code reload_load; /* INSN to reload for loading. */
460 enum insn_code reload_store; /* INSN to reload for storing. */
461 enum insn_code reload_fpr_gpr; /* INSN to move from FPR to GPR. */
462 enum insn_code reload_gpr_vsx; /* INSN to move from GPR to VSX. */
463 enum insn_code reload_vsx_gpr; /* INSN to move from VSX to GPR. */
464 enum insn_code fusion_gpr_ld; /* INSN for fusing gpr ADDIS/loads. */
465 /* INSNs for fusing addi with loads
466 or stores for each reg. class. */
467 enum insn_code fusion_addi_ld[(int)N_RELOAD_REG];
468 enum insn_code fusion_addi_st[(int)N_RELOAD_REG];
469 /* INSNs for fusing addis with loads
470 or stores for each reg. class. */
471 enum insn_code fusion_addis_ld[(int)N_RELOAD_REG];
472 enum insn_code fusion_addis_st[(int)N_RELOAD_REG];
473 addr_mask_type addr_mask[(int)N_RELOAD_REG]; /* Valid address masks. */
474 bool scalar_in_vmx_p; /* Scalar value can go in VMX. */
475 bool fused_toc; /* Mode supports TOC fusion. */
478 static struct rs6000_reg_addr reg_addr[NUM_MACHINE_MODES];
480 /* Helper function to say whether a mode supports PRE_INC or PRE_DEC. */
481 static inline bool
482 mode_supports_pre_incdec_p (machine_mode mode)
484 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_INCDEC)
485 != 0);
488 /* Helper function to say whether a mode supports PRE_MODIFY. */
489 static inline bool
490 mode_supports_pre_modify_p (machine_mode mode)
492 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_MODIFY)
493 != 0);
496 /* Return true if we have D-form addressing in altivec registers. */
497 static inline bool
498 mode_supports_vmx_dform (machine_mode mode)
500 return ((reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_OFFSET) != 0);
503 /* Return true if we have D-form addressing in VSX registers. This addressing
504 is more limited than normal d-form addressing in that the offset must be
505 aligned on a 16-byte boundary. */
506 static inline bool
507 mode_supports_vsx_dform_quad (machine_mode mode)
509 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_QUAD_OFFSET)
510 != 0);
514 /* Target cpu costs. */
516 struct processor_costs {
517 const int mulsi; /* cost of SImode multiplication. */
518 const int mulsi_const; /* cost of SImode multiplication by constant. */
519 const int mulsi_const9; /* cost of SImode mult by short constant. */
520 const int muldi; /* cost of DImode multiplication. */
521 const int divsi; /* cost of SImode division. */
522 const int divdi; /* cost of DImode division. */
523 const int fp; /* cost of simple SFmode and DFmode insns. */
524 const int dmul; /* cost of DFmode multiplication (and fmadd). */
525 const int sdiv; /* cost of SFmode division (fdivs). */
526 const int ddiv; /* cost of DFmode division (fdiv). */
527 const int cache_line_size; /* cache line size in bytes. */
528 const int l1_cache_size; /* size of l1 cache, in kilobytes. */
529 const int l2_cache_size; /* size of l2 cache, in kilobytes. */
530 const int simultaneous_prefetches; /* number of parallel prefetch
531 operations. */
532 const int sfdf_convert; /* cost of SF->DF conversion. */
535 const struct processor_costs *rs6000_cost;
537 /* Processor costs (relative to an add) */
539 /* Instruction size costs on 32bit processors. */
540 static const
541 struct processor_costs size32_cost = {
542 COSTS_N_INSNS (1), /* mulsi */
543 COSTS_N_INSNS (1), /* mulsi_const */
544 COSTS_N_INSNS (1), /* mulsi_const9 */
545 COSTS_N_INSNS (1), /* muldi */
546 COSTS_N_INSNS (1), /* divsi */
547 COSTS_N_INSNS (1), /* divdi */
548 COSTS_N_INSNS (1), /* fp */
549 COSTS_N_INSNS (1), /* dmul */
550 COSTS_N_INSNS (1), /* sdiv */
551 COSTS_N_INSNS (1), /* ddiv */
552 32, /* cache line size */
553 0, /* l1 cache */
554 0, /* l2 cache */
555 0, /* streams */
556 0, /* SF->DF convert */
559 /* Instruction size costs on 64bit processors. */
560 static const
561 struct processor_costs size64_cost = {
562 COSTS_N_INSNS (1), /* mulsi */
563 COSTS_N_INSNS (1), /* mulsi_const */
564 COSTS_N_INSNS (1), /* mulsi_const9 */
565 COSTS_N_INSNS (1), /* muldi */
566 COSTS_N_INSNS (1), /* divsi */
567 COSTS_N_INSNS (1), /* divdi */
568 COSTS_N_INSNS (1), /* fp */
569 COSTS_N_INSNS (1), /* dmul */
570 COSTS_N_INSNS (1), /* sdiv */
571 COSTS_N_INSNS (1), /* ddiv */
572 128, /* cache line size */
573 0, /* l1 cache */
574 0, /* l2 cache */
575 0, /* streams */
576 0, /* SF->DF convert */
579 /* Instruction costs on RS64A processors. */
580 static const
581 struct processor_costs rs64a_cost = {
582 COSTS_N_INSNS (20), /* mulsi */
583 COSTS_N_INSNS (12), /* mulsi_const */
584 COSTS_N_INSNS (8), /* mulsi_const9 */
585 COSTS_N_INSNS (34), /* muldi */
586 COSTS_N_INSNS (65), /* divsi */
587 COSTS_N_INSNS (67), /* divdi */
588 COSTS_N_INSNS (4), /* fp */
589 COSTS_N_INSNS (4), /* dmul */
590 COSTS_N_INSNS (31), /* sdiv */
591 COSTS_N_INSNS (31), /* ddiv */
592 128, /* cache line size */
593 128, /* l1 cache */
594 2048, /* l2 cache */
595 1, /* streams */
596 0, /* SF->DF convert */
599 /* Instruction costs on MPCCORE processors. */
600 static const
601 struct processor_costs mpccore_cost = {
602 COSTS_N_INSNS (2), /* mulsi */
603 COSTS_N_INSNS (2), /* mulsi_const */
604 COSTS_N_INSNS (2), /* mulsi_const9 */
605 COSTS_N_INSNS (2), /* muldi */
606 COSTS_N_INSNS (6), /* divsi */
607 COSTS_N_INSNS (6), /* divdi */
608 COSTS_N_INSNS (4), /* fp */
609 COSTS_N_INSNS (5), /* dmul */
610 COSTS_N_INSNS (10), /* sdiv */
611 COSTS_N_INSNS (17), /* ddiv */
612 32, /* cache line size */
613 4, /* l1 cache */
614 16, /* l2 cache */
615 1, /* streams */
616 0, /* SF->DF convert */
619 /* Instruction costs on PPC403 processors. */
620 static const
621 struct processor_costs ppc403_cost = {
622 COSTS_N_INSNS (4), /* mulsi */
623 COSTS_N_INSNS (4), /* mulsi_const */
624 COSTS_N_INSNS (4), /* mulsi_const9 */
625 COSTS_N_INSNS (4), /* muldi */
626 COSTS_N_INSNS (33), /* divsi */
627 COSTS_N_INSNS (33), /* divdi */
628 COSTS_N_INSNS (11), /* fp */
629 COSTS_N_INSNS (11), /* dmul */
630 COSTS_N_INSNS (11), /* sdiv */
631 COSTS_N_INSNS (11), /* ddiv */
632 32, /* cache line size */
633 4, /* l1 cache */
634 16, /* l2 cache */
635 1, /* streams */
636 0, /* SF->DF convert */
639 /* Instruction costs on PPC405 processors. */
640 static const
641 struct processor_costs ppc405_cost = {
642 COSTS_N_INSNS (5), /* mulsi */
643 COSTS_N_INSNS (4), /* mulsi_const */
644 COSTS_N_INSNS (3), /* mulsi_const9 */
645 COSTS_N_INSNS (5), /* muldi */
646 COSTS_N_INSNS (35), /* divsi */
647 COSTS_N_INSNS (35), /* divdi */
648 COSTS_N_INSNS (11), /* fp */
649 COSTS_N_INSNS (11), /* dmul */
650 COSTS_N_INSNS (11), /* sdiv */
651 COSTS_N_INSNS (11), /* ddiv */
652 32, /* cache line size */
653 16, /* l1 cache */
654 128, /* l2 cache */
655 1, /* streams */
656 0, /* SF->DF convert */
659 /* Instruction costs on PPC440 processors. */
660 static const
661 struct processor_costs ppc440_cost = {
662 COSTS_N_INSNS (3), /* mulsi */
663 COSTS_N_INSNS (2), /* mulsi_const */
664 COSTS_N_INSNS (2), /* mulsi_const9 */
665 COSTS_N_INSNS (3), /* muldi */
666 COSTS_N_INSNS (34), /* divsi */
667 COSTS_N_INSNS (34), /* divdi */
668 COSTS_N_INSNS (5), /* fp */
669 COSTS_N_INSNS (5), /* dmul */
670 COSTS_N_INSNS (19), /* sdiv */
671 COSTS_N_INSNS (33), /* ddiv */
672 32, /* cache line size */
673 32, /* l1 cache */
674 256, /* l2 cache */
675 1, /* streams */
676 0, /* SF->DF convert */
679 /* Instruction costs on PPC476 processors. */
680 static const
681 struct processor_costs ppc476_cost = {
682 COSTS_N_INSNS (4), /* mulsi */
683 COSTS_N_INSNS (4), /* mulsi_const */
684 COSTS_N_INSNS (4), /* mulsi_const9 */
685 COSTS_N_INSNS (4), /* muldi */
686 COSTS_N_INSNS (11), /* divsi */
687 COSTS_N_INSNS (11), /* divdi */
688 COSTS_N_INSNS (6), /* fp */
689 COSTS_N_INSNS (6), /* dmul */
690 COSTS_N_INSNS (19), /* sdiv */
691 COSTS_N_INSNS (33), /* ddiv */
692 32, /* l1 cache line size */
693 32, /* l1 cache */
694 512, /* l2 cache */
695 1, /* streams */
696 0, /* SF->DF convert */
699 /* Instruction costs on PPC601 processors. */
700 static const
701 struct processor_costs ppc601_cost = {
702 COSTS_N_INSNS (5), /* mulsi */
703 COSTS_N_INSNS (5), /* mulsi_const */
704 COSTS_N_INSNS (5), /* mulsi_const9 */
705 COSTS_N_INSNS (5), /* muldi */
706 COSTS_N_INSNS (36), /* divsi */
707 COSTS_N_INSNS (36), /* divdi */
708 COSTS_N_INSNS (4), /* fp */
709 COSTS_N_INSNS (5), /* dmul */
710 COSTS_N_INSNS (17), /* sdiv */
711 COSTS_N_INSNS (31), /* ddiv */
712 32, /* cache line size */
713 32, /* l1 cache */
714 256, /* l2 cache */
715 1, /* streams */
716 0, /* SF->DF convert */
719 /* Instruction costs on PPC603 processors. */
720 static const
721 struct processor_costs ppc603_cost = {
722 COSTS_N_INSNS (5), /* mulsi */
723 COSTS_N_INSNS (3), /* mulsi_const */
724 COSTS_N_INSNS (2), /* mulsi_const9 */
725 COSTS_N_INSNS (5), /* muldi */
726 COSTS_N_INSNS (37), /* divsi */
727 COSTS_N_INSNS (37), /* divdi */
728 COSTS_N_INSNS (3), /* fp */
729 COSTS_N_INSNS (4), /* dmul */
730 COSTS_N_INSNS (18), /* sdiv */
731 COSTS_N_INSNS (33), /* ddiv */
732 32, /* cache line size */
733 8, /* l1 cache */
734 64, /* l2 cache */
735 1, /* streams */
736 0, /* SF->DF convert */
739 /* Instruction costs on PPC604 processors. */
740 static const
741 struct processor_costs ppc604_cost = {
742 COSTS_N_INSNS (4), /* mulsi */
743 COSTS_N_INSNS (4), /* mulsi_const */
744 COSTS_N_INSNS (4), /* mulsi_const9 */
745 COSTS_N_INSNS (4), /* muldi */
746 COSTS_N_INSNS (20), /* divsi */
747 COSTS_N_INSNS (20), /* divdi */
748 COSTS_N_INSNS (3), /* fp */
749 COSTS_N_INSNS (3), /* dmul */
750 COSTS_N_INSNS (18), /* sdiv */
751 COSTS_N_INSNS (32), /* ddiv */
752 32, /* cache line size */
753 16, /* l1 cache */
754 512, /* l2 cache */
755 1, /* streams */
756 0, /* SF->DF convert */
759 /* Instruction costs on PPC604e processors. */
760 static const
761 struct processor_costs ppc604e_cost = {
762 COSTS_N_INSNS (2), /* mulsi */
763 COSTS_N_INSNS (2), /* mulsi_const */
764 COSTS_N_INSNS (2), /* mulsi_const9 */
765 COSTS_N_INSNS (2), /* muldi */
766 COSTS_N_INSNS (20), /* divsi */
767 COSTS_N_INSNS (20), /* divdi */
768 COSTS_N_INSNS (3), /* fp */
769 COSTS_N_INSNS (3), /* dmul */
770 COSTS_N_INSNS (18), /* sdiv */
771 COSTS_N_INSNS (32), /* ddiv */
772 32, /* cache line size */
773 32, /* l1 cache */
774 1024, /* l2 cache */
775 1, /* streams */
776 0, /* SF->DF convert */
779 /* Instruction costs on PPC620 processors. */
780 static const
781 struct processor_costs ppc620_cost = {
782 COSTS_N_INSNS (5), /* mulsi */
783 COSTS_N_INSNS (4), /* mulsi_const */
784 COSTS_N_INSNS (3), /* mulsi_const9 */
785 COSTS_N_INSNS (7), /* muldi */
786 COSTS_N_INSNS (21), /* divsi */
787 COSTS_N_INSNS (37), /* divdi */
788 COSTS_N_INSNS (3), /* fp */
789 COSTS_N_INSNS (3), /* dmul */
790 COSTS_N_INSNS (18), /* sdiv */
791 COSTS_N_INSNS (32), /* ddiv */
792 128, /* cache line size */
793 32, /* l1 cache */
794 1024, /* l2 cache */
795 1, /* streams */
796 0, /* SF->DF convert */
799 /* Instruction costs on PPC630 processors. */
800 static const
801 struct processor_costs ppc630_cost = {
802 COSTS_N_INSNS (5), /* mulsi */
803 COSTS_N_INSNS (4), /* mulsi_const */
804 COSTS_N_INSNS (3), /* mulsi_const9 */
805 COSTS_N_INSNS (7), /* muldi */
806 COSTS_N_INSNS (21), /* divsi */
807 COSTS_N_INSNS (37), /* divdi */
808 COSTS_N_INSNS (3), /* fp */
809 COSTS_N_INSNS (3), /* dmul */
810 COSTS_N_INSNS (17), /* sdiv */
811 COSTS_N_INSNS (21), /* ddiv */
812 128, /* cache line size */
813 64, /* l1 cache */
814 1024, /* l2 cache */
815 1, /* streams */
816 0, /* SF->DF convert */
819 /* Instruction costs on Cell processor. */
820 /* COSTS_N_INSNS (1) ~ one add. */
821 static const
822 struct processor_costs ppccell_cost = {
823 COSTS_N_INSNS (9/2)+2, /* mulsi */
824 COSTS_N_INSNS (6/2), /* mulsi_const */
825 COSTS_N_INSNS (6/2), /* mulsi_const9 */
826 COSTS_N_INSNS (15/2)+2, /* muldi */
827 COSTS_N_INSNS (38/2), /* divsi */
828 COSTS_N_INSNS (70/2), /* divdi */
829 COSTS_N_INSNS (10/2), /* fp */
830 COSTS_N_INSNS (10/2), /* dmul */
831 COSTS_N_INSNS (74/2), /* sdiv */
832 COSTS_N_INSNS (74/2), /* ddiv */
833 128, /* cache line size */
834 32, /* l1 cache */
835 512, /* l2 cache */
836 6, /* streams */
837 0, /* SF->DF convert */
840 /* Instruction costs on PPC750 and PPC7400 processors. */
841 static const
842 struct processor_costs ppc750_cost = {
843 COSTS_N_INSNS (5), /* mulsi */
844 COSTS_N_INSNS (3), /* mulsi_const */
845 COSTS_N_INSNS (2), /* mulsi_const9 */
846 COSTS_N_INSNS (5), /* muldi */
847 COSTS_N_INSNS (17), /* divsi */
848 COSTS_N_INSNS (17), /* divdi */
849 COSTS_N_INSNS (3), /* fp */
850 COSTS_N_INSNS (3), /* dmul */
851 COSTS_N_INSNS (17), /* sdiv */
852 COSTS_N_INSNS (31), /* ddiv */
853 32, /* cache line size */
854 32, /* l1 cache */
855 512, /* l2 cache */
856 1, /* streams */
857 0, /* SF->DF convert */
860 /* Instruction costs on PPC7450 processors. */
861 static const
862 struct processor_costs ppc7450_cost = {
863 COSTS_N_INSNS (4), /* mulsi */
864 COSTS_N_INSNS (3), /* mulsi_const */
865 COSTS_N_INSNS (3), /* mulsi_const9 */
866 COSTS_N_INSNS (4), /* muldi */
867 COSTS_N_INSNS (23), /* divsi */
868 COSTS_N_INSNS (23), /* divdi */
869 COSTS_N_INSNS (5), /* fp */
870 COSTS_N_INSNS (5), /* dmul */
871 COSTS_N_INSNS (21), /* sdiv */
872 COSTS_N_INSNS (35), /* ddiv */
873 32, /* cache line size */
874 32, /* l1 cache */
875 1024, /* l2 cache */
876 1, /* streams */
877 0, /* SF->DF convert */
880 /* Instruction costs on PPC8540 processors. */
881 static const
882 struct processor_costs ppc8540_cost = {
883 COSTS_N_INSNS (4), /* mulsi */
884 COSTS_N_INSNS (4), /* mulsi_const */
885 COSTS_N_INSNS (4), /* mulsi_const9 */
886 COSTS_N_INSNS (4), /* muldi */
887 COSTS_N_INSNS (19), /* divsi */
888 COSTS_N_INSNS (19), /* divdi */
889 COSTS_N_INSNS (4), /* fp */
890 COSTS_N_INSNS (4), /* dmul */
891 COSTS_N_INSNS (29), /* sdiv */
892 COSTS_N_INSNS (29), /* ddiv */
893 32, /* cache line size */
894 32, /* l1 cache */
895 256, /* l2 cache */
896 1, /* prefetch streams /*/
897 0, /* SF->DF convert */
900 /* Instruction costs on E300C2 and E300C3 cores. */
901 static const
902 struct processor_costs ppce300c2c3_cost = {
903 COSTS_N_INSNS (4), /* mulsi */
904 COSTS_N_INSNS (4), /* mulsi_const */
905 COSTS_N_INSNS (4), /* mulsi_const9 */
906 COSTS_N_INSNS (4), /* muldi */
907 COSTS_N_INSNS (19), /* divsi */
908 COSTS_N_INSNS (19), /* divdi */
909 COSTS_N_INSNS (3), /* fp */
910 COSTS_N_INSNS (4), /* dmul */
911 COSTS_N_INSNS (18), /* sdiv */
912 COSTS_N_INSNS (33), /* ddiv */
914 16, /* l1 cache */
915 16, /* l2 cache */
916 1, /* prefetch streams /*/
917 0, /* SF->DF convert */
920 /* Instruction costs on PPCE500MC processors. */
921 static const
922 struct processor_costs ppce500mc_cost = {
923 COSTS_N_INSNS (4), /* mulsi */
924 COSTS_N_INSNS (4), /* mulsi_const */
925 COSTS_N_INSNS (4), /* mulsi_const9 */
926 COSTS_N_INSNS (4), /* muldi */
927 COSTS_N_INSNS (14), /* divsi */
928 COSTS_N_INSNS (14), /* divdi */
929 COSTS_N_INSNS (8), /* fp */
930 COSTS_N_INSNS (10), /* dmul */
931 COSTS_N_INSNS (36), /* sdiv */
932 COSTS_N_INSNS (66), /* ddiv */
933 64, /* cache line size */
934 32, /* l1 cache */
935 128, /* l2 cache */
936 1, /* prefetch streams /*/
937 0, /* SF->DF convert */
940 /* Instruction costs on PPCE500MC64 processors. */
941 static const
942 struct processor_costs ppce500mc64_cost = {
943 COSTS_N_INSNS (4), /* mulsi */
944 COSTS_N_INSNS (4), /* mulsi_const */
945 COSTS_N_INSNS (4), /* mulsi_const9 */
946 COSTS_N_INSNS (4), /* muldi */
947 COSTS_N_INSNS (14), /* divsi */
948 COSTS_N_INSNS (14), /* divdi */
949 COSTS_N_INSNS (4), /* fp */
950 COSTS_N_INSNS (10), /* dmul */
951 COSTS_N_INSNS (36), /* sdiv */
952 COSTS_N_INSNS (66), /* ddiv */
953 64, /* cache line size */
954 32, /* l1 cache */
955 128, /* l2 cache */
956 1, /* prefetch streams /*/
957 0, /* SF->DF convert */
960 /* Instruction costs on PPCE5500 processors. */
961 static const
962 struct processor_costs ppce5500_cost = {
963 COSTS_N_INSNS (5), /* mulsi */
964 COSTS_N_INSNS (5), /* mulsi_const */
965 COSTS_N_INSNS (4), /* mulsi_const9 */
966 COSTS_N_INSNS (5), /* muldi */
967 COSTS_N_INSNS (14), /* divsi */
968 COSTS_N_INSNS (14), /* divdi */
969 COSTS_N_INSNS (7), /* fp */
970 COSTS_N_INSNS (10), /* dmul */
971 COSTS_N_INSNS (36), /* sdiv */
972 COSTS_N_INSNS (66), /* ddiv */
973 64, /* cache line size */
974 32, /* l1 cache */
975 128, /* l2 cache */
976 1, /* prefetch streams /*/
977 0, /* SF->DF convert */
980 /* Instruction costs on PPCE6500 processors. */
981 static const
982 struct processor_costs ppce6500_cost = {
983 COSTS_N_INSNS (5), /* mulsi */
984 COSTS_N_INSNS (5), /* mulsi_const */
985 COSTS_N_INSNS (4), /* mulsi_const9 */
986 COSTS_N_INSNS (5), /* muldi */
987 COSTS_N_INSNS (14), /* divsi */
988 COSTS_N_INSNS (14), /* divdi */
989 COSTS_N_INSNS (7), /* fp */
990 COSTS_N_INSNS (10), /* dmul */
991 COSTS_N_INSNS (36), /* sdiv */
992 COSTS_N_INSNS (66), /* ddiv */
993 64, /* cache line size */
994 32, /* l1 cache */
995 128, /* l2 cache */
996 1, /* prefetch streams /*/
997 0, /* SF->DF convert */
1000 /* Instruction costs on AppliedMicro Titan processors. */
1001 static const
1002 struct processor_costs titan_cost = {
1003 COSTS_N_INSNS (5), /* mulsi */
1004 COSTS_N_INSNS (5), /* mulsi_const */
1005 COSTS_N_INSNS (5), /* mulsi_const9 */
1006 COSTS_N_INSNS (5), /* muldi */
1007 COSTS_N_INSNS (18), /* divsi */
1008 COSTS_N_INSNS (18), /* divdi */
1009 COSTS_N_INSNS (10), /* fp */
1010 COSTS_N_INSNS (10), /* dmul */
1011 COSTS_N_INSNS (46), /* sdiv */
1012 COSTS_N_INSNS (72), /* ddiv */
1013 32, /* cache line size */
1014 32, /* l1 cache */
1015 512, /* l2 cache */
1016 1, /* prefetch streams /*/
1017 0, /* SF->DF convert */
1020 /* Instruction costs on POWER4 and POWER5 processors. */
1021 static const
1022 struct processor_costs power4_cost = {
1023 COSTS_N_INSNS (3), /* mulsi */
1024 COSTS_N_INSNS (2), /* mulsi_const */
1025 COSTS_N_INSNS (2), /* mulsi_const9 */
1026 COSTS_N_INSNS (4), /* muldi */
1027 COSTS_N_INSNS (18), /* divsi */
1028 COSTS_N_INSNS (34), /* divdi */
1029 COSTS_N_INSNS (3), /* fp */
1030 COSTS_N_INSNS (3), /* dmul */
1031 COSTS_N_INSNS (17), /* sdiv */
1032 COSTS_N_INSNS (17), /* ddiv */
1033 128, /* cache line size */
1034 32, /* l1 cache */
1035 1024, /* l2 cache */
1036 8, /* prefetch streams /*/
1037 0, /* SF->DF convert */
1040 /* Instruction costs on POWER6 processors. */
1041 static const
1042 struct processor_costs power6_cost = {
1043 COSTS_N_INSNS (8), /* mulsi */
1044 COSTS_N_INSNS (8), /* mulsi_const */
1045 COSTS_N_INSNS (8), /* mulsi_const9 */
1046 COSTS_N_INSNS (8), /* muldi */
1047 COSTS_N_INSNS (22), /* divsi */
1048 COSTS_N_INSNS (28), /* divdi */
1049 COSTS_N_INSNS (3), /* fp */
1050 COSTS_N_INSNS (3), /* dmul */
1051 COSTS_N_INSNS (13), /* sdiv */
1052 COSTS_N_INSNS (16), /* ddiv */
1053 128, /* cache line size */
1054 64, /* l1 cache */
1055 2048, /* l2 cache */
1056 16, /* prefetch streams */
1057 0, /* SF->DF convert */
1060 /* Instruction costs on POWER7 processors. */
1061 static const
1062 struct processor_costs power7_cost = {
1063 COSTS_N_INSNS (2), /* mulsi */
1064 COSTS_N_INSNS (2), /* mulsi_const */
1065 COSTS_N_INSNS (2), /* mulsi_const9 */
1066 COSTS_N_INSNS (2), /* muldi */
1067 COSTS_N_INSNS (18), /* divsi */
1068 COSTS_N_INSNS (34), /* divdi */
1069 COSTS_N_INSNS (3), /* fp */
1070 COSTS_N_INSNS (3), /* dmul */
1071 COSTS_N_INSNS (13), /* sdiv */
1072 COSTS_N_INSNS (16), /* ddiv */
1073 128, /* cache line size */
1074 32, /* l1 cache */
1075 256, /* l2 cache */
1076 12, /* prefetch streams */
1077 COSTS_N_INSNS (3), /* SF->DF convert */
1080 /* Instruction costs on POWER8 processors. */
1081 static const
1082 struct processor_costs power8_cost = {
1083 COSTS_N_INSNS (3), /* mulsi */
1084 COSTS_N_INSNS (3), /* mulsi_const */
1085 COSTS_N_INSNS (3), /* mulsi_const9 */
1086 COSTS_N_INSNS (3), /* muldi */
1087 COSTS_N_INSNS (19), /* divsi */
1088 COSTS_N_INSNS (35), /* divdi */
1089 COSTS_N_INSNS (3), /* fp */
1090 COSTS_N_INSNS (3), /* dmul */
1091 COSTS_N_INSNS (14), /* sdiv */
1092 COSTS_N_INSNS (17), /* ddiv */
1093 128, /* cache line size */
1094 32, /* l1 cache */
1095 256, /* l2 cache */
1096 12, /* prefetch streams */
1097 COSTS_N_INSNS (3), /* SF->DF convert */
1100 /* Instruction costs on POWER9 processors. */
1101 static const
1102 struct processor_costs power9_cost = {
1103 COSTS_N_INSNS (3), /* mulsi */
1104 COSTS_N_INSNS (3), /* mulsi_const */
1105 COSTS_N_INSNS (3), /* mulsi_const9 */
1106 COSTS_N_INSNS (3), /* muldi */
1107 COSTS_N_INSNS (8), /* divsi */
1108 COSTS_N_INSNS (12), /* divdi */
1109 COSTS_N_INSNS (3), /* fp */
1110 COSTS_N_INSNS (3), /* dmul */
1111 COSTS_N_INSNS (13), /* sdiv */
1112 COSTS_N_INSNS (18), /* ddiv */
1113 128, /* cache line size */
1114 32, /* l1 cache */
1115 512, /* l2 cache */
1116 8, /* prefetch streams */
1117 COSTS_N_INSNS (3), /* SF->DF convert */
1120 /* Instruction costs on POWER A2 processors. */
1121 static const
1122 struct processor_costs ppca2_cost = {
1123 COSTS_N_INSNS (16), /* mulsi */
1124 COSTS_N_INSNS (16), /* mulsi_const */
1125 COSTS_N_INSNS (16), /* mulsi_const9 */
1126 COSTS_N_INSNS (16), /* muldi */
1127 COSTS_N_INSNS (22), /* divsi */
1128 COSTS_N_INSNS (28), /* divdi */
1129 COSTS_N_INSNS (3), /* fp */
1130 COSTS_N_INSNS (3), /* dmul */
1131 COSTS_N_INSNS (59), /* sdiv */
1132 COSTS_N_INSNS (72), /* ddiv */
1134 16, /* l1 cache */
1135 2048, /* l2 cache */
1136 16, /* prefetch streams */
1137 0, /* SF->DF convert */
1141 /* Table that classifies rs6000 builtin functions (pure, const, etc.). */
1142 #undef RS6000_BUILTIN_0
1143 #undef RS6000_BUILTIN_1
1144 #undef RS6000_BUILTIN_2
1145 #undef RS6000_BUILTIN_3
1146 #undef RS6000_BUILTIN_A
1147 #undef RS6000_BUILTIN_D
1148 #undef RS6000_BUILTIN_E
1149 #undef RS6000_BUILTIN_H
1150 #undef RS6000_BUILTIN_P
1151 #undef RS6000_BUILTIN_Q
1152 #undef RS6000_BUILTIN_S
1153 #undef RS6000_BUILTIN_X
1155 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE) \
1156 { NAME, ICODE, MASK, ATTR },
1158 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE) \
1159 { NAME, ICODE, MASK, ATTR },
1161 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE) \
1162 { NAME, ICODE, MASK, ATTR },
1164 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE) \
1165 { NAME, ICODE, MASK, ATTR },
1167 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE) \
1168 { NAME, ICODE, MASK, ATTR },
1170 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE) \
1171 { NAME, ICODE, MASK, ATTR },
1173 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE) \
1174 { NAME, ICODE, MASK, ATTR },
1176 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) \
1177 { NAME, ICODE, MASK, ATTR },
1179 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) \
1180 { NAME, ICODE, MASK, ATTR },
1182 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE) \
1183 { NAME, ICODE, MASK, ATTR },
1185 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE) \
1186 { NAME, ICODE, MASK, ATTR },
1188 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE) \
1189 { NAME, ICODE, MASK, ATTR },
1191 struct rs6000_builtin_info_type {
1192 const char *name;
1193 const enum insn_code icode;
1194 const HOST_WIDE_INT mask;
1195 const unsigned attr;
1198 static const struct rs6000_builtin_info_type rs6000_builtin_info[] =
1200 #include "rs6000-builtin.def"
1203 #undef RS6000_BUILTIN_0
1204 #undef RS6000_BUILTIN_1
1205 #undef RS6000_BUILTIN_2
1206 #undef RS6000_BUILTIN_3
1207 #undef RS6000_BUILTIN_A
1208 #undef RS6000_BUILTIN_D
1209 #undef RS6000_BUILTIN_E
1210 #undef RS6000_BUILTIN_H
1211 #undef RS6000_BUILTIN_P
1212 #undef RS6000_BUILTIN_Q
1213 #undef RS6000_BUILTIN_S
1214 #undef RS6000_BUILTIN_X
1216 /* Support for -mveclibabi=<xxx> to control which vector library to use. */
1217 static tree (*rs6000_veclib_handler) (combined_fn, tree, tree);
1220 static bool rs6000_debug_legitimate_address_p (machine_mode, rtx, bool);
1221 static bool spe_func_has_64bit_regs_p (void);
1222 static struct machine_function * rs6000_init_machine_status (void);
1223 static int rs6000_ra_ever_killed (void);
1224 static tree rs6000_handle_longcall_attribute (tree *, tree, tree, int, bool *);
1225 static tree rs6000_handle_altivec_attribute (tree *, tree, tree, int, bool *);
1226 static tree rs6000_handle_struct_attribute (tree *, tree, tree, int, bool *);
1227 static tree rs6000_builtin_vectorized_libmass (combined_fn, tree, tree);
1228 static void rs6000_emit_set_long_const (rtx, HOST_WIDE_INT);
1229 static int rs6000_memory_move_cost (machine_mode, reg_class_t, bool);
1230 static bool rs6000_debug_rtx_costs (rtx, machine_mode, int, int, int *, bool);
1231 static int rs6000_debug_address_cost (rtx, machine_mode, addr_space_t,
1232 bool);
1233 static int rs6000_debug_adjust_cost (rtx_insn *, rtx, rtx_insn *, int);
1234 static bool is_microcoded_insn (rtx_insn *);
1235 static bool is_nonpipeline_insn (rtx_insn *);
1236 static bool is_cracked_insn (rtx_insn *);
1237 static bool is_load_insn (rtx, rtx *);
1238 static bool is_store_insn (rtx, rtx *);
1239 static bool set_to_load_agen (rtx_insn *,rtx_insn *);
1240 static bool insn_terminates_group_p (rtx_insn *, enum group_termination);
1241 static bool insn_must_be_first_in_group (rtx_insn *);
1242 static bool insn_must_be_last_in_group (rtx_insn *);
1243 static void altivec_init_builtins (void);
1244 static tree builtin_function_type (machine_mode, machine_mode,
1245 machine_mode, machine_mode,
1246 enum rs6000_builtins, const char *name);
1247 static void rs6000_common_init_builtins (void);
1248 static void paired_init_builtins (void);
1249 static rtx paired_expand_predicate_builtin (enum insn_code, tree, rtx);
1250 static void spe_init_builtins (void);
1251 static void htm_init_builtins (void);
1252 static rtx spe_expand_predicate_builtin (enum insn_code, tree, rtx);
1253 static rtx spe_expand_evsel_builtin (enum insn_code, tree, rtx);
1254 static int rs6000_emit_int_cmove (rtx, rtx, rtx, rtx);
1255 static rs6000_stack_t *rs6000_stack_info (void);
1256 static void is_altivec_return_reg (rtx, void *);
1257 int easy_vector_constant (rtx, machine_mode);
1258 static rtx rs6000_debug_legitimize_address (rtx, rtx, machine_mode);
1259 static rtx rs6000_legitimize_tls_address (rtx, enum tls_model);
1260 static rtx rs6000_darwin64_record_arg (CUMULATIVE_ARGS *, const_tree,
1261 bool, bool);
1262 #if TARGET_MACHO
1263 static void macho_branch_islands (void);
1264 #endif
1265 static rtx rs6000_legitimize_reload_address (rtx, machine_mode, int, int,
1266 int, int *);
1267 static rtx rs6000_debug_legitimize_reload_address (rtx, machine_mode, int,
1268 int, int, int *);
1269 static bool rs6000_mode_dependent_address (const_rtx);
1270 static bool rs6000_debug_mode_dependent_address (const_rtx);
1271 static enum reg_class rs6000_secondary_reload_class (enum reg_class,
1272 machine_mode, rtx);
1273 static enum reg_class rs6000_debug_secondary_reload_class (enum reg_class,
1274 machine_mode,
1275 rtx);
1276 static enum reg_class rs6000_preferred_reload_class (rtx, enum reg_class);
1277 static enum reg_class rs6000_debug_preferred_reload_class (rtx,
1278 enum reg_class);
1279 static bool rs6000_secondary_memory_needed (enum reg_class, enum reg_class,
1280 machine_mode);
1281 static bool rs6000_debug_secondary_memory_needed (enum reg_class,
1282 enum reg_class,
1283 machine_mode);
1284 static bool rs6000_cannot_change_mode_class (machine_mode,
1285 machine_mode,
1286 enum reg_class);
1287 static bool rs6000_debug_cannot_change_mode_class (machine_mode,
1288 machine_mode,
1289 enum reg_class);
1290 static bool rs6000_save_toc_in_prologue_p (void);
1291 static rtx rs6000_internal_arg_pointer (void);
1293 rtx (*rs6000_legitimize_reload_address_ptr) (rtx, machine_mode, int, int,
1294 int, int *)
1295 = rs6000_legitimize_reload_address;
1297 static bool (*rs6000_mode_dependent_address_ptr) (const_rtx)
1298 = rs6000_mode_dependent_address;
1300 enum reg_class (*rs6000_secondary_reload_class_ptr) (enum reg_class,
1301 machine_mode, rtx)
1302 = rs6000_secondary_reload_class;
1304 enum reg_class (*rs6000_preferred_reload_class_ptr) (rtx, enum reg_class)
1305 = rs6000_preferred_reload_class;
1307 bool (*rs6000_secondary_memory_needed_ptr) (enum reg_class, enum reg_class,
1308 machine_mode)
1309 = rs6000_secondary_memory_needed;
1311 bool (*rs6000_cannot_change_mode_class_ptr) (machine_mode,
1312 machine_mode,
1313 enum reg_class)
1314 = rs6000_cannot_change_mode_class;
1316 const int INSN_NOT_AVAILABLE = -1;
1318 static void rs6000_print_isa_options (FILE *, int, const char *,
1319 HOST_WIDE_INT);
1320 static void rs6000_print_builtin_options (FILE *, int, const char *,
1321 HOST_WIDE_INT);
1323 static enum rs6000_reg_type register_to_reg_type (rtx, bool *);
1324 static bool rs6000_secondary_reload_move (enum rs6000_reg_type,
1325 enum rs6000_reg_type,
1326 machine_mode,
1327 secondary_reload_info *,
1328 bool);
1329 rtl_opt_pass *make_pass_analyze_swaps (gcc::context*);
1330 static bool rs6000_keep_leaf_when_profiled () __attribute__ ((unused));
1331 static tree rs6000_fold_builtin (tree, int, tree *, bool);
1333 /* Hash table stuff for keeping track of TOC entries. */
1335 struct GTY((for_user)) toc_hash_struct
1337 /* `key' will satisfy CONSTANT_P; in fact, it will satisfy
1338 ASM_OUTPUT_SPECIAL_POOL_ENTRY_P. */
1339 rtx key;
1340 machine_mode key_mode;
1341 int labelno;
1344 struct toc_hasher : ggc_ptr_hash<toc_hash_struct>
1346 static hashval_t hash (toc_hash_struct *);
1347 static bool equal (toc_hash_struct *, toc_hash_struct *);
1350 static GTY (()) hash_table<toc_hasher> *toc_hash_table;
1352 /* Hash table to keep track of the argument types for builtin functions. */
1354 struct GTY((for_user)) builtin_hash_struct
1356 tree type;
1357 machine_mode mode[4]; /* return value + 3 arguments. */
1358 unsigned char uns_p[4]; /* and whether the types are unsigned. */
1361 struct builtin_hasher : ggc_ptr_hash<builtin_hash_struct>
1363 static hashval_t hash (builtin_hash_struct *);
1364 static bool equal (builtin_hash_struct *, builtin_hash_struct *);
1367 static GTY (()) hash_table<builtin_hasher> *builtin_hash_table;
1370 /* Default register names. */
1371 char rs6000_reg_names[][8] =
1373 "0", "1", "2", "3", "4", "5", "6", "7",
1374 "8", "9", "10", "11", "12", "13", "14", "15",
1375 "16", "17", "18", "19", "20", "21", "22", "23",
1376 "24", "25", "26", "27", "28", "29", "30", "31",
1377 "0", "1", "2", "3", "4", "5", "6", "7",
1378 "8", "9", "10", "11", "12", "13", "14", "15",
1379 "16", "17", "18", "19", "20", "21", "22", "23",
1380 "24", "25", "26", "27", "28", "29", "30", "31",
1381 "mq", "lr", "ctr","ap",
1382 "0", "1", "2", "3", "4", "5", "6", "7",
1383 "ca",
1384 /* AltiVec registers. */
1385 "0", "1", "2", "3", "4", "5", "6", "7",
1386 "8", "9", "10", "11", "12", "13", "14", "15",
1387 "16", "17", "18", "19", "20", "21", "22", "23",
1388 "24", "25", "26", "27", "28", "29", "30", "31",
1389 "vrsave", "vscr",
1390 /* SPE registers. */
1391 "spe_acc", "spefscr",
1392 /* Soft frame pointer. */
1393 "sfp",
1394 /* HTM SPR registers. */
1395 "tfhar", "tfiar", "texasr",
1396 /* SPE High registers. */
1397 "0", "1", "2", "3", "4", "5", "6", "7",
1398 "8", "9", "10", "11", "12", "13", "14", "15",
1399 "16", "17", "18", "19", "20", "21", "22", "23",
1400 "24", "25", "26", "27", "28", "29", "30", "31"
1403 #ifdef TARGET_REGNAMES
1404 static const char alt_reg_names[][8] =
1406 "%r0", "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7",
1407 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
1408 "%r16", "%r17", "%r18", "%r19", "%r20", "%r21", "%r22", "%r23",
1409 "%r24", "%r25", "%r26", "%r27", "%r28", "%r29", "%r30", "%r31",
1410 "%f0", "%f1", "%f2", "%f3", "%f4", "%f5", "%f6", "%f7",
1411 "%f8", "%f9", "%f10", "%f11", "%f12", "%f13", "%f14", "%f15",
1412 "%f16", "%f17", "%f18", "%f19", "%f20", "%f21", "%f22", "%f23",
1413 "%f24", "%f25", "%f26", "%f27", "%f28", "%f29", "%f30", "%f31",
1414 "mq", "lr", "ctr", "ap",
1415 "%cr0", "%cr1", "%cr2", "%cr3", "%cr4", "%cr5", "%cr6", "%cr7",
1416 "ca",
1417 /* AltiVec registers. */
1418 "%v0", "%v1", "%v2", "%v3", "%v4", "%v5", "%v6", "%v7",
1419 "%v8", "%v9", "%v10", "%v11", "%v12", "%v13", "%v14", "%v15",
1420 "%v16", "%v17", "%v18", "%v19", "%v20", "%v21", "%v22", "%v23",
1421 "%v24", "%v25", "%v26", "%v27", "%v28", "%v29", "%v30", "%v31",
1422 "vrsave", "vscr",
1423 /* SPE registers. */
1424 "spe_acc", "spefscr",
1425 /* Soft frame pointer. */
1426 "sfp",
1427 /* HTM SPR registers. */
1428 "tfhar", "tfiar", "texasr",
1429 /* SPE High registers. */
1430 "%rh0", "%rh1", "%rh2", "%rh3", "%rh4", "%rh5", "%rh6", "%rh7",
1431 "%rh8", "%rh9", "%rh10", "%r11", "%rh12", "%rh13", "%rh14", "%rh15",
1432 "%rh16", "%rh17", "%rh18", "%rh19", "%rh20", "%rh21", "%rh22", "%rh23",
1433 "%rh24", "%rh25", "%rh26", "%rh27", "%rh28", "%rh29", "%rh30", "%rh31"
1435 #endif
1437 /* Table of valid machine attributes. */
1439 static const struct attribute_spec rs6000_attribute_table[] =
1441 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
1442 affects_type_identity } */
1443 { "altivec", 1, 1, false, true, false, rs6000_handle_altivec_attribute,
1444 false },
1445 { "longcall", 0, 0, false, true, true, rs6000_handle_longcall_attribute,
1446 false },
1447 { "shortcall", 0, 0, false, true, true, rs6000_handle_longcall_attribute,
1448 false },
1449 { "ms_struct", 0, 0, false, false, false, rs6000_handle_struct_attribute,
1450 false },
1451 { "gcc_struct", 0, 0, false, false, false, rs6000_handle_struct_attribute,
1452 false },
1453 #ifdef SUBTARGET_ATTRIBUTE_TABLE
1454 SUBTARGET_ATTRIBUTE_TABLE,
1455 #endif
1456 { NULL, 0, 0, false, false, false, NULL, false }
1459 #ifndef TARGET_PROFILE_KERNEL
1460 #define TARGET_PROFILE_KERNEL 0
1461 #endif
1463 /* The VRSAVE bitmask puts bit %v0 as the most significant bit. */
1464 #define ALTIVEC_REG_BIT(REGNO) (0x80000000 >> ((REGNO) - FIRST_ALTIVEC_REGNO))
1466 /* Initialize the GCC target structure. */
1467 #undef TARGET_ATTRIBUTE_TABLE
1468 #define TARGET_ATTRIBUTE_TABLE rs6000_attribute_table
1469 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
1470 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES rs6000_set_default_type_attributes
1471 #undef TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P
1472 #define TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P rs6000_attribute_takes_identifier_p
1474 #undef TARGET_ASM_ALIGNED_DI_OP
1475 #define TARGET_ASM_ALIGNED_DI_OP DOUBLE_INT_ASM_OP
1477 /* Default unaligned ops are only provided for ELF. Find the ops needed
1478 for non-ELF systems. */
1479 #ifndef OBJECT_FORMAT_ELF
1480 #if TARGET_XCOFF
1481 /* For XCOFF. rs6000_assemble_integer will handle unaligned DIs on
1482 64-bit targets. */
1483 #undef TARGET_ASM_UNALIGNED_HI_OP
1484 #define TARGET_ASM_UNALIGNED_HI_OP "\t.vbyte\t2,"
1485 #undef TARGET_ASM_UNALIGNED_SI_OP
1486 #define TARGET_ASM_UNALIGNED_SI_OP "\t.vbyte\t4,"
1487 #undef TARGET_ASM_UNALIGNED_DI_OP
1488 #define TARGET_ASM_UNALIGNED_DI_OP "\t.vbyte\t8,"
1489 #else
1490 /* For Darwin. */
1491 #undef TARGET_ASM_UNALIGNED_HI_OP
1492 #define TARGET_ASM_UNALIGNED_HI_OP "\t.short\t"
1493 #undef TARGET_ASM_UNALIGNED_SI_OP
1494 #define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
1495 #undef TARGET_ASM_UNALIGNED_DI_OP
1496 #define TARGET_ASM_UNALIGNED_DI_OP "\t.quad\t"
1497 #undef TARGET_ASM_ALIGNED_DI_OP
1498 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
1499 #endif
1500 #endif
1502 /* This hook deals with fixups for relocatable code and DI-mode objects
1503 in 64-bit code. */
1504 #undef TARGET_ASM_INTEGER
1505 #define TARGET_ASM_INTEGER rs6000_assemble_integer
1507 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
1508 #undef TARGET_ASM_ASSEMBLE_VISIBILITY
1509 #define TARGET_ASM_ASSEMBLE_VISIBILITY rs6000_assemble_visibility
1510 #endif
1512 #undef TARGET_SET_UP_BY_PROLOGUE
1513 #define TARGET_SET_UP_BY_PROLOGUE rs6000_set_up_by_prologue
1515 #undef TARGET_EXTRA_LIVE_ON_ENTRY
1516 #define TARGET_EXTRA_LIVE_ON_ENTRY rs6000_live_on_entry
1518 #undef TARGET_INTERNAL_ARG_POINTER
1519 #define TARGET_INTERNAL_ARG_POINTER rs6000_internal_arg_pointer
1521 #undef TARGET_HAVE_TLS
1522 #define TARGET_HAVE_TLS HAVE_AS_TLS
1524 #undef TARGET_CANNOT_FORCE_CONST_MEM
1525 #define TARGET_CANNOT_FORCE_CONST_MEM rs6000_cannot_force_const_mem
1527 #undef TARGET_DELEGITIMIZE_ADDRESS
1528 #define TARGET_DELEGITIMIZE_ADDRESS rs6000_delegitimize_address
1530 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
1531 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P rs6000_const_not_ok_for_debug_p
1533 #undef TARGET_ASM_FUNCTION_PROLOGUE
1534 #define TARGET_ASM_FUNCTION_PROLOGUE rs6000_output_function_prologue
1535 #undef TARGET_ASM_FUNCTION_EPILOGUE
1536 #define TARGET_ASM_FUNCTION_EPILOGUE rs6000_output_function_epilogue
1538 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
1539 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA rs6000_output_addr_const_extra
1541 #undef TARGET_LEGITIMIZE_ADDRESS
1542 #define TARGET_LEGITIMIZE_ADDRESS rs6000_legitimize_address
1544 #undef TARGET_SCHED_VARIABLE_ISSUE
1545 #define TARGET_SCHED_VARIABLE_ISSUE rs6000_variable_issue
1547 #undef TARGET_SCHED_ISSUE_RATE
1548 #define TARGET_SCHED_ISSUE_RATE rs6000_issue_rate
1549 #undef TARGET_SCHED_ADJUST_COST
1550 #define TARGET_SCHED_ADJUST_COST rs6000_adjust_cost
1551 #undef TARGET_SCHED_ADJUST_PRIORITY
1552 #define TARGET_SCHED_ADJUST_PRIORITY rs6000_adjust_priority
1553 #undef TARGET_SCHED_IS_COSTLY_DEPENDENCE
1554 #define TARGET_SCHED_IS_COSTLY_DEPENDENCE rs6000_is_costly_dependence
1555 #undef TARGET_SCHED_INIT
1556 #define TARGET_SCHED_INIT rs6000_sched_init
1557 #undef TARGET_SCHED_FINISH
1558 #define TARGET_SCHED_FINISH rs6000_sched_finish
1559 #undef TARGET_SCHED_REORDER
1560 #define TARGET_SCHED_REORDER rs6000_sched_reorder
1561 #undef TARGET_SCHED_REORDER2
1562 #define TARGET_SCHED_REORDER2 rs6000_sched_reorder2
1564 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1565 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD rs6000_use_sched_lookahead
1567 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
1568 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD rs6000_use_sched_lookahead_guard
1570 #undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
1571 #define TARGET_SCHED_ALLOC_SCHED_CONTEXT rs6000_alloc_sched_context
1572 #undef TARGET_SCHED_INIT_SCHED_CONTEXT
1573 #define TARGET_SCHED_INIT_SCHED_CONTEXT rs6000_init_sched_context
1574 #undef TARGET_SCHED_SET_SCHED_CONTEXT
1575 #define TARGET_SCHED_SET_SCHED_CONTEXT rs6000_set_sched_context
1576 #undef TARGET_SCHED_FREE_SCHED_CONTEXT
1577 #define TARGET_SCHED_FREE_SCHED_CONTEXT rs6000_free_sched_context
1579 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
1580 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD rs6000_builtin_mask_for_load
1581 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
1582 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
1583 rs6000_builtin_support_vector_misalignment
1584 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
1585 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE rs6000_vector_alignment_reachable
1586 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
1587 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
1588 rs6000_builtin_vectorization_cost
1589 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
1590 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
1591 rs6000_preferred_simd_mode
1592 #undef TARGET_VECTORIZE_INIT_COST
1593 #define TARGET_VECTORIZE_INIT_COST rs6000_init_cost
1594 #undef TARGET_VECTORIZE_ADD_STMT_COST
1595 #define TARGET_VECTORIZE_ADD_STMT_COST rs6000_add_stmt_cost
1596 #undef TARGET_VECTORIZE_FINISH_COST
1597 #define TARGET_VECTORIZE_FINISH_COST rs6000_finish_cost
1598 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
1599 #define TARGET_VECTORIZE_DESTROY_COST_DATA rs6000_destroy_cost_data
1601 #undef TARGET_INIT_BUILTINS
1602 #define TARGET_INIT_BUILTINS rs6000_init_builtins
1603 #undef TARGET_BUILTIN_DECL
1604 #define TARGET_BUILTIN_DECL rs6000_builtin_decl
1606 #undef TARGET_FOLD_BUILTIN
1607 #define TARGET_FOLD_BUILTIN rs6000_fold_builtin
1609 #undef TARGET_EXPAND_BUILTIN
1610 #define TARGET_EXPAND_BUILTIN rs6000_expand_builtin
1612 #undef TARGET_MANGLE_TYPE
1613 #define TARGET_MANGLE_TYPE rs6000_mangle_type
1615 #undef TARGET_INIT_LIBFUNCS
1616 #define TARGET_INIT_LIBFUNCS rs6000_init_libfuncs
1618 #if TARGET_MACHO
1619 #undef TARGET_BINDS_LOCAL_P
1620 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1621 #endif
1623 #undef TARGET_MS_BITFIELD_LAYOUT_P
1624 #define TARGET_MS_BITFIELD_LAYOUT_P rs6000_ms_bitfield_layout_p
1626 #undef TARGET_ASM_OUTPUT_MI_THUNK
1627 #define TARGET_ASM_OUTPUT_MI_THUNK rs6000_output_mi_thunk
1629 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1630 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
1632 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1633 #define TARGET_FUNCTION_OK_FOR_SIBCALL rs6000_function_ok_for_sibcall
1635 #undef TARGET_REGISTER_MOVE_COST
1636 #define TARGET_REGISTER_MOVE_COST rs6000_register_move_cost
1637 #undef TARGET_MEMORY_MOVE_COST
1638 #define TARGET_MEMORY_MOVE_COST rs6000_memory_move_cost
1639 #undef TARGET_CANNOT_COPY_INSN_P
1640 #define TARGET_CANNOT_COPY_INSN_P rs6000_cannot_copy_insn_p
1641 #undef TARGET_RTX_COSTS
1642 #define TARGET_RTX_COSTS rs6000_rtx_costs
1643 #undef TARGET_ADDRESS_COST
1644 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
1646 #undef TARGET_DWARF_REGISTER_SPAN
1647 #define TARGET_DWARF_REGISTER_SPAN rs6000_dwarf_register_span
1649 #undef TARGET_INIT_DWARF_REG_SIZES_EXTRA
1650 #define TARGET_INIT_DWARF_REG_SIZES_EXTRA rs6000_init_dwarf_reg_sizes_extra
1652 #undef TARGET_MEMBER_TYPE_FORCES_BLK
1653 #define TARGET_MEMBER_TYPE_FORCES_BLK rs6000_member_type_forces_blk
1655 #undef TARGET_PROMOTE_FUNCTION_MODE
1656 #define TARGET_PROMOTE_FUNCTION_MODE rs6000_promote_function_mode
1658 #undef TARGET_RETURN_IN_MEMORY
1659 #define TARGET_RETURN_IN_MEMORY rs6000_return_in_memory
1661 #undef TARGET_RETURN_IN_MSB
1662 #define TARGET_RETURN_IN_MSB rs6000_return_in_msb
1664 #undef TARGET_SETUP_INCOMING_VARARGS
1665 #define TARGET_SETUP_INCOMING_VARARGS setup_incoming_varargs
1667 /* Always strict argument naming on rs6000. */
1668 #undef TARGET_STRICT_ARGUMENT_NAMING
1669 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
1670 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
1671 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED hook_bool_CUMULATIVE_ARGS_true
1672 #undef TARGET_SPLIT_COMPLEX_ARG
1673 #define TARGET_SPLIT_COMPLEX_ARG hook_bool_const_tree_true
1674 #undef TARGET_MUST_PASS_IN_STACK
1675 #define TARGET_MUST_PASS_IN_STACK rs6000_must_pass_in_stack
1676 #undef TARGET_PASS_BY_REFERENCE
1677 #define TARGET_PASS_BY_REFERENCE rs6000_pass_by_reference
1678 #undef TARGET_ARG_PARTIAL_BYTES
1679 #define TARGET_ARG_PARTIAL_BYTES rs6000_arg_partial_bytes
1680 #undef TARGET_FUNCTION_ARG_ADVANCE
1681 #define TARGET_FUNCTION_ARG_ADVANCE rs6000_function_arg_advance
1682 #undef TARGET_FUNCTION_ARG
1683 #define TARGET_FUNCTION_ARG rs6000_function_arg
1684 #undef TARGET_FUNCTION_ARG_BOUNDARY
1685 #define TARGET_FUNCTION_ARG_BOUNDARY rs6000_function_arg_boundary
1687 #undef TARGET_BUILD_BUILTIN_VA_LIST
1688 #define TARGET_BUILD_BUILTIN_VA_LIST rs6000_build_builtin_va_list
1690 #undef TARGET_EXPAND_BUILTIN_VA_START
1691 #define TARGET_EXPAND_BUILTIN_VA_START rs6000_va_start
1693 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1694 #define TARGET_GIMPLIFY_VA_ARG_EXPR rs6000_gimplify_va_arg
1696 #undef TARGET_EH_RETURN_FILTER_MODE
1697 #define TARGET_EH_RETURN_FILTER_MODE rs6000_eh_return_filter_mode
1699 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1700 #define TARGET_SCALAR_MODE_SUPPORTED_P rs6000_scalar_mode_supported_p
1702 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1703 #define TARGET_VECTOR_MODE_SUPPORTED_P rs6000_vector_mode_supported_p
1705 #undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN
1706 #define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN invalid_arg_for_unprototyped_fn
1708 #undef TARGET_ASM_LOOP_ALIGN_MAX_SKIP
1709 #define TARGET_ASM_LOOP_ALIGN_MAX_SKIP rs6000_loop_align_max_skip
1711 #undef TARGET_MD_ASM_ADJUST
1712 #define TARGET_MD_ASM_ADJUST rs6000_md_asm_adjust
1714 #undef TARGET_OPTION_OVERRIDE
1715 #define TARGET_OPTION_OVERRIDE rs6000_option_override
1717 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
1718 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
1719 rs6000_builtin_vectorized_function
1721 #undef TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION
1722 #define TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION \
1723 rs6000_builtin_md_vectorized_function
1725 #if !TARGET_MACHO
1726 #undef TARGET_STACK_PROTECT_FAIL
1727 #define TARGET_STACK_PROTECT_FAIL rs6000_stack_protect_fail
1728 #endif
1730 #ifdef HAVE_AS_TLS
1731 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1732 #define TARGET_ASM_OUTPUT_DWARF_DTPREL rs6000_output_dwarf_dtprel
1733 #endif
1735 /* Use a 32-bit anchor range. This leads to sequences like:
1737 addis tmp,anchor,high
1738 add dest,tmp,low
1740 where tmp itself acts as an anchor, and can be shared between
1741 accesses to the same 64k page. */
1742 #undef TARGET_MIN_ANCHOR_OFFSET
1743 #define TARGET_MIN_ANCHOR_OFFSET -0x7fffffff - 1
1744 #undef TARGET_MAX_ANCHOR_OFFSET
1745 #define TARGET_MAX_ANCHOR_OFFSET 0x7fffffff
1746 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1747 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P rs6000_use_blocks_for_constant_p
1748 #undef TARGET_USE_BLOCKS_FOR_DECL_P
1749 #define TARGET_USE_BLOCKS_FOR_DECL_P rs6000_use_blocks_for_decl_p
1751 #undef TARGET_BUILTIN_RECIPROCAL
1752 #define TARGET_BUILTIN_RECIPROCAL rs6000_builtin_reciprocal
1754 #undef TARGET_EXPAND_TO_RTL_HOOK
1755 #define TARGET_EXPAND_TO_RTL_HOOK rs6000_alloc_sdmode_stack_slot
1757 #undef TARGET_INSTANTIATE_DECLS
1758 #define TARGET_INSTANTIATE_DECLS rs6000_instantiate_decls
1760 #undef TARGET_SECONDARY_RELOAD
1761 #define TARGET_SECONDARY_RELOAD rs6000_secondary_reload
1763 #undef TARGET_LEGITIMATE_ADDRESS_P
1764 #define TARGET_LEGITIMATE_ADDRESS_P rs6000_legitimate_address_p
1766 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
1767 #define TARGET_MODE_DEPENDENT_ADDRESS_P rs6000_mode_dependent_address_p
1769 #undef TARGET_LRA_P
1770 #define TARGET_LRA_P rs6000_lra_p
1772 #undef TARGET_CAN_ELIMINATE
1773 #define TARGET_CAN_ELIMINATE rs6000_can_eliminate
1775 #undef TARGET_CONDITIONAL_REGISTER_USAGE
1776 #define TARGET_CONDITIONAL_REGISTER_USAGE rs6000_conditional_register_usage
1778 #undef TARGET_SCHED_REASSOCIATION_WIDTH
1779 #define TARGET_SCHED_REASSOCIATION_WIDTH rs6000_reassociation_width
1781 #undef TARGET_TRAMPOLINE_INIT
1782 #define TARGET_TRAMPOLINE_INIT rs6000_trampoline_init
1784 #undef TARGET_FUNCTION_VALUE
1785 #define TARGET_FUNCTION_VALUE rs6000_function_value
1787 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
1788 #define TARGET_OPTION_VALID_ATTRIBUTE_P rs6000_valid_attribute_p
1790 #undef TARGET_OPTION_SAVE
1791 #define TARGET_OPTION_SAVE rs6000_function_specific_save
1793 #undef TARGET_OPTION_RESTORE
1794 #define TARGET_OPTION_RESTORE rs6000_function_specific_restore
1796 #undef TARGET_OPTION_PRINT
1797 #define TARGET_OPTION_PRINT rs6000_function_specific_print
1799 #undef TARGET_CAN_INLINE_P
1800 #define TARGET_CAN_INLINE_P rs6000_can_inline_p
1802 #undef TARGET_SET_CURRENT_FUNCTION
1803 #define TARGET_SET_CURRENT_FUNCTION rs6000_set_current_function
1805 #undef TARGET_LEGITIMATE_CONSTANT_P
1806 #define TARGET_LEGITIMATE_CONSTANT_P rs6000_legitimate_constant_p
1808 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
1809 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK rs6000_vectorize_vec_perm_const_ok
1811 #undef TARGET_CAN_USE_DOLOOP_P
1812 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
1814 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
1815 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV rs6000_atomic_assign_expand_fenv
1817 #undef TARGET_LIBGCC_CMP_RETURN_MODE
1818 #define TARGET_LIBGCC_CMP_RETURN_MODE rs6000_abi_word_mode
1819 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
1820 #define TARGET_LIBGCC_SHIFT_COUNT_MODE rs6000_abi_word_mode
1821 #undef TARGET_UNWIND_WORD_MODE
1822 #define TARGET_UNWIND_WORD_MODE rs6000_abi_word_mode
1824 #undef TARGET_OFFLOAD_OPTIONS
1825 #define TARGET_OFFLOAD_OPTIONS rs6000_offload_options
1827 #undef TARGET_C_MODE_FOR_SUFFIX
1828 #define TARGET_C_MODE_FOR_SUFFIX rs6000_c_mode_for_suffix
1830 #undef TARGET_INVALID_BINARY_OP
1831 #define TARGET_INVALID_BINARY_OP rs6000_invalid_binary_op
1833 #undef TARGET_OPTAB_SUPPORTED_P
1834 #define TARGET_OPTAB_SUPPORTED_P rs6000_optab_supported_p
1837 /* Processor table. */
1838 struct rs6000_ptt
1840 const char *const name; /* Canonical processor name. */
1841 const enum processor_type processor; /* Processor type enum value. */
1842 const HOST_WIDE_INT target_enable; /* Target flags to enable. */
1845 static struct rs6000_ptt const processor_target_table[] =
1847 #define RS6000_CPU(NAME, CPU, FLAGS) { NAME, CPU, FLAGS },
1848 #include "rs6000-cpus.def"
1849 #undef RS6000_CPU
1852 /* Look up a processor name for -mcpu=xxx and -mtune=xxx. Return -1 if the
1853 name is invalid. */
1855 static int
1856 rs6000_cpu_name_lookup (const char *name)
1858 size_t i;
1860 if (name != NULL)
1862 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
1863 if (! strcmp (name, processor_target_table[i].name))
1864 return (int)i;
1867 return -1;
1871 /* Return number of consecutive hard regs needed starting at reg REGNO
1872 to hold something of mode MODE.
1873 This is ordinarily the length in words of a value of mode MODE
1874 but can be less for certain modes in special long registers.
1876 For the SPE, GPRs are 64 bits but only 32 bits are visible in
1877 scalar instructions. The upper 32 bits are only available to the
1878 SIMD instructions.
1880 POWER and PowerPC GPRs hold 32 bits worth;
1881 PowerPC64 GPRs and FPRs point register holds 64 bits worth. */
1883 static int
1884 rs6000_hard_regno_nregs_internal (int regno, machine_mode mode)
1886 unsigned HOST_WIDE_INT reg_size;
1888 /* 128-bit floating point usually takes 2 registers, unless it is IEEE
1889 128-bit floating point that can go in vector registers, which has VSX
1890 memory addressing. */
1891 if (FP_REGNO_P (regno))
1892 reg_size = (VECTOR_MEM_VSX_P (mode) || FLOAT128_VECTOR_P (mode)
1893 ? UNITS_PER_VSX_WORD
1894 : UNITS_PER_FP_WORD);
1896 else if (SPE_SIMD_REGNO_P (regno) && TARGET_SPE && SPE_VECTOR_MODE (mode))
1897 reg_size = UNITS_PER_SPE_WORD;
1899 else if (ALTIVEC_REGNO_P (regno))
1900 reg_size = UNITS_PER_ALTIVEC_WORD;
1902 /* The value returned for SCmode in the E500 double case is 2 for
1903 ABI compatibility; storing an SCmode value in a single register
1904 would require function_arg and rs6000_spe_function_arg to handle
1905 SCmode so as to pass the value correctly in a pair of
1906 registers. */
1907 else if (TARGET_E500_DOUBLE && FLOAT_MODE_P (mode) && mode != SCmode
1908 && !DECIMAL_FLOAT_MODE_P (mode) && SPE_SIMD_REGNO_P (regno))
1909 reg_size = UNITS_PER_FP_WORD;
1911 else
1912 reg_size = UNITS_PER_WORD;
1914 return (GET_MODE_SIZE (mode) + reg_size - 1) / reg_size;
1917 /* Value is 1 if hard register REGNO can hold a value of machine-mode
1918 MODE. */
1919 static int
1920 rs6000_hard_regno_mode_ok (int regno, machine_mode mode)
1922 int last_regno = regno + rs6000_hard_regno_nregs[mode][regno] - 1;
1924 if (COMPLEX_MODE_P (mode))
1925 mode = GET_MODE_INNER (mode);
1927 /* PTImode can only go in GPRs. Quad word memory operations require even/odd
1928 register combinations, and use PTImode where we need to deal with quad
1929 word memory operations. Don't allow quad words in the argument or frame
1930 pointer registers, just registers 0..31. */
1931 if (mode == PTImode)
1932 return (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
1933 && IN_RANGE (last_regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
1934 && ((regno & 1) == 0));
1936 /* VSX registers that overlap the FPR registers are larger than for non-VSX
1937 implementations. Don't allow an item to be split between a FP register
1938 and an Altivec register. Allow TImode in all VSX registers if the user
1939 asked for it. */
1940 if (TARGET_VSX && VSX_REGNO_P (regno)
1941 && (VECTOR_MEM_VSX_P (mode)
1942 || FLOAT128_VECTOR_P (mode)
1943 || reg_addr[mode].scalar_in_vmx_p
1944 || (TARGET_VSX_TIMODE && mode == TImode)
1945 || (TARGET_VADDUQM && mode == V1TImode)
1946 || (TARGET_UPPER_REGS_DI && mode == DImode)))
1948 if (FP_REGNO_P (regno))
1949 return FP_REGNO_P (last_regno);
1951 if (ALTIVEC_REGNO_P (regno))
1953 if (GET_MODE_SIZE (mode) != 16 && !reg_addr[mode].scalar_in_vmx_p)
1954 return 0;
1956 return ALTIVEC_REGNO_P (last_regno);
1960 /* The GPRs can hold any mode, but values bigger than one register
1961 cannot go past R31. */
1962 if (INT_REGNO_P (regno))
1963 return INT_REGNO_P (last_regno);
1965 /* The float registers (except for VSX vector modes) can only hold floating
1966 modes and DImode. */
1967 if (FP_REGNO_P (regno))
1969 if (FLOAT128_VECTOR_P (mode))
1970 return false;
1972 if (SCALAR_FLOAT_MODE_P (mode)
1973 && (mode != TDmode || (regno % 2) == 0)
1974 && FP_REGNO_P (last_regno))
1975 return 1;
1977 if (GET_MODE_CLASS (mode) == MODE_INT
1978 && GET_MODE_SIZE (mode) == UNITS_PER_FP_WORD)
1979 return 1;
1981 if (PAIRED_SIMD_REGNO_P (regno) && TARGET_PAIRED_FLOAT
1982 && PAIRED_VECTOR_MODE (mode))
1983 return 1;
1985 return 0;
1988 /* The CR register can only hold CC modes. */
1989 if (CR_REGNO_P (regno))
1990 return GET_MODE_CLASS (mode) == MODE_CC;
1992 if (CA_REGNO_P (regno))
1993 return mode == Pmode || mode == SImode;
1995 /* AltiVec only in AldyVec registers. */
1996 if (ALTIVEC_REGNO_P (regno))
1997 return (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)
1998 || mode == V1TImode);
2000 /* ...but GPRs can hold SIMD data on the SPE in one register. */
2001 if (SPE_SIMD_REGNO_P (regno) && TARGET_SPE && SPE_VECTOR_MODE (mode))
2002 return 1;
2004 /* We cannot put non-VSX TImode or PTImode anywhere except general register
2005 and it must be able to fit within the register set. */
2007 return GET_MODE_SIZE (mode) <= UNITS_PER_WORD;
2010 /* Print interesting facts about registers. */
2011 static void
2012 rs6000_debug_reg_print (int first_regno, int last_regno, const char *reg_name)
2014 int r, m;
2016 for (r = first_regno; r <= last_regno; ++r)
2018 const char *comma = "";
2019 int len;
2021 if (first_regno == last_regno)
2022 fprintf (stderr, "%s:\t", reg_name);
2023 else
2024 fprintf (stderr, "%s%d:\t", reg_name, r - first_regno);
2026 len = 8;
2027 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2028 if (rs6000_hard_regno_mode_ok_p[m][r] && rs6000_hard_regno_nregs[m][r])
2030 if (len > 70)
2032 fprintf (stderr, ",\n\t");
2033 len = 8;
2034 comma = "";
2037 if (rs6000_hard_regno_nregs[m][r] > 1)
2038 len += fprintf (stderr, "%s%s/%d", comma, GET_MODE_NAME (m),
2039 rs6000_hard_regno_nregs[m][r]);
2040 else
2041 len += fprintf (stderr, "%s%s", comma, GET_MODE_NAME (m));
2043 comma = ", ";
2046 if (call_used_regs[r])
2048 if (len > 70)
2050 fprintf (stderr, ",\n\t");
2051 len = 8;
2052 comma = "";
2055 len += fprintf (stderr, "%s%s", comma, "call-used");
2056 comma = ", ";
2059 if (fixed_regs[r])
2061 if (len > 70)
2063 fprintf (stderr, ",\n\t");
2064 len = 8;
2065 comma = "";
2068 len += fprintf (stderr, "%s%s", comma, "fixed");
2069 comma = ", ";
2072 if (len > 70)
2074 fprintf (stderr, ",\n\t");
2075 comma = "";
2078 len += fprintf (stderr, "%sreg-class = %s", comma,
2079 reg_class_names[(int)rs6000_regno_regclass[r]]);
2080 comma = ", ";
2082 if (len > 70)
2084 fprintf (stderr, ",\n\t");
2085 comma = "";
2088 fprintf (stderr, "%sregno = %d\n", comma, r);
2092 static const char *
2093 rs6000_debug_vector_unit (enum rs6000_vector v)
2095 const char *ret;
2097 switch (v)
2099 case VECTOR_NONE: ret = "none"; break;
2100 case VECTOR_ALTIVEC: ret = "altivec"; break;
2101 case VECTOR_VSX: ret = "vsx"; break;
2102 case VECTOR_P8_VECTOR: ret = "p8_vector"; break;
2103 case VECTOR_PAIRED: ret = "paired"; break;
2104 case VECTOR_SPE: ret = "spe"; break;
2105 case VECTOR_OTHER: ret = "other"; break;
2106 default: ret = "unknown"; break;
2109 return ret;
2112 /* Inner function printing just the address mask for a particular reload
2113 register class. */
2114 DEBUG_FUNCTION char *
2115 rs6000_debug_addr_mask (addr_mask_type mask, bool keep_spaces)
2117 static char ret[8];
2118 char *p = ret;
2120 if ((mask & RELOAD_REG_VALID) != 0)
2121 *p++ = 'v';
2122 else if (keep_spaces)
2123 *p++ = ' ';
2125 if ((mask & RELOAD_REG_MULTIPLE) != 0)
2126 *p++ = 'm';
2127 else if (keep_spaces)
2128 *p++ = ' ';
2130 if ((mask & RELOAD_REG_INDEXED) != 0)
2131 *p++ = 'i';
2132 else if (keep_spaces)
2133 *p++ = ' ';
2135 if ((mask & RELOAD_REG_QUAD_OFFSET) != 0)
2136 *p++ = 'O';
2137 else if ((mask & RELOAD_REG_OFFSET) != 0)
2138 *p++ = 'o';
2139 else if (keep_spaces)
2140 *p++ = ' ';
2142 if ((mask & RELOAD_REG_PRE_INCDEC) != 0)
2143 *p++ = '+';
2144 else if (keep_spaces)
2145 *p++ = ' ';
2147 if ((mask & RELOAD_REG_PRE_MODIFY) != 0)
2148 *p++ = '+';
2149 else if (keep_spaces)
2150 *p++ = ' ';
2152 if ((mask & RELOAD_REG_AND_M16) != 0)
2153 *p++ = '&';
2154 else if (keep_spaces)
2155 *p++ = ' ';
2157 *p = '\0';
2159 return ret;
2162 /* Print the address masks in a human readble fashion. */
2163 DEBUG_FUNCTION void
2164 rs6000_debug_print_mode (ssize_t m)
2166 ssize_t rc;
2167 int spaces = 0;
2168 bool fuse_extra_p;
2170 fprintf (stderr, "Mode: %-5s", GET_MODE_NAME (m));
2171 for (rc = 0; rc < N_RELOAD_REG; rc++)
2172 fprintf (stderr, " %s: %s", reload_reg_map[rc].name,
2173 rs6000_debug_addr_mask (reg_addr[m].addr_mask[rc], true));
2175 if ((reg_addr[m].reload_store != CODE_FOR_nothing)
2176 || (reg_addr[m].reload_load != CODE_FOR_nothing))
2177 fprintf (stderr, " Reload=%c%c",
2178 (reg_addr[m].reload_store != CODE_FOR_nothing) ? 's' : '*',
2179 (reg_addr[m].reload_load != CODE_FOR_nothing) ? 'l' : '*');
2180 else
2181 spaces += sizeof (" Reload=sl") - 1;
2183 if (reg_addr[m].scalar_in_vmx_p)
2185 fprintf (stderr, "%*s Upper=y", spaces, "");
2186 spaces = 0;
2188 else
2189 spaces += sizeof (" Upper=y") - 1;
2191 fuse_extra_p = ((reg_addr[m].fusion_gpr_ld != CODE_FOR_nothing)
2192 || reg_addr[m].fused_toc);
2193 if (!fuse_extra_p)
2195 for (rc = 0; rc < N_RELOAD_REG; rc++)
2197 if (rc != RELOAD_REG_ANY)
2199 if (reg_addr[m].fusion_addi_ld[rc] != CODE_FOR_nothing
2200 || reg_addr[m].fusion_addi_ld[rc] != CODE_FOR_nothing
2201 || reg_addr[m].fusion_addi_st[rc] != CODE_FOR_nothing
2202 || reg_addr[m].fusion_addis_ld[rc] != CODE_FOR_nothing
2203 || reg_addr[m].fusion_addis_st[rc] != CODE_FOR_nothing)
2205 fuse_extra_p = true;
2206 break;
2212 if (fuse_extra_p)
2214 fprintf (stderr, "%*s Fuse:", spaces, "");
2215 spaces = 0;
2217 for (rc = 0; rc < N_RELOAD_REG; rc++)
2219 if (rc != RELOAD_REG_ANY)
2221 char load, store;
2223 if (reg_addr[m].fusion_addis_ld[rc] != CODE_FOR_nothing)
2224 load = 'l';
2225 else if (reg_addr[m].fusion_addi_ld[rc] != CODE_FOR_nothing)
2226 load = 'L';
2227 else
2228 load = '-';
2230 if (reg_addr[m].fusion_addis_st[rc] != CODE_FOR_nothing)
2231 store = 's';
2232 else if (reg_addr[m].fusion_addi_st[rc] != CODE_FOR_nothing)
2233 store = 'S';
2234 else
2235 store = '-';
2237 if (load == '-' && store == '-')
2238 spaces += 5;
2239 else
2241 fprintf (stderr, "%*s%c=%c%c", (spaces + 1), "",
2242 reload_reg_map[rc].name[0], load, store);
2243 spaces = 0;
2248 if (reg_addr[m].fusion_gpr_ld != CODE_FOR_nothing)
2250 fprintf (stderr, "%*sP8gpr", (spaces + 1), "");
2251 spaces = 0;
2253 else
2254 spaces += sizeof (" P8gpr") - 1;
2256 if (reg_addr[m].fused_toc)
2258 fprintf (stderr, "%*sToc", (spaces + 1), "");
2259 spaces = 0;
2261 else
2262 spaces += sizeof (" Toc") - 1;
2264 else
2265 spaces += sizeof (" Fuse: G=ls F=ls v=ls P8gpr Toc") - 1;
2267 if (rs6000_vector_unit[m] != VECTOR_NONE
2268 || rs6000_vector_mem[m] != VECTOR_NONE)
2270 fprintf (stderr, "%*s vector: arith=%-10s mem=%s",
2271 spaces, "",
2272 rs6000_debug_vector_unit (rs6000_vector_unit[m]),
2273 rs6000_debug_vector_unit (rs6000_vector_mem[m]));
2276 fputs ("\n", stderr);
2279 #define DEBUG_FMT_ID "%-32s= "
2280 #define DEBUG_FMT_D DEBUG_FMT_ID "%d\n"
2281 #define DEBUG_FMT_WX DEBUG_FMT_ID "%#.12" HOST_WIDE_INT_PRINT "x: "
2282 #define DEBUG_FMT_S DEBUG_FMT_ID "%s\n"
2284 /* Print various interesting information with -mdebug=reg. */
2285 static void
2286 rs6000_debug_reg_global (void)
2288 static const char *const tf[2] = { "false", "true" };
2289 const char *nl = (const char *)0;
2290 int m;
2291 size_t m1, m2, v;
2292 char costly_num[20];
2293 char nop_num[20];
2294 char flags_buffer[40];
2295 const char *costly_str;
2296 const char *nop_str;
2297 const char *trace_str;
2298 const char *abi_str;
2299 const char *cmodel_str;
2300 struct cl_target_option cl_opts;
2302 /* Modes we want tieable information on. */
2303 static const machine_mode print_tieable_modes[] = {
2304 QImode,
2305 HImode,
2306 SImode,
2307 DImode,
2308 TImode,
2309 PTImode,
2310 SFmode,
2311 DFmode,
2312 TFmode,
2313 IFmode,
2314 KFmode,
2315 SDmode,
2316 DDmode,
2317 TDmode,
2318 V8QImode,
2319 V4HImode,
2320 V2SImode,
2321 V16QImode,
2322 V8HImode,
2323 V4SImode,
2324 V2DImode,
2325 V1TImode,
2326 V32QImode,
2327 V16HImode,
2328 V8SImode,
2329 V4DImode,
2330 V2TImode,
2331 V2SFmode,
2332 V4SFmode,
2333 V2DFmode,
2334 V8SFmode,
2335 V4DFmode,
2336 CCmode,
2337 CCUNSmode,
2338 CCEQmode,
2341 /* Virtual regs we are interested in. */
2342 const static struct {
2343 int regno; /* register number. */
2344 const char *name; /* register name. */
2345 } virtual_regs[] = {
2346 { STACK_POINTER_REGNUM, "stack pointer:" },
2347 { TOC_REGNUM, "toc: " },
2348 { STATIC_CHAIN_REGNUM, "static chain: " },
2349 { RS6000_PIC_OFFSET_TABLE_REGNUM, "pic offset: " },
2350 { HARD_FRAME_POINTER_REGNUM, "hard frame: " },
2351 { ARG_POINTER_REGNUM, "arg pointer: " },
2352 { FRAME_POINTER_REGNUM, "frame pointer:" },
2353 { FIRST_PSEUDO_REGISTER, "first pseudo: " },
2354 { FIRST_VIRTUAL_REGISTER, "first virtual:" },
2355 { VIRTUAL_INCOMING_ARGS_REGNUM, "incoming_args:" },
2356 { VIRTUAL_STACK_VARS_REGNUM, "stack_vars: " },
2357 { VIRTUAL_STACK_DYNAMIC_REGNUM, "stack_dynamic:" },
2358 { VIRTUAL_OUTGOING_ARGS_REGNUM, "outgoing_args:" },
2359 { VIRTUAL_CFA_REGNUM, "cfa (frame): " },
2360 { VIRTUAL_PREFERRED_STACK_BOUNDARY_REGNUM, "stack boundry:" },
2361 { LAST_VIRTUAL_REGISTER, "last virtual: " },
2364 fputs ("\nHard register information:\n", stderr);
2365 rs6000_debug_reg_print (FIRST_GPR_REGNO, LAST_GPR_REGNO, "gr");
2366 rs6000_debug_reg_print (FIRST_FPR_REGNO, LAST_FPR_REGNO, "fp");
2367 rs6000_debug_reg_print (FIRST_ALTIVEC_REGNO,
2368 LAST_ALTIVEC_REGNO,
2369 "vs");
2370 rs6000_debug_reg_print (LR_REGNO, LR_REGNO, "lr");
2371 rs6000_debug_reg_print (CTR_REGNO, CTR_REGNO, "ctr");
2372 rs6000_debug_reg_print (CR0_REGNO, CR7_REGNO, "cr");
2373 rs6000_debug_reg_print (CA_REGNO, CA_REGNO, "ca");
2374 rs6000_debug_reg_print (VRSAVE_REGNO, VRSAVE_REGNO, "vrsave");
2375 rs6000_debug_reg_print (VSCR_REGNO, VSCR_REGNO, "vscr");
2376 rs6000_debug_reg_print (SPE_ACC_REGNO, SPE_ACC_REGNO, "spe_a");
2377 rs6000_debug_reg_print (SPEFSCR_REGNO, SPEFSCR_REGNO, "spe_f");
2379 fputs ("\nVirtual/stack/frame registers:\n", stderr);
2380 for (v = 0; v < ARRAY_SIZE (virtual_regs); v++)
2381 fprintf (stderr, "%s regno = %3d\n", virtual_regs[v].name, virtual_regs[v].regno);
2383 fprintf (stderr,
2384 "\n"
2385 "d reg_class = %s\n"
2386 "f reg_class = %s\n"
2387 "v reg_class = %s\n"
2388 "wa reg_class = %s\n"
2389 "wb reg_class = %s\n"
2390 "wd reg_class = %s\n"
2391 "we reg_class = %s\n"
2392 "wf reg_class = %s\n"
2393 "wg reg_class = %s\n"
2394 "wh reg_class = %s\n"
2395 "wi reg_class = %s\n"
2396 "wj reg_class = %s\n"
2397 "wk reg_class = %s\n"
2398 "wl reg_class = %s\n"
2399 "wm reg_class = %s\n"
2400 "wo reg_class = %s\n"
2401 "wp reg_class = %s\n"
2402 "wq reg_class = %s\n"
2403 "wr reg_class = %s\n"
2404 "ws reg_class = %s\n"
2405 "wt reg_class = %s\n"
2406 "wu reg_class = %s\n"
2407 "wv reg_class = %s\n"
2408 "ww reg_class = %s\n"
2409 "wx reg_class = %s\n"
2410 "wy reg_class = %s\n"
2411 "wz reg_class = %s\n"
2412 "\n",
2413 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_d]],
2414 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_f]],
2415 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_v]],
2416 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wa]],
2417 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wb]],
2418 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wd]],
2419 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_we]],
2420 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wf]],
2421 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wg]],
2422 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wh]],
2423 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wi]],
2424 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wj]],
2425 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wk]],
2426 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wl]],
2427 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wm]],
2428 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wo]],
2429 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wp]],
2430 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wq]],
2431 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wr]],
2432 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_ws]],
2433 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wt]],
2434 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wu]],
2435 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wv]],
2436 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_ww]],
2437 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wx]],
2438 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wy]],
2439 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wz]]);
2441 nl = "\n";
2442 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2443 rs6000_debug_print_mode (m);
2445 fputs ("\n", stderr);
2447 for (m1 = 0; m1 < ARRAY_SIZE (print_tieable_modes); m1++)
2449 machine_mode mode1 = print_tieable_modes[m1];
2450 bool first_time = true;
2452 nl = (const char *)0;
2453 for (m2 = 0; m2 < ARRAY_SIZE (print_tieable_modes); m2++)
2455 machine_mode mode2 = print_tieable_modes[m2];
2456 if (mode1 != mode2 && MODES_TIEABLE_P (mode1, mode2))
2458 if (first_time)
2460 fprintf (stderr, "Tieable modes %s:", GET_MODE_NAME (mode1));
2461 nl = "\n";
2462 first_time = false;
2465 fprintf (stderr, " %s", GET_MODE_NAME (mode2));
2469 if (!first_time)
2470 fputs ("\n", stderr);
2473 if (nl)
2474 fputs (nl, stderr);
2476 if (rs6000_recip_control)
2478 fprintf (stderr, "\nReciprocal mask = 0x%x\n", rs6000_recip_control);
2480 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2481 if (rs6000_recip_bits[m])
2483 fprintf (stderr,
2484 "Reciprocal estimate mode: %-5s divide: %s rsqrt: %s\n",
2485 GET_MODE_NAME (m),
2486 (RS6000_RECIP_AUTO_RE_P (m)
2487 ? "auto"
2488 : (RS6000_RECIP_HAVE_RE_P (m) ? "have" : "none")),
2489 (RS6000_RECIP_AUTO_RSQRTE_P (m)
2490 ? "auto"
2491 : (RS6000_RECIP_HAVE_RSQRTE_P (m) ? "have" : "none")));
2494 fputs ("\n", stderr);
2497 if (rs6000_cpu_index >= 0)
2499 const char *name = processor_target_table[rs6000_cpu_index].name;
2500 HOST_WIDE_INT flags
2501 = processor_target_table[rs6000_cpu_index].target_enable;
2503 sprintf (flags_buffer, "-mcpu=%s flags", name);
2504 rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2506 else
2507 fprintf (stderr, DEBUG_FMT_S, "cpu", "<none>");
2509 if (rs6000_tune_index >= 0)
2511 const char *name = processor_target_table[rs6000_tune_index].name;
2512 HOST_WIDE_INT flags
2513 = processor_target_table[rs6000_tune_index].target_enable;
2515 sprintf (flags_buffer, "-mtune=%s flags", name);
2516 rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2518 else
2519 fprintf (stderr, DEBUG_FMT_S, "tune", "<none>");
2521 cl_target_option_save (&cl_opts, &global_options);
2522 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags",
2523 rs6000_isa_flags);
2525 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags_explicit",
2526 rs6000_isa_flags_explicit);
2528 rs6000_print_builtin_options (stderr, 0, "rs6000_builtin_mask",
2529 rs6000_builtin_mask);
2531 rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT);
2533 fprintf (stderr, DEBUG_FMT_S, "--with-cpu default",
2534 OPTION_TARGET_CPU_DEFAULT ? OPTION_TARGET_CPU_DEFAULT : "<none>");
2536 switch (rs6000_sched_costly_dep)
2538 case max_dep_latency:
2539 costly_str = "max_dep_latency";
2540 break;
2542 case no_dep_costly:
2543 costly_str = "no_dep_costly";
2544 break;
2546 case all_deps_costly:
2547 costly_str = "all_deps_costly";
2548 break;
2550 case true_store_to_load_dep_costly:
2551 costly_str = "true_store_to_load_dep_costly";
2552 break;
2554 case store_to_load_dep_costly:
2555 costly_str = "store_to_load_dep_costly";
2556 break;
2558 default:
2559 costly_str = costly_num;
2560 sprintf (costly_num, "%d", (int)rs6000_sched_costly_dep);
2561 break;
2564 fprintf (stderr, DEBUG_FMT_S, "sched_costly_dep", costly_str);
2566 switch (rs6000_sched_insert_nops)
2568 case sched_finish_regroup_exact:
2569 nop_str = "sched_finish_regroup_exact";
2570 break;
2572 case sched_finish_pad_groups:
2573 nop_str = "sched_finish_pad_groups";
2574 break;
2576 case sched_finish_none:
2577 nop_str = "sched_finish_none";
2578 break;
2580 default:
2581 nop_str = nop_num;
2582 sprintf (nop_num, "%d", (int)rs6000_sched_insert_nops);
2583 break;
2586 fprintf (stderr, DEBUG_FMT_S, "sched_insert_nops", nop_str);
2588 switch (rs6000_sdata)
2590 default:
2591 case SDATA_NONE:
2592 break;
2594 case SDATA_DATA:
2595 fprintf (stderr, DEBUG_FMT_S, "sdata", "data");
2596 break;
2598 case SDATA_SYSV:
2599 fprintf (stderr, DEBUG_FMT_S, "sdata", "sysv");
2600 break;
2602 case SDATA_EABI:
2603 fprintf (stderr, DEBUG_FMT_S, "sdata", "eabi");
2604 break;
2608 switch (rs6000_traceback)
2610 case traceback_default: trace_str = "default"; break;
2611 case traceback_none: trace_str = "none"; break;
2612 case traceback_part: trace_str = "part"; break;
2613 case traceback_full: trace_str = "full"; break;
2614 default: trace_str = "unknown"; break;
2617 fprintf (stderr, DEBUG_FMT_S, "traceback", trace_str);
2619 switch (rs6000_current_cmodel)
2621 case CMODEL_SMALL: cmodel_str = "small"; break;
2622 case CMODEL_MEDIUM: cmodel_str = "medium"; break;
2623 case CMODEL_LARGE: cmodel_str = "large"; break;
2624 default: cmodel_str = "unknown"; break;
2627 fprintf (stderr, DEBUG_FMT_S, "cmodel", cmodel_str);
2629 switch (rs6000_current_abi)
2631 case ABI_NONE: abi_str = "none"; break;
2632 case ABI_AIX: abi_str = "aix"; break;
2633 case ABI_ELFv2: abi_str = "ELFv2"; break;
2634 case ABI_V4: abi_str = "V4"; break;
2635 case ABI_DARWIN: abi_str = "darwin"; break;
2636 default: abi_str = "unknown"; break;
2639 fprintf (stderr, DEBUG_FMT_S, "abi", abi_str);
2641 if (rs6000_altivec_abi)
2642 fprintf (stderr, DEBUG_FMT_S, "altivec_abi", "true");
2644 if (rs6000_spe_abi)
2645 fprintf (stderr, DEBUG_FMT_S, "spe_abi", "true");
2647 if (rs6000_darwin64_abi)
2648 fprintf (stderr, DEBUG_FMT_S, "darwin64_abi", "true");
2650 if (rs6000_float_gprs)
2651 fprintf (stderr, DEBUG_FMT_S, "float_gprs", "true");
2653 fprintf (stderr, DEBUG_FMT_S, "fprs",
2654 (TARGET_FPRS ? "true" : "false"));
2656 fprintf (stderr, DEBUG_FMT_S, "single_float",
2657 (TARGET_SINGLE_FLOAT ? "true" : "false"));
2659 fprintf (stderr, DEBUG_FMT_S, "double_float",
2660 (TARGET_DOUBLE_FLOAT ? "true" : "false"));
2662 fprintf (stderr, DEBUG_FMT_S, "soft_float",
2663 (TARGET_SOFT_FLOAT ? "true" : "false"));
2665 fprintf (stderr, DEBUG_FMT_S, "e500_single",
2666 (TARGET_E500_SINGLE ? "true" : "false"));
2668 fprintf (stderr, DEBUG_FMT_S, "e500_double",
2669 (TARGET_E500_DOUBLE ? "true" : "false"));
2671 if (TARGET_LINK_STACK)
2672 fprintf (stderr, DEBUG_FMT_S, "link_stack", "true");
2674 fprintf (stderr, DEBUG_FMT_S, "lra", TARGET_LRA ? "true" : "false");
2676 if (TARGET_P8_FUSION)
2678 char options[80];
2680 strcpy (options, (TARGET_P9_FUSION) ? "power9" : "power8");
2681 if (TARGET_TOC_FUSION)
2682 strcat (options, ", toc");
2684 if (TARGET_P8_FUSION_SIGN)
2685 strcat (options, ", sign");
2687 fprintf (stderr, DEBUG_FMT_S, "fusion", options);
2690 fprintf (stderr, DEBUG_FMT_S, "plt-format",
2691 TARGET_SECURE_PLT ? "secure" : "bss");
2692 fprintf (stderr, DEBUG_FMT_S, "struct-return",
2693 aix_struct_return ? "aix" : "sysv");
2694 fprintf (stderr, DEBUG_FMT_S, "always_hint", tf[!!rs6000_always_hint]);
2695 fprintf (stderr, DEBUG_FMT_S, "sched_groups", tf[!!rs6000_sched_groups]);
2696 fprintf (stderr, DEBUG_FMT_S, "align_branch",
2697 tf[!!rs6000_align_branch_targets]);
2698 fprintf (stderr, DEBUG_FMT_D, "tls_size", rs6000_tls_size);
2699 fprintf (stderr, DEBUG_FMT_D, "long_double_size",
2700 rs6000_long_double_type_size);
2701 fprintf (stderr, DEBUG_FMT_D, "sched_restricted_insns_priority",
2702 (int)rs6000_sched_restricted_insns_priority);
2703 fprintf (stderr, DEBUG_FMT_D, "Number of standard builtins",
2704 (int)END_BUILTINS);
2705 fprintf (stderr, DEBUG_FMT_D, "Number of rs6000 builtins",
2706 (int)RS6000_BUILTIN_COUNT);
2708 if (TARGET_VSX)
2709 fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit scalar element",
2710 (int)VECTOR_ELEMENT_SCALAR_64BIT);
2712 if (TARGET_DIRECT_MOVE_128)
2713 fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit mfvsrld element",
2714 (int)VECTOR_ELEMENT_MFVSRLD_64BIT);
2718 /* Update the addr mask bits in reg_addr to help secondary reload and go if
2719 legitimate address support to figure out the appropriate addressing to
2720 use. */
2722 static void
2723 rs6000_setup_reg_addr_masks (void)
2725 ssize_t rc, reg, m, nregs;
2726 addr_mask_type any_addr_mask, addr_mask;
2728 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2730 machine_mode m2 = (machine_mode) m;
2731 bool complex_p = false;
2732 size_t msize;
2734 if (COMPLEX_MODE_P (m2))
2736 complex_p = true;
2737 m2 = GET_MODE_INNER (m2);
2740 msize = GET_MODE_SIZE (m2);
2742 /* SDmode is special in that we want to access it only via REG+REG
2743 addressing on power7 and above, since we want to use the LFIWZX and
2744 STFIWZX instructions to load it. */
2745 bool indexed_only_p = (m == SDmode && TARGET_NO_SDMODE_STACK);
2747 any_addr_mask = 0;
2748 for (rc = FIRST_RELOAD_REG_CLASS; rc <= LAST_RELOAD_REG_CLASS; rc++)
2750 addr_mask = 0;
2751 reg = reload_reg_map[rc].reg;
2753 /* Can mode values go in the GPR/FPR/Altivec registers? */
2754 if (reg >= 0 && rs6000_hard_regno_mode_ok_p[m][reg])
2756 nregs = rs6000_hard_regno_nregs[m][reg];
2757 addr_mask |= RELOAD_REG_VALID;
2759 /* Indicate if the mode takes more than 1 physical register. If
2760 it takes a single register, indicate it can do REG+REG
2761 addressing. */
2762 if (nregs > 1 || m == BLKmode || complex_p)
2763 addr_mask |= RELOAD_REG_MULTIPLE;
2764 else
2765 addr_mask |= RELOAD_REG_INDEXED;
2767 /* Figure out if we can do PRE_INC, PRE_DEC, or PRE_MODIFY
2768 addressing. Restrict addressing on SPE for 64-bit types
2769 because of the SUBREG hackery used to address 64-bit floats in
2770 '32-bit' GPRs. If we allow scalars into Altivec registers,
2771 don't allow PRE_INC, PRE_DEC, or PRE_MODIFY. */
2773 if (TARGET_UPDATE
2774 && (rc == RELOAD_REG_GPR || rc == RELOAD_REG_FPR)
2775 && msize <= 8
2776 && !VECTOR_MODE_P (m2)
2777 && !FLOAT128_VECTOR_P (m2)
2778 && !complex_p
2779 && (m2 != DFmode || !TARGET_UPPER_REGS_DF)
2780 && (m2 != SFmode || !TARGET_UPPER_REGS_SF)
2781 && !(TARGET_E500_DOUBLE && msize == 8))
2783 addr_mask |= RELOAD_REG_PRE_INCDEC;
2785 /* PRE_MODIFY is more restricted than PRE_INC/PRE_DEC in that
2786 we don't allow PRE_MODIFY for some multi-register
2787 operations. */
2788 switch (m)
2790 default:
2791 addr_mask |= RELOAD_REG_PRE_MODIFY;
2792 break;
2794 case DImode:
2795 if (TARGET_POWERPC64)
2796 addr_mask |= RELOAD_REG_PRE_MODIFY;
2797 break;
2799 case DFmode:
2800 case DDmode:
2801 if (TARGET_DF_INSN)
2802 addr_mask |= RELOAD_REG_PRE_MODIFY;
2803 break;
2808 /* GPR and FPR registers can do REG+OFFSET addressing, except
2809 possibly for SDmode. ISA 3.0 (i.e. power9) adds D-form addressing
2810 for 64-bit scalars and 32-bit SFmode to altivec registers. */
2811 if ((addr_mask != 0) && !indexed_only_p
2812 && msize <= 8
2813 && (rc == RELOAD_REG_GPR
2814 || ((msize == 8 || m2 == SFmode)
2815 && (rc == RELOAD_REG_FPR
2816 || (rc == RELOAD_REG_VMX
2817 && TARGET_P9_DFORM_SCALAR)))))
2818 addr_mask |= RELOAD_REG_OFFSET;
2820 /* VSX registers can do REG+OFFSET addresssing if ISA 3.0
2821 instructions are enabled. The offset for 128-bit VSX registers is
2822 only 12-bits. While GPRs can handle the full offset range, VSX
2823 registers can only handle the restricted range. */
2824 else if ((addr_mask != 0) && !indexed_only_p
2825 && msize == 16 && TARGET_P9_DFORM_VECTOR
2826 && (ALTIVEC_OR_VSX_VECTOR_MODE (m2)
2827 || (m2 == TImode && TARGET_VSX_TIMODE)))
2829 addr_mask |= RELOAD_REG_OFFSET;
2830 if (rc == RELOAD_REG_FPR || rc == RELOAD_REG_VMX)
2831 addr_mask |= RELOAD_REG_QUAD_OFFSET;
2834 /* VMX registers can do (REG & -16) and ((REG+REG) & -16)
2835 addressing on 128-bit types. */
2836 if (rc == RELOAD_REG_VMX && msize == 16
2837 && (addr_mask & RELOAD_REG_VALID) != 0)
2838 addr_mask |= RELOAD_REG_AND_M16;
2840 reg_addr[m].addr_mask[rc] = addr_mask;
2841 any_addr_mask |= addr_mask;
2844 reg_addr[m].addr_mask[RELOAD_REG_ANY] = any_addr_mask;
2849 /* Initialize the various global tables that are based on register size. */
2850 static void
2851 rs6000_init_hard_regno_mode_ok (bool global_init_p)
2853 ssize_t r, m, c;
2854 int align64;
2855 int align32;
2857 /* Precalculate REGNO_REG_CLASS. */
2858 rs6000_regno_regclass[0] = GENERAL_REGS;
2859 for (r = 1; r < 32; ++r)
2860 rs6000_regno_regclass[r] = BASE_REGS;
2862 for (r = 32; r < 64; ++r)
2863 rs6000_regno_regclass[r] = FLOAT_REGS;
2865 for (r = 64; r < FIRST_PSEUDO_REGISTER; ++r)
2866 rs6000_regno_regclass[r] = NO_REGS;
2868 for (r = FIRST_ALTIVEC_REGNO; r <= LAST_ALTIVEC_REGNO; ++r)
2869 rs6000_regno_regclass[r] = ALTIVEC_REGS;
2871 rs6000_regno_regclass[CR0_REGNO] = CR0_REGS;
2872 for (r = CR1_REGNO; r <= CR7_REGNO; ++r)
2873 rs6000_regno_regclass[r] = CR_REGS;
2875 rs6000_regno_regclass[LR_REGNO] = LINK_REGS;
2876 rs6000_regno_regclass[CTR_REGNO] = CTR_REGS;
2877 rs6000_regno_regclass[CA_REGNO] = NO_REGS;
2878 rs6000_regno_regclass[VRSAVE_REGNO] = VRSAVE_REGS;
2879 rs6000_regno_regclass[VSCR_REGNO] = VRSAVE_REGS;
2880 rs6000_regno_regclass[SPE_ACC_REGNO] = SPE_ACC_REGS;
2881 rs6000_regno_regclass[SPEFSCR_REGNO] = SPEFSCR_REGS;
2882 rs6000_regno_regclass[TFHAR_REGNO] = SPR_REGS;
2883 rs6000_regno_regclass[TFIAR_REGNO] = SPR_REGS;
2884 rs6000_regno_regclass[TEXASR_REGNO] = SPR_REGS;
2885 rs6000_regno_regclass[ARG_POINTER_REGNUM] = BASE_REGS;
2886 rs6000_regno_regclass[FRAME_POINTER_REGNUM] = BASE_REGS;
2888 /* Precalculate register class to simpler reload register class. We don't
2889 need all of the register classes that are combinations of different
2890 classes, just the simple ones that have constraint letters. */
2891 for (c = 0; c < N_REG_CLASSES; c++)
2892 reg_class_to_reg_type[c] = NO_REG_TYPE;
2894 reg_class_to_reg_type[(int)GENERAL_REGS] = GPR_REG_TYPE;
2895 reg_class_to_reg_type[(int)BASE_REGS] = GPR_REG_TYPE;
2896 reg_class_to_reg_type[(int)VSX_REGS] = VSX_REG_TYPE;
2897 reg_class_to_reg_type[(int)VRSAVE_REGS] = SPR_REG_TYPE;
2898 reg_class_to_reg_type[(int)VSCR_REGS] = SPR_REG_TYPE;
2899 reg_class_to_reg_type[(int)LINK_REGS] = SPR_REG_TYPE;
2900 reg_class_to_reg_type[(int)CTR_REGS] = SPR_REG_TYPE;
2901 reg_class_to_reg_type[(int)LINK_OR_CTR_REGS] = SPR_REG_TYPE;
2902 reg_class_to_reg_type[(int)CR_REGS] = CR_REG_TYPE;
2903 reg_class_to_reg_type[(int)CR0_REGS] = CR_REG_TYPE;
2904 reg_class_to_reg_type[(int)SPE_ACC_REGS] = SPE_ACC_TYPE;
2905 reg_class_to_reg_type[(int)SPEFSCR_REGS] = SPEFSCR_REG_TYPE;
2907 if (TARGET_VSX)
2909 reg_class_to_reg_type[(int)FLOAT_REGS] = VSX_REG_TYPE;
2910 reg_class_to_reg_type[(int)ALTIVEC_REGS] = VSX_REG_TYPE;
2912 else
2914 reg_class_to_reg_type[(int)FLOAT_REGS] = FPR_REG_TYPE;
2915 reg_class_to_reg_type[(int)ALTIVEC_REGS] = ALTIVEC_REG_TYPE;
2918 /* Precalculate the valid memory formats as well as the vector information,
2919 this must be set up before the rs6000_hard_regno_nregs_internal calls
2920 below. */
2921 gcc_assert ((int)VECTOR_NONE == 0);
2922 memset ((void *) &rs6000_vector_unit[0], '\0', sizeof (rs6000_vector_unit));
2923 memset ((void *) &rs6000_vector_mem[0], '\0', sizeof (rs6000_vector_unit));
2925 gcc_assert ((int)CODE_FOR_nothing == 0);
2926 memset ((void *) &reg_addr[0], '\0', sizeof (reg_addr));
2928 gcc_assert ((int)NO_REGS == 0);
2929 memset ((void *) &rs6000_constraints[0], '\0', sizeof (rs6000_constraints));
2931 /* The VSX hardware allows native alignment for vectors, but control whether the compiler
2932 believes it can use native alignment or still uses 128-bit alignment. */
2933 if (TARGET_VSX && !TARGET_VSX_ALIGN_128)
2935 align64 = 64;
2936 align32 = 32;
2938 else
2940 align64 = 128;
2941 align32 = 128;
2944 /* KF mode (IEEE 128-bit in VSX registers). We do not have arithmetic, so
2945 only set the memory modes. Include TFmode if -mabi=ieeelongdouble. */
2946 if (TARGET_FLOAT128)
2948 rs6000_vector_mem[KFmode] = VECTOR_VSX;
2949 rs6000_vector_align[KFmode] = 128;
2951 if (FLOAT128_IEEE_P (TFmode))
2953 rs6000_vector_mem[TFmode] = VECTOR_VSX;
2954 rs6000_vector_align[TFmode] = 128;
2958 /* V2DF mode, VSX only. */
2959 if (TARGET_VSX)
2961 rs6000_vector_unit[V2DFmode] = VECTOR_VSX;
2962 rs6000_vector_mem[V2DFmode] = VECTOR_VSX;
2963 rs6000_vector_align[V2DFmode] = align64;
2966 /* V4SF mode, either VSX or Altivec. */
2967 if (TARGET_VSX)
2969 rs6000_vector_unit[V4SFmode] = VECTOR_VSX;
2970 rs6000_vector_mem[V4SFmode] = VECTOR_VSX;
2971 rs6000_vector_align[V4SFmode] = align32;
2973 else if (TARGET_ALTIVEC)
2975 rs6000_vector_unit[V4SFmode] = VECTOR_ALTIVEC;
2976 rs6000_vector_mem[V4SFmode] = VECTOR_ALTIVEC;
2977 rs6000_vector_align[V4SFmode] = align32;
2980 /* V16QImode, V8HImode, V4SImode are Altivec only, but possibly do VSX loads
2981 and stores. */
2982 if (TARGET_ALTIVEC)
2984 rs6000_vector_unit[V4SImode] = VECTOR_ALTIVEC;
2985 rs6000_vector_unit[V8HImode] = VECTOR_ALTIVEC;
2986 rs6000_vector_unit[V16QImode] = VECTOR_ALTIVEC;
2987 rs6000_vector_align[V4SImode] = align32;
2988 rs6000_vector_align[V8HImode] = align32;
2989 rs6000_vector_align[V16QImode] = align32;
2991 if (TARGET_VSX)
2993 rs6000_vector_mem[V4SImode] = VECTOR_VSX;
2994 rs6000_vector_mem[V8HImode] = VECTOR_VSX;
2995 rs6000_vector_mem[V16QImode] = VECTOR_VSX;
2997 else
2999 rs6000_vector_mem[V4SImode] = VECTOR_ALTIVEC;
3000 rs6000_vector_mem[V8HImode] = VECTOR_ALTIVEC;
3001 rs6000_vector_mem[V16QImode] = VECTOR_ALTIVEC;
3005 /* V2DImode, full mode depends on ISA 2.07 vector mode. Allow under VSX to
3006 do insert/splat/extract. Altivec doesn't have 64-bit integer support. */
3007 if (TARGET_VSX)
3009 rs6000_vector_mem[V2DImode] = VECTOR_VSX;
3010 rs6000_vector_unit[V2DImode]
3011 = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
3012 rs6000_vector_align[V2DImode] = align64;
3014 rs6000_vector_mem[V1TImode] = VECTOR_VSX;
3015 rs6000_vector_unit[V1TImode]
3016 = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
3017 rs6000_vector_align[V1TImode] = 128;
3020 /* DFmode, see if we want to use the VSX unit. Memory is handled
3021 differently, so don't set rs6000_vector_mem. */
3022 if (TARGET_VSX && TARGET_VSX_SCALAR_DOUBLE)
3024 rs6000_vector_unit[DFmode] = VECTOR_VSX;
3025 rs6000_vector_align[DFmode] = 64;
3028 /* SFmode, see if we want to use the VSX unit. */
3029 if (TARGET_P8_VECTOR && TARGET_VSX_SCALAR_FLOAT)
3031 rs6000_vector_unit[SFmode] = VECTOR_VSX;
3032 rs6000_vector_align[SFmode] = 32;
3035 /* Allow TImode in VSX register and set the VSX memory macros. */
3036 if (TARGET_VSX && TARGET_VSX_TIMODE)
3038 rs6000_vector_mem[TImode] = VECTOR_VSX;
3039 rs6000_vector_align[TImode] = align64;
3042 /* TODO add SPE and paired floating point vector support. */
3044 /* Register class constraints for the constraints that depend on compile
3045 switches. When the VSX code was added, different constraints were added
3046 based on the type (DFmode, V2DFmode, V4SFmode). For the vector types, all
3047 of the VSX registers are used. The register classes for scalar floating
3048 point types is set, based on whether we allow that type into the upper
3049 (Altivec) registers. GCC has register classes to target the Altivec
3050 registers for load/store operations, to select using a VSX memory
3051 operation instead of the traditional floating point operation. The
3052 constraints are:
3054 d - Register class to use with traditional DFmode instructions.
3055 f - Register class to use with traditional SFmode instructions.
3056 v - Altivec register.
3057 wa - Any VSX register.
3058 wc - Reserved to represent individual CR bits (used in LLVM).
3059 wd - Preferred register class for V2DFmode.
3060 wf - Preferred register class for V4SFmode.
3061 wg - Float register for power6x move insns.
3062 wh - FP register for direct move instructions.
3063 wi - FP or VSX register to hold 64-bit integers for VSX insns.
3064 wj - FP or VSX register to hold 64-bit integers for direct moves.
3065 wk - FP or VSX register to hold 64-bit doubles for direct moves.
3066 wl - Float register if we can do 32-bit signed int loads.
3067 wm - VSX register for ISA 2.07 direct move operations.
3068 wn - always NO_REGS.
3069 wr - GPR if 64-bit mode is permitted.
3070 ws - Register class to do ISA 2.06 DF operations.
3071 wt - VSX register for TImode in VSX registers.
3072 wu - Altivec register for ISA 2.07 VSX SF/SI load/stores.
3073 wv - Altivec register for ISA 2.06 VSX DF/DI load/stores.
3074 ww - Register class to do SF conversions in with VSX operations.
3075 wx - Float register if we can do 32-bit int stores.
3076 wy - Register class to do ISA 2.07 SF operations.
3077 wz - Float register if we can do 32-bit unsigned int loads. */
3079 if (TARGET_HARD_FLOAT && TARGET_FPRS)
3080 rs6000_constraints[RS6000_CONSTRAINT_f] = FLOAT_REGS; /* SFmode */
3082 if (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
3083 rs6000_constraints[RS6000_CONSTRAINT_d] = FLOAT_REGS; /* DFmode */
3085 if (TARGET_VSX)
3087 rs6000_constraints[RS6000_CONSTRAINT_wa] = VSX_REGS;
3088 rs6000_constraints[RS6000_CONSTRAINT_wd] = VSX_REGS; /* V2DFmode */
3089 rs6000_constraints[RS6000_CONSTRAINT_wf] = VSX_REGS; /* V4SFmode */
3091 if (TARGET_VSX_TIMODE)
3092 rs6000_constraints[RS6000_CONSTRAINT_wt] = VSX_REGS; /* TImode */
3094 if (TARGET_UPPER_REGS_DF) /* DFmode */
3096 rs6000_constraints[RS6000_CONSTRAINT_ws] = VSX_REGS;
3097 rs6000_constraints[RS6000_CONSTRAINT_wv] = ALTIVEC_REGS;
3099 else
3100 rs6000_constraints[RS6000_CONSTRAINT_ws] = FLOAT_REGS;
3102 if (TARGET_UPPER_REGS_DF) /* DImode */
3103 rs6000_constraints[RS6000_CONSTRAINT_wi] = VSX_REGS;
3104 else
3105 rs6000_constraints[RS6000_CONSTRAINT_wi] = FLOAT_REGS;
3108 /* Add conditional constraints based on various options, to allow us to
3109 collapse multiple insn patterns. */
3110 if (TARGET_ALTIVEC)
3111 rs6000_constraints[RS6000_CONSTRAINT_v] = ALTIVEC_REGS;
3113 if (TARGET_MFPGPR) /* DFmode */
3114 rs6000_constraints[RS6000_CONSTRAINT_wg] = FLOAT_REGS;
3116 if (TARGET_LFIWAX)
3117 rs6000_constraints[RS6000_CONSTRAINT_wl] = FLOAT_REGS; /* DImode */
3119 if (TARGET_DIRECT_MOVE)
3121 rs6000_constraints[RS6000_CONSTRAINT_wh] = FLOAT_REGS;
3122 rs6000_constraints[RS6000_CONSTRAINT_wj] /* DImode */
3123 = rs6000_constraints[RS6000_CONSTRAINT_wi];
3124 rs6000_constraints[RS6000_CONSTRAINT_wk] /* DFmode */
3125 = rs6000_constraints[RS6000_CONSTRAINT_ws];
3126 rs6000_constraints[RS6000_CONSTRAINT_wm] = VSX_REGS;
3129 if (TARGET_POWERPC64)
3130 rs6000_constraints[RS6000_CONSTRAINT_wr] = GENERAL_REGS;
3132 if (TARGET_P8_VECTOR && TARGET_UPPER_REGS_SF) /* SFmode */
3134 rs6000_constraints[RS6000_CONSTRAINT_wu] = ALTIVEC_REGS;
3135 rs6000_constraints[RS6000_CONSTRAINT_wy] = VSX_REGS;
3136 rs6000_constraints[RS6000_CONSTRAINT_ww] = VSX_REGS;
3138 else if (TARGET_P8_VECTOR)
3140 rs6000_constraints[RS6000_CONSTRAINT_wy] = FLOAT_REGS;
3141 rs6000_constraints[RS6000_CONSTRAINT_ww] = FLOAT_REGS;
3143 else if (TARGET_VSX)
3144 rs6000_constraints[RS6000_CONSTRAINT_ww] = FLOAT_REGS;
3146 if (TARGET_STFIWX)
3147 rs6000_constraints[RS6000_CONSTRAINT_wx] = FLOAT_REGS; /* DImode */
3149 if (TARGET_LFIWZX)
3150 rs6000_constraints[RS6000_CONSTRAINT_wz] = FLOAT_REGS; /* DImode */
3152 if (TARGET_FLOAT128)
3154 rs6000_constraints[RS6000_CONSTRAINT_wq] = VSX_REGS; /* KFmode */
3155 if (FLOAT128_IEEE_P (TFmode))
3156 rs6000_constraints[RS6000_CONSTRAINT_wp] = VSX_REGS; /* TFmode */
3159 /* Support for new D-form instructions. */
3160 if (TARGET_P9_DFORM_SCALAR)
3161 rs6000_constraints[RS6000_CONSTRAINT_wb] = ALTIVEC_REGS;
3163 /* Support for ISA 3.0 (power9) vectors. */
3164 if (TARGET_P9_VECTOR)
3165 rs6000_constraints[RS6000_CONSTRAINT_wo] = VSX_REGS;
3167 /* Support for new direct moves (ISA 3.0 + 64bit). */
3168 if (TARGET_DIRECT_MOVE_128)
3169 rs6000_constraints[RS6000_CONSTRAINT_we] = VSX_REGS;
3171 /* Set up the reload helper and direct move functions. */
3172 if (TARGET_VSX || TARGET_ALTIVEC)
3174 if (TARGET_64BIT)
3176 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_di_store;
3177 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_di_load;
3178 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_di_store;
3179 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_di_load;
3180 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_di_store;
3181 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_di_load;
3182 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_di_store;
3183 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_di_load;
3184 reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_di_store;
3185 reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_di_load;
3186 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_di_store;
3187 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_di_load;
3188 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_di_store;
3189 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_di_load;
3190 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_di_store;
3191 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_di_load;
3192 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_di_store;
3193 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_di_load;
3194 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_di_store;
3195 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_di_load;
3197 if (FLOAT128_VECTOR_P (KFmode))
3199 reg_addr[KFmode].reload_store = CODE_FOR_reload_kf_di_store;
3200 reg_addr[KFmode].reload_load = CODE_FOR_reload_kf_di_load;
3203 if (FLOAT128_VECTOR_P (TFmode))
3205 reg_addr[TFmode].reload_store = CODE_FOR_reload_tf_di_store;
3206 reg_addr[TFmode].reload_load = CODE_FOR_reload_tf_di_load;
3209 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
3210 available. */
3211 if (TARGET_NO_SDMODE_STACK)
3213 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_di_store;
3214 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_di_load;
3217 if (TARGET_VSX_TIMODE)
3219 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_di_store;
3220 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_di_load;
3223 if (TARGET_DIRECT_MOVE && !TARGET_DIRECT_MOVE_128)
3225 reg_addr[TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxti;
3226 reg_addr[V1TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv1ti;
3227 reg_addr[V2DFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2df;
3228 reg_addr[V2DImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2di;
3229 reg_addr[V4SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4sf;
3230 reg_addr[V4SImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4si;
3231 reg_addr[V8HImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv8hi;
3232 reg_addr[V16QImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv16qi;
3233 reg_addr[SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxsf;
3235 reg_addr[TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprti;
3236 reg_addr[V1TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv1ti;
3237 reg_addr[V2DFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2df;
3238 reg_addr[V2DImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2di;
3239 reg_addr[V4SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4sf;
3240 reg_addr[V4SImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4si;
3241 reg_addr[V8HImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv8hi;
3242 reg_addr[V16QImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv16qi;
3243 reg_addr[SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprsf;
3245 if (FLOAT128_VECTOR_P (KFmode))
3247 reg_addr[KFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxkf;
3248 reg_addr[KFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprkf;
3251 if (FLOAT128_VECTOR_P (TFmode))
3253 reg_addr[TFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxtf;
3254 reg_addr[TFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprtf;
3258 else
3260 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_si_store;
3261 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_si_load;
3262 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_si_store;
3263 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_si_load;
3264 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_si_store;
3265 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_si_load;
3266 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_si_store;
3267 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_si_load;
3268 reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_si_store;
3269 reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_si_load;
3270 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_si_store;
3271 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_si_load;
3272 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_si_store;
3273 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_si_load;
3274 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_si_store;
3275 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_si_load;
3276 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_si_store;
3277 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_si_load;
3278 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_si_store;
3279 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_si_load;
3281 if (FLOAT128_VECTOR_P (KFmode))
3283 reg_addr[KFmode].reload_store = CODE_FOR_reload_kf_si_store;
3284 reg_addr[KFmode].reload_load = CODE_FOR_reload_kf_si_load;
3287 if (FLOAT128_IEEE_P (TFmode))
3289 reg_addr[TFmode].reload_store = CODE_FOR_reload_tf_si_store;
3290 reg_addr[TFmode].reload_load = CODE_FOR_reload_tf_si_load;
3293 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
3294 available. */
3295 if (TARGET_NO_SDMODE_STACK)
3297 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_si_store;
3298 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_si_load;
3301 if (TARGET_VSX_TIMODE)
3303 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_si_store;
3304 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_si_load;
3307 if (TARGET_DIRECT_MOVE)
3309 reg_addr[DImode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdi;
3310 reg_addr[DDmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdd;
3311 reg_addr[DFmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdf;
3315 if (TARGET_UPPER_REGS_DF)
3316 reg_addr[DFmode].scalar_in_vmx_p = true;
3318 if (TARGET_UPPER_REGS_DI)
3319 reg_addr[DImode].scalar_in_vmx_p = true;
3321 if (TARGET_UPPER_REGS_SF)
3322 reg_addr[SFmode].scalar_in_vmx_p = true;
3325 /* Setup the fusion operations. */
3326 if (TARGET_P8_FUSION)
3328 reg_addr[QImode].fusion_gpr_ld = CODE_FOR_fusion_gpr_load_qi;
3329 reg_addr[HImode].fusion_gpr_ld = CODE_FOR_fusion_gpr_load_hi;
3330 reg_addr[SImode].fusion_gpr_ld = CODE_FOR_fusion_gpr_load_si;
3331 if (TARGET_64BIT)
3332 reg_addr[DImode].fusion_gpr_ld = CODE_FOR_fusion_gpr_load_di;
3335 if (TARGET_P9_FUSION)
3337 struct fuse_insns {
3338 enum machine_mode mode; /* mode of the fused type. */
3339 enum machine_mode pmode; /* pointer mode. */
3340 enum rs6000_reload_reg_type rtype; /* register type. */
3341 enum insn_code load; /* load insn. */
3342 enum insn_code store; /* store insn. */
3345 static const struct fuse_insns addis_insns[] = {
3346 { SFmode, DImode, RELOAD_REG_FPR,
3347 CODE_FOR_fusion_fpr_di_sf_load,
3348 CODE_FOR_fusion_fpr_di_sf_store },
3350 { SFmode, SImode, RELOAD_REG_FPR,
3351 CODE_FOR_fusion_fpr_si_sf_load,
3352 CODE_FOR_fusion_fpr_si_sf_store },
3354 { DFmode, DImode, RELOAD_REG_FPR,
3355 CODE_FOR_fusion_fpr_di_df_load,
3356 CODE_FOR_fusion_fpr_di_df_store },
3358 { DFmode, SImode, RELOAD_REG_FPR,
3359 CODE_FOR_fusion_fpr_si_df_load,
3360 CODE_FOR_fusion_fpr_si_df_store },
3362 { DImode, DImode, RELOAD_REG_FPR,
3363 CODE_FOR_fusion_fpr_di_di_load,
3364 CODE_FOR_fusion_fpr_di_di_store },
3366 { DImode, SImode, RELOAD_REG_FPR,
3367 CODE_FOR_fusion_fpr_si_di_load,
3368 CODE_FOR_fusion_fpr_si_di_store },
3370 { QImode, DImode, RELOAD_REG_GPR,
3371 CODE_FOR_fusion_gpr_di_qi_load,
3372 CODE_FOR_fusion_gpr_di_qi_store },
3374 { QImode, SImode, RELOAD_REG_GPR,
3375 CODE_FOR_fusion_gpr_si_qi_load,
3376 CODE_FOR_fusion_gpr_si_qi_store },
3378 { HImode, DImode, RELOAD_REG_GPR,
3379 CODE_FOR_fusion_gpr_di_hi_load,
3380 CODE_FOR_fusion_gpr_di_hi_store },
3382 { HImode, SImode, RELOAD_REG_GPR,
3383 CODE_FOR_fusion_gpr_si_hi_load,
3384 CODE_FOR_fusion_gpr_si_hi_store },
3386 { SImode, DImode, RELOAD_REG_GPR,
3387 CODE_FOR_fusion_gpr_di_si_load,
3388 CODE_FOR_fusion_gpr_di_si_store },
3390 { SImode, SImode, RELOAD_REG_GPR,
3391 CODE_FOR_fusion_gpr_si_si_load,
3392 CODE_FOR_fusion_gpr_si_si_store },
3394 { SFmode, DImode, RELOAD_REG_GPR,
3395 CODE_FOR_fusion_gpr_di_sf_load,
3396 CODE_FOR_fusion_gpr_di_sf_store },
3398 { SFmode, SImode, RELOAD_REG_GPR,
3399 CODE_FOR_fusion_gpr_si_sf_load,
3400 CODE_FOR_fusion_gpr_si_sf_store },
3402 { DImode, DImode, RELOAD_REG_GPR,
3403 CODE_FOR_fusion_gpr_di_di_load,
3404 CODE_FOR_fusion_gpr_di_di_store },
3406 { DFmode, DImode, RELOAD_REG_GPR,
3407 CODE_FOR_fusion_gpr_di_df_load,
3408 CODE_FOR_fusion_gpr_di_df_store },
3411 enum machine_mode cur_pmode = Pmode;
3412 size_t i;
3414 for (i = 0; i < ARRAY_SIZE (addis_insns); i++)
3416 enum machine_mode xmode = addis_insns[i].mode;
3417 enum rs6000_reload_reg_type rtype = addis_insns[i].rtype;
3419 if (addis_insns[i].pmode != cur_pmode)
3420 continue;
3422 if (rtype == RELOAD_REG_FPR
3423 && (!TARGET_HARD_FLOAT || !TARGET_FPRS))
3424 continue;
3426 reg_addr[xmode].fusion_addis_ld[rtype] = addis_insns[i].load;
3427 reg_addr[xmode].fusion_addis_st[rtype] = addis_insns[i].store;
3431 /* Note which types we support fusing TOC setup plus memory insn. We only do
3432 fused TOCs for medium/large code models. */
3433 if (TARGET_P8_FUSION && TARGET_TOC_FUSION && TARGET_POWERPC64
3434 && (TARGET_CMODEL != CMODEL_SMALL))
3436 reg_addr[QImode].fused_toc = true;
3437 reg_addr[HImode].fused_toc = true;
3438 reg_addr[SImode].fused_toc = true;
3439 reg_addr[DImode].fused_toc = true;
3440 if (TARGET_HARD_FLOAT && TARGET_FPRS)
3442 if (TARGET_SINGLE_FLOAT)
3443 reg_addr[SFmode].fused_toc = true;
3444 if (TARGET_DOUBLE_FLOAT)
3445 reg_addr[DFmode].fused_toc = true;
3449 /* Precalculate HARD_REGNO_NREGS. */
3450 for (r = 0; r < FIRST_PSEUDO_REGISTER; ++r)
3451 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3452 rs6000_hard_regno_nregs[m][r]
3453 = rs6000_hard_regno_nregs_internal (r, (machine_mode)m);
3455 /* Precalculate HARD_REGNO_MODE_OK. */
3456 for (r = 0; r < FIRST_PSEUDO_REGISTER; ++r)
3457 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3458 if (rs6000_hard_regno_mode_ok (r, (machine_mode)m))
3459 rs6000_hard_regno_mode_ok_p[m][r] = true;
3461 /* Precalculate CLASS_MAX_NREGS sizes. */
3462 for (c = 0; c < LIM_REG_CLASSES; ++c)
3464 int reg_size;
3466 if (TARGET_VSX && VSX_REG_CLASS_P (c))
3467 reg_size = UNITS_PER_VSX_WORD;
3469 else if (c == ALTIVEC_REGS)
3470 reg_size = UNITS_PER_ALTIVEC_WORD;
3472 else if (c == FLOAT_REGS)
3473 reg_size = UNITS_PER_FP_WORD;
3475 else
3476 reg_size = UNITS_PER_WORD;
3478 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3480 machine_mode m2 = (machine_mode)m;
3481 int reg_size2 = reg_size;
3483 /* TDmode & IBM 128-bit floating point always takes 2 registers, even
3484 in VSX. */
3485 if (TARGET_VSX && VSX_REG_CLASS_P (c) && FLOAT128_2REG_P (m))
3486 reg_size2 = UNITS_PER_FP_WORD;
3488 rs6000_class_max_nregs[m][c]
3489 = (GET_MODE_SIZE (m2) + reg_size2 - 1) / reg_size2;
3493 if (TARGET_E500_DOUBLE)
3494 rs6000_class_max_nregs[DFmode][GENERAL_REGS] = 1;
3496 /* Calculate which modes to automatically generate code to use a the
3497 reciprocal divide and square root instructions. In the future, possibly
3498 automatically generate the instructions even if the user did not specify
3499 -mrecip. The older machines double precision reciprocal sqrt estimate is
3500 not accurate enough. */
3501 memset (rs6000_recip_bits, 0, sizeof (rs6000_recip_bits));
3502 if (TARGET_FRES)
3503 rs6000_recip_bits[SFmode] = RS6000_RECIP_MASK_HAVE_RE;
3504 if (TARGET_FRE)
3505 rs6000_recip_bits[DFmode] = RS6000_RECIP_MASK_HAVE_RE;
3506 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
3507 rs6000_recip_bits[V4SFmode] = RS6000_RECIP_MASK_HAVE_RE;
3508 if (VECTOR_UNIT_VSX_P (V2DFmode))
3509 rs6000_recip_bits[V2DFmode] = RS6000_RECIP_MASK_HAVE_RE;
3511 if (TARGET_FRSQRTES)
3512 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3513 if (TARGET_FRSQRTE)
3514 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3515 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
3516 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3517 if (VECTOR_UNIT_VSX_P (V2DFmode))
3518 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3520 if (rs6000_recip_control)
3522 if (!flag_finite_math_only)
3523 warning (0, "-mrecip requires -ffinite-math or -ffast-math");
3524 if (flag_trapping_math)
3525 warning (0, "-mrecip requires -fno-trapping-math or -ffast-math");
3526 if (!flag_reciprocal_math)
3527 warning (0, "-mrecip requires -freciprocal-math or -ffast-math");
3528 if (flag_finite_math_only && !flag_trapping_math && flag_reciprocal_math)
3530 if (RS6000_RECIP_HAVE_RE_P (SFmode)
3531 && (rs6000_recip_control & RECIP_SF_DIV) != 0)
3532 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3534 if (RS6000_RECIP_HAVE_RE_P (DFmode)
3535 && (rs6000_recip_control & RECIP_DF_DIV) != 0)
3536 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3538 if (RS6000_RECIP_HAVE_RE_P (V4SFmode)
3539 && (rs6000_recip_control & RECIP_V4SF_DIV) != 0)
3540 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3542 if (RS6000_RECIP_HAVE_RE_P (V2DFmode)
3543 && (rs6000_recip_control & RECIP_V2DF_DIV) != 0)
3544 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3546 if (RS6000_RECIP_HAVE_RSQRTE_P (SFmode)
3547 && (rs6000_recip_control & RECIP_SF_RSQRT) != 0)
3548 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3550 if (RS6000_RECIP_HAVE_RSQRTE_P (DFmode)
3551 && (rs6000_recip_control & RECIP_DF_RSQRT) != 0)
3552 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3554 if (RS6000_RECIP_HAVE_RSQRTE_P (V4SFmode)
3555 && (rs6000_recip_control & RECIP_V4SF_RSQRT) != 0)
3556 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3558 if (RS6000_RECIP_HAVE_RSQRTE_P (V2DFmode)
3559 && (rs6000_recip_control & RECIP_V2DF_RSQRT) != 0)
3560 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3564 /* Update the addr mask bits in reg_addr to help secondary reload and go if
3565 legitimate address support to figure out the appropriate addressing to
3566 use. */
3567 rs6000_setup_reg_addr_masks ();
3569 if (global_init_p || TARGET_DEBUG_TARGET)
3571 if (TARGET_DEBUG_REG)
3572 rs6000_debug_reg_global ();
3574 if (TARGET_DEBUG_COST || TARGET_DEBUG_REG)
3575 fprintf (stderr,
3576 "SImode variable mult cost = %d\n"
3577 "SImode constant mult cost = %d\n"
3578 "SImode short constant mult cost = %d\n"
3579 "DImode multipliciation cost = %d\n"
3580 "SImode division cost = %d\n"
3581 "DImode division cost = %d\n"
3582 "Simple fp operation cost = %d\n"
3583 "DFmode multiplication cost = %d\n"
3584 "SFmode division cost = %d\n"
3585 "DFmode division cost = %d\n"
3586 "cache line size = %d\n"
3587 "l1 cache size = %d\n"
3588 "l2 cache size = %d\n"
3589 "simultaneous prefetches = %d\n"
3590 "\n",
3591 rs6000_cost->mulsi,
3592 rs6000_cost->mulsi_const,
3593 rs6000_cost->mulsi_const9,
3594 rs6000_cost->muldi,
3595 rs6000_cost->divsi,
3596 rs6000_cost->divdi,
3597 rs6000_cost->fp,
3598 rs6000_cost->dmul,
3599 rs6000_cost->sdiv,
3600 rs6000_cost->ddiv,
3601 rs6000_cost->cache_line_size,
3602 rs6000_cost->l1_cache_size,
3603 rs6000_cost->l2_cache_size,
3604 rs6000_cost->simultaneous_prefetches);
3608 #if TARGET_MACHO
3609 /* The Darwin version of SUBTARGET_OVERRIDE_OPTIONS. */
3611 static void
3612 darwin_rs6000_override_options (void)
3614 /* The Darwin ABI always includes AltiVec, can't be (validly) turned
3615 off. */
3616 rs6000_altivec_abi = 1;
3617 TARGET_ALTIVEC_VRSAVE = 1;
3618 rs6000_current_abi = ABI_DARWIN;
3620 if (DEFAULT_ABI == ABI_DARWIN
3621 && TARGET_64BIT)
3622 darwin_one_byte_bool = 1;
3624 if (TARGET_64BIT && ! TARGET_POWERPC64)
3626 rs6000_isa_flags |= OPTION_MASK_POWERPC64;
3627 warning (0, "-m64 requires PowerPC64 architecture, enabling");
3629 if (flag_mkernel)
3631 rs6000_default_long_calls = 1;
3632 rs6000_isa_flags |= OPTION_MASK_SOFT_FLOAT;
3635 /* Make -m64 imply -maltivec. Darwin's 64-bit ABI includes
3636 Altivec. */
3637 if (!flag_mkernel && !flag_apple_kext
3638 && TARGET_64BIT
3639 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC))
3640 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3642 /* Unless the user (not the configurer) has explicitly overridden
3643 it with -mcpu=G3 or -mno-altivec, then 10.5+ targets default to
3644 G4 unless targeting the kernel. */
3645 if (!flag_mkernel
3646 && !flag_apple_kext
3647 && strverscmp (darwin_macosx_version_min, "10.5") >= 0
3648 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC)
3649 && ! global_options_set.x_rs6000_cpu_index)
3651 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3654 #endif
3656 /* If not otherwise specified by a target, make 'long double' equivalent to
3657 'double'. */
3659 #ifndef RS6000_DEFAULT_LONG_DOUBLE_SIZE
3660 #define RS6000_DEFAULT_LONG_DOUBLE_SIZE 64
3661 #endif
3663 /* Return the builtin mask of the various options used that could affect which
3664 builtins were used. In the past we used target_flags, but we've run out of
3665 bits, and some options like SPE and PAIRED are no longer in
3666 target_flags. */
3668 HOST_WIDE_INT
3669 rs6000_builtin_mask_calculate (void)
3671 return (((TARGET_ALTIVEC) ? RS6000_BTM_ALTIVEC : 0)
3672 | ((TARGET_VSX) ? RS6000_BTM_VSX : 0)
3673 | ((TARGET_SPE) ? RS6000_BTM_SPE : 0)
3674 | ((TARGET_PAIRED_FLOAT) ? RS6000_BTM_PAIRED : 0)
3675 | ((TARGET_FRE) ? RS6000_BTM_FRE : 0)
3676 | ((TARGET_FRES) ? RS6000_BTM_FRES : 0)
3677 | ((TARGET_FRSQRTE) ? RS6000_BTM_FRSQRTE : 0)
3678 | ((TARGET_FRSQRTES) ? RS6000_BTM_FRSQRTES : 0)
3679 | ((TARGET_POPCNTD) ? RS6000_BTM_POPCNTD : 0)
3680 | ((rs6000_cpu == PROCESSOR_CELL) ? RS6000_BTM_CELL : 0)
3681 | ((TARGET_P8_VECTOR) ? RS6000_BTM_P8_VECTOR : 0)
3682 | ((TARGET_P9_VECTOR) ? RS6000_BTM_P9_VECTOR : 0)
3683 | ((TARGET_P9_MISC) ? RS6000_BTM_P9_MISC : 0)
3684 | ((TARGET_MODULO) ? RS6000_BTM_MODULO : 0)
3685 | ((TARGET_64BIT) ? RS6000_BTM_64BIT : 0)
3686 | ((TARGET_CRYPTO) ? RS6000_BTM_CRYPTO : 0)
3687 | ((TARGET_HTM) ? RS6000_BTM_HTM : 0)
3688 | ((TARGET_DFP) ? RS6000_BTM_DFP : 0)
3689 | ((TARGET_HARD_FLOAT) ? RS6000_BTM_HARD_FLOAT : 0)
3690 | ((TARGET_LONG_DOUBLE_128) ? RS6000_BTM_LDBL128 : 0)
3691 | ((TARGET_FLOAT128) ? RS6000_BTM_FLOAT128 : 0));
3694 /* Implement TARGET_MD_ASM_ADJUST. All asm statements are considered
3695 to clobber the XER[CA] bit because clobbering that bit without telling
3696 the compiler worked just fine with versions of GCC before GCC 5, and
3697 breaking a lot of older code in ways that are hard to track down is
3698 not such a great idea. */
3700 static rtx_insn *
3701 rs6000_md_asm_adjust (vec<rtx> &/*outputs*/, vec<rtx> &/*inputs*/,
3702 vec<const char *> &/*constraints*/,
3703 vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs)
3705 clobbers.safe_push (gen_rtx_REG (SImode, CA_REGNO));
3706 SET_HARD_REG_BIT (clobbered_regs, CA_REGNO);
3707 return NULL;
3710 /* Override command line options. Mostly we process the processor type and
3711 sometimes adjust other TARGET_ options. */
3713 static bool
3714 rs6000_option_override_internal (bool global_init_p)
3716 bool ret = true;
3717 bool have_cpu = false;
3719 /* The default cpu requested at configure time, if any. */
3720 const char *implicit_cpu = OPTION_TARGET_CPU_DEFAULT;
3722 HOST_WIDE_INT set_masks;
3723 int cpu_index;
3724 int tune_index;
3725 struct cl_target_option *main_target_opt
3726 = ((global_init_p || target_option_default_node == NULL)
3727 ? NULL : TREE_TARGET_OPTION (target_option_default_node));
3729 /* Print defaults. */
3730 if ((TARGET_DEBUG_REG || TARGET_DEBUG_TARGET) && global_init_p)
3731 rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT);
3733 /* Remember the explicit arguments. */
3734 if (global_init_p)
3735 rs6000_isa_flags_explicit = global_options_set.x_rs6000_isa_flags;
3737 /* On 64-bit Darwin, power alignment is ABI-incompatible with some C
3738 library functions, so warn about it. The flag may be useful for
3739 performance studies from time to time though, so don't disable it
3740 entirely. */
3741 if (global_options_set.x_rs6000_alignment_flags
3742 && rs6000_alignment_flags == MASK_ALIGN_POWER
3743 && DEFAULT_ABI == ABI_DARWIN
3744 && TARGET_64BIT)
3745 warning (0, "-malign-power is not supported for 64-bit Darwin;"
3746 " it is incompatible with the installed C and C++ libraries");
3748 /* Numerous experiment shows that IRA based loop pressure
3749 calculation works better for RTL loop invariant motion on targets
3750 with enough (>= 32) registers. It is an expensive optimization.
3751 So it is on only for peak performance. */
3752 if (optimize >= 3 && global_init_p
3753 && !global_options_set.x_flag_ira_loop_pressure)
3754 flag_ira_loop_pressure = 1;
3756 /* Set the pointer size. */
3757 if (TARGET_64BIT)
3759 rs6000_pmode = (int)DImode;
3760 rs6000_pointer_size = 64;
3762 else
3764 rs6000_pmode = (int)SImode;
3765 rs6000_pointer_size = 32;
3768 /* Some OSs don't support saving the high part of 64-bit registers on context
3769 switch. Other OSs don't support saving Altivec registers. On those OSs,
3770 we don't touch the OPTION_MASK_POWERPC64 or OPTION_MASK_ALTIVEC settings;
3771 if the user wants either, the user must explicitly specify them and we
3772 won't interfere with the user's specification. */
3774 set_masks = POWERPC_MASKS;
3775 #ifdef OS_MISSING_POWERPC64
3776 if (OS_MISSING_POWERPC64)
3777 set_masks &= ~OPTION_MASK_POWERPC64;
3778 #endif
3779 #ifdef OS_MISSING_ALTIVEC
3780 if (OS_MISSING_ALTIVEC)
3781 set_masks &= ~(OPTION_MASK_ALTIVEC | OPTION_MASK_VSX);
3782 #endif
3784 /* Don't override by the processor default if given explicitly. */
3785 set_masks &= ~rs6000_isa_flags_explicit;
3787 /* Process the -mcpu=<xxx> and -mtune=<xxx> argument. If the user changed
3788 the cpu in a target attribute or pragma, but did not specify a tuning
3789 option, use the cpu for the tuning option rather than the option specified
3790 with -mtune on the command line. Process a '--with-cpu' configuration
3791 request as an implicit --cpu. */
3792 if (rs6000_cpu_index >= 0)
3794 cpu_index = rs6000_cpu_index;
3795 have_cpu = true;
3797 else if (main_target_opt != NULL && main_target_opt->x_rs6000_cpu_index >= 0)
3799 rs6000_cpu_index = cpu_index = main_target_opt->x_rs6000_cpu_index;
3800 have_cpu = true;
3802 else if (implicit_cpu)
3804 rs6000_cpu_index = cpu_index = rs6000_cpu_name_lookup (implicit_cpu);
3805 have_cpu = true;
3807 else
3809 /* PowerPC 64-bit LE requires at least ISA 2.07. */
3810 const char *default_cpu = ((!TARGET_POWERPC64)
3811 ? "powerpc"
3812 : ((BYTES_BIG_ENDIAN)
3813 ? "powerpc64"
3814 : "powerpc64le"));
3816 rs6000_cpu_index = cpu_index = rs6000_cpu_name_lookup (default_cpu);
3817 have_cpu = false;
3820 gcc_assert (cpu_index >= 0);
3822 /* If we have a cpu, either through an explicit -mcpu=<xxx> or if the
3823 compiler was configured with --with-cpu=<xxx>, replace all of the ISA bits
3824 with those from the cpu, except for options that were explicitly set. If
3825 we don't have a cpu, do not override the target bits set in
3826 TARGET_DEFAULT. */
3827 if (have_cpu)
3829 rs6000_isa_flags &= ~set_masks;
3830 rs6000_isa_flags |= (processor_target_table[cpu_index].target_enable
3831 & set_masks);
3833 else
3835 /* If no -mcpu=<xxx>, inherit any default options that were cleared via
3836 POWERPC_MASKS. Originally, TARGET_DEFAULT was used to initialize
3837 target_flags via the TARGET_DEFAULT_TARGET_FLAGS hook. When we switched
3838 to using rs6000_isa_flags, we need to do the initialization here.
3840 If there is a TARGET_DEFAULT, use that. Otherwise fall back to using
3841 -mcpu=powerpc, -mcpu=powerpc64, or -mcpu=powerpc64le defaults. */
3842 HOST_WIDE_INT flags = ((TARGET_DEFAULT) ? TARGET_DEFAULT
3843 : processor_target_table[cpu_index].target_enable);
3844 rs6000_isa_flags |= (flags & ~rs6000_isa_flags_explicit);
3847 if (rs6000_tune_index >= 0)
3848 tune_index = rs6000_tune_index;
3849 else if (have_cpu)
3850 rs6000_tune_index = tune_index = cpu_index;
3851 else
3853 size_t i;
3854 enum processor_type tune_proc
3855 = (TARGET_POWERPC64 ? PROCESSOR_DEFAULT64 : PROCESSOR_DEFAULT);
3857 tune_index = -1;
3858 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
3859 if (processor_target_table[i].processor == tune_proc)
3861 rs6000_tune_index = tune_index = i;
3862 break;
3866 gcc_assert (tune_index >= 0);
3867 rs6000_cpu = processor_target_table[tune_index].processor;
3869 /* Pick defaults for SPE related control flags. Do this early to make sure
3870 that the TARGET_ macros are representative ASAP. */
3872 int spe_capable_cpu =
3873 (rs6000_cpu == PROCESSOR_PPC8540
3874 || rs6000_cpu == PROCESSOR_PPC8548);
3876 if (!global_options_set.x_rs6000_spe_abi)
3877 rs6000_spe_abi = spe_capable_cpu;
3879 if (!global_options_set.x_rs6000_spe)
3880 rs6000_spe = spe_capable_cpu;
3882 if (!global_options_set.x_rs6000_float_gprs)
3883 rs6000_float_gprs =
3884 (rs6000_cpu == PROCESSOR_PPC8540 ? 1
3885 : rs6000_cpu == PROCESSOR_PPC8548 ? 2
3886 : 0);
3889 if (global_options_set.x_rs6000_spe_abi
3890 && rs6000_spe_abi
3891 && !TARGET_SPE_ABI)
3892 error ("not configured for SPE ABI");
3894 if (global_options_set.x_rs6000_spe
3895 && rs6000_spe
3896 && !TARGET_SPE)
3897 error ("not configured for SPE instruction set");
3899 if (main_target_opt != NULL
3900 && ((main_target_opt->x_rs6000_spe_abi != rs6000_spe_abi)
3901 || (main_target_opt->x_rs6000_spe != rs6000_spe)
3902 || (main_target_opt->x_rs6000_float_gprs != rs6000_float_gprs)))
3903 error ("target attribute or pragma changes SPE ABI");
3905 if (rs6000_cpu == PROCESSOR_PPCE300C2 || rs6000_cpu == PROCESSOR_PPCE300C3
3906 || rs6000_cpu == PROCESSOR_PPCE500MC || rs6000_cpu == PROCESSOR_PPCE500MC64
3907 || rs6000_cpu == PROCESSOR_PPCE5500)
3909 if (TARGET_ALTIVEC)
3910 error ("AltiVec not supported in this target");
3911 if (TARGET_SPE)
3912 error ("SPE not supported in this target");
3914 if (rs6000_cpu == PROCESSOR_PPCE6500)
3916 if (TARGET_SPE)
3917 error ("SPE not supported in this target");
3920 /* Disable Cell microcode if we are optimizing for the Cell
3921 and not optimizing for size. */
3922 if (rs6000_gen_cell_microcode == -1)
3923 rs6000_gen_cell_microcode = !(rs6000_cpu == PROCESSOR_CELL
3924 && !optimize_size);
3926 /* If we are optimizing big endian systems for space and it's OK to
3927 use instructions that would be microcoded on the Cell, use the
3928 load/store multiple and string instructions. */
3929 if (BYTES_BIG_ENDIAN && optimize_size && rs6000_gen_cell_microcode)
3930 rs6000_isa_flags |= ~rs6000_isa_flags_explicit & (OPTION_MASK_MULTIPLE
3931 | OPTION_MASK_STRING);
3933 /* Don't allow -mmultiple or -mstring on little endian systems
3934 unless the cpu is a 750, because the hardware doesn't support the
3935 instructions used in little endian mode, and causes an alignment
3936 trap. The 750 does not cause an alignment trap (except when the
3937 target is unaligned). */
3939 if (!BYTES_BIG_ENDIAN && rs6000_cpu != PROCESSOR_PPC750)
3941 if (TARGET_MULTIPLE)
3943 rs6000_isa_flags &= ~OPTION_MASK_MULTIPLE;
3944 if ((rs6000_isa_flags_explicit & OPTION_MASK_MULTIPLE) != 0)
3945 warning (0, "-mmultiple is not supported on little endian systems");
3948 if (TARGET_STRING)
3950 rs6000_isa_flags &= ~OPTION_MASK_STRING;
3951 if ((rs6000_isa_flags_explicit & OPTION_MASK_STRING) != 0)
3952 warning (0, "-mstring is not supported on little endian systems");
3956 /* If little-endian, default to -mstrict-align on older processors.
3957 Testing for htm matches power8 and later. */
3958 if (!BYTES_BIG_ENDIAN
3959 && !(processor_target_table[tune_index].target_enable & OPTION_MASK_HTM))
3960 rs6000_isa_flags |= ~rs6000_isa_flags_explicit & OPTION_MASK_STRICT_ALIGN;
3962 /* -maltivec={le,be} implies -maltivec. */
3963 if (rs6000_altivec_element_order != 0)
3964 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3966 /* Disallow -maltivec=le in big endian mode for now. This is not
3967 known to be useful for anyone. */
3968 if (BYTES_BIG_ENDIAN && rs6000_altivec_element_order == 1)
3970 warning (0, N_("-maltivec=le not allowed for big-endian targets"));
3971 rs6000_altivec_element_order = 0;
3974 /* Add some warnings for VSX. */
3975 if (TARGET_VSX)
3977 const char *msg = NULL;
3978 if (!TARGET_HARD_FLOAT || !TARGET_FPRS
3979 || !TARGET_SINGLE_FLOAT || !TARGET_DOUBLE_FLOAT)
3981 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
3982 msg = N_("-mvsx requires hardware floating point");
3983 else
3985 rs6000_isa_flags &= ~ OPTION_MASK_VSX;
3986 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
3989 else if (TARGET_PAIRED_FLOAT)
3990 msg = N_("-mvsx and -mpaired are incompatible");
3991 else if (TARGET_AVOID_XFORM > 0)
3992 msg = N_("-mvsx needs indexed addressing");
3993 else if (!TARGET_ALTIVEC && (rs6000_isa_flags_explicit
3994 & OPTION_MASK_ALTIVEC))
3996 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
3997 msg = N_("-mvsx and -mno-altivec are incompatible");
3998 else
3999 msg = N_("-mno-altivec disables vsx");
4002 if (msg)
4004 warning (0, msg);
4005 rs6000_isa_flags &= ~ OPTION_MASK_VSX;
4006 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
4010 /* If hard-float/altivec/vsx were explicitly turned off then don't allow
4011 the -mcpu setting to enable options that conflict. */
4012 if ((!TARGET_HARD_FLOAT || !TARGET_ALTIVEC || !TARGET_VSX)
4013 && (rs6000_isa_flags_explicit & (OPTION_MASK_SOFT_FLOAT
4014 | OPTION_MASK_ALTIVEC
4015 | OPTION_MASK_VSX)) != 0)
4016 rs6000_isa_flags &= ~((OPTION_MASK_P8_VECTOR | OPTION_MASK_CRYPTO
4017 | OPTION_MASK_DIRECT_MOVE)
4018 & ~rs6000_isa_flags_explicit);
4020 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4021 rs6000_print_isa_options (stderr, 0, "before defaults", rs6000_isa_flags);
4023 /* For the newer switches (vsx, dfp, etc.) set some of the older options,
4024 unless the user explicitly used the -mno-<option> to disable the code. */
4025 if (TARGET_P9_VECTOR || TARGET_MODULO || TARGET_P9_DFORM_SCALAR
4026 || TARGET_P9_DFORM_VECTOR || TARGET_P9_DFORM_BOTH > 0 || TARGET_P9_MINMAX)
4027 rs6000_isa_flags |= (ISA_3_0_MASKS_SERVER & ~rs6000_isa_flags_explicit);
4028 else if (TARGET_P8_VECTOR || TARGET_DIRECT_MOVE || TARGET_CRYPTO)
4029 rs6000_isa_flags |= (ISA_2_7_MASKS_SERVER & ~rs6000_isa_flags_explicit);
4030 else if (TARGET_VSX)
4031 rs6000_isa_flags |= (ISA_2_6_MASKS_SERVER & ~rs6000_isa_flags_explicit);
4032 else if (TARGET_POPCNTD)
4033 rs6000_isa_flags |= (ISA_2_6_MASKS_EMBEDDED & ~rs6000_isa_flags_explicit);
4034 else if (TARGET_DFP)
4035 rs6000_isa_flags |= (ISA_2_5_MASKS_SERVER & ~rs6000_isa_flags_explicit);
4036 else if (TARGET_CMPB)
4037 rs6000_isa_flags |= (ISA_2_5_MASKS_EMBEDDED & ~rs6000_isa_flags_explicit);
4038 else if (TARGET_FPRND)
4039 rs6000_isa_flags |= (ISA_2_4_MASKS & ~rs6000_isa_flags_explicit);
4040 else if (TARGET_POPCNTB)
4041 rs6000_isa_flags |= (ISA_2_2_MASKS & ~rs6000_isa_flags_explicit);
4042 else if (TARGET_ALTIVEC)
4043 rs6000_isa_flags |= (OPTION_MASK_PPC_GFXOPT & ~rs6000_isa_flags_explicit);
4045 if (TARGET_CRYPTO && !TARGET_ALTIVEC)
4047 if (rs6000_isa_flags_explicit & OPTION_MASK_CRYPTO)
4048 error ("-mcrypto requires -maltivec");
4049 rs6000_isa_flags &= ~OPTION_MASK_CRYPTO;
4052 if (TARGET_DIRECT_MOVE && !TARGET_VSX)
4054 if (rs6000_isa_flags_explicit & OPTION_MASK_DIRECT_MOVE)
4055 error ("-mdirect-move requires -mvsx");
4056 rs6000_isa_flags &= ~OPTION_MASK_DIRECT_MOVE;
4059 if (TARGET_P8_VECTOR && !TARGET_ALTIVEC)
4061 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
4062 error ("-mpower8-vector requires -maltivec");
4063 rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
4066 if (TARGET_P8_VECTOR && !TARGET_VSX)
4068 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
4069 error ("-mpower8-vector requires -mvsx");
4070 rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
4073 if (TARGET_VSX_TIMODE && !TARGET_VSX)
4075 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX_TIMODE)
4076 error ("-mvsx-timode requires -mvsx");
4077 rs6000_isa_flags &= ~OPTION_MASK_VSX_TIMODE;
4080 if (TARGET_DFP && !TARGET_HARD_FLOAT)
4082 if (rs6000_isa_flags_explicit & OPTION_MASK_DFP)
4083 error ("-mhard-dfp requires -mhard-float");
4084 rs6000_isa_flags &= ~OPTION_MASK_DFP;
4087 /* Allow an explicit -mupper-regs to set -mupper-regs-df, -mupper-regs-di,
4088 and -mupper-regs-sf, depending on the cpu, unless the user explicitly also
4089 set the individual option. */
4090 if (TARGET_UPPER_REGS > 0)
4092 if (TARGET_VSX
4093 && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF))
4095 rs6000_isa_flags |= OPTION_MASK_UPPER_REGS_DF;
4096 rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_DF;
4098 if (TARGET_VSX
4099 && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DI))
4101 rs6000_isa_flags |= OPTION_MASK_UPPER_REGS_DI;
4102 rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_DI;
4104 if (TARGET_P8_VECTOR
4105 && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_SF))
4107 rs6000_isa_flags |= OPTION_MASK_UPPER_REGS_SF;
4108 rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_SF;
4111 else if (TARGET_UPPER_REGS == 0)
4113 if (TARGET_VSX
4114 && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF))
4116 rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_DF;
4117 rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_DF;
4119 if (TARGET_VSX
4120 && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DI))
4122 rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_DI;
4123 rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_DI;
4125 if (TARGET_P8_VECTOR
4126 && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_SF))
4128 rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_SF;
4129 rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_SF;
4133 if (TARGET_UPPER_REGS_DF && !TARGET_VSX)
4135 if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF)
4136 error ("-mupper-regs-df requires -mvsx");
4137 rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_DF;
4140 if (TARGET_UPPER_REGS_DI && !TARGET_VSX)
4142 if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF)
4143 error ("-mupper-regs-di requires -mvsx");
4144 rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_DF;
4147 if (TARGET_UPPER_REGS_SF && !TARGET_P8_VECTOR)
4149 if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_SF)
4150 error ("-mupper-regs-sf requires -mpower8-vector");
4151 rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_SF;
4154 /* The quad memory instructions only works in 64-bit mode. In 32-bit mode,
4155 silently turn off quad memory mode. */
4156 if ((TARGET_QUAD_MEMORY || TARGET_QUAD_MEMORY_ATOMIC) && !TARGET_POWERPC64)
4158 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
4159 warning (0, N_("-mquad-memory requires 64-bit mode"));
4161 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) != 0)
4162 warning (0, N_("-mquad-memory-atomic requires 64-bit mode"));
4164 rs6000_isa_flags &= ~(OPTION_MASK_QUAD_MEMORY
4165 | OPTION_MASK_QUAD_MEMORY_ATOMIC);
4168 /* Non-atomic quad memory load/store are disabled for little endian, since
4169 the words are reversed, but atomic operations can still be done by
4170 swapping the words. */
4171 if (TARGET_QUAD_MEMORY && !WORDS_BIG_ENDIAN)
4173 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
4174 warning (0, N_("-mquad-memory is not available in little endian mode"));
4176 rs6000_isa_flags &= ~OPTION_MASK_QUAD_MEMORY;
4179 /* Assume if the user asked for normal quad memory instructions, they want
4180 the atomic versions as well, unless they explicity told us not to use quad
4181 word atomic instructions. */
4182 if (TARGET_QUAD_MEMORY
4183 && !TARGET_QUAD_MEMORY_ATOMIC
4184 && ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) == 0))
4185 rs6000_isa_flags |= OPTION_MASK_QUAD_MEMORY_ATOMIC;
4187 /* Enable power8 fusion if we are tuning for power8, even if we aren't
4188 generating power8 instructions. */
4189 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION))
4190 rs6000_isa_flags |= (processor_target_table[tune_index].target_enable
4191 & OPTION_MASK_P8_FUSION);
4193 /* Setting additional fusion flags turns on base fusion. */
4194 if (!TARGET_P8_FUSION && (TARGET_P8_FUSION_SIGN || TARGET_TOC_FUSION))
4196 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION)
4198 if (TARGET_P8_FUSION_SIGN)
4199 error ("-mpower8-fusion-sign requires -mpower8-fusion");
4201 if (TARGET_TOC_FUSION)
4202 error ("-mtoc-fusion requires -mpower8-fusion");
4204 rs6000_isa_flags &= ~OPTION_MASK_P8_FUSION;
4206 else
4207 rs6000_isa_flags |= OPTION_MASK_P8_FUSION;
4210 /* Power9 fusion is a superset over power8 fusion. */
4211 if (TARGET_P9_FUSION && !TARGET_P8_FUSION)
4213 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION)
4215 error ("-mpower9-fusion requires -mpower8-fusion");
4216 rs6000_isa_flags &= ~OPTION_MASK_P9_FUSION;
4218 else
4219 rs6000_isa_flags |= OPTION_MASK_P8_FUSION;
4222 /* Enable power9 fusion if we are tuning for power9, even if we aren't
4223 generating power9 instructions. */
4224 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P9_FUSION))
4225 rs6000_isa_flags |= (processor_target_table[tune_index].target_enable
4226 & OPTION_MASK_P9_FUSION);
4228 /* Power8 does not fuse sign extended loads with the addis. If we are
4229 optimizing at high levels for speed, convert a sign extended load into a
4230 zero extending load, and an explicit sign extension. */
4231 if (TARGET_P8_FUSION
4232 && !(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION_SIGN)
4233 && optimize_function_for_speed_p (cfun)
4234 && optimize >= 3)
4235 rs6000_isa_flags |= OPTION_MASK_P8_FUSION_SIGN;
4237 /* TOC fusion requires 64-bit and medium/large code model. */
4238 if (TARGET_TOC_FUSION && !TARGET_POWERPC64)
4240 rs6000_isa_flags &= ~OPTION_MASK_TOC_FUSION;
4241 if ((rs6000_isa_flags_explicit & OPTION_MASK_TOC_FUSION) != 0)
4242 warning (0, N_("-mtoc-fusion requires 64-bit"));
4245 if (TARGET_TOC_FUSION && (TARGET_CMODEL == CMODEL_SMALL))
4247 rs6000_isa_flags &= ~OPTION_MASK_TOC_FUSION;
4248 if ((rs6000_isa_flags_explicit & OPTION_MASK_TOC_FUSION) != 0)
4249 warning (0, N_("-mtoc-fusion requires medium/large code model"));
4252 /* Turn on -mtoc-fusion by default if p8-fusion and 64-bit medium/large code
4253 model. */
4254 if (TARGET_P8_FUSION && !TARGET_TOC_FUSION && TARGET_POWERPC64
4255 && (TARGET_CMODEL != CMODEL_SMALL)
4256 && !(rs6000_isa_flags_explicit & OPTION_MASK_TOC_FUSION))
4257 rs6000_isa_flags |= OPTION_MASK_TOC_FUSION;
4259 /* -mpower9-dform turns on both -mpower9-dform-scalar and
4260 -mpower9-dform-vector. */
4261 if (TARGET_P9_DFORM_BOTH > 0)
4263 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P9_DFORM_VECTOR))
4264 rs6000_isa_flags |= OPTION_MASK_P9_DFORM_VECTOR;
4266 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P9_DFORM_SCALAR))
4267 rs6000_isa_flags |= OPTION_MASK_P9_DFORM_SCALAR;
4269 else if (TARGET_P9_DFORM_BOTH == 0)
4271 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P9_DFORM_VECTOR))
4272 rs6000_isa_flags &= ~OPTION_MASK_P9_DFORM_VECTOR;
4274 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P9_DFORM_SCALAR))
4275 rs6000_isa_flags &= ~OPTION_MASK_P9_DFORM_SCALAR;
4278 /* ISA 3.0 D-form instructions require p9-vector and upper-regs. */
4279 if ((TARGET_P9_DFORM_SCALAR || TARGET_P9_DFORM_VECTOR) && !TARGET_P9_VECTOR)
4281 if (rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR)
4282 error ("-mpower9-dform requires -mpower9-vector");
4283 rs6000_isa_flags &= ~(OPTION_MASK_P9_DFORM_SCALAR
4284 | OPTION_MASK_P9_DFORM_VECTOR);
4287 if (TARGET_P9_DFORM_SCALAR && !TARGET_UPPER_REGS_DF)
4289 if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF)
4290 error ("-mpower9-dform requires -mupper-regs-df");
4291 rs6000_isa_flags &= ~OPTION_MASK_P9_DFORM_SCALAR;
4294 if (TARGET_P9_DFORM_SCALAR && !TARGET_UPPER_REGS_SF)
4296 if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_SF)
4297 error ("-mpower9-dform requires -mupper-regs-sf");
4298 rs6000_isa_flags &= ~OPTION_MASK_P9_DFORM_SCALAR;
4301 /* ISA 3.0 vector instructions include ISA 2.07. */
4302 if (TARGET_P9_VECTOR && !TARGET_P8_VECTOR)
4304 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
4305 error ("-mpower9-vector requires -mpower8-vector");
4306 rs6000_isa_flags &= ~OPTION_MASK_P9_VECTOR;
4309 /* There have been bugs with -mvsx-timode that don't show up with -mlra,
4310 but do show up with -mno-lra. Given -mlra will become the default once
4311 PR 69847 is fixed, turn off the options with problems by default if
4312 -mno-lra was used, and warn if the user explicitly asked for the option.
4314 Enable -mpower9-dform-vector by default if LRA and other power9 options.
4315 Enable -mvsx-timode by default if LRA and VSX. */
4316 if (!TARGET_LRA)
4318 if (TARGET_VSX_TIMODE)
4320 if ((rs6000_isa_flags_explicit & OPTION_MASK_VSX_TIMODE) != 0)
4321 warning (0, "-mvsx-timode might need -mlra");
4323 else
4324 rs6000_isa_flags &= ~OPTION_MASK_VSX_TIMODE;
4328 else
4330 if (TARGET_VSX && !TARGET_VSX_TIMODE
4331 && (rs6000_isa_flags_explicit & OPTION_MASK_VSX_TIMODE) == 0)
4332 rs6000_isa_flags |= OPTION_MASK_VSX_TIMODE;
4335 /* Set -mallow-movmisalign to explicitly on if we have full ISA 2.07
4336 support. If we only have ISA 2.06 support, and the user did not specify
4337 the switch, leave it set to -1 so the movmisalign patterns are enabled,
4338 but we don't enable the full vectorization support */
4339 if (TARGET_ALLOW_MOVMISALIGN == -1 && TARGET_P8_VECTOR && TARGET_DIRECT_MOVE)
4340 TARGET_ALLOW_MOVMISALIGN = 1;
4342 else if (TARGET_ALLOW_MOVMISALIGN && !TARGET_VSX)
4344 if (TARGET_ALLOW_MOVMISALIGN > 0
4345 && global_options_set.x_TARGET_ALLOW_MOVMISALIGN)
4346 error ("-mallow-movmisalign requires -mvsx");
4348 TARGET_ALLOW_MOVMISALIGN = 0;
4351 /* Determine when unaligned vector accesses are permitted, and when
4352 they are preferred over masked Altivec loads. Note that if
4353 TARGET_ALLOW_MOVMISALIGN has been disabled by the user, then
4354 TARGET_EFFICIENT_UNALIGNED_VSX must be as well. The converse is
4355 not true. */
4356 if (TARGET_EFFICIENT_UNALIGNED_VSX)
4358 if (!TARGET_VSX)
4360 if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX)
4361 error ("-mefficient-unaligned-vsx requires -mvsx");
4363 rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX;
4366 else if (!TARGET_ALLOW_MOVMISALIGN)
4368 if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX)
4369 error ("-mefficient-unaligned-vsx requires -mallow-movmisalign");
4371 rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX;
4375 /* __float128 requires VSX support. */
4376 if (TARGET_FLOAT128 && !TARGET_VSX)
4378 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128) != 0)
4379 error ("-mfloat128 requires VSX support");
4381 rs6000_isa_flags &= ~(OPTION_MASK_FLOAT128 | OPTION_MASK_FLOAT128_HW);
4384 /* IEEE 128-bit floating point hardware instructions imply enabling
4385 __float128. */
4386 if (TARGET_FLOAT128_HW
4387 && (rs6000_isa_flags & (OPTION_MASK_P9_VECTOR
4388 | OPTION_MASK_DIRECT_MOVE
4389 | OPTION_MASK_UPPER_REGS_DI
4390 | OPTION_MASK_UPPER_REGS_DF
4391 | OPTION_MASK_UPPER_REGS_SF)) == 0)
4393 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0)
4394 error ("-mfloat128-hardware requires full ISA 3.0 support");
4396 rs6000_isa_flags &= ~OPTION_MASK_FLOAT128_HW;
4399 else if (TARGET_P9_VECTOR && !TARGET_FLOAT128_HW
4400 && (rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) == 0)
4401 rs6000_isa_flags |= OPTION_MASK_FLOAT128_HW;
4403 if (TARGET_FLOAT128_HW
4404 && (rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128) == 0)
4405 rs6000_isa_flags |= OPTION_MASK_FLOAT128;
4407 /* Print the options after updating the defaults. */
4408 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4409 rs6000_print_isa_options (stderr, 0, "after defaults", rs6000_isa_flags);
4411 /* E500mc does "better" if we inline more aggressively. Respect the
4412 user's opinion, though. */
4413 if (rs6000_block_move_inline_limit == 0
4414 && (rs6000_cpu == PROCESSOR_PPCE500MC
4415 || rs6000_cpu == PROCESSOR_PPCE500MC64
4416 || rs6000_cpu == PROCESSOR_PPCE5500
4417 || rs6000_cpu == PROCESSOR_PPCE6500))
4418 rs6000_block_move_inline_limit = 128;
4420 /* store_one_arg depends on expand_block_move to handle at least the
4421 size of reg_parm_stack_space. */
4422 if (rs6000_block_move_inline_limit < (TARGET_POWERPC64 ? 64 : 32))
4423 rs6000_block_move_inline_limit = (TARGET_POWERPC64 ? 64 : 32);
4425 if (global_init_p)
4427 /* If the appropriate debug option is enabled, replace the target hooks
4428 with debug versions that call the real version and then prints
4429 debugging information. */
4430 if (TARGET_DEBUG_COST)
4432 targetm.rtx_costs = rs6000_debug_rtx_costs;
4433 targetm.address_cost = rs6000_debug_address_cost;
4434 targetm.sched.adjust_cost = rs6000_debug_adjust_cost;
4437 if (TARGET_DEBUG_ADDR)
4439 targetm.legitimate_address_p = rs6000_debug_legitimate_address_p;
4440 targetm.legitimize_address = rs6000_debug_legitimize_address;
4441 rs6000_secondary_reload_class_ptr
4442 = rs6000_debug_secondary_reload_class;
4443 rs6000_secondary_memory_needed_ptr
4444 = rs6000_debug_secondary_memory_needed;
4445 rs6000_cannot_change_mode_class_ptr
4446 = rs6000_debug_cannot_change_mode_class;
4447 rs6000_preferred_reload_class_ptr
4448 = rs6000_debug_preferred_reload_class;
4449 rs6000_legitimize_reload_address_ptr
4450 = rs6000_debug_legitimize_reload_address;
4451 rs6000_mode_dependent_address_ptr
4452 = rs6000_debug_mode_dependent_address;
4455 if (rs6000_veclibabi_name)
4457 if (strcmp (rs6000_veclibabi_name, "mass") == 0)
4458 rs6000_veclib_handler = rs6000_builtin_vectorized_libmass;
4459 else
4461 error ("unknown vectorization library ABI type (%s) for "
4462 "-mveclibabi= switch", rs6000_veclibabi_name);
4463 ret = false;
4468 if (!global_options_set.x_rs6000_long_double_type_size)
4470 if (main_target_opt != NULL
4471 && (main_target_opt->x_rs6000_long_double_type_size
4472 != RS6000_DEFAULT_LONG_DOUBLE_SIZE))
4473 error ("target attribute or pragma changes long double size");
4474 else
4475 rs6000_long_double_type_size = RS6000_DEFAULT_LONG_DOUBLE_SIZE;
4478 #if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD)
4479 if (!global_options_set.x_rs6000_ieeequad)
4480 rs6000_ieeequad = 1;
4481 #endif
4483 /* Disable VSX and Altivec silently if the user switched cpus to power7 in a
4484 target attribute or pragma which automatically enables both options,
4485 unless the altivec ABI was set. This is set by default for 64-bit, but
4486 not for 32-bit. */
4487 if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi)
4488 rs6000_isa_flags &= ~((OPTION_MASK_VSX | OPTION_MASK_ALTIVEC
4489 | OPTION_MASK_FLOAT128)
4490 & ~rs6000_isa_flags_explicit);
4492 /* Enable Altivec ABI for AIX -maltivec. */
4493 if (TARGET_XCOFF && (TARGET_ALTIVEC || TARGET_VSX))
4495 if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi)
4496 error ("target attribute or pragma changes AltiVec ABI");
4497 else
4498 rs6000_altivec_abi = 1;
4501 /* The AltiVec ABI is the default for PowerPC-64 GNU/Linux. For
4502 PowerPC-32 GNU/Linux, -maltivec implies the AltiVec ABI. It can
4503 be explicitly overridden in either case. */
4504 if (TARGET_ELF)
4506 if (!global_options_set.x_rs6000_altivec_abi
4507 && (TARGET_64BIT || TARGET_ALTIVEC || TARGET_VSX))
4509 if (main_target_opt != NULL &&
4510 !main_target_opt->x_rs6000_altivec_abi)
4511 error ("target attribute or pragma changes AltiVec ABI");
4512 else
4513 rs6000_altivec_abi = 1;
4517 /* Set the Darwin64 ABI as default for 64-bit Darwin.
4518 So far, the only darwin64 targets are also MACH-O. */
4519 if (TARGET_MACHO
4520 && DEFAULT_ABI == ABI_DARWIN
4521 && TARGET_64BIT)
4523 if (main_target_opt != NULL && !main_target_opt->x_rs6000_darwin64_abi)
4524 error ("target attribute or pragma changes darwin64 ABI");
4525 else
4527 rs6000_darwin64_abi = 1;
4528 /* Default to natural alignment, for better performance. */
4529 rs6000_alignment_flags = MASK_ALIGN_NATURAL;
4533 /* Place FP constants in the constant pool instead of TOC
4534 if section anchors enabled. */
4535 if (flag_section_anchors
4536 && !global_options_set.x_TARGET_NO_FP_IN_TOC)
4537 TARGET_NO_FP_IN_TOC = 1;
4539 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4540 rs6000_print_isa_options (stderr, 0, "before subtarget", rs6000_isa_flags);
4542 #ifdef SUBTARGET_OVERRIDE_OPTIONS
4543 SUBTARGET_OVERRIDE_OPTIONS;
4544 #endif
4545 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
4546 SUBSUBTARGET_OVERRIDE_OPTIONS;
4547 #endif
4548 #ifdef SUB3TARGET_OVERRIDE_OPTIONS
4549 SUB3TARGET_OVERRIDE_OPTIONS;
4550 #endif
4552 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4553 rs6000_print_isa_options (stderr, 0, "after subtarget", rs6000_isa_flags);
4555 /* For the E500 family of cores, reset the single/double FP flags to let us
4556 check that they remain constant across attributes or pragmas. Also,
4557 clear a possible request for string instructions, not supported and which
4558 we might have silently queried above for -Os.
4560 For other families, clear ISEL in case it was set implicitly.
4563 switch (rs6000_cpu)
4565 case PROCESSOR_PPC8540:
4566 case PROCESSOR_PPC8548:
4567 case PROCESSOR_PPCE500MC:
4568 case PROCESSOR_PPCE500MC64:
4569 case PROCESSOR_PPCE5500:
4570 case PROCESSOR_PPCE6500:
4572 rs6000_single_float = TARGET_E500_SINGLE || TARGET_E500_DOUBLE;
4573 rs6000_double_float = TARGET_E500_DOUBLE;
4575 rs6000_isa_flags &= ~OPTION_MASK_STRING;
4577 break;
4579 default:
4581 if (have_cpu && !(rs6000_isa_flags_explicit & OPTION_MASK_ISEL))
4582 rs6000_isa_flags &= ~OPTION_MASK_ISEL;
4584 break;
4587 if (main_target_opt)
4589 if (main_target_opt->x_rs6000_single_float != rs6000_single_float)
4590 error ("target attribute or pragma changes single precision floating "
4591 "point");
4592 if (main_target_opt->x_rs6000_double_float != rs6000_double_float)
4593 error ("target attribute or pragma changes double precision floating "
4594 "point");
4597 /* Detect invalid option combinations with E500. */
4598 CHECK_E500_OPTIONS;
4600 rs6000_always_hint = (rs6000_cpu != PROCESSOR_POWER4
4601 && rs6000_cpu != PROCESSOR_POWER5
4602 && rs6000_cpu != PROCESSOR_POWER6
4603 && rs6000_cpu != PROCESSOR_POWER7
4604 && rs6000_cpu != PROCESSOR_POWER8
4605 && rs6000_cpu != PROCESSOR_POWER9
4606 && rs6000_cpu != PROCESSOR_PPCA2
4607 && rs6000_cpu != PROCESSOR_CELL
4608 && rs6000_cpu != PROCESSOR_PPC476);
4609 rs6000_sched_groups = (rs6000_cpu == PROCESSOR_POWER4
4610 || rs6000_cpu == PROCESSOR_POWER5
4611 || rs6000_cpu == PROCESSOR_POWER7
4612 || rs6000_cpu == PROCESSOR_POWER8);
4613 rs6000_align_branch_targets = (rs6000_cpu == PROCESSOR_POWER4
4614 || rs6000_cpu == PROCESSOR_POWER5
4615 || rs6000_cpu == PROCESSOR_POWER6
4616 || rs6000_cpu == PROCESSOR_POWER7
4617 || rs6000_cpu == PROCESSOR_POWER8
4618 || rs6000_cpu == PROCESSOR_POWER9
4619 || rs6000_cpu == PROCESSOR_PPCE500MC
4620 || rs6000_cpu == PROCESSOR_PPCE500MC64
4621 || rs6000_cpu == PROCESSOR_PPCE5500
4622 || rs6000_cpu == PROCESSOR_PPCE6500);
4624 /* Allow debug switches to override the above settings. These are set to -1
4625 in rs6000.opt to indicate the user hasn't directly set the switch. */
4626 if (TARGET_ALWAYS_HINT >= 0)
4627 rs6000_always_hint = TARGET_ALWAYS_HINT;
4629 if (TARGET_SCHED_GROUPS >= 0)
4630 rs6000_sched_groups = TARGET_SCHED_GROUPS;
4632 if (TARGET_ALIGN_BRANCH_TARGETS >= 0)
4633 rs6000_align_branch_targets = TARGET_ALIGN_BRANCH_TARGETS;
4635 rs6000_sched_restricted_insns_priority
4636 = (rs6000_sched_groups ? 1 : 0);
4638 /* Handle -msched-costly-dep option. */
4639 rs6000_sched_costly_dep
4640 = (rs6000_sched_groups ? true_store_to_load_dep_costly : no_dep_costly);
4642 if (rs6000_sched_costly_dep_str)
4644 if (! strcmp (rs6000_sched_costly_dep_str, "no"))
4645 rs6000_sched_costly_dep = no_dep_costly;
4646 else if (! strcmp (rs6000_sched_costly_dep_str, "all"))
4647 rs6000_sched_costly_dep = all_deps_costly;
4648 else if (! strcmp (rs6000_sched_costly_dep_str, "true_store_to_load"))
4649 rs6000_sched_costly_dep = true_store_to_load_dep_costly;
4650 else if (! strcmp (rs6000_sched_costly_dep_str, "store_to_load"))
4651 rs6000_sched_costly_dep = store_to_load_dep_costly;
4652 else
4653 rs6000_sched_costly_dep = ((enum rs6000_dependence_cost)
4654 atoi (rs6000_sched_costly_dep_str));
4657 /* Handle -minsert-sched-nops option. */
4658 rs6000_sched_insert_nops
4659 = (rs6000_sched_groups ? sched_finish_regroup_exact : sched_finish_none);
4661 if (rs6000_sched_insert_nops_str)
4663 if (! strcmp (rs6000_sched_insert_nops_str, "no"))
4664 rs6000_sched_insert_nops = sched_finish_none;
4665 else if (! strcmp (rs6000_sched_insert_nops_str, "pad"))
4666 rs6000_sched_insert_nops = sched_finish_pad_groups;
4667 else if (! strcmp (rs6000_sched_insert_nops_str, "regroup_exact"))
4668 rs6000_sched_insert_nops = sched_finish_regroup_exact;
4669 else
4670 rs6000_sched_insert_nops = ((enum rs6000_nop_insertion)
4671 atoi (rs6000_sched_insert_nops_str));
4674 if (global_init_p)
4676 #ifdef TARGET_REGNAMES
4677 /* If the user desires alternate register names, copy in the
4678 alternate names now. */
4679 if (TARGET_REGNAMES)
4680 memcpy (rs6000_reg_names, alt_reg_names, sizeof (rs6000_reg_names));
4681 #endif
4683 /* Set aix_struct_return last, after the ABI is determined.
4684 If -maix-struct-return or -msvr4-struct-return was explicitly
4685 used, don't override with the ABI default. */
4686 if (!global_options_set.x_aix_struct_return)
4687 aix_struct_return = (DEFAULT_ABI != ABI_V4 || DRAFT_V4_STRUCT_RET);
4689 #if 0
4690 /* IBM XL compiler defaults to unsigned bitfields. */
4691 if (TARGET_XL_COMPAT)
4692 flag_signed_bitfields = 0;
4693 #endif
4695 if (TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
4696 REAL_MODE_FORMAT (TFmode) = &ibm_extended_format;
4698 ASM_GENERATE_INTERNAL_LABEL (toc_label_name, "LCTOC", 1);
4700 /* We can only guarantee the availability of DI pseudo-ops when
4701 assembling for 64-bit targets. */
4702 if (!TARGET_64BIT)
4704 targetm.asm_out.aligned_op.di = NULL;
4705 targetm.asm_out.unaligned_op.di = NULL;
4709 /* Set branch target alignment, if not optimizing for size. */
4710 if (!optimize_size)
4712 /* Cell wants to be aligned 8byte for dual issue. Titan wants to be
4713 aligned 8byte to avoid misprediction by the branch predictor. */
4714 if (rs6000_cpu == PROCESSOR_TITAN
4715 || rs6000_cpu == PROCESSOR_CELL)
4717 if (align_functions <= 0)
4718 align_functions = 8;
4719 if (align_jumps <= 0)
4720 align_jumps = 8;
4721 if (align_loops <= 0)
4722 align_loops = 8;
4724 if (rs6000_align_branch_targets)
4726 if (align_functions <= 0)
4727 align_functions = 16;
4728 if (align_jumps <= 0)
4729 align_jumps = 16;
4730 if (align_loops <= 0)
4732 can_override_loop_align = 1;
4733 align_loops = 16;
4736 if (align_jumps_max_skip <= 0)
4737 align_jumps_max_skip = 15;
4738 if (align_loops_max_skip <= 0)
4739 align_loops_max_skip = 15;
4742 /* Arrange to save and restore machine status around nested functions. */
4743 init_machine_status = rs6000_init_machine_status;
4745 /* We should always be splitting complex arguments, but we can't break
4746 Linux and Darwin ABIs at the moment. For now, only AIX is fixed. */
4747 if (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN)
4748 targetm.calls.split_complex_arg = NULL;
4751 /* Initialize rs6000_cost with the appropriate target costs. */
4752 if (optimize_size)
4753 rs6000_cost = TARGET_POWERPC64 ? &size64_cost : &size32_cost;
4754 else
4755 switch (rs6000_cpu)
4757 case PROCESSOR_RS64A:
4758 rs6000_cost = &rs64a_cost;
4759 break;
4761 case PROCESSOR_MPCCORE:
4762 rs6000_cost = &mpccore_cost;
4763 break;
4765 case PROCESSOR_PPC403:
4766 rs6000_cost = &ppc403_cost;
4767 break;
4769 case PROCESSOR_PPC405:
4770 rs6000_cost = &ppc405_cost;
4771 break;
4773 case PROCESSOR_PPC440:
4774 rs6000_cost = &ppc440_cost;
4775 break;
4777 case PROCESSOR_PPC476:
4778 rs6000_cost = &ppc476_cost;
4779 break;
4781 case PROCESSOR_PPC601:
4782 rs6000_cost = &ppc601_cost;
4783 break;
4785 case PROCESSOR_PPC603:
4786 rs6000_cost = &ppc603_cost;
4787 break;
4789 case PROCESSOR_PPC604:
4790 rs6000_cost = &ppc604_cost;
4791 break;
4793 case PROCESSOR_PPC604e:
4794 rs6000_cost = &ppc604e_cost;
4795 break;
4797 case PROCESSOR_PPC620:
4798 rs6000_cost = &ppc620_cost;
4799 break;
4801 case PROCESSOR_PPC630:
4802 rs6000_cost = &ppc630_cost;
4803 break;
4805 case PROCESSOR_CELL:
4806 rs6000_cost = &ppccell_cost;
4807 break;
4809 case PROCESSOR_PPC750:
4810 case PROCESSOR_PPC7400:
4811 rs6000_cost = &ppc750_cost;
4812 break;
4814 case PROCESSOR_PPC7450:
4815 rs6000_cost = &ppc7450_cost;
4816 break;
4818 case PROCESSOR_PPC8540:
4819 case PROCESSOR_PPC8548:
4820 rs6000_cost = &ppc8540_cost;
4821 break;
4823 case PROCESSOR_PPCE300C2:
4824 case PROCESSOR_PPCE300C3:
4825 rs6000_cost = &ppce300c2c3_cost;
4826 break;
4828 case PROCESSOR_PPCE500MC:
4829 rs6000_cost = &ppce500mc_cost;
4830 break;
4832 case PROCESSOR_PPCE500MC64:
4833 rs6000_cost = &ppce500mc64_cost;
4834 break;
4836 case PROCESSOR_PPCE5500:
4837 rs6000_cost = &ppce5500_cost;
4838 break;
4840 case PROCESSOR_PPCE6500:
4841 rs6000_cost = &ppce6500_cost;
4842 break;
4844 case PROCESSOR_TITAN:
4845 rs6000_cost = &titan_cost;
4846 break;
4848 case PROCESSOR_POWER4:
4849 case PROCESSOR_POWER5:
4850 rs6000_cost = &power4_cost;
4851 break;
4853 case PROCESSOR_POWER6:
4854 rs6000_cost = &power6_cost;
4855 break;
4857 case PROCESSOR_POWER7:
4858 rs6000_cost = &power7_cost;
4859 break;
4861 case PROCESSOR_POWER8:
4862 rs6000_cost = &power8_cost;
4863 break;
4865 case PROCESSOR_POWER9:
4866 rs6000_cost = &power9_cost;
4867 break;
4869 case PROCESSOR_PPCA2:
4870 rs6000_cost = &ppca2_cost;
4871 break;
4873 default:
4874 gcc_unreachable ();
4877 if (global_init_p)
4879 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
4880 rs6000_cost->simultaneous_prefetches,
4881 global_options.x_param_values,
4882 global_options_set.x_param_values);
4883 maybe_set_param_value (PARAM_L1_CACHE_SIZE, rs6000_cost->l1_cache_size,
4884 global_options.x_param_values,
4885 global_options_set.x_param_values);
4886 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
4887 rs6000_cost->cache_line_size,
4888 global_options.x_param_values,
4889 global_options_set.x_param_values);
4890 maybe_set_param_value (PARAM_L2_CACHE_SIZE, rs6000_cost->l2_cache_size,
4891 global_options.x_param_values,
4892 global_options_set.x_param_values);
4894 /* Increase loop peeling limits based on performance analysis. */
4895 maybe_set_param_value (PARAM_MAX_PEELED_INSNS, 400,
4896 global_options.x_param_values,
4897 global_options_set.x_param_values);
4898 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEELED_INSNS, 400,
4899 global_options.x_param_values,
4900 global_options_set.x_param_values);
4902 /* If using typedef char *va_list, signal that
4903 __builtin_va_start (&ap, 0) can be optimized to
4904 ap = __builtin_next_arg (0). */
4905 if (DEFAULT_ABI != ABI_V4)
4906 targetm.expand_builtin_va_start = NULL;
4909 /* Set up single/double float flags.
4910 If TARGET_HARD_FLOAT is set, but neither single or double is set,
4911 then set both flags. */
4912 if (TARGET_HARD_FLOAT && TARGET_FPRS
4913 && rs6000_single_float == 0 && rs6000_double_float == 0)
4914 rs6000_single_float = rs6000_double_float = 1;
4916 /* If not explicitly specified via option, decide whether to generate indexed
4917 load/store instructions. */
4918 if (TARGET_AVOID_XFORM == -1)
4919 /* Avoid indexed addressing when targeting Power6 in order to avoid the
4920 DERAT mispredict penalty. However the LVE and STVE altivec instructions
4921 need indexed accesses and the type used is the scalar type of the element
4922 being loaded or stored. */
4923 TARGET_AVOID_XFORM = (rs6000_cpu == PROCESSOR_POWER6 && TARGET_CMPB
4924 && !TARGET_ALTIVEC);
4926 /* Set the -mrecip options. */
4927 if (rs6000_recip_name)
4929 char *p = ASTRDUP (rs6000_recip_name);
4930 char *q;
4931 unsigned int mask, i;
4932 bool invert;
4934 while ((q = strtok (p, ",")) != NULL)
4936 p = NULL;
4937 if (*q == '!')
4939 invert = true;
4940 q++;
4942 else
4943 invert = false;
4945 if (!strcmp (q, "default"))
4946 mask = ((TARGET_RECIP_PRECISION)
4947 ? RECIP_HIGH_PRECISION : RECIP_LOW_PRECISION);
4948 else
4950 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
4951 if (!strcmp (q, recip_options[i].string))
4953 mask = recip_options[i].mask;
4954 break;
4957 if (i == ARRAY_SIZE (recip_options))
4959 error ("unknown option for -mrecip=%s", q);
4960 invert = false;
4961 mask = 0;
4962 ret = false;
4966 if (invert)
4967 rs6000_recip_control &= ~mask;
4968 else
4969 rs6000_recip_control |= mask;
4973 /* Set the builtin mask of the various options used that could affect which
4974 builtins were used. In the past we used target_flags, but we've run out
4975 of bits, and some options like SPE and PAIRED are no longer in
4976 target_flags. */
4977 rs6000_builtin_mask = rs6000_builtin_mask_calculate ();
4978 if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
4979 rs6000_print_builtin_options (stderr, 0, "builtin mask",
4980 rs6000_builtin_mask);
4982 /* Initialize all of the registers. */
4983 rs6000_init_hard_regno_mode_ok (global_init_p);
4985 /* Save the initial options in case the user does function specific options */
4986 if (global_init_p)
4987 target_option_default_node = target_option_current_node
4988 = build_target_option_node (&global_options);
4990 /* If not explicitly specified via option, decide whether to generate the
4991 extra blr's required to preserve the link stack on some cpus (eg, 476). */
4992 if (TARGET_LINK_STACK == -1)
4993 SET_TARGET_LINK_STACK (rs6000_cpu == PROCESSOR_PPC476 && flag_pic);
4995 return ret;
4998 /* Implement TARGET_OPTION_OVERRIDE. On the RS/6000 this is used to
4999 define the target cpu type. */
5001 static void
5002 rs6000_option_override (void)
5004 (void) rs6000_option_override_internal (true);
5006 /* Register machine-specific passes. This needs to be done at start-up.
5007 It's convenient to do it here (like i386 does). */
5008 opt_pass *pass_analyze_swaps = make_pass_analyze_swaps (g);
5010 struct register_pass_info analyze_swaps_info
5011 = { pass_analyze_swaps, "cse1", 1, PASS_POS_INSERT_BEFORE };
5013 register_pass (&analyze_swaps_info);
5017 /* Implement targetm.vectorize.builtin_mask_for_load. */
5018 static tree
5019 rs6000_builtin_mask_for_load (void)
5021 /* Don't use lvsl/vperm for P8 and similarly efficient machines. */
5022 if ((TARGET_ALTIVEC && !TARGET_VSX)
5023 || (TARGET_VSX && !TARGET_EFFICIENT_UNALIGNED_VSX))
5024 return altivec_builtin_mask_for_load;
5025 else
5026 return 0;
5029 /* Implement LOOP_ALIGN. */
5031 rs6000_loop_align (rtx label)
5033 basic_block bb;
5034 int ninsns;
5036 /* Don't override loop alignment if -falign-loops was specified. */
5037 if (!can_override_loop_align)
5038 return align_loops_log;
5040 bb = BLOCK_FOR_INSN (label);
5041 ninsns = num_loop_insns(bb->loop_father);
5043 /* Align small loops to 32 bytes to fit in an icache sector, otherwise return default. */
5044 if (ninsns > 4 && ninsns <= 8
5045 && (rs6000_cpu == PROCESSOR_POWER4
5046 || rs6000_cpu == PROCESSOR_POWER5
5047 || rs6000_cpu == PROCESSOR_POWER6
5048 || rs6000_cpu == PROCESSOR_POWER7
5049 || rs6000_cpu == PROCESSOR_POWER8
5050 || rs6000_cpu == PROCESSOR_POWER9))
5051 return 5;
5052 else
5053 return align_loops_log;
5056 /* Implement TARGET_LOOP_ALIGN_MAX_SKIP. */
5057 static int
5058 rs6000_loop_align_max_skip (rtx_insn *label)
5060 return (1 << rs6000_loop_align (label)) - 1;
5063 /* Return true iff, data reference of TYPE can reach vector alignment (16)
5064 after applying N number of iterations. This routine does not determine
5065 how may iterations are required to reach desired alignment. */
5067 static bool
5068 rs6000_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
5070 if (is_packed)
5071 return false;
5073 if (TARGET_32BIT)
5075 if (rs6000_alignment_flags == MASK_ALIGN_NATURAL)
5076 return true;
5078 if (rs6000_alignment_flags == MASK_ALIGN_POWER)
5079 return true;
5081 return false;
5083 else
5085 if (TARGET_MACHO)
5086 return false;
5088 /* Assuming that all other types are naturally aligned. CHECKME! */
5089 return true;
5093 /* Return true if the vector misalignment factor is supported by the
5094 target. */
5095 static bool
5096 rs6000_builtin_support_vector_misalignment (machine_mode mode,
5097 const_tree type,
5098 int misalignment,
5099 bool is_packed)
5101 if (TARGET_VSX)
5103 if (TARGET_EFFICIENT_UNALIGNED_VSX)
5104 return true;
5106 /* Return if movmisalign pattern is not supported for this mode. */
5107 if (optab_handler (movmisalign_optab, mode) == CODE_FOR_nothing)
5108 return false;
5110 if (misalignment == -1)
5112 /* Misalignment factor is unknown at compile time but we know
5113 it's word aligned. */
5114 if (rs6000_vector_alignment_reachable (type, is_packed))
5116 int element_size = TREE_INT_CST_LOW (TYPE_SIZE (type));
5118 if (element_size == 64 || element_size == 32)
5119 return true;
5122 return false;
5125 /* VSX supports word-aligned vector. */
5126 if (misalignment % 4 == 0)
5127 return true;
5129 return false;
5132 /* Implement targetm.vectorize.builtin_vectorization_cost. */
5133 static int
5134 rs6000_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
5135 tree vectype, int misalign)
5137 unsigned elements;
5138 tree elem_type;
5140 switch (type_of_cost)
5142 case scalar_stmt:
5143 case scalar_load:
5144 case scalar_store:
5145 case vector_stmt:
5146 case vector_load:
5147 case vector_store:
5148 case vec_to_scalar:
5149 case scalar_to_vec:
5150 case cond_branch_not_taken:
5151 return 1;
5153 case vec_perm:
5154 if (TARGET_VSX)
5155 return 3;
5156 else
5157 return 1;
5159 case vec_promote_demote:
5160 if (TARGET_VSX)
5161 return 4;
5162 else
5163 return 1;
5165 case cond_branch_taken:
5166 return 3;
5168 case unaligned_load:
5169 if (TARGET_EFFICIENT_UNALIGNED_VSX)
5170 return 1;
5172 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
5174 elements = TYPE_VECTOR_SUBPARTS (vectype);
5175 if (elements == 2)
5176 /* Double word aligned. */
5177 return 2;
5179 if (elements == 4)
5181 switch (misalign)
5183 case 8:
5184 /* Double word aligned. */
5185 return 2;
5187 case -1:
5188 /* Unknown misalignment. */
5189 case 4:
5190 case 12:
5191 /* Word aligned. */
5192 return 22;
5194 default:
5195 gcc_unreachable ();
5200 if (TARGET_ALTIVEC)
5201 /* Misaligned loads are not supported. */
5202 gcc_unreachable ();
5204 return 2;
5206 case unaligned_store:
5207 if (TARGET_EFFICIENT_UNALIGNED_VSX)
5208 return 1;
5210 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
5212 elements = TYPE_VECTOR_SUBPARTS (vectype);
5213 if (elements == 2)
5214 /* Double word aligned. */
5215 return 2;
5217 if (elements == 4)
5219 switch (misalign)
5221 case 8:
5222 /* Double word aligned. */
5223 return 2;
5225 case -1:
5226 /* Unknown misalignment. */
5227 case 4:
5228 case 12:
5229 /* Word aligned. */
5230 return 23;
5232 default:
5233 gcc_unreachable ();
5238 if (TARGET_ALTIVEC)
5239 /* Misaligned stores are not supported. */
5240 gcc_unreachable ();
5242 return 2;
5244 case vec_construct:
5245 elements = TYPE_VECTOR_SUBPARTS (vectype);
5246 elem_type = TREE_TYPE (vectype);
5247 /* 32-bit vectors loaded into registers are stored as double
5248 precision, so we need n/2 converts in addition to the usual
5249 n/2 merges to construct a vector of short floats from them. */
5250 if (SCALAR_FLOAT_TYPE_P (elem_type)
5251 && TYPE_PRECISION (elem_type) == 32)
5252 return elements + 1;
5253 else
5254 return elements / 2 + 1;
5256 default:
5257 gcc_unreachable ();
5261 /* Implement targetm.vectorize.preferred_simd_mode. */
5263 static machine_mode
5264 rs6000_preferred_simd_mode (machine_mode mode)
5266 if (TARGET_VSX)
5267 switch (mode)
5269 case DFmode:
5270 return V2DFmode;
5271 default:;
5273 if (TARGET_ALTIVEC || TARGET_VSX)
5274 switch (mode)
5276 case SFmode:
5277 return V4SFmode;
5278 case TImode:
5279 return V1TImode;
5280 case DImode:
5281 return V2DImode;
5282 case SImode:
5283 return V4SImode;
5284 case HImode:
5285 return V8HImode;
5286 case QImode:
5287 return V16QImode;
5288 default:;
5290 if (TARGET_SPE)
5291 switch (mode)
5293 case SFmode:
5294 return V2SFmode;
5295 case SImode:
5296 return V2SImode;
5297 default:;
5299 if (TARGET_PAIRED_FLOAT
5300 && mode == SFmode)
5301 return V2SFmode;
5302 return word_mode;
5305 typedef struct _rs6000_cost_data
5307 struct loop *loop_info;
5308 unsigned cost[3];
5309 } rs6000_cost_data;
5311 /* Test for likely overcommitment of vector hardware resources. If a
5312 loop iteration is relatively large, and too large a percentage of
5313 instructions in the loop are vectorized, the cost model may not
5314 adequately reflect delays from unavailable vector resources.
5315 Penalize the loop body cost for this case. */
5317 static void
5318 rs6000_density_test (rs6000_cost_data *data)
5320 const int DENSITY_PCT_THRESHOLD = 85;
5321 const int DENSITY_SIZE_THRESHOLD = 70;
5322 const int DENSITY_PENALTY = 10;
5323 struct loop *loop = data->loop_info;
5324 basic_block *bbs = get_loop_body (loop);
5325 int nbbs = loop->num_nodes;
5326 int vec_cost = data->cost[vect_body], not_vec_cost = 0;
5327 int i, density_pct;
5329 for (i = 0; i < nbbs; i++)
5331 basic_block bb = bbs[i];
5332 gimple_stmt_iterator gsi;
5334 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
5336 gimple *stmt = gsi_stmt (gsi);
5337 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5339 if (!STMT_VINFO_RELEVANT_P (stmt_info)
5340 && !STMT_VINFO_IN_PATTERN_P (stmt_info))
5341 not_vec_cost++;
5345 free (bbs);
5346 density_pct = (vec_cost * 100) / (vec_cost + not_vec_cost);
5348 if (density_pct > DENSITY_PCT_THRESHOLD
5349 && vec_cost + not_vec_cost > DENSITY_SIZE_THRESHOLD)
5351 data->cost[vect_body] = vec_cost * (100 + DENSITY_PENALTY) / 100;
5352 if (dump_enabled_p ())
5353 dump_printf_loc (MSG_NOTE, vect_location,
5354 "density %d%%, cost %d exceeds threshold, penalizing "
5355 "loop body cost by %d%%", density_pct,
5356 vec_cost + not_vec_cost, DENSITY_PENALTY);
5360 /* Implement targetm.vectorize.init_cost. */
5362 static void *
5363 rs6000_init_cost (struct loop *loop_info)
5365 rs6000_cost_data *data = XNEW (struct _rs6000_cost_data);
5366 data->loop_info = loop_info;
5367 data->cost[vect_prologue] = 0;
5368 data->cost[vect_body] = 0;
5369 data->cost[vect_epilogue] = 0;
5370 return data;
5373 /* Implement targetm.vectorize.add_stmt_cost. */
5375 static unsigned
5376 rs6000_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
5377 struct _stmt_vec_info *stmt_info, int misalign,
5378 enum vect_cost_model_location where)
5380 rs6000_cost_data *cost_data = (rs6000_cost_data*) data;
5381 unsigned retval = 0;
5383 if (flag_vect_cost_model)
5385 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
5386 int stmt_cost = rs6000_builtin_vectorization_cost (kind, vectype,
5387 misalign);
5388 /* Statements in an inner loop relative to the loop being
5389 vectorized are weighted more heavily. The value here is
5390 arbitrary and could potentially be improved with analysis. */
5391 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
5392 count *= 50; /* FIXME. */
5394 retval = (unsigned) (count * stmt_cost);
5395 cost_data->cost[where] += retval;
5398 return retval;
5401 /* Implement targetm.vectorize.finish_cost. */
5403 static void
5404 rs6000_finish_cost (void *data, unsigned *prologue_cost,
5405 unsigned *body_cost, unsigned *epilogue_cost)
5407 rs6000_cost_data *cost_data = (rs6000_cost_data*) data;
5409 if (cost_data->loop_info)
5410 rs6000_density_test (cost_data);
5412 *prologue_cost = cost_data->cost[vect_prologue];
5413 *body_cost = cost_data->cost[vect_body];
5414 *epilogue_cost = cost_data->cost[vect_epilogue];
5417 /* Implement targetm.vectorize.destroy_cost_data. */
5419 static void
5420 rs6000_destroy_cost_data (void *data)
5422 free (data);
5425 /* Handler for the Mathematical Acceleration Subsystem (mass) interface to a
5426 library with vectorized intrinsics. */
5428 static tree
5429 rs6000_builtin_vectorized_libmass (combined_fn fn, tree type_out,
5430 tree type_in)
5432 char name[32];
5433 const char *suffix = NULL;
5434 tree fntype, new_fndecl, bdecl = NULL_TREE;
5435 int n_args = 1;
5436 const char *bname;
5437 machine_mode el_mode, in_mode;
5438 int n, in_n;
5440 /* Libmass is suitable for unsafe math only as it does not correctly support
5441 parts of IEEE with the required precision such as denormals. Only support
5442 it if we have VSX to use the simd d2 or f4 functions.
5443 XXX: Add variable length support. */
5444 if (!flag_unsafe_math_optimizations || !TARGET_VSX)
5445 return NULL_TREE;
5447 el_mode = TYPE_MODE (TREE_TYPE (type_out));
5448 n = TYPE_VECTOR_SUBPARTS (type_out);
5449 in_mode = TYPE_MODE (TREE_TYPE (type_in));
5450 in_n = TYPE_VECTOR_SUBPARTS (type_in);
5451 if (el_mode != in_mode
5452 || n != in_n)
5453 return NULL_TREE;
5455 switch (fn)
5457 CASE_CFN_ATAN2:
5458 CASE_CFN_HYPOT:
5459 CASE_CFN_POW:
5460 n_args = 2;
5461 /* fall through */
5463 CASE_CFN_ACOS:
5464 CASE_CFN_ACOSH:
5465 CASE_CFN_ASIN:
5466 CASE_CFN_ASINH:
5467 CASE_CFN_ATAN:
5468 CASE_CFN_ATANH:
5469 CASE_CFN_CBRT:
5470 CASE_CFN_COS:
5471 CASE_CFN_COSH:
5472 CASE_CFN_ERF:
5473 CASE_CFN_ERFC:
5474 CASE_CFN_EXP2:
5475 CASE_CFN_EXP:
5476 CASE_CFN_EXPM1:
5477 CASE_CFN_LGAMMA:
5478 CASE_CFN_LOG10:
5479 CASE_CFN_LOG1P:
5480 CASE_CFN_LOG2:
5481 CASE_CFN_LOG:
5482 CASE_CFN_SIN:
5483 CASE_CFN_SINH:
5484 CASE_CFN_SQRT:
5485 CASE_CFN_TAN:
5486 CASE_CFN_TANH:
5487 if (el_mode == DFmode && n == 2)
5489 bdecl = mathfn_built_in (double_type_node, fn);
5490 suffix = "d2"; /* pow -> powd2 */
5492 else if (el_mode == SFmode && n == 4)
5494 bdecl = mathfn_built_in (float_type_node, fn);
5495 suffix = "4"; /* powf -> powf4 */
5497 else
5498 return NULL_TREE;
5499 if (!bdecl)
5500 return NULL_TREE;
5501 break;
5503 default:
5504 return NULL_TREE;
5507 gcc_assert (suffix != NULL);
5508 bname = IDENTIFIER_POINTER (DECL_NAME (bdecl));
5509 if (!bname)
5510 return NULL_TREE;
5512 strcpy (name, bname + sizeof ("__builtin_") - 1);
5513 strcat (name, suffix);
5515 if (n_args == 1)
5516 fntype = build_function_type_list (type_out, type_in, NULL);
5517 else if (n_args == 2)
5518 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
5519 else
5520 gcc_unreachable ();
5522 /* Build a function declaration for the vectorized function. */
5523 new_fndecl = build_decl (BUILTINS_LOCATION,
5524 FUNCTION_DECL, get_identifier (name), fntype);
5525 TREE_PUBLIC (new_fndecl) = 1;
5526 DECL_EXTERNAL (new_fndecl) = 1;
5527 DECL_IS_NOVOPS (new_fndecl) = 1;
5528 TREE_READONLY (new_fndecl) = 1;
5530 return new_fndecl;
5533 /* Returns a function decl for a vectorized version of the builtin function
5534 with builtin function code FN and the result vector type TYPE, or NULL_TREE
5535 if it is not available. */
5537 static tree
5538 rs6000_builtin_vectorized_function (unsigned int fn, tree type_out,
5539 tree type_in)
5541 machine_mode in_mode, out_mode;
5542 int in_n, out_n;
5544 if (TARGET_DEBUG_BUILTIN)
5545 fprintf (stderr, "rs6000_builtin_vectorized_function (%s, %s, %s)\n",
5546 combined_fn_name (combined_fn (fn)),
5547 GET_MODE_NAME (TYPE_MODE (type_out)),
5548 GET_MODE_NAME (TYPE_MODE (type_in)));
5550 if (TREE_CODE (type_out) != VECTOR_TYPE
5551 || TREE_CODE (type_in) != VECTOR_TYPE
5552 || !TARGET_VECTORIZE_BUILTINS)
5553 return NULL_TREE;
5555 out_mode = TYPE_MODE (TREE_TYPE (type_out));
5556 out_n = TYPE_VECTOR_SUBPARTS (type_out);
5557 in_mode = TYPE_MODE (TREE_TYPE (type_in));
5558 in_n = TYPE_VECTOR_SUBPARTS (type_in);
5560 switch (fn)
5562 CASE_CFN_COPYSIGN:
5563 if (VECTOR_UNIT_VSX_P (V2DFmode)
5564 && out_mode == DFmode && out_n == 2
5565 && in_mode == DFmode && in_n == 2)
5566 return rs6000_builtin_decls[VSX_BUILTIN_CPSGNDP];
5567 if (VECTOR_UNIT_VSX_P (V4SFmode)
5568 && out_mode == SFmode && out_n == 4
5569 && in_mode == SFmode && in_n == 4)
5570 return rs6000_builtin_decls[VSX_BUILTIN_CPSGNSP];
5571 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5572 && out_mode == SFmode && out_n == 4
5573 && in_mode == SFmode && in_n == 4)
5574 return rs6000_builtin_decls[ALTIVEC_BUILTIN_COPYSIGN_V4SF];
5575 break;
5576 CASE_CFN_CEIL:
5577 if (VECTOR_UNIT_VSX_P (V2DFmode)
5578 && out_mode == DFmode && out_n == 2
5579 && in_mode == DFmode && in_n == 2)
5580 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIP];
5581 if (VECTOR_UNIT_VSX_P (V4SFmode)
5582 && out_mode == SFmode && out_n == 4
5583 && in_mode == SFmode && in_n == 4)
5584 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIP];
5585 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5586 && out_mode == SFmode && out_n == 4
5587 && in_mode == SFmode && in_n == 4)
5588 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIP];
5589 break;
5590 CASE_CFN_FLOOR:
5591 if (VECTOR_UNIT_VSX_P (V2DFmode)
5592 && out_mode == DFmode && out_n == 2
5593 && in_mode == DFmode && in_n == 2)
5594 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIM];
5595 if (VECTOR_UNIT_VSX_P (V4SFmode)
5596 && out_mode == SFmode && out_n == 4
5597 && in_mode == SFmode && in_n == 4)
5598 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIM];
5599 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5600 && out_mode == SFmode && out_n == 4
5601 && in_mode == SFmode && in_n == 4)
5602 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIM];
5603 break;
5604 CASE_CFN_FMA:
5605 if (VECTOR_UNIT_VSX_P (V2DFmode)
5606 && out_mode == DFmode && out_n == 2
5607 && in_mode == DFmode && in_n == 2)
5608 return rs6000_builtin_decls[VSX_BUILTIN_XVMADDDP];
5609 if (VECTOR_UNIT_VSX_P (V4SFmode)
5610 && out_mode == SFmode && out_n == 4
5611 && in_mode == SFmode && in_n == 4)
5612 return rs6000_builtin_decls[VSX_BUILTIN_XVMADDSP];
5613 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5614 && out_mode == SFmode && out_n == 4
5615 && in_mode == SFmode && in_n == 4)
5616 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VMADDFP];
5617 break;
5618 CASE_CFN_TRUNC:
5619 if (VECTOR_UNIT_VSX_P (V2DFmode)
5620 && out_mode == DFmode && out_n == 2
5621 && in_mode == DFmode && in_n == 2)
5622 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIZ];
5623 if (VECTOR_UNIT_VSX_P (V4SFmode)
5624 && out_mode == SFmode && out_n == 4
5625 && in_mode == SFmode && in_n == 4)
5626 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIZ];
5627 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5628 && out_mode == SFmode && out_n == 4
5629 && in_mode == SFmode && in_n == 4)
5630 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIZ];
5631 break;
5632 CASE_CFN_NEARBYINT:
5633 if (VECTOR_UNIT_VSX_P (V2DFmode)
5634 && flag_unsafe_math_optimizations
5635 && out_mode == DFmode && out_n == 2
5636 && in_mode == DFmode && in_n == 2)
5637 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPI];
5638 if (VECTOR_UNIT_VSX_P (V4SFmode)
5639 && flag_unsafe_math_optimizations
5640 && out_mode == SFmode && out_n == 4
5641 && in_mode == SFmode && in_n == 4)
5642 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPI];
5643 break;
5644 CASE_CFN_RINT:
5645 if (VECTOR_UNIT_VSX_P (V2DFmode)
5646 && !flag_trapping_math
5647 && out_mode == DFmode && out_n == 2
5648 && in_mode == DFmode && in_n == 2)
5649 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIC];
5650 if (VECTOR_UNIT_VSX_P (V4SFmode)
5651 && !flag_trapping_math
5652 && out_mode == SFmode && out_n == 4
5653 && in_mode == SFmode && in_n == 4)
5654 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIC];
5655 break;
5656 default:
5657 break;
5660 /* Generate calls to libmass if appropriate. */
5661 if (rs6000_veclib_handler)
5662 return rs6000_veclib_handler (combined_fn (fn), type_out, type_in);
5664 return NULL_TREE;
5667 /* Implement TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION. */
5669 static tree
5670 rs6000_builtin_md_vectorized_function (tree fndecl, tree type_out,
5671 tree type_in)
5673 machine_mode in_mode, out_mode;
5674 int in_n, out_n;
5676 if (TARGET_DEBUG_BUILTIN)
5677 fprintf (stderr, "rs6000_builtin_md_vectorized_function (%s, %s, %s)\n",
5678 IDENTIFIER_POINTER (DECL_NAME (fndecl)),
5679 GET_MODE_NAME (TYPE_MODE (type_out)),
5680 GET_MODE_NAME (TYPE_MODE (type_in)));
5682 if (TREE_CODE (type_out) != VECTOR_TYPE
5683 || TREE_CODE (type_in) != VECTOR_TYPE
5684 || !TARGET_VECTORIZE_BUILTINS)
5685 return NULL_TREE;
5687 out_mode = TYPE_MODE (TREE_TYPE (type_out));
5688 out_n = TYPE_VECTOR_SUBPARTS (type_out);
5689 in_mode = TYPE_MODE (TREE_TYPE (type_in));
5690 in_n = TYPE_VECTOR_SUBPARTS (type_in);
5692 enum rs6000_builtins fn
5693 = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
5694 switch (fn)
5696 case RS6000_BUILTIN_RSQRTF:
5697 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)
5698 && out_mode == SFmode && out_n == 4
5699 && in_mode == SFmode && in_n == 4)
5700 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRSQRTFP];
5701 break;
5702 case RS6000_BUILTIN_RSQRT:
5703 if (VECTOR_UNIT_VSX_P (V2DFmode)
5704 && out_mode == DFmode && out_n == 2
5705 && in_mode == DFmode && in_n == 2)
5706 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_2DF];
5707 break;
5708 case RS6000_BUILTIN_RECIPF:
5709 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)
5710 && out_mode == SFmode && out_n == 4
5711 && in_mode == SFmode && in_n == 4)
5712 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRECIPFP];
5713 break;
5714 case RS6000_BUILTIN_RECIP:
5715 if (VECTOR_UNIT_VSX_P (V2DFmode)
5716 && out_mode == DFmode && out_n == 2
5717 && in_mode == DFmode && in_n == 2)
5718 return rs6000_builtin_decls[VSX_BUILTIN_RECIP_V2DF];
5719 break;
5720 default:
5721 break;
5723 return NULL_TREE;
5726 /* Default CPU string for rs6000*_file_start functions. */
5727 static const char *rs6000_default_cpu;
5729 /* Do anything needed at the start of the asm file. */
5731 static void
5732 rs6000_file_start (void)
5734 char buffer[80];
5735 const char *start = buffer;
5736 FILE *file = asm_out_file;
5738 rs6000_default_cpu = TARGET_CPU_DEFAULT;
5740 default_file_start ();
5742 if (flag_verbose_asm)
5744 sprintf (buffer, "\n%s rs6000/powerpc options:", ASM_COMMENT_START);
5746 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
5748 fprintf (file, "%s --with-cpu=%s", start, rs6000_default_cpu);
5749 start = "";
5752 if (global_options_set.x_rs6000_cpu_index)
5754 fprintf (file, "%s -mcpu=%s", start,
5755 processor_target_table[rs6000_cpu_index].name);
5756 start = "";
5759 if (global_options_set.x_rs6000_tune_index)
5761 fprintf (file, "%s -mtune=%s", start,
5762 processor_target_table[rs6000_tune_index].name);
5763 start = "";
5766 if (PPC405_ERRATUM77)
5768 fprintf (file, "%s PPC405CR_ERRATUM77", start);
5769 start = "";
5772 #ifdef USING_ELFOS_H
5773 switch (rs6000_sdata)
5775 case SDATA_NONE: fprintf (file, "%s -msdata=none", start); start = ""; break;
5776 case SDATA_DATA: fprintf (file, "%s -msdata=data", start); start = ""; break;
5777 case SDATA_SYSV: fprintf (file, "%s -msdata=sysv", start); start = ""; break;
5778 case SDATA_EABI: fprintf (file, "%s -msdata=eabi", start); start = ""; break;
5781 if (rs6000_sdata && g_switch_value)
5783 fprintf (file, "%s -G %d", start,
5784 g_switch_value);
5785 start = "";
5787 #endif
5789 if (*start == '\0')
5790 putc ('\n', file);
5793 #ifdef USING_ELFOS_H
5794 if (rs6000_default_cpu == 0 || rs6000_default_cpu[0] == '\0'
5795 || !global_options_set.x_rs6000_cpu_index)
5797 fputs ("\t.machine ", asm_out_file);
5798 if ((rs6000_isa_flags & OPTION_MASK_MODULO) != 0)
5799 fputs ("power9\n", asm_out_file);
5800 else if ((rs6000_isa_flags & OPTION_MASK_DIRECT_MOVE) != 0)
5801 fputs ("power8\n", asm_out_file);
5802 else if ((rs6000_isa_flags & OPTION_MASK_POPCNTD) != 0)
5803 fputs ("power7\n", asm_out_file);
5804 else if ((rs6000_isa_flags & OPTION_MASK_CMPB) != 0)
5805 fputs ("power6\n", asm_out_file);
5806 else if ((rs6000_isa_flags & OPTION_MASK_POPCNTB) != 0)
5807 fputs ("power5\n", asm_out_file);
5808 else if ((rs6000_isa_flags & OPTION_MASK_MFCRF) != 0)
5809 fputs ("power4\n", asm_out_file);
5810 else if ((rs6000_isa_flags & OPTION_MASK_POWERPC64) != 0)
5811 fputs ("ppc64\n", asm_out_file);
5812 else
5813 fputs ("ppc\n", asm_out_file);
5815 #endif
5817 if (DEFAULT_ABI == ABI_ELFv2)
5818 fprintf (file, "\t.abiversion 2\n");
5822 /* Return nonzero if this function is known to have a null epilogue. */
5825 direct_return (void)
5827 if (reload_completed)
5829 rs6000_stack_t *info = rs6000_stack_info ();
5831 if (info->first_gp_reg_save == 32
5832 && info->first_fp_reg_save == 64
5833 && info->first_altivec_reg_save == LAST_ALTIVEC_REGNO + 1
5834 && ! info->lr_save_p
5835 && ! info->cr_save_p
5836 && info->vrsave_size == 0
5837 && ! info->push_p)
5838 return 1;
5841 return 0;
5844 /* Return the number of instructions it takes to form a constant in an
5845 integer register. */
5848 num_insns_constant_wide (HOST_WIDE_INT value)
5850 /* signed constant loadable with addi */
5851 if (((unsigned HOST_WIDE_INT) value + 0x8000) < 0x10000)
5852 return 1;
5854 /* constant loadable with addis */
5855 else if ((value & 0xffff) == 0
5856 && (value >> 31 == -1 || value >> 31 == 0))
5857 return 1;
5859 else if (TARGET_POWERPC64)
5861 HOST_WIDE_INT low = ((value & 0xffffffff) ^ 0x80000000) - 0x80000000;
5862 HOST_WIDE_INT high = value >> 31;
5864 if (high == 0 || high == -1)
5865 return 2;
5867 high >>= 1;
5869 if (low == 0)
5870 return num_insns_constant_wide (high) + 1;
5871 else if (high == 0)
5872 return num_insns_constant_wide (low) + 1;
5873 else
5874 return (num_insns_constant_wide (high)
5875 + num_insns_constant_wide (low) + 1);
5878 else
5879 return 2;
5883 num_insns_constant (rtx op, machine_mode mode)
5885 HOST_WIDE_INT low, high;
5887 switch (GET_CODE (op))
5889 case CONST_INT:
5890 if ((INTVAL (op) >> 31) != 0 && (INTVAL (op) >> 31) != -1
5891 && rs6000_is_valid_and_mask (op, mode))
5892 return 2;
5893 else
5894 return num_insns_constant_wide (INTVAL (op));
5896 case CONST_WIDE_INT:
5898 int i;
5899 int ins = CONST_WIDE_INT_NUNITS (op) - 1;
5900 for (i = 0; i < CONST_WIDE_INT_NUNITS (op); i++)
5901 ins += num_insns_constant_wide (CONST_WIDE_INT_ELT (op, i));
5902 return ins;
5905 case CONST_DOUBLE:
5906 if (mode == SFmode || mode == SDmode)
5908 long l;
5910 if (DECIMAL_FLOAT_MODE_P (mode))
5911 REAL_VALUE_TO_TARGET_DECIMAL32
5912 (*CONST_DOUBLE_REAL_VALUE (op), l);
5913 else
5914 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), l);
5915 return num_insns_constant_wide ((HOST_WIDE_INT) l);
5918 long l[2];
5919 if (DECIMAL_FLOAT_MODE_P (mode))
5920 REAL_VALUE_TO_TARGET_DECIMAL64 (*CONST_DOUBLE_REAL_VALUE (op), l);
5921 else
5922 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (op), l);
5923 high = l[WORDS_BIG_ENDIAN == 0];
5924 low = l[WORDS_BIG_ENDIAN != 0];
5926 if (TARGET_32BIT)
5927 return (num_insns_constant_wide (low)
5928 + num_insns_constant_wide (high));
5929 else
5931 if ((high == 0 && low >= 0)
5932 || (high == -1 && low < 0))
5933 return num_insns_constant_wide (low);
5935 else if (rs6000_is_valid_and_mask (op, mode))
5936 return 2;
5938 else if (low == 0)
5939 return num_insns_constant_wide (high) + 1;
5941 else
5942 return (num_insns_constant_wide (high)
5943 + num_insns_constant_wide (low) + 1);
5946 default:
5947 gcc_unreachable ();
5951 /* Interpret element ELT of the CONST_VECTOR OP as an integer value.
5952 If the mode of OP is MODE_VECTOR_INT, this simply returns the
5953 corresponding element of the vector, but for V4SFmode and V2SFmode,
5954 the corresponding "float" is interpreted as an SImode integer. */
5956 HOST_WIDE_INT
5957 const_vector_elt_as_int (rtx op, unsigned int elt)
5959 rtx tmp;
5961 /* We can't handle V2DImode and V2DFmode vector constants here yet. */
5962 gcc_assert (GET_MODE (op) != V2DImode
5963 && GET_MODE (op) != V2DFmode);
5965 tmp = CONST_VECTOR_ELT (op, elt);
5966 if (GET_MODE (op) == V4SFmode
5967 || GET_MODE (op) == V2SFmode)
5968 tmp = gen_lowpart (SImode, tmp);
5969 return INTVAL (tmp);
5972 /* Return true if OP can be synthesized with a particular vspltisb, vspltish
5973 or vspltisw instruction. OP is a CONST_VECTOR. Which instruction is used
5974 depends on STEP and COPIES, one of which will be 1. If COPIES > 1,
5975 all items are set to the same value and contain COPIES replicas of the
5976 vsplt's operand; if STEP > 1, one in STEP elements is set to the vsplt's
5977 operand and the others are set to the value of the operand's msb. */
5979 static bool
5980 vspltis_constant (rtx op, unsigned step, unsigned copies)
5982 machine_mode mode = GET_MODE (op);
5983 machine_mode inner = GET_MODE_INNER (mode);
5985 unsigned i;
5986 unsigned nunits;
5987 unsigned bitsize;
5988 unsigned mask;
5990 HOST_WIDE_INT val;
5991 HOST_WIDE_INT splat_val;
5992 HOST_WIDE_INT msb_val;
5994 if (mode == V2DImode || mode == V2DFmode || mode == V1TImode)
5995 return false;
5997 nunits = GET_MODE_NUNITS (mode);
5998 bitsize = GET_MODE_BITSIZE (inner);
5999 mask = GET_MODE_MASK (inner);
6001 val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
6002 splat_val = val;
6003 msb_val = val >= 0 ? 0 : -1;
6005 /* Construct the value to be splatted, if possible. If not, return 0. */
6006 for (i = 2; i <= copies; i *= 2)
6008 HOST_WIDE_INT small_val;
6009 bitsize /= 2;
6010 small_val = splat_val >> bitsize;
6011 mask >>= bitsize;
6012 if (splat_val != ((small_val << bitsize) | (small_val & mask)))
6013 return false;
6014 splat_val = small_val;
6017 /* Check if SPLAT_VAL can really be the operand of a vspltis[bhw]. */
6018 if (EASY_VECTOR_15 (splat_val))
6021 /* Also check if we can splat, and then add the result to itself. Do so if
6022 the value is positive, of if the splat instruction is using OP's mode;
6023 for splat_val < 0, the splat and the add should use the same mode. */
6024 else if (EASY_VECTOR_15_ADD_SELF (splat_val)
6025 && (splat_val >= 0 || (step == 1 && copies == 1)))
6028 /* Also check if are loading up the most significant bit which can be done by
6029 loading up -1 and shifting the value left by -1. */
6030 else if (EASY_VECTOR_MSB (splat_val, inner))
6033 else
6034 return false;
6036 /* Check if VAL is present in every STEP-th element, and the
6037 other elements are filled with its most significant bit. */
6038 for (i = 1; i < nunits; ++i)
6040 HOST_WIDE_INT desired_val;
6041 unsigned elt = BYTES_BIG_ENDIAN ? nunits - 1 - i : i;
6042 if ((i & (step - 1)) == 0)
6043 desired_val = val;
6044 else
6045 desired_val = msb_val;
6047 if (desired_val != const_vector_elt_as_int (op, elt))
6048 return false;
6051 return true;
6054 /* Like vsplitis_constant, but allow the value to be shifted left with a VSLDOI
6055 instruction, filling in the bottom elements with 0 or -1.
6057 Return 0 if the constant cannot be generated with VSLDOI. Return positive
6058 for the number of zeroes to shift in, or negative for the number of 0xff
6059 bytes to shift in.
6061 OP is a CONST_VECTOR. */
6064 vspltis_shifted (rtx op)
6066 machine_mode mode = GET_MODE (op);
6067 machine_mode inner = GET_MODE_INNER (mode);
6069 unsigned i, j;
6070 unsigned nunits;
6071 unsigned mask;
6073 HOST_WIDE_INT val;
6075 if (mode != V16QImode && mode != V8HImode && mode != V4SImode)
6076 return false;
6078 /* We need to create pseudo registers to do the shift, so don't recognize
6079 shift vector constants after reload. */
6080 if (!can_create_pseudo_p ())
6081 return false;
6083 nunits = GET_MODE_NUNITS (mode);
6084 mask = GET_MODE_MASK (inner);
6086 val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? 0 : nunits - 1);
6088 /* Check if the value can really be the operand of a vspltis[bhw]. */
6089 if (EASY_VECTOR_15 (val))
6092 /* Also check if we are loading up the most significant bit which can be done
6093 by loading up -1 and shifting the value left by -1. */
6094 else if (EASY_VECTOR_MSB (val, inner))
6097 else
6098 return 0;
6100 /* Check if VAL is present in every STEP-th element until we find elements
6101 that are 0 or all 1 bits. */
6102 for (i = 1; i < nunits; ++i)
6104 unsigned elt = BYTES_BIG_ENDIAN ? i : nunits - 1 - i;
6105 HOST_WIDE_INT elt_val = const_vector_elt_as_int (op, elt);
6107 /* If the value isn't the splat value, check for the remaining elements
6108 being 0/-1. */
6109 if (val != elt_val)
6111 if (elt_val == 0)
6113 for (j = i+1; j < nunits; ++j)
6115 unsigned elt2 = BYTES_BIG_ENDIAN ? j : nunits - 1 - j;
6116 if (const_vector_elt_as_int (op, elt2) != 0)
6117 return 0;
6120 return (nunits - i) * GET_MODE_SIZE (inner);
6123 else if ((elt_val & mask) == mask)
6125 for (j = i+1; j < nunits; ++j)
6127 unsigned elt2 = BYTES_BIG_ENDIAN ? j : nunits - 1 - j;
6128 if ((const_vector_elt_as_int (op, elt2) & mask) != mask)
6129 return 0;
6132 return -((nunits - i) * GET_MODE_SIZE (inner));
6135 else
6136 return 0;
6140 /* If all elements are equal, we don't need to do VLSDOI. */
6141 return 0;
6145 /* Return true if OP is of the given MODE and can be synthesized
6146 with a vspltisb, vspltish or vspltisw. */
6148 bool
6149 easy_altivec_constant (rtx op, machine_mode mode)
6151 unsigned step, copies;
6153 if (mode == VOIDmode)
6154 mode = GET_MODE (op);
6155 else if (mode != GET_MODE (op))
6156 return false;
6158 /* V2DI/V2DF was added with VSX. Only allow 0 and all 1's as easy
6159 constants. */
6160 if (mode == V2DFmode)
6161 return zero_constant (op, mode);
6163 else if (mode == V2DImode)
6165 if (GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_INT
6166 || GET_CODE (CONST_VECTOR_ELT (op, 1)) != CONST_INT)
6167 return false;
6169 if (zero_constant (op, mode))
6170 return true;
6172 if (INTVAL (CONST_VECTOR_ELT (op, 0)) == -1
6173 && INTVAL (CONST_VECTOR_ELT (op, 1)) == -1)
6174 return true;
6176 return false;
6179 /* V1TImode is a special container for TImode. Ignore for now. */
6180 else if (mode == V1TImode)
6181 return false;
6183 /* Start with a vspltisw. */
6184 step = GET_MODE_NUNITS (mode) / 4;
6185 copies = 1;
6187 if (vspltis_constant (op, step, copies))
6188 return true;
6190 /* Then try with a vspltish. */
6191 if (step == 1)
6192 copies <<= 1;
6193 else
6194 step >>= 1;
6196 if (vspltis_constant (op, step, copies))
6197 return true;
6199 /* And finally a vspltisb. */
6200 if (step == 1)
6201 copies <<= 1;
6202 else
6203 step >>= 1;
6205 if (vspltis_constant (op, step, copies))
6206 return true;
6208 if (vspltis_shifted (op) != 0)
6209 return true;
6211 return false;
6214 /* Generate a VEC_DUPLICATE representing a vspltis[bhw] instruction whose
6215 result is OP. Abort if it is not possible. */
6218 gen_easy_altivec_constant (rtx op)
6220 machine_mode mode = GET_MODE (op);
6221 int nunits = GET_MODE_NUNITS (mode);
6222 rtx val = CONST_VECTOR_ELT (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
6223 unsigned step = nunits / 4;
6224 unsigned copies = 1;
6226 /* Start with a vspltisw. */
6227 if (vspltis_constant (op, step, copies))
6228 return gen_rtx_VEC_DUPLICATE (V4SImode, gen_lowpart (SImode, val));
6230 /* Then try with a vspltish. */
6231 if (step == 1)
6232 copies <<= 1;
6233 else
6234 step >>= 1;
6236 if (vspltis_constant (op, step, copies))
6237 return gen_rtx_VEC_DUPLICATE (V8HImode, gen_lowpart (HImode, val));
6239 /* And finally a vspltisb. */
6240 if (step == 1)
6241 copies <<= 1;
6242 else
6243 step >>= 1;
6245 if (vspltis_constant (op, step, copies))
6246 return gen_rtx_VEC_DUPLICATE (V16QImode, gen_lowpart (QImode, val));
6248 gcc_unreachable ();
6251 /* Return true if OP is of the given MODE and can be synthesized with ISA 3.0
6252 instructions (xxspltib, vupkhsb/vextsb2w/vextb2d).
6254 Return the number of instructions needed (1 or 2) into the address pointed
6255 via NUM_INSNS_PTR.
6257 Return the constant that is being split via CONSTANT_PTR. */
6259 bool
6260 xxspltib_constant_p (rtx op,
6261 machine_mode mode,
6262 int *num_insns_ptr,
6263 int *constant_ptr)
6265 size_t nunits = GET_MODE_NUNITS (mode);
6266 size_t i;
6267 HOST_WIDE_INT value;
6268 rtx element;
6270 /* Set the returned values to out of bound values. */
6271 *num_insns_ptr = -1;
6272 *constant_ptr = 256;
6274 if (!TARGET_P9_VECTOR)
6275 return false;
6277 if (mode == VOIDmode)
6278 mode = GET_MODE (op);
6280 else if (mode != GET_MODE (op) && GET_MODE (op) != VOIDmode)
6281 return false;
6283 /* Handle (vec_duplicate <constant>). */
6284 if (GET_CODE (op) == VEC_DUPLICATE)
6286 if (mode != V16QImode && mode != V8HImode && mode != V4SImode
6287 && mode != V2DImode)
6288 return false;
6290 element = XEXP (op, 0);
6291 if (!CONST_INT_P (element))
6292 return false;
6294 value = INTVAL (element);
6295 if (!IN_RANGE (value, -128, 127))
6296 return false;
6299 /* Handle (const_vector [...]). */
6300 else if (GET_CODE (op) == CONST_VECTOR)
6302 if (mode != V16QImode && mode != V8HImode && mode != V4SImode
6303 && mode != V2DImode)
6304 return false;
6306 element = CONST_VECTOR_ELT (op, 0);
6307 if (!CONST_INT_P (element))
6308 return false;
6310 value = INTVAL (element);
6311 if (!IN_RANGE (value, -128, 127))
6312 return false;
6314 for (i = 1; i < nunits; i++)
6316 element = CONST_VECTOR_ELT (op, i);
6317 if (!CONST_INT_P (element))
6318 return false;
6320 if (value != INTVAL (element))
6321 return false;
6325 /* Handle integer constants being loaded into the upper part of the VSX
6326 register as a scalar. If the value isn't 0/-1, only allow it if the mode
6327 can go in Altivec registers. Prefer VSPLTISW/VUPKHSW over XXSPLITIB. */
6328 else if (CONST_INT_P (op))
6330 if (!SCALAR_INT_MODE_P (mode))
6331 return false;
6333 value = INTVAL (op);
6334 if (!IN_RANGE (value, -128, 127))
6335 return false;
6337 if (!IN_RANGE (value, -1, 0))
6339 if (!(reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_VALID))
6340 return false;
6342 if (EASY_VECTOR_15 (value))
6343 return false;
6347 else
6348 return false;
6350 /* See if we could generate vspltisw/vspltish directly instead of xxspltib +
6351 sign extend. Special case 0/-1 to allow getting any VSX register instead
6352 of an Altivec register. */
6353 if ((mode == V4SImode || mode == V8HImode) && !IN_RANGE (value, -1, 0)
6354 && EASY_VECTOR_15 (value))
6355 return false;
6357 /* Return # of instructions and the constant byte for XXSPLTIB. */
6358 if (mode == V16QImode)
6359 *num_insns_ptr = 1;
6361 else if (IN_RANGE (value, -1, 0))
6362 *num_insns_ptr = 1;
6364 else
6365 *num_insns_ptr = 2;
6367 *constant_ptr = (int) value;
6368 return true;
6371 const char *
6372 output_vec_const_move (rtx *operands)
6374 int cst, cst2, shift;
6375 machine_mode mode;
6376 rtx dest, vec;
6378 dest = operands[0];
6379 vec = operands[1];
6380 mode = GET_MODE (dest);
6382 if (TARGET_VSX)
6384 bool dest_vmx_p = ALTIVEC_REGNO_P (REGNO (dest));
6385 int xxspltib_value = 256;
6386 int num_insns = -1;
6388 if (zero_constant (vec, mode))
6390 if (TARGET_P9_VECTOR)
6391 return "xxspltib %x0,0";
6393 else if (dest_vmx_p)
6394 return "vspltisw %0,0";
6396 else
6397 return "xxlxor %x0,%x0,%x0";
6400 if (all_ones_constant (vec, mode))
6402 if (TARGET_P9_VECTOR)
6403 return "xxspltib %x0,255";
6405 else if (dest_vmx_p)
6406 return "vspltisw %0,-1";
6408 else if (TARGET_P8_VECTOR)
6409 return "xxlorc %x0,%x0,%x0";
6411 else
6412 gcc_unreachable ();
6415 if (TARGET_P9_VECTOR
6416 && xxspltib_constant_p (vec, mode, &num_insns, &xxspltib_value))
6418 if (num_insns == 1)
6420 operands[2] = GEN_INT (xxspltib_value & 0xff);
6421 return "xxspltib %x0,%2";
6424 return "#";
6428 if (TARGET_ALTIVEC)
6430 rtx splat_vec;
6432 gcc_assert (ALTIVEC_REGNO_P (REGNO (dest)));
6433 if (zero_constant (vec, mode))
6434 return "vspltisw %0,0";
6436 if (all_ones_constant (vec, mode))
6437 return "vspltisw %0,-1";
6439 /* Do we need to construct a value using VSLDOI? */
6440 shift = vspltis_shifted (vec);
6441 if (shift != 0)
6442 return "#";
6444 splat_vec = gen_easy_altivec_constant (vec);
6445 gcc_assert (GET_CODE (splat_vec) == VEC_DUPLICATE);
6446 operands[1] = XEXP (splat_vec, 0);
6447 if (!EASY_VECTOR_15 (INTVAL (operands[1])))
6448 return "#";
6450 switch (GET_MODE (splat_vec))
6452 case V4SImode:
6453 return "vspltisw %0,%1";
6455 case V8HImode:
6456 return "vspltish %0,%1";
6458 case V16QImode:
6459 return "vspltisb %0,%1";
6461 default:
6462 gcc_unreachable ();
6466 gcc_assert (TARGET_SPE);
6468 /* Vector constant 0 is handled as a splitter of V2SI, and in the
6469 pattern of V1DI, V4HI, and V2SF.
6471 FIXME: We should probably return # and add post reload
6472 splitters for these, but this way is so easy ;-). */
6473 cst = INTVAL (CONST_VECTOR_ELT (vec, 0));
6474 cst2 = INTVAL (CONST_VECTOR_ELT (vec, 1));
6475 operands[1] = CONST_VECTOR_ELT (vec, 0);
6476 operands[2] = CONST_VECTOR_ELT (vec, 1);
6477 if (cst == cst2)
6478 return "li %0,%1\n\tevmergelo %0,%0,%0";
6479 else if (WORDS_BIG_ENDIAN)
6480 return "li %0,%1\n\tevmergelo %0,%0,%0\n\tli %0,%2";
6481 else
6482 return "li %0,%2\n\tevmergelo %0,%0,%0\n\tli %0,%1";
6485 /* Initialize TARGET of vector PAIRED to VALS. */
6487 void
6488 paired_expand_vector_init (rtx target, rtx vals)
6490 machine_mode mode = GET_MODE (target);
6491 int n_elts = GET_MODE_NUNITS (mode);
6492 int n_var = 0;
6493 rtx x, new_rtx, tmp, constant_op, op1, op2;
6494 int i;
6496 for (i = 0; i < n_elts; ++i)
6498 x = XVECEXP (vals, 0, i);
6499 if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x)))
6500 ++n_var;
6502 if (n_var == 0)
6504 /* Load from constant pool. */
6505 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
6506 return;
6509 if (n_var == 2)
6511 /* The vector is initialized only with non-constants. */
6512 new_rtx = gen_rtx_VEC_CONCAT (V2SFmode, XVECEXP (vals, 0, 0),
6513 XVECEXP (vals, 0, 1));
6515 emit_move_insn (target, new_rtx);
6516 return;
6519 /* One field is non-constant and the other one is a constant. Load the
6520 constant from the constant pool and use ps_merge instruction to
6521 construct the whole vector. */
6522 op1 = XVECEXP (vals, 0, 0);
6523 op2 = XVECEXP (vals, 0, 1);
6525 constant_op = (CONSTANT_P (op1)) ? op1 : op2;
6527 tmp = gen_reg_rtx (GET_MODE (constant_op));
6528 emit_move_insn (tmp, constant_op);
6530 if (CONSTANT_P (op1))
6531 new_rtx = gen_rtx_VEC_CONCAT (V2SFmode, tmp, op2);
6532 else
6533 new_rtx = gen_rtx_VEC_CONCAT (V2SFmode, op1, tmp);
6535 emit_move_insn (target, new_rtx);
6538 void
6539 paired_expand_vector_move (rtx operands[])
6541 rtx op0 = operands[0], op1 = operands[1];
6543 emit_move_insn (op0, op1);
6546 /* Emit vector compare for code RCODE. DEST is destination, OP1 and
6547 OP2 are two VEC_COND_EXPR operands, CC_OP0 and CC_OP1 are the two
6548 operands for the relation operation COND. This is a recursive
6549 function. */
6551 static void
6552 paired_emit_vector_compare (enum rtx_code rcode,
6553 rtx dest, rtx op0, rtx op1,
6554 rtx cc_op0, rtx cc_op1)
6556 rtx tmp = gen_reg_rtx (V2SFmode);
6557 rtx tmp1, max, min;
6559 gcc_assert (TARGET_PAIRED_FLOAT);
6560 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
6562 switch (rcode)
6564 case LT:
6565 case LTU:
6566 paired_emit_vector_compare (GE, dest, op1, op0, cc_op0, cc_op1);
6567 return;
6568 case GE:
6569 case GEU:
6570 emit_insn (gen_subv2sf3 (tmp, cc_op0, cc_op1));
6571 emit_insn (gen_selv2sf4 (dest, tmp, op0, op1, CONST0_RTX (SFmode)));
6572 return;
6573 case LE:
6574 case LEU:
6575 paired_emit_vector_compare (GE, dest, op0, op1, cc_op1, cc_op0);
6576 return;
6577 case GT:
6578 paired_emit_vector_compare (LE, dest, op1, op0, cc_op0, cc_op1);
6579 return;
6580 case EQ:
6581 tmp1 = gen_reg_rtx (V2SFmode);
6582 max = gen_reg_rtx (V2SFmode);
6583 min = gen_reg_rtx (V2SFmode);
6584 gen_reg_rtx (V2SFmode);
6586 emit_insn (gen_subv2sf3 (tmp, cc_op0, cc_op1));
6587 emit_insn (gen_selv2sf4
6588 (max, tmp, cc_op0, cc_op1, CONST0_RTX (SFmode)));
6589 emit_insn (gen_subv2sf3 (tmp, cc_op1, cc_op0));
6590 emit_insn (gen_selv2sf4
6591 (min, tmp, cc_op0, cc_op1, CONST0_RTX (SFmode)));
6592 emit_insn (gen_subv2sf3 (tmp1, min, max));
6593 emit_insn (gen_selv2sf4 (dest, tmp1, op0, op1, CONST0_RTX (SFmode)));
6594 return;
6595 case NE:
6596 paired_emit_vector_compare (EQ, dest, op1, op0, cc_op0, cc_op1);
6597 return;
6598 case UNLE:
6599 paired_emit_vector_compare (LE, dest, op1, op0, cc_op0, cc_op1);
6600 return;
6601 case UNLT:
6602 paired_emit_vector_compare (LT, dest, op1, op0, cc_op0, cc_op1);
6603 return;
6604 case UNGE:
6605 paired_emit_vector_compare (GE, dest, op1, op0, cc_op0, cc_op1);
6606 return;
6607 case UNGT:
6608 paired_emit_vector_compare (GT, dest, op1, op0, cc_op0, cc_op1);
6609 return;
6610 default:
6611 gcc_unreachable ();
6614 return;
6617 /* Emit vector conditional expression.
6618 DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands.
6619 CC_OP0 and CC_OP1 are the two operands for the relation operation COND. */
6622 paired_emit_vector_cond_expr (rtx dest, rtx op1, rtx op2,
6623 rtx cond, rtx cc_op0, rtx cc_op1)
6625 enum rtx_code rcode = GET_CODE (cond);
6627 if (!TARGET_PAIRED_FLOAT)
6628 return 0;
6630 paired_emit_vector_compare (rcode, dest, op1, op2, cc_op0, cc_op1);
6632 return 1;
6635 /* Initialize vector TARGET to VALS. */
6637 void
6638 rs6000_expand_vector_init (rtx target, rtx vals)
6640 machine_mode mode = GET_MODE (target);
6641 machine_mode inner_mode = GET_MODE_INNER (mode);
6642 int n_elts = GET_MODE_NUNITS (mode);
6643 int n_var = 0, one_var = -1;
6644 bool all_same = true, all_const_zero = true;
6645 rtx x, mem;
6646 int i;
6648 for (i = 0; i < n_elts; ++i)
6650 x = XVECEXP (vals, 0, i);
6651 if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x)))
6652 ++n_var, one_var = i;
6653 else if (x != CONST0_RTX (inner_mode))
6654 all_const_zero = false;
6656 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
6657 all_same = false;
6660 if (n_var == 0)
6662 rtx const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
6663 bool int_vector_p = (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
6664 if ((int_vector_p || TARGET_VSX) && all_const_zero)
6666 /* Zero register. */
6667 emit_insn (gen_rtx_SET (target, gen_rtx_XOR (mode, target, target)));
6668 return;
6670 else if (int_vector_p && easy_vector_constant (const_vec, mode))
6672 /* Splat immediate. */
6673 emit_insn (gen_rtx_SET (target, const_vec));
6674 return;
6676 else
6678 /* Load from constant pool. */
6679 emit_move_insn (target, const_vec);
6680 return;
6684 /* Double word values on VSX can use xxpermdi or lxvdsx. */
6685 if (VECTOR_MEM_VSX_P (mode) && (mode == V2DFmode || mode == V2DImode))
6687 rtx op0 = XVECEXP (vals, 0, 0);
6688 rtx op1 = XVECEXP (vals, 0, 1);
6689 if (all_same)
6691 if (!MEM_P (op0) && !REG_P (op0))
6692 op0 = force_reg (inner_mode, op0);
6693 if (mode == V2DFmode)
6694 emit_insn (gen_vsx_splat_v2df (target, op0));
6695 else
6696 emit_insn (gen_vsx_splat_v2di (target, op0));
6698 else
6700 op0 = force_reg (inner_mode, op0);
6701 op1 = force_reg (inner_mode, op1);
6702 if (mode == V2DFmode)
6703 emit_insn (gen_vsx_concat_v2df (target, op0, op1));
6704 else
6705 emit_insn (gen_vsx_concat_v2di (target, op0, op1));
6707 return;
6710 /* Word values on ISA 3.0 can use mtvsrws, lxvwsx, or vspltisw. V4SF is
6711 complicated since scalars are stored as doubles in the registers. */
6712 if (TARGET_P9_VECTOR && mode == V4SImode && all_same
6713 && VECTOR_MEM_VSX_P (mode))
6715 emit_insn (gen_vsx_splat_v4si (target, XVECEXP (vals, 0, 0)));
6716 return;
6719 /* With single precision floating point on VSX, know that internally single
6720 precision is actually represented as a double, and either make 2 V2DF
6721 vectors, and convert these vectors to single precision, or do one
6722 conversion, and splat the result to the other elements. */
6723 if (mode == V4SFmode && VECTOR_MEM_VSX_P (mode))
6725 if (all_same)
6727 rtx op0 = XVECEXP (vals, 0, 0);
6729 if (TARGET_P9_VECTOR)
6730 emit_insn (gen_vsx_splat_v4sf (target, op0));
6732 else
6734 rtx freg = gen_reg_rtx (V4SFmode);
6735 rtx sreg = force_reg (SFmode, op0);
6736 rtx cvt = (TARGET_XSCVDPSPN
6737 ? gen_vsx_xscvdpspn_scalar (freg, sreg)
6738 : gen_vsx_xscvdpsp_scalar (freg, sreg));
6740 emit_insn (cvt);
6741 emit_insn (gen_vsx_xxspltw_v4sf_direct (target, freg,
6742 const0_rtx));
6745 else
6747 rtx dbl_even = gen_reg_rtx (V2DFmode);
6748 rtx dbl_odd = gen_reg_rtx (V2DFmode);
6749 rtx flt_even = gen_reg_rtx (V4SFmode);
6750 rtx flt_odd = gen_reg_rtx (V4SFmode);
6751 rtx op0 = force_reg (SFmode, XVECEXP (vals, 0, 0));
6752 rtx op1 = force_reg (SFmode, XVECEXP (vals, 0, 1));
6753 rtx op2 = force_reg (SFmode, XVECEXP (vals, 0, 2));
6754 rtx op3 = force_reg (SFmode, XVECEXP (vals, 0, 3));
6756 emit_insn (gen_vsx_concat_v2sf (dbl_even, op0, op1));
6757 emit_insn (gen_vsx_concat_v2sf (dbl_odd, op2, op3));
6758 emit_insn (gen_vsx_xvcvdpsp (flt_even, dbl_even));
6759 emit_insn (gen_vsx_xvcvdpsp (flt_odd, dbl_odd));
6760 rs6000_expand_extract_even (target, flt_even, flt_odd);
6762 return;
6765 /* Store value to stack temp. Load vector element. Splat. However, splat
6766 of 64-bit items is not supported on Altivec. */
6767 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
6769 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
6770 emit_move_insn (adjust_address_nv (mem, inner_mode, 0),
6771 XVECEXP (vals, 0, 0));
6772 x = gen_rtx_UNSPEC (VOIDmode,
6773 gen_rtvec (1, const0_rtx), UNSPEC_LVE);
6774 emit_insn (gen_rtx_PARALLEL (VOIDmode,
6775 gen_rtvec (2,
6776 gen_rtx_SET (target, mem),
6777 x)));
6778 x = gen_rtx_VEC_SELECT (inner_mode, target,
6779 gen_rtx_PARALLEL (VOIDmode,
6780 gen_rtvec (1, const0_rtx)));
6781 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_DUPLICATE (mode, x)));
6782 return;
6785 /* One field is non-constant. Load constant then overwrite
6786 varying field. */
6787 if (n_var == 1)
6789 rtx copy = copy_rtx (vals);
6791 /* Load constant part of vector, substitute neighboring value for
6792 varying element. */
6793 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
6794 rs6000_expand_vector_init (target, copy);
6796 /* Insert variable. */
6797 rs6000_expand_vector_set (target, XVECEXP (vals, 0, one_var), one_var);
6798 return;
6801 /* Construct the vector in memory one field at a time
6802 and load the whole vector. */
6803 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
6804 for (i = 0; i < n_elts; i++)
6805 emit_move_insn (adjust_address_nv (mem, inner_mode,
6806 i * GET_MODE_SIZE (inner_mode)),
6807 XVECEXP (vals, 0, i));
6808 emit_move_insn (target, mem);
6811 /* Set field ELT of TARGET to VAL. */
6813 void
6814 rs6000_expand_vector_set (rtx target, rtx val, int elt)
6816 machine_mode mode = GET_MODE (target);
6817 machine_mode inner_mode = GET_MODE_INNER (mode);
6818 rtx reg = gen_reg_rtx (mode);
6819 rtx mask, mem, x;
6820 int width = GET_MODE_SIZE (inner_mode);
6821 int i;
6823 if (VECTOR_MEM_VSX_P (mode) && (mode == V2DFmode || mode == V2DImode))
6825 rtx (*set_func) (rtx, rtx, rtx, rtx)
6826 = ((mode == V2DFmode) ? gen_vsx_set_v2df : gen_vsx_set_v2di);
6827 emit_insn (set_func (target, target, val, GEN_INT (elt)));
6828 return;
6831 /* Simplify setting single element vectors like V1TImode. */
6832 if (GET_MODE_SIZE (mode) == GET_MODE_SIZE (inner_mode) && elt == 0)
6834 emit_move_insn (target, gen_lowpart (mode, val));
6835 return;
6838 /* Load single variable value. */
6839 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
6840 emit_move_insn (adjust_address_nv (mem, inner_mode, 0), val);
6841 x = gen_rtx_UNSPEC (VOIDmode,
6842 gen_rtvec (1, const0_rtx), UNSPEC_LVE);
6843 emit_insn (gen_rtx_PARALLEL (VOIDmode,
6844 gen_rtvec (2,
6845 gen_rtx_SET (reg, mem),
6846 x)));
6848 /* Linear sequence. */
6849 mask = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
6850 for (i = 0; i < 16; ++i)
6851 XVECEXP (mask, 0, i) = GEN_INT (i);
6853 /* Set permute mask to insert element into target. */
6854 for (i = 0; i < width; ++i)
6855 XVECEXP (mask, 0, elt*width + i)
6856 = GEN_INT (i + 0x10);
6857 x = gen_rtx_CONST_VECTOR (V16QImode, XVEC (mask, 0));
6859 if (BYTES_BIG_ENDIAN)
6860 x = gen_rtx_UNSPEC (mode,
6861 gen_rtvec (3, target, reg,
6862 force_reg (V16QImode, x)),
6863 UNSPEC_VPERM);
6864 else
6866 if (TARGET_P9_VECTOR)
6867 x = gen_rtx_UNSPEC (mode,
6868 gen_rtvec (3, target, reg,
6869 force_reg (V16QImode, x)),
6870 UNSPEC_VPERMR);
6871 else
6873 /* Invert selector. We prefer to generate VNAND on P8 so
6874 that future fusion opportunities can kick in, but must
6875 generate VNOR elsewhere. */
6876 rtx notx = gen_rtx_NOT (V16QImode, force_reg (V16QImode, x));
6877 rtx iorx = (TARGET_P8_VECTOR
6878 ? gen_rtx_IOR (V16QImode, notx, notx)
6879 : gen_rtx_AND (V16QImode, notx, notx));
6880 rtx tmp = gen_reg_rtx (V16QImode);
6881 emit_insn (gen_rtx_SET (tmp, iorx));
6883 /* Permute with operands reversed and adjusted selector. */
6884 x = gen_rtx_UNSPEC (mode, gen_rtvec (3, reg, target, tmp),
6885 UNSPEC_VPERM);
6889 emit_insn (gen_rtx_SET (target, x));
6892 /* Extract field ELT from VEC into TARGET. */
6894 void
6895 rs6000_expand_vector_extract (rtx target, rtx vec, int elt)
6897 machine_mode mode = GET_MODE (vec);
6898 machine_mode inner_mode = GET_MODE_INNER (mode);
6899 rtx mem;
6901 if (VECTOR_MEM_VSX_P (mode))
6903 switch (mode)
6905 default:
6906 break;
6907 case V1TImode:
6908 gcc_assert (elt == 0 && inner_mode == TImode);
6909 emit_move_insn (target, gen_lowpart (TImode, vec));
6910 break;
6911 case V2DFmode:
6912 emit_insn (gen_vsx_extract_v2df (target, vec, GEN_INT (elt)));
6913 return;
6914 case V2DImode:
6915 emit_insn (gen_vsx_extract_v2di (target, vec, GEN_INT (elt)));
6916 return;
6917 case V4SFmode:
6918 emit_insn (gen_vsx_extract_v4sf (target, vec, GEN_INT (elt)));
6919 return;
6920 case V16QImode:
6921 if (TARGET_VEXTRACTUB)
6923 emit_insn (gen_vsx_extract_v16qi (target, vec, GEN_INT (elt)));
6924 return;
6926 else
6927 break;
6928 case V8HImode:
6929 if (TARGET_VEXTRACTUB)
6931 emit_insn (gen_vsx_extract_v8hi (target, vec, GEN_INT (elt)));
6932 return;
6934 else
6935 break;
6936 case V4SImode:
6937 if (TARGET_VEXTRACTUB)
6939 emit_insn (gen_vsx_extract_v4si (target, vec, GEN_INT (elt)));
6940 return;
6942 else
6943 break;
6947 /* Allocate mode-sized buffer. */
6948 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
6950 emit_move_insn (mem, vec);
6952 /* Add offset to field within buffer matching vector element. */
6953 mem = adjust_address_nv (mem, inner_mode, elt * GET_MODE_SIZE (inner_mode));
6955 emit_move_insn (target, adjust_address_nv (mem, inner_mode, 0));
6958 /* Return TRUE if OP is an invalid SUBREG operation on the e500. */
6960 bool
6961 invalid_e500_subreg (rtx op, machine_mode mode)
6963 if (TARGET_E500_DOUBLE)
6965 /* Reject (subreg:SI (reg:DF)); likewise with subreg:DI or
6966 subreg:TI and reg:TF. Decimal float modes are like integer
6967 modes (only low part of each register used) for this
6968 purpose. */
6969 if (GET_CODE (op) == SUBREG
6970 && (mode == SImode || mode == DImode || mode == TImode
6971 || mode == DDmode || mode == TDmode || mode == PTImode)
6972 && REG_P (SUBREG_REG (op))
6973 && (GET_MODE (SUBREG_REG (op)) == DFmode
6974 || GET_MODE (SUBREG_REG (op)) == TFmode
6975 || GET_MODE (SUBREG_REG (op)) == IFmode
6976 || GET_MODE (SUBREG_REG (op)) == KFmode))
6977 return true;
6979 /* Reject (subreg:DF (reg:DI)); likewise with subreg:TF and
6980 reg:TI. */
6981 if (GET_CODE (op) == SUBREG
6982 && (mode == DFmode || mode == TFmode || mode == IFmode
6983 || mode == KFmode)
6984 && REG_P (SUBREG_REG (op))
6985 && (GET_MODE (SUBREG_REG (op)) == DImode
6986 || GET_MODE (SUBREG_REG (op)) == TImode
6987 || GET_MODE (SUBREG_REG (op)) == PTImode
6988 || GET_MODE (SUBREG_REG (op)) == DDmode
6989 || GET_MODE (SUBREG_REG (op)) == TDmode))
6990 return true;
6993 if (TARGET_SPE
6994 && GET_CODE (op) == SUBREG
6995 && mode == SImode
6996 && REG_P (SUBREG_REG (op))
6997 && SPE_VECTOR_MODE (GET_MODE (SUBREG_REG (op))))
6998 return true;
7000 return false;
7003 /* Return alignment of TYPE. Existing alignment is ALIGN. HOW
7004 selects whether the alignment is abi mandated, optional, or
7005 both abi and optional alignment. */
7007 unsigned int
7008 rs6000_data_alignment (tree type, unsigned int align, enum data_align how)
7010 if (how != align_opt)
7012 if (TREE_CODE (type) == VECTOR_TYPE)
7014 if ((TARGET_SPE && SPE_VECTOR_MODE (TYPE_MODE (type)))
7015 || (TARGET_PAIRED_FLOAT && PAIRED_VECTOR_MODE (TYPE_MODE (type))))
7017 if (align < 64)
7018 align = 64;
7020 else if (align < 128)
7021 align = 128;
7023 else if (TARGET_E500_DOUBLE
7024 && TREE_CODE (type) == REAL_TYPE
7025 && TYPE_MODE (type) == DFmode)
7027 if (align < 64)
7028 align = 64;
7032 if (how != align_abi)
7034 if (TREE_CODE (type) == ARRAY_TYPE
7035 && TYPE_MODE (TREE_TYPE (type)) == QImode)
7037 if (align < BITS_PER_WORD)
7038 align = BITS_PER_WORD;
7042 return align;
7045 /* Previous GCC releases forced all vector types to have 16-byte alignment. */
7047 bool
7048 rs6000_special_adjust_field_align_p (tree field, unsigned int computed)
7050 if (TARGET_ALTIVEC && TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
7052 if (computed != 128)
7054 static bool warned;
7055 if (!warned && warn_psabi)
7057 warned = true;
7058 inform (input_location,
7059 "the layout of aggregates containing vectors with"
7060 " %d-byte alignment has changed in GCC 5",
7061 computed / BITS_PER_UNIT);
7064 /* In current GCC there is no special case. */
7065 return false;
7068 return false;
7071 /* AIX increases natural record alignment to doubleword if the first
7072 field is an FP double while the FP fields remain word aligned. */
7074 unsigned int
7075 rs6000_special_round_type_align (tree type, unsigned int computed,
7076 unsigned int specified)
7078 unsigned int align = MAX (computed, specified);
7079 tree field = TYPE_FIELDS (type);
7081 /* Skip all non field decls */
7082 while (field != NULL && TREE_CODE (field) != FIELD_DECL)
7083 field = DECL_CHAIN (field);
7085 if (field != NULL && field != type)
7087 type = TREE_TYPE (field);
7088 while (TREE_CODE (type) == ARRAY_TYPE)
7089 type = TREE_TYPE (type);
7091 if (type != error_mark_node && TYPE_MODE (type) == DFmode)
7092 align = MAX (align, 64);
7095 return align;
7098 /* Darwin increases record alignment to the natural alignment of
7099 the first field. */
7101 unsigned int
7102 darwin_rs6000_special_round_type_align (tree type, unsigned int computed,
7103 unsigned int specified)
7105 unsigned int align = MAX (computed, specified);
7107 if (TYPE_PACKED (type))
7108 return align;
7110 /* Find the first field, looking down into aggregates. */
7111 do {
7112 tree field = TYPE_FIELDS (type);
7113 /* Skip all non field decls */
7114 while (field != NULL && TREE_CODE (field) != FIELD_DECL)
7115 field = DECL_CHAIN (field);
7116 if (! field)
7117 break;
7118 /* A packed field does not contribute any extra alignment. */
7119 if (DECL_PACKED (field))
7120 return align;
7121 type = TREE_TYPE (field);
7122 while (TREE_CODE (type) == ARRAY_TYPE)
7123 type = TREE_TYPE (type);
7124 } while (AGGREGATE_TYPE_P (type));
7126 if (! AGGREGATE_TYPE_P (type) && type != error_mark_node)
7127 align = MAX (align, TYPE_ALIGN (type));
7129 return align;
7132 /* Return 1 for an operand in small memory on V.4/eabi. */
7135 small_data_operand (rtx op ATTRIBUTE_UNUSED,
7136 machine_mode mode ATTRIBUTE_UNUSED)
7138 #if TARGET_ELF
7139 rtx sym_ref;
7141 if (rs6000_sdata == SDATA_NONE || rs6000_sdata == SDATA_DATA)
7142 return 0;
7144 if (DEFAULT_ABI != ABI_V4)
7145 return 0;
7147 /* Vector and float memory instructions have a limited offset on the
7148 SPE, so using a vector or float variable directly as an operand is
7149 not useful. */
7150 if (TARGET_SPE
7151 && (SPE_VECTOR_MODE (mode) || FLOAT_MODE_P (mode)))
7152 return 0;
7154 if (GET_CODE (op) == SYMBOL_REF)
7155 sym_ref = op;
7157 else if (GET_CODE (op) != CONST
7158 || GET_CODE (XEXP (op, 0)) != PLUS
7159 || GET_CODE (XEXP (XEXP (op, 0), 0)) != SYMBOL_REF
7160 || GET_CODE (XEXP (XEXP (op, 0), 1)) != CONST_INT)
7161 return 0;
7163 else
7165 rtx sum = XEXP (op, 0);
7166 HOST_WIDE_INT summand;
7168 /* We have to be careful here, because it is the referenced address
7169 that must be 32k from _SDA_BASE_, not just the symbol. */
7170 summand = INTVAL (XEXP (sum, 1));
7171 if (summand < 0 || summand > g_switch_value)
7172 return 0;
7174 sym_ref = XEXP (sum, 0);
7177 return SYMBOL_REF_SMALL_P (sym_ref);
7178 #else
7179 return 0;
7180 #endif
7183 /* Return true if either operand is a general purpose register. */
7185 bool
7186 gpr_or_gpr_p (rtx op0, rtx op1)
7188 return ((REG_P (op0) && INT_REGNO_P (REGNO (op0)))
7189 || (REG_P (op1) && INT_REGNO_P (REGNO (op1))));
7192 /* Return true if this is a move direct operation between GPR registers and
7193 floating point/VSX registers. */
7195 bool
7196 direct_move_p (rtx op0, rtx op1)
7198 int regno0, regno1;
7200 if (!REG_P (op0) || !REG_P (op1))
7201 return false;
7203 if (!TARGET_DIRECT_MOVE && !TARGET_MFPGPR)
7204 return false;
7206 regno0 = REGNO (op0);
7207 regno1 = REGNO (op1);
7208 if (regno0 >= FIRST_PSEUDO_REGISTER || regno1 >= FIRST_PSEUDO_REGISTER)
7209 return false;
7211 if (INT_REGNO_P (regno0))
7212 return (TARGET_DIRECT_MOVE) ? VSX_REGNO_P (regno1) : FP_REGNO_P (regno1);
7214 else if (INT_REGNO_P (regno1))
7216 if (TARGET_MFPGPR && FP_REGNO_P (regno0))
7217 return true;
7219 else if (TARGET_DIRECT_MOVE && VSX_REGNO_P (regno0))
7220 return true;
7223 return false;
7226 /* Return true if the OFFSET is valid for the quad address instructions that
7227 use d-form (register + offset) addressing. */
7229 static inline bool
7230 quad_address_offset_p (HOST_WIDE_INT offset)
7232 return (IN_RANGE (offset, -32768, 32767) && ((offset) & 0xf) == 0);
7235 /* Return true if the ADDR is an acceptable address for a quad memory
7236 operation of mode MODE (either LQ/STQ for general purpose registers, or
7237 LXV/STXV for vector registers under ISA 3.0. GPR_P is true if this address
7238 is intended for LQ/STQ. If it is false, the address is intended for the ISA
7239 3.0 LXV/STXV instruction. */
7241 bool
7242 quad_address_p (rtx addr, machine_mode mode, bool strict)
7244 rtx op0, op1;
7246 if (GET_MODE_SIZE (mode) != 16)
7247 return false;
7249 if (legitimate_indirect_address_p (addr, strict))
7250 return true;
7252 if (VECTOR_MODE_P (mode) && !mode_supports_vsx_dform_quad (mode))
7253 return false;
7255 if (GET_CODE (addr) != PLUS)
7256 return false;
7258 op0 = XEXP (addr, 0);
7259 if (!REG_P (op0) || !INT_REG_OK_FOR_BASE_P (op0, strict))
7260 return false;
7262 op1 = XEXP (addr, 1);
7263 if (!CONST_INT_P (op1))
7264 return false;
7266 return quad_address_offset_p (INTVAL (op1));
7269 /* Return true if this is a load or store quad operation. This function does
7270 not handle the atomic quad memory instructions. */
7272 bool
7273 quad_load_store_p (rtx op0, rtx op1)
7275 bool ret;
7277 if (!TARGET_QUAD_MEMORY)
7278 ret = false;
7280 else if (REG_P (op0) && MEM_P (op1))
7281 ret = (quad_int_reg_operand (op0, GET_MODE (op0))
7282 && quad_memory_operand (op1, GET_MODE (op1))
7283 && !reg_overlap_mentioned_p (op0, op1));
7285 else if (MEM_P (op0) && REG_P (op1))
7286 ret = (quad_memory_operand (op0, GET_MODE (op0))
7287 && quad_int_reg_operand (op1, GET_MODE (op1)));
7289 else
7290 ret = false;
7292 if (TARGET_DEBUG_ADDR)
7294 fprintf (stderr, "\n========== quad_load_store, return %s\n",
7295 ret ? "true" : "false");
7296 debug_rtx (gen_rtx_SET (op0, op1));
7299 return ret;
7302 /* Given an address, return a constant offset term if one exists. */
7304 static rtx
7305 address_offset (rtx op)
7307 if (GET_CODE (op) == PRE_INC
7308 || GET_CODE (op) == PRE_DEC)
7309 op = XEXP (op, 0);
7310 else if (GET_CODE (op) == PRE_MODIFY
7311 || GET_CODE (op) == LO_SUM)
7312 op = XEXP (op, 1);
7314 if (GET_CODE (op) == CONST)
7315 op = XEXP (op, 0);
7317 if (GET_CODE (op) == PLUS)
7318 op = XEXP (op, 1);
7320 if (CONST_INT_P (op))
7321 return op;
7323 return NULL_RTX;
7326 /* Return true if the MEM operand is a memory operand suitable for use
7327 with a (full width, possibly multiple) gpr load/store. On
7328 powerpc64 this means the offset must be divisible by 4.
7329 Implements 'Y' constraint.
7331 Accept direct, indexed, offset, lo_sum and tocref. Since this is
7332 a constraint function we know the operand has satisfied a suitable
7333 memory predicate. Also accept some odd rtl generated by reload
7334 (see rs6000_legitimize_reload_address for various forms). It is
7335 important that reload rtl be accepted by appropriate constraints
7336 but not by the operand predicate.
7338 Offsetting a lo_sum should not be allowed, except where we know by
7339 alignment that a 32k boundary is not crossed, but see the ???
7340 comment in rs6000_legitimize_reload_address. Note that by
7341 "offsetting" here we mean a further offset to access parts of the
7342 MEM. It's fine to have a lo_sum where the inner address is offset
7343 from a sym, since the same sym+offset will appear in the high part
7344 of the address calculation. */
7346 bool
7347 mem_operand_gpr (rtx op, machine_mode mode)
7349 unsigned HOST_WIDE_INT offset;
7350 int extra;
7351 rtx addr = XEXP (op, 0);
7353 op = address_offset (addr);
7354 if (op == NULL_RTX)
7355 return true;
7357 offset = INTVAL (op);
7358 if (TARGET_POWERPC64 && (offset & 3) != 0)
7359 return false;
7361 if (mode_supports_vsx_dform_quad (mode)
7362 && !quad_address_offset_p (offset))
7363 return false;
7365 extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD;
7366 if (extra < 0)
7367 extra = 0;
7369 if (GET_CODE (addr) == LO_SUM)
7370 /* For lo_sum addresses, we must allow any offset except one that
7371 causes a wrap, so test only the low 16 bits. */
7372 offset = ((offset & 0xffff) ^ 0x8000) - 0x8000;
7374 return offset + 0x8000 < 0x10000u - extra;
7377 /* Subroutines of rs6000_legitimize_address and rs6000_legitimate_address_p. */
7379 static bool
7380 reg_offset_addressing_ok_p (machine_mode mode)
7382 switch (mode)
7384 case V16QImode:
7385 case V8HImode:
7386 case V4SFmode:
7387 case V4SImode:
7388 case V2DFmode:
7389 case V2DImode:
7390 case V1TImode:
7391 case TImode:
7392 case TFmode:
7393 case KFmode:
7394 /* AltiVec/VSX vector modes. Only reg+reg addressing was valid until the
7395 ISA 3.0 vector d-form addressing mode was added. While TImode is not
7396 a vector mode, if we want to use the VSX registers to move it around,
7397 we need to restrict ourselves to reg+reg addressing. Similarly for
7398 IEEE 128-bit floating point that is passed in a single vector
7399 register. */
7400 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode))
7401 return mode_supports_vsx_dform_quad (mode);
7402 break;
7404 case V4HImode:
7405 case V2SImode:
7406 case V1DImode:
7407 case V2SFmode:
7408 /* Paired vector modes. Only reg+reg addressing is valid. */
7409 if (TARGET_PAIRED_FLOAT)
7410 return false;
7411 break;
7413 case SDmode:
7414 /* If we can do direct load/stores of SDmode, restrict it to reg+reg
7415 addressing for the LFIWZX and STFIWX instructions. */
7416 if (TARGET_NO_SDMODE_STACK)
7417 return false;
7418 break;
7420 default:
7421 break;
7424 return true;
7427 static bool
7428 virtual_stack_registers_memory_p (rtx op)
7430 int regnum;
7432 if (GET_CODE (op) == REG)
7433 regnum = REGNO (op);
7435 else if (GET_CODE (op) == PLUS
7436 && GET_CODE (XEXP (op, 0)) == REG
7437 && GET_CODE (XEXP (op, 1)) == CONST_INT)
7438 regnum = REGNO (XEXP (op, 0));
7440 else
7441 return false;
7443 return (regnum >= FIRST_VIRTUAL_REGISTER
7444 && regnum <= LAST_VIRTUAL_POINTER_REGISTER);
7447 /* Return true if a MODE sized memory accesses to OP plus OFFSET
7448 is known to not straddle a 32k boundary. This function is used
7449 to determine whether -mcmodel=medium code can use TOC pointer
7450 relative addressing for OP. This means the alignment of the TOC
7451 pointer must also be taken into account, and unfortunately that is
7452 only 8 bytes. */
7454 #ifndef POWERPC64_TOC_POINTER_ALIGNMENT
7455 #define POWERPC64_TOC_POINTER_ALIGNMENT 8
7456 #endif
7458 static bool
7459 offsettable_ok_by_alignment (rtx op, HOST_WIDE_INT offset,
7460 machine_mode mode)
7462 tree decl;
7463 unsigned HOST_WIDE_INT dsize, dalign, lsb, mask;
7465 if (GET_CODE (op) != SYMBOL_REF)
7466 return false;
7468 /* ISA 3.0 vector d-form addressing is restricted, don't allow
7469 SYMBOL_REF. */
7470 if (mode_supports_vsx_dform_quad (mode))
7471 return false;
7473 dsize = GET_MODE_SIZE (mode);
7474 decl = SYMBOL_REF_DECL (op);
7475 if (!decl)
7477 if (dsize == 0)
7478 return false;
7480 /* -fsection-anchors loses the original SYMBOL_REF_DECL when
7481 replacing memory addresses with an anchor plus offset. We
7482 could find the decl by rummaging around in the block->objects
7483 VEC for the given offset but that seems like too much work. */
7484 dalign = BITS_PER_UNIT;
7485 if (SYMBOL_REF_HAS_BLOCK_INFO_P (op)
7486 && SYMBOL_REF_ANCHOR_P (op)
7487 && SYMBOL_REF_BLOCK (op) != NULL)
7489 struct object_block *block = SYMBOL_REF_BLOCK (op);
7491 dalign = block->alignment;
7492 offset += SYMBOL_REF_BLOCK_OFFSET (op);
7494 else if (CONSTANT_POOL_ADDRESS_P (op))
7496 /* It would be nice to have get_pool_align().. */
7497 machine_mode cmode = get_pool_mode (op);
7499 dalign = GET_MODE_ALIGNMENT (cmode);
7502 else if (DECL_P (decl))
7504 dalign = DECL_ALIGN (decl);
7506 if (dsize == 0)
7508 /* Allow BLKmode when the entire object is known to not
7509 cross a 32k boundary. */
7510 if (!DECL_SIZE_UNIT (decl))
7511 return false;
7513 if (!tree_fits_uhwi_p (DECL_SIZE_UNIT (decl)))
7514 return false;
7516 dsize = tree_to_uhwi (DECL_SIZE_UNIT (decl));
7517 if (dsize > 32768)
7518 return false;
7520 dalign /= BITS_PER_UNIT;
7521 if (dalign > POWERPC64_TOC_POINTER_ALIGNMENT)
7522 dalign = POWERPC64_TOC_POINTER_ALIGNMENT;
7523 return dalign >= dsize;
7526 else
7527 gcc_unreachable ();
7529 /* Find how many bits of the alignment we know for this access. */
7530 dalign /= BITS_PER_UNIT;
7531 if (dalign > POWERPC64_TOC_POINTER_ALIGNMENT)
7532 dalign = POWERPC64_TOC_POINTER_ALIGNMENT;
7533 mask = dalign - 1;
7534 lsb = offset & -offset;
7535 mask &= lsb - 1;
7536 dalign = mask + 1;
7538 return dalign >= dsize;
7541 static bool
7542 constant_pool_expr_p (rtx op)
7544 rtx base, offset;
7546 split_const (op, &base, &offset);
7547 return (GET_CODE (base) == SYMBOL_REF
7548 && CONSTANT_POOL_ADDRESS_P (base)
7549 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (base), Pmode));
7552 static const_rtx tocrel_base, tocrel_offset;
7554 /* Return true if OP is a toc pointer relative address (the output
7555 of create_TOC_reference). If STRICT, do not match high part or
7556 non-split -mcmodel=large/medium toc pointer relative addresses. */
7558 bool
7559 toc_relative_expr_p (const_rtx op, bool strict)
7561 if (!TARGET_TOC)
7562 return false;
7564 if (TARGET_CMODEL != CMODEL_SMALL)
7566 /* Only match the low part. */
7567 if (GET_CODE (op) == LO_SUM
7568 && REG_P (XEXP (op, 0))
7569 && INT_REG_OK_FOR_BASE_P (XEXP (op, 0), strict))
7570 op = XEXP (op, 1);
7571 else if (strict)
7572 return false;
7575 tocrel_base = op;
7576 tocrel_offset = const0_rtx;
7577 if (GET_CODE (op) == PLUS && add_cint_operand (XEXP (op, 1), GET_MODE (op)))
7579 tocrel_base = XEXP (op, 0);
7580 tocrel_offset = XEXP (op, 1);
7583 return (GET_CODE (tocrel_base) == UNSPEC
7584 && XINT (tocrel_base, 1) == UNSPEC_TOCREL);
7587 /* Return true if X is a constant pool address, and also for cmodel=medium
7588 if X is a toc-relative address known to be offsettable within MODE. */
7590 bool
7591 legitimate_constant_pool_address_p (const_rtx x, machine_mode mode,
7592 bool strict)
7594 return (toc_relative_expr_p (x, strict)
7595 && (TARGET_CMODEL != CMODEL_MEDIUM
7596 || constant_pool_expr_p (XVECEXP (tocrel_base, 0, 0))
7597 || mode == QImode
7598 || offsettable_ok_by_alignment (XVECEXP (tocrel_base, 0, 0),
7599 INTVAL (tocrel_offset), mode)));
7602 static bool
7603 legitimate_small_data_p (machine_mode mode, rtx x)
7605 return (DEFAULT_ABI == ABI_V4
7606 && !flag_pic && !TARGET_TOC
7607 && (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == CONST)
7608 && small_data_operand (x, mode));
7611 /* SPE offset addressing is limited to 5-bits worth of double words. */
7612 #define SPE_CONST_OFFSET_OK(x) (((x) & ~0xf8) == 0)
7614 bool
7615 rs6000_legitimate_offset_address_p (machine_mode mode, rtx x,
7616 bool strict, bool worst_case)
7618 unsigned HOST_WIDE_INT offset;
7619 unsigned int extra;
7621 if (GET_CODE (x) != PLUS)
7622 return false;
7623 if (!REG_P (XEXP (x, 0)))
7624 return false;
7625 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
7626 return false;
7627 if (mode_supports_vsx_dform_quad (mode))
7628 return quad_address_p (x, mode, strict);
7629 if (!reg_offset_addressing_ok_p (mode))
7630 return virtual_stack_registers_memory_p (x);
7631 if (legitimate_constant_pool_address_p (x, mode, strict || lra_in_progress))
7632 return true;
7633 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
7634 return false;
7636 offset = INTVAL (XEXP (x, 1));
7637 extra = 0;
7638 switch (mode)
7640 case V4HImode:
7641 case V2SImode:
7642 case V1DImode:
7643 case V2SFmode:
7644 /* SPE vector modes. */
7645 return SPE_CONST_OFFSET_OK (offset);
7647 case DFmode:
7648 case DDmode:
7649 case DImode:
7650 /* On e500v2, we may have:
7652 (subreg:DF (mem:DI (plus (reg) (const_int))) 0).
7654 Which gets addressed with evldd instructions. */
7655 if (TARGET_E500_DOUBLE)
7656 return SPE_CONST_OFFSET_OK (offset);
7658 /* If we are using VSX scalar loads, restrict ourselves to reg+reg
7659 addressing. */
7660 if (VECTOR_MEM_VSX_P (mode))
7661 return false;
7663 if (!worst_case)
7664 break;
7665 if (!TARGET_POWERPC64)
7666 extra = 4;
7667 else if (offset & 3)
7668 return false;
7669 break;
7671 case TFmode:
7672 case IFmode:
7673 case KFmode:
7674 if (TARGET_E500_DOUBLE)
7675 return (SPE_CONST_OFFSET_OK (offset)
7676 && SPE_CONST_OFFSET_OK (offset + 8));
7677 /* fall through */
7679 case TDmode:
7680 case TImode:
7681 case PTImode:
7682 extra = 8;
7683 if (!worst_case)
7684 break;
7685 if (!TARGET_POWERPC64)
7686 extra = 12;
7687 else if (offset & 3)
7688 return false;
7689 break;
7691 default:
7692 break;
7695 offset += 0x8000;
7696 return offset < 0x10000 - extra;
7699 bool
7700 legitimate_indexed_address_p (rtx x, int strict)
7702 rtx op0, op1;
7704 if (GET_CODE (x) != PLUS)
7705 return false;
7707 op0 = XEXP (x, 0);
7708 op1 = XEXP (x, 1);
7710 /* Recognize the rtl generated by reload which we know will later be
7711 replaced with proper base and index regs. */
7712 if (!strict
7713 && reload_in_progress
7714 && (REG_P (op0) || GET_CODE (op0) == PLUS)
7715 && REG_P (op1))
7716 return true;
7718 return (REG_P (op0) && REG_P (op1)
7719 && ((INT_REG_OK_FOR_BASE_P (op0, strict)
7720 && INT_REG_OK_FOR_INDEX_P (op1, strict))
7721 || (INT_REG_OK_FOR_BASE_P (op1, strict)
7722 && INT_REG_OK_FOR_INDEX_P (op0, strict))));
7725 bool
7726 avoiding_indexed_address_p (machine_mode mode)
7728 /* Avoid indexed addressing for modes that have non-indexed
7729 load/store instruction forms. */
7730 return (TARGET_AVOID_XFORM && VECTOR_MEM_NONE_P (mode));
7733 bool
7734 legitimate_indirect_address_p (rtx x, int strict)
7736 return GET_CODE (x) == REG && INT_REG_OK_FOR_BASE_P (x, strict);
7739 bool
7740 macho_lo_sum_memory_operand (rtx x, machine_mode mode)
7742 if (!TARGET_MACHO || !flag_pic
7743 || mode != SImode || GET_CODE (x) != MEM)
7744 return false;
7745 x = XEXP (x, 0);
7747 if (GET_CODE (x) != LO_SUM)
7748 return false;
7749 if (GET_CODE (XEXP (x, 0)) != REG)
7750 return false;
7751 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), 0))
7752 return false;
7753 x = XEXP (x, 1);
7755 return CONSTANT_P (x);
7758 static bool
7759 legitimate_lo_sum_address_p (machine_mode mode, rtx x, int strict)
7761 if (GET_CODE (x) != LO_SUM)
7762 return false;
7763 if (GET_CODE (XEXP (x, 0)) != REG)
7764 return false;
7765 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
7766 return false;
7767 /* quad word addresses are restricted, and we can't use LO_SUM. */
7768 if (mode_supports_vsx_dform_quad (mode))
7769 return false;
7770 /* Restrict addressing for DI because of our SUBREG hackery. */
7771 if (TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
7772 return false;
7773 x = XEXP (x, 1);
7775 if (TARGET_ELF || TARGET_MACHO)
7777 bool large_toc_ok;
7779 if (DEFAULT_ABI == ABI_V4 && flag_pic)
7780 return false;
7781 /* LRA doesn't use LEGITIMIZE_RELOAD_ADDRESS as it usually calls
7782 push_reload from reload pass code. LEGITIMIZE_RELOAD_ADDRESS
7783 recognizes some LO_SUM addresses as valid although this
7784 function says opposite. In most cases, LRA through different
7785 transformations can generate correct code for address reloads.
7786 It can not manage only some LO_SUM cases. So we need to add
7787 code analogous to one in rs6000_legitimize_reload_address for
7788 LOW_SUM here saying that some addresses are still valid. */
7789 large_toc_ok = (lra_in_progress && TARGET_CMODEL != CMODEL_SMALL
7790 && small_toc_ref (x, VOIDmode));
7791 if (TARGET_TOC && ! large_toc_ok)
7792 return false;
7793 if (GET_MODE_NUNITS (mode) != 1)
7794 return false;
7795 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
7796 && !(/* ??? Assume floating point reg based on mode? */
7797 TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
7798 && (mode == DFmode || mode == DDmode)))
7799 return false;
7801 return CONSTANT_P (x) || large_toc_ok;
7804 return false;
7808 /* Try machine-dependent ways of modifying an illegitimate address
7809 to be legitimate. If we find one, return the new, valid address.
7810 This is used from only one place: `memory_address' in explow.c.
7812 OLDX is the address as it was before break_out_memory_refs was
7813 called. In some cases it is useful to look at this to decide what
7814 needs to be done.
7816 It is always safe for this function to do nothing. It exists to
7817 recognize opportunities to optimize the output.
7819 On RS/6000, first check for the sum of a register with a constant
7820 integer that is out of range. If so, generate code to add the
7821 constant with the low-order 16 bits masked to the register and force
7822 this result into another register (this can be done with `cau').
7823 Then generate an address of REG+(CONST&0xffff), allowing for the
7824 possibility of bit 16 being a one.
7826 Then check for the sum of a register and something not constant, try to
7827 load the other things into a register and return the sum. */
7829 static rtx
7830 rs6000_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
7831 machine_mode mode)
7833 unsigned int extra;
7835 if (!reg_offset_addressing_ok_p (mode)
7836 || mode_supports_vsx_dform_quad (mode))
7838 if (virtual_stack_registers_memory_p (x))
7839 return x;
7841 /* In theory we should not be seeing addresses of the form reg+0,
7842 but just in case it is generated, optimize it away. */
7843 if (GET_CODE (x) == PLUS && XEXP (x, 1) == const0_rtx)
7844 return force_reg (Pmode, XEXP (x, 0));
7846 /* For TImode with load/store quad, restrict addresses to just a single
7847 pointer, so it works with both GPRs and VSX registers. */
7848 /* Make sure both operands are registers. */
7849 else if (GET_CODE (x) == PLUS
7850 && (mode != TImode || !TARGET_QUAD_MEMORY))
7851 return gen_rtx_PLUS (Pmode,
7852 force_reg (Pmode, XEXP (x, 0)),
7853 force_reg (Pmode, XEXP (x, 1)));
7854 else
7855 return force_reg (Pmode, x);
7857 if (GET_CODE (x) == SYMBOL_REF)
7859 enum tls_model model = SYMBOL_REF_TLS_MODEL (x);
7860 if (model != 0)
7861 return rs6000_legitimize_tls_address (x, model);
7864 extra = 0;
7865 switch (mode)
7867 case TFmode:
7868 case TDmode:
7869 case TImode:
7870 case PTImode:
7871 case IFmode:
7872 case KFmode:
7873 /* As in legitimate_offset_address_p we do not assume
7874 worst-case. The mode here is just a hint as to the registers
7875 used. A TImode is usually in gprs, but may actually be in
7876 fprs. Leave worst-case scenario for reload to handle via
7877 insn constraints. PTImode is only GPRs. */
7878 extra = 8;
7879 break;
7880 default:
7881 break;
7884 if (GET_CODE (x) == PLUS
7885 && GET_CODE (XEXP (x, 0)) == REG
7886 && GET_CODE (XEXP (x, 1)) == CONST_INT
7887 && ((unsigned HOST_WIDE_INT) (INTVAL (XEXP (x, 1)) + 0x8000)
7888 >= 0x10000 - extra)
7889 && !(SPE_VECTOR_MODE (mode)
7890 || (TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD)))
7892 HOST_WIDE_INT high_int, low_int;
7893 rtx sum;
7894 low_int = ((INTVAL (XEXP (x, 1)) & 0xffff) ^ 0x8000) - 0x8000;
7895 if (low_int >= 0x8000 - extra)
7896 low_int = 0;
7897 high_int = INTVAL (XEXP (x, 1)) - low_int;
7898 sum = force_operand (gen_rtx_PLUS (Pmode, XEXP (x, 0),
7899 GEN_INT (high_int)), 0);
7900 return plus_constant (Pmode, sum, low_int);
7902 else if (GET_CODE (x) == PLUS
7903 && GET_CODE (XEXP (x, 0)) == REG
7904 && GET_CODE (XEXP (x, 1)) != CONST_INT
7905 && GET_MODE_NUNITS (mode) == 1
7906 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
7907 || (/* ??? Assume floating point reg based on mode? */
7908 (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
7909 && (mode == DFmode || mode == DDmode)))
7910 && !avoiding_indexed_address_p (mode))
7912 return gen_rtx_PLUS (Pmode, XEXP (x, 0),
7913 force_reg (Pmode, force_operand (XEXP (x, 1), 0)));
7915 else if (SPE_VECTOR_MODE (mode)
7916 || (TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD))
7918 if (mode == DImode)
7919 return x;
7920 /* We accept [reg + reg] and [reg + OFFSET]. */
7922 if (GET_CODE (x) == PLUS)
7924 rtx op1 = XEXP (x, 0);
7925 rtx op2 = XEXP (x, 1);
7926 rtx y;
7928 op1 = force_reg (Pmode, op1);
7930 if (GET_CODE (op2) != REG
7931 && (GET_CODE (op2) != CONST_INT
7932 || !SPE_CONST_OFFSET_OK (INTVAL (op2))
7933 || (GET_MODE_SIZE (mode) > 8
7934 && !SPE_CONST_OFFSET_OK (INTVAL (op2) + 8))))
7935 op2 = force_reg (Pmode, op2);
7937 /* We can't always do [reg + reg] for these, because [reg +
7938 reg + offset] is not a legitimate addressing mode. */
7939 y = gen_rtx_PLUS (Pmode, op1, op2);
7941 if ((GET_MODE_SIZE (mode) > 8 || mode == DDmode) && REG_P (op2))
7942 return force_reg (Pmode, y);
7943 else
7944 return y;
7947 return force_reg (Pmode, x);
7949 else if ((TARGET_ELF
7950 #if TARGET_MACHO
7951 || !MACHO_DYNAMIC_NO_PIC_P
7952 #endif
7954 && TARGET_32BIT
7955 && TARGET_NO_TOC
7956 && ! flag_pic
7957 && GET_CODE (x) != CONST_INT
7958 && GET_CODE (x) != CONST_WIDE_INT
7959 && GET_CODE (x) != CONST_DOUBLE
7960 && CONSTANT_P (x)
7961 && GET_MODE_NUNITS (mode) == 1
7962 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
7963 || (/* ??? Assume floating point reg based on mode? */
7964 (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
7965 && (mode == DFmode || mode == DDmode))))
7967 rtx reg = gen_reg_rtx (Pmode);
7968 if (TARGET_ELF)
7969 emit_insn (gen_elf_high (reg, x));
7970 else
7971 emit_insn (gen_macho_high (reg, x));
7972 return gen_rtx_LO_SUM (Pmode, reg, x);
7974 else if (TARGET_TOC
7975 && GET_CODE (x) == SYMBOL_REF
7976 && constant_pool_expr_p (x)
7977 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (x), Pmode))
7978 return create_TOC_reference (x, NULL_RTX);
7979 else
7980 return x;
7983 /* Debug version of rs6000_legitimize_address. */
7984 static rtx
7985 rs6000_debug_legitimize_address (rtx x, rtx oldx, machine_mode mode)
7987 rtx ret;
7988 rtx_insn *insns;
7990 start_sequence ();
7991 ret = rs6000_legitimize_address (x, oldx, mode);
7992 insns = get_insns ();
7993 end_sequence ();
7995 if (ret != x)
7997 fprintf (stderr,
7998 "\nrs6000_legitimize_address: mode %s, old code %s, "
7999 "new code %s, modified\n",
8000 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)),
8001 GET_RTX_NAME (GET_CODE (ret)));
8003 fprintf (stderr, "Original address:\n");
8004 debug_rtx (x);
8006 fprintf (stderr, "oldx:\n");
8007 debug_rtx (oldx);
8009 fprintf (stderr, "New address:\n");
8010 debug_rtx (ret);
8012 if (insns)
8014 fprintf (stderr, "Insns added:\n");
8015 debug_rtx_list (insns, 20);
8018 else
8020 fprintf (stderr,
8021 "\nrs6000_legitimize_address: mode %s, code %s, no change:\n",
8022 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)));
8024 debug_rtx (x);
8027 if (insns)
8028 emit_insn (insns);
8030 return ret;
8033 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
8034 We need to emit DTP-relative relocations. */
8036 static void rs6000_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
8037 static void
8038 rs6000_output_dwarf_dtprel (FILE *file, int size, rtx x)
8040 switch (size)
8042 case 4:
8043 fputs ("\t.long\t", file);
8044 break;
8045 case 8:
8046 fputs (DOUBLE_INT_ASM_OP, file);
8047 break;
8048 default:
8049 gcc_unreachable ();
8051 output_addr_const (file, x);
8052 if (TARGET_ELF)
8053 fputs ("@dtprel+0x8000", file);
8054 else if (TARGET_XCOFF && GET_CODE (x) == SYMBOL_REF)
8056 switch (SYMBOL_REF_TLS_MODEL (x))
8058 case 0:
8059 break;
8060 case TLS_MODEL_LOCAL_EXEC:
8061 fputs ("@le", file);
8062 break;
8063 case TLS_MODEL_INITIAL_EXEC:
8064 fputs ("@ie", file);
8065 break;
8066 case TLS_MODEL_GLOBAL_DYNAMIC:
8067 case TLS_MODEL_LOCAL_DYNAMIC:
8068 fputs ("@m", file);
8069 break;
8070 default:
8071 gcc_unreachable ();
8076 /* Return true if X is a symbol that refers to real (rather than emulated)
8077 TLS. */
8079 static bool
8080 rs6000_real_tls_symbol_ref_p (rtx x)
8082 return (GET_CODE (x) == SYMBOL_REF
8083 && SYMBOL_REF_TLS_MODEL (x) >= TLS_MODEL_REAL);
8086 /* In the name of slightly smaller debug output, and to cater to
8087 general assembler lossage, recognize various UNSPEC sequences
8088 and turn them back into a direct symbol reference. */
8090 static rtx
8091 rs6000_delegitimize_address (rtx orig_x)
8093 rtx x, y, offset;
8095 orig_x = delegitimize_mem_from_attrs (orig_x);
8096 x = orig_x;
8097 if (MEM_P (x))
8098 x = XEXP (x, 0);
8100 y = x;
8101 if (TARGET_CMODEL != CMODEL_SMALL
8102 && GET_CODE (y) == LO_SUM)
8103 y = XEXP (y, 1);
8105 offset = NULL_RTX;
8106 if (GET_CODE (y) == PLUS
8107 && GET_MODE (y) == Pmode
8108 && CONST_INT_P (XEXP (y, 1)))
8110 offset = XEXP (y, 1);
8111 y = XEXP (y, 0);
8114 if (GET_CODE (y) == UNSPEC
8115 && XINT (y, 1) == UNSPEC_TOCREL)
8117 y = XVECEXP (y, 0, 0);
8119 #ifdef HAVE_AS_TLS
8120 /* Do not associate thread-local symbols with the original
8121 constant pool symbol. */
8122 if (TARGET_XCOFF
8123 && GET_CODE (y) == SYMBOL_REF
8124 && CONSTANT_POOL_ADDRESS_P (y)
8125 && rs6000_real_tls_symbol_ref_p (get_pool_constant (y)))
8126 return orig_x;
8127 #endif
8129 if (offset != NULL_RTX)
8130 y = gen_rtx_PLUS (Pmode, y, offset);
8131 if (!MEM_P (orig_x))
8132 return y;
8133 else
8134 return replace_equiv_address_nv (orig_x, y);
8137 if (TARGET_MACHO
8138 && GET_CODE (orig_x) == LO_SUM
8139 && GET_CODE (XEXP (orig_x, 1)) == CONST)
8141 y = XEXP (XEXP (orig_x, 1), 0);
8142 if (GET_CODE (y) == UNSPEC
8143 && XINT (y, 1) == UNSPEC_MACHOPIC_OFFSET)
8144 return XVECEXP (y, 0, 0);
8147 return orig_x;
8150 /* Return true if X shouldn't be emitted into the debug info.
8151 The linker doesn't like .toc section references from
8152 .debug_* sections, so reject .toc section symbols. */
8154 static bool
8155 rs6000_const_not_ok_for_debug_p (rtx x)
8157 if (GET_CODE (x) == SYMBOL_REF
8158 && CONSTANT_POOL_ADDRESS_P (x))
8160 rtx c = get_pool_constant (x);
8161 machine_mode cmode = get_pool_mode (x);
8162 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (c, cmode))
8163 return true;
8166 return false;
8169 /* Construct the SYMBOL_REF for the tls_get_addr function. */
8171 static GTY(()) rtx rs6000_tls_symbol;
8172 static rtx
8173 rs6000_tls_get_addr (void)
8175 if (!rs6000_tls_symbol)
8176 rs6000_tls_symbol = init_one_libfunc ("__tls_get_addr");
8178 return rs6000_tls_symbol;
8181 /* Construct the SYMBOL_REF for TLS GOT references. */
8183 static GTY(()) rtx rs6000_got_symbol;
8184 static rtx
8185 rs6000_got_sym (void)
8187 if (!rs6000_got_symbol)
8189 rs6000_got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
8190 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_LOCAL;
8191 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_EXTERNAL;
8194 return rs6000_got_symbol;
8197 /* AIX Thread-Local Address support. */
8199 static rtx
8200 rs6000_legitimize_tls_address_aix (rtx addr, enum tls_model model)
8202 rtx sym, mem, tocref, tlsreg, tmpreg, dest, tlsaddr;
8203 const char *name;
8204 char *tlsname;
8206 name = XSTR (addr, 0);
8207 /* Append TLS CSECT qualifier, unless the symbol already is qualified
8208 or the symbol will be in TLS private data section. */
8209 if (name[strlen (name) - 1] != ']'
8210 && (TREE_PUBLIC (SYMBOL_REF_DECL (addr))
8211 || bss_initializer_p (SYMBOL_REF_DECL (addr))))
8213 tlsname = XALLOCAVEC (char, strlen (name) + 4);
8214 strcpy (tlsname, name);
8215 strcat (tlsname,
8216 bss_initializer_p (SYMBOL_REF_DECL (addr)) ? "[UL]" : "[TL]");
8217 tlsaddr = copy_rtx (addr);
8218 XSTR (tlsaddr, 0) = ggc_strdup (tlsname);
8220 else
8221 tlsaddr = addr;
8223 /* Place addr into TOC constant pool. */
8224 sym = force_const_mem (GET_MODE (tlsaddr), tlsaddr);
8226 /* Output the TOC entry and create the MEM referencing the value. */
8227 if (constant_pool_expr_p (XEXP (sym, 0))
8228 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (XEXP (sym, 0)), Pmode))
8230 tocref = create_TOC_reference (XEXP (sym, 0), NULL_RTX);
8231 mem = gen_const_mem (Pmode, tocref);
8232 set_mem_alias_set (mem, get_TOC_alias_set ());
8234 else
8235 return sym;
8237 /* Use global-dynamic for local-dynamic. */
8238 if (model == TLS_MODEL_GLOBAL_DYNAMIC
8239 || model == TLS_MODEL_LOCAL_DYNAMIC)
8241 /* Create new TOC reference for @m symbol. */
8242 name = XSTR (XVECEXP (XEXP (mem, 0), 0, 0), 0);
8243 tlsname = XALLOCAVEC (char, strlen (name) + 1);
8244 strcpy (tlsname, "*LCM");
8245 strcat (tlsname, name + 3);
8246 rtx modaddr = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (tlsname));
8247 SYMBOL_REF_FLAGS (modaddr) |= SYMBOL_FLAG_LOCAL;
8248 tocref = create_TOC_reference (modaddr, NULL_RTX);
8249 rtx modmem = gen_const_mem (Pmode, tocref);
8250 set_mem_alias_set (modmem, get_TOC_alias_set ());
8252 rtx modreg = gen_reg_rtx (Pmode);
8253 emit_insn (gen_rtx_SET (modreg, modmem));
8255 tmpreg = gen_reg_rtx (Pmode);
8256 emit_insn (gen_rtx_SET (tmpreg, mem));
8258 dest = gen_reg_rtx (Pmode);
8259 if (TARGET_32BIT)
8260 emit_insn (gen_tls_get_addrsi (dest, modreg, tmpreg));
8261 else
8262 emit_insn (gen_tls_get_addrdi (dest, modreg, tmpreg));
8263 return dest;
8265 /* Obtain TLS pointer: 32 bit call or 64 bit GPR 13. */
8266 else if (TARGET_32BIT)
8268 tlsreg = gen_reg_rtx (SImode);
8269 emit_insn (gen_tls_get_tpointer (tlsreg));
8271 else
8272 tlsreg = gen_rtx_REG (DImode, 13);
8274 /* Load the TOC value into temporary register. */
8275 tmpreg = gen_reg_rtx (Pmode);
8276 emit_insn (gen_rtx_SET (tmpreg, mem));
8277 set_unique_reg_note (get_last_insn (), REG_EQUAL,
8278 gen_rtx_MINUS (Pmode, addr, tlsreg));
8280 /* Add TOC symbol value to TLS pointer. */
8281 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tmpreg, tlsreg));
8283 return dest;
8286 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
8287 this (thread-local) address. */
8289 static rtx
8290 rs6000_legitimize_tls_address (rtx addr, enum tls_model model)
8292 rtx dest, insn;
8294 if (TARGET_XCOFF)
8295 return rs6000_legitimize_tls_address_aix (addr, model);
8297 dest = gen_reg_rtx (Pmode);
8298 if (model == TLS_MODEL_LOCAL_EXEC && rs6000_tls_size == 16)
8300 rtx tlsreg;
8302 if (TARGET_64BIT)
8304 tlsreg = gen_rtx_REG (Pmode, 13);
8305 insn = gen_tls_tprel_64 (dest, tlsreg, addr);
8307 else
8309 tlsreg = gen_rtx_REG (Pmode, 2);
8310 insn = gen_tls_tprel_32 (dest, tlsreg, addr);
8312 emit_insn (insn);
8314 else if (model == TLS_MODEL_LOCAL_EXEC && rs6000_tls_size == 32)
8316 rtx tlsreg, tmp;
8318 tmp = gen_reg_rtx (Pmode);
8319 if (TARGET_64BIT)
8321 tlsreg = gen_rtx_REG (Pmode, 13);
8322 insn = gen_tls_tprel_ha_64 (tmp, tlsreg, addr);
8324 else
8326 tlsreg = gen_rtx_REG (Pmode, 2);
8327 insn = gen_tls_tprel_ha_32 (tmp, tlsreg, addr);
8329 emit_insn (insn);
8330 if (TARGET_64BIT)
8331 insn = gen_tls_tprel_lo_64 (dest, tmp, addr);
8332 else
8333 insn = gen_tls_tprel_lo_32 (dest, tmp, addr);
8334 emit_insn (insn);
8336 else
8338 rtx r3, got, tga, tmp1, tmp2, call_insn;
8340 /* We currently use relocations like @got@tlsgd for tls, which
8341 means the linker will handle allocation of tls entries, placing
8342 them in the .got section. So use a pointer to the .got section,
8343 not one to secondary TOC sections used by 64-bit -mminimal-toc,
8344 or to secondary GOT sections used by 32-bit -fPIC. */
8345 if (TARGET_64BIT)
8346 got = gen_rtx_REG (Pmode, 2);
8347 else
8349 if (flag_pic == 1)
8350 got = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
8351 else
8353 rtx gsym = rs6000_got_sym ();
8354 got = gen_reg_rtx (Pmode);
8355 if (flag_pic == 0)
8356 rs6000_emit_move (got, gsym, Pmode);
8357 else
8359 rtx mem, lab, last;
8361 tmp1 = gen_reg_rtx (Pmode);
8362 tmp2 = gen_reg_rtx (Pmode);
8363 mem = gen_const_mem (Pmode, tmp1);
8364 lab = gen_label_rtx ();
8365 emit_insn (gen_load_toc_v4_PIC_1b (gsym, lab));
8366 emit_move_insn (tmp1, gen_rtx_REG (Pmode, LR_REGNO));
8367 if (TARGET_LINK_STACK)
8368 emit_insn (gen_addsi3 (tmp1, tmp1, GEN_INT (4)));
8369 emit_move_insn (tmp2, mem);
8370 last = emit_insn (gen_addsi3 (got, tmp1, tmp2));
8371 set_unique_reg_note (last, REG_EQUAL, gsym);
8376 if (model == TLS_MODEL_GLOBAL_DYNAMIC)
8378 tga = rs6000_tls_get_addr ();
8379 emit_library_call_value (tga, dest, LCT_CONST, Pmode,
8380 1, const0_rtx, Pmode);
8382 r3 = gen_rtx_REG (Pmode, 3);
8383 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
8385 if (TARGET_64BIT)
8386 insn = gen_tls_gd_aix64 (r3, got, addr, tga, const0_rtx);
8387 else
8388 insn = gen_tls_gd_aix32 (r3, got, addr, tga, const0_rtx);
8390 else if (DEFAULT_ABI == ABI_V4)
8391 insn = gen_tls_gd_sysvsi (r3, got, addr, tga, const0_rtx);
8392 else
8393 gcc_unreachable ();
8394 call_insn = last_call_insn ();
8395 PATTERN (call_insn) = insn;
8396 if (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
8397 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn),
8398 pic_offset_table_rtx);
8400 else if (model == TLS_MODEL_LOCAL_DYNAMIC)
8402 tga = rs6000_tls_get_addr ();
8403 tmp1 = gen_reg_rtx (Pmode);
8404 emit_library_call_value (tga, tmp1, LCT_CONST, Pmode,
8405 1, const0_rtx, Pmode);
8407 r3 = gen_rtx_REG (Pmode, 3);
8408 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
8410 if (TARGET_64BIT)
8411 insn = gen_tls_ld_aix64 (r3, got, tga, const0_rtx);
8412 else
8413 insn = gen_tls_ld_aix32 (r3, got, tga, const0_rtx);
8415 else if (DEFAULT_ABI == ABI_V4)
8416 insn = gen_tls_ld_sysvsi (r3, got, tga, const0_rtx);
8417 else
8418 gcc_unreachable ();
8419 call_insn = last_call_insn ();
8420 PATTERN (call_insn) = insn;
8421 if (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
8422 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn),
8423 pic_offset_table_rtx);
8425 if (rs6000_tls_size == 16)
8427 if (TARGET_64BIT)
8428 insn = gen_tls_dtprel_64 (dest, tmp1, addr);
8429 else
8430 insn = gen_tls_dtprel_32 (dest, tmp1, addr);
8432 else if (rs6000_tls_size == 32)
8434 tmp2 = gen_reg_rtx (Pmode);
8435 if (TARGET_64BIT)
8436 insn = gen_tls_dtprel_ha_64 (tmp2, tmp1, addr);
8437 else
8438 insn = gen_tls_dtprel_ha_32 (tmp2, tmp1, addr);
8439 emit_insn (insn);
8440 if (TARGET_64BIT)
8441 insn = gen_tls_dtprel_lo_64 (dest, tmp2, addr);
8442 else
8443 insn = gen_tls_dtprel_lo_32 (dest, tmp2, addr);
8445 else
8447 tmp2 = gen_reg_rtx (Pmode);
8448 if (TARGET_64BIT)
8449 insn = gen_tls_got_dtprel_64 (tmp2, got, addr);
8450 else
8451 insn = gen_tls_got_dtprel_32 (tmp2, got, addr);
8452 emit_insn (insn);
8453 insn = gen_rtx_SET (dest, gen_rtx_PLUS (Pmode, tmp2, tmp1));
8455 emit_insn (insn);
8457 else
8459 /* IE, or 64-bit offset LE. */
8460 tmp2 = gen_reg_rtx (Pmode);
8461 if (TARGET_64BIT)
8462 insn = gen_tls_got_tprel_64 (tmp2, got, addr);
8463 else
8464 insn = gen_tls_got_tprel_32 (tmp2, got, addr);
8465 emit_insn (insn);
8466 if (TARGET_64BIT)
8467 insn = gen_tls_tls_64 (dest, tmp2, addr);
8468 else
8469 insn = gen_tls_tls_32 (dest, tmp2, addr);
8470 emit_insn (insn);
8474 return dest;
8477 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
8479 static bool
8480 rs6000_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8482 if (GET_CODE (x) == HIGH
8483 && GET_CODE (XEXP (x, 0)) == UNSPEC)
8484 return true;
8486 /* A TLS symbol in the TOC cannot contain a sum. */
8487 if (GET_CODE (x) == CONST
8488 && GET_CODE (XEXP (x, 0)) == PLUS
8489 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
8490 && SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0)) != 0)
8491 return true;
8493 /* Do not place an ELF TLS symbol in the constant pool. */
8494 return TARGET_ELF && tls_referenced_p (x);
8497 /* Return true iff the given SYMBOL_REF refers to a constant pool entry
8498 that we have put in the TOC, or for cmodel=medium, if the SYMBOL_REF
8499 can be addressed relative to the toc pointer. */
8501 static bool
8502 use_toc_relative_ref (rtx sym, machine_mode mode)
8504 return ((constant_pool_expr_p (sym)
8505 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (sym),
8506 get_pool_mode (sym)))
8507 || (TARGET_CMODEL == CMODEL_MEDIUM
8508 && SYMBOL_REF_LOCAL_P (sym)
8509 && GET_MODE_SIZE (mode) <= POWERPC64_TOC_POINTER_ALIGNMENT));
8512 /* Our implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
8513 replace the input X, or the original X if no replacement is called for.
8514 The output parameter *WIN is 1 if the calling macro should goto WIN,
8515 0 if it should not.
8517 For RS/6000, we wish to handle large displacements off a base
8518 register by splitting the addend across an addiu/addis and the mem insn.
8519 This cuts number of extra insns needed from 3 to 1.
8521 On Darwin, we use this to generate code for floating point constants.
8522 A movsf_low is generated so we wind up with 2 instructions rather than 3.
8523 The Darwin code is inside #if TARGET_MACHO because only then are the
8524 machopic_* functions defined. */
8525 static rtx
8526 rs6000_legitimize_reload_address (rtx x, machine_mode mode,
8527 int opnum, int type,
8528 int ind_levels ATTRIBUTE_UNUSED, int *win)
8530 bool reg_offset_p = reg_offset_addressing_ok_p (mode);
8531 bool quad_offset_p = mode_supports_vsx_dform_quad (mode);
8533 /* Nasty hack for vsx_splat_v2df/v2di load from mem, which takes a
8534 DFmode/DImode MEM. Ditto for ISA 3.0 vsx_splat_v4sf/v4si. */
8535 if (reg_offset_p
8536 && opnum == 1
8537 && ((mode == DFmode && recog_data.operand_mode[0] == V2DFmode)
8538 || (mode == DImode && recog_data.operand_mode[0] == V2DImode)
8539 || (mode == SFmode && recog_data.operand_mode[0] == V4SFmode
8540 && TARGET_P9_VECTOR)
8541 || (mode == SImode && recog_data.operand_mode[0] == V4SImode
8542 && TARGET_P9_VECTOR)))
8543 reg_offset_p = false;
8545 /* We must recognize output that we have already generated ourselves. */
8546 if (GET_CODE (x) == PLUS
8547 && GET_CODE (XEXP (x, 0)) == PLUS
8548 && GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
8549 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
8550 && GET_CODE (XEXP (x, 1)) == CONST_INT)
8552 if (TARGET_DEBUG_ADDR)
8554 fprintf (stderr, "\nlegitimize_reload_address push_reload #1:\n");
8555 debug_rtx (x);
8557 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
8558 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
8559 opnum, (enum reload_type) type);
8560 *win = 1;
8561 return x;
8564 /* Likewise for (lo_sum (high ...) ...) output we have generated. */
8565 if (GET_CODE (x) == LO_SUM
8566 && GET_CODE (XEXP (x, 0)) == HIGH)
8568 if (TARGET_DEBUG_ADDR)
8570 fprintf (stderr, "\nlegitimize_reload_address push_reload #2:\n");
8571 debug_rtx (x);
8573 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
8574 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
8575 opnum, (enum reload_type) type);
8576 *win = 1;
8577 return x;
8580 #if TARGET_MACHO
8581 if (DEFAULT_ABI == ABI_DARWIN && flag_pic
8582 && GET_CODE (x) == LO_SUM
8583 && GET_CODE (XEXP (x, 0)) == PLUS
8584 && XEXP (XEXP (x, 0), 0) == pic_offset_table_rtx
8585 && GET_CODE (XEXP (XEXP (x, 0), 1)) == HIGH
8586 && XEXP (XEXP (XEXP (x, 0), 1), 0) == XEXP (x, 1)
8587 && machopic_operand_p (XEXP (x, 1)))
8589 /* Result of previous invocation of this function on Darwin
8590 floating point constant. */
8591 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
8592 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
8593 opnum, (enum reload_type) type);
8594 *win = 1;
8595 return x;
8597 #endif
8599 if (TARGET_CMODEL != CMODEL_SMALL
8600 && reg_offset_p
8601 && !quad_offset_p
8602 && small_toc_ref (x, VOIDmode))
8604 rtx hi = gen_rtx_HIGH (Pmode, copy_rtx (x));
8605 x = gen_rtx_LO_SUM (Pmode, hi, x);
8606 if (TARGET_DEBUG_ADDR)
8608 fprintf (stderr, "\nlegitimize_reload_address push_reload #3:\n");
8609 debug_rtx (x);
8611 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
8612 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
8613 opnum, (enum reload_type) type);
8614 *win = 1;
8615 return x;
8618 if (GET_CODE (x) == PLUS
8619 && REG_P (XEXP (x, 0))
8620 && REGNO (XEXP (x, 0)) < FIRST_PSEUDO_REGISTER
8621 && INT_REG_OK_FOR_BASE_P (XEXP (x, 0), 1)
8622 && CONST_INT_P (XEXP (x, 1))
8623 && reg_offset_p
8624 && !SPE_VECTOR_MODE (mode)
8625 && !(TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
8626 && (quad_offset_p || !VECTOR_MODE_P (mode) || VECTOR_MEM_NONE_P (mode)))
8628 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
8629 HOST_WIDE_INT low = ((val & 0xffff) ^ 0x8000) - 0x8000;
8630 HOST_WIDE_INT high
8631 = (((val - low) & 0xffffffff) ^ 0x80000000) - 0x80000000;
8633 /* Check for 32-bit overflow or quad addresses with one of the
8634 four least significant bits set. */
8635 if (high + low != val
8636 || (quad_offset_p && (low & 0xf)))
8638 *win = 0;
8639 return x;
8642 /* Reload the high part into a base reg; leave the low part
8643 in the mem directly. */
8645 x = gen_rtx_PLUS (GET_MODE (x),
8646 gen_rtx_PLUS (GET_MODE (x), XEXP (x, 0),
8647 GEN_INT (high)),
8648 GEN_INT (low));
8650 if (TARGET_DEBUG_ADDR)
8652 fprintf (stderr, "\nlegitimize_reload_address push_reload #4:\n");
8653 debug_rtx (x);
8655 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
8656 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
8657 opnum, (enum reload_type) type);
8658 *win = 1;
8659 return x;
8662 if (GET_CODE (x) == SYMBOL_REF
8663 && reg_offset_p
8664 && !quad_offset_p
8665 && (!VECTOR_MODE_P (mode) || VECTOR_MEM_NONE_P (mode))
8666 && !SPE_VECTOR_MODE (mode)
8667 #if TARGET_MACHO
8668 && DEFAULT_ABI == ABI_DARWIN
8669 && (flag_pic || MACHO_DYNAMIC_NO_PIC_P)
8670 && machopic_symbol_defined_p (x)
8671 #else
8672 && DEFAULT_ABI == ABI_V4
8673 && !flag_pic
8674 #endif
8675 /* Don't do this for TFmode or TDmode, since the result isn't offsettable.
8676 The same goes for DImode without 64-bit gprs and DFmode and DDmode
8677 without fprs.
8678 ??? Assume floating point reg based on mode? This assumption is
8679 violated by eg. powerpc-linux -m32 compile of gcc.dg/pr28796-2.c
8680 where reload ends up doing a DFmode load of a constant from
8681 mem using two gprs. Unfortunately, at this point reload
8682 hasn't yet selected regs so poking around in reload data
8683 won't help and even if we could figure out the regs reliably,
8684 we'd still want to allow this transformation when the mem is
8685 naturally aligned. Since we say the address is good here, we
8686 can't disable offsets from LO_SUMs in mem_operand_gpr.
8687 FIXME: Allow offset from lo_sum for other modes too, when
8688 mem is sufficiently aligned.
8690 Also disallow this if the type can go in VMX/Altivec registers, since
8691 those registers do not have d-form (reg+offset) address modes. */
8692 && !reg_addr[mode].scalar_in_vmx_p
8693 && mode != TFmode
8694 && mode != TDmode
8695 && mode != IFmode
8696 && mode != KFmode
8697 && (mode != TImode || !TARGET_VSX_TIMODE)
8698 && mode != PTImode
8699 && (mode != DImode || TARGET_POWERPC64)
8700 && ((mode != DFmode && mode != DDmode) || TARGET_POWERPC64
8701 || (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)))
8703 #if TARGET_MACHO
8704 if (flag_pic)
8706 rtx offset = machopic_gen_offset (x);
8707 x = gen_rtx_LO_SUM (GET_MODE (x),
8708 gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
8709 gen_rtx_HIGH (Pmode, offset)), offset);
8711 else
8712 #endif
8713 x = gen_rtx_LO_SUM (GET_MODE (x),
8714 gen_rtx_HIGH (Pmode, x), x);
8716 if (TARGET_DEBUG_ADDR)
8718 fprintf (stderr, "\nlegitimize_reload_address push_reload #5:\n");
8719 debug_rtx (x);
8721 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
8722 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
8723 opnum, (enum reload_type) type);
8724 *win = 1;
8725 return x;
8728 /* Reload an offset address wrapped by an AND that represents the
8729 masking of the lower bits. Strip the outer AND and let reload
8730 convert the offset address into an indirect address. For VSX,
8731 force reload to create the address with an AND in a separate
8732 register, because we can't guarantee an altivec register will
8733 be used. */
8734 if (VECTOR_MEM_ALTIVEC_P (mode)
8735 && GET_CODE (x) == AND
8736 && GET_CODE (XEXP (x, 0)) == PLUS
8737 && GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
8738 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
8739 && GET_CODE (XEXP (x, 1)) == CONST_INT
8740 && INTVAL (XEXP (x, 1)) == -16)
8742 x = XEXP (x, 0);
8743 *win = 1;
8744 return x;
8747 if (TARGET_TOC
8748 && reg_offset_p
8749 && !quad_offset_p
8750 && GET_CODE (x) == SYMBOL_REF
8751 && use_toc_relative_ref (x, mode))
8753 x = create_TOC_reference (x, NULL_RTX);
8754 if (TARGET_CMODEL != CMODEL_SMALL)
8756 if (TARGET_DEBUG_ADDR)
8758 fprintf (stderr, "\nlegitimize_reload_address push_reload #6:\n");
8759 debug_rtx (x);
8761 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
8762 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
8763 opnum, (enum reload_type) type);
8765 *win = 1;
8766 return x;
8768 *win = 0;
8769 return x;
8772 /* Debug version of rs6000_legitimize_reload_address. */
8773 static rtx
8774 rs6000_debug_legitimize_reload_address (rtx x, machine_mode mode,
8775 int opnum, int type,
8776 int ind_levels, int *win)
8778 rtx ret = rs6000_legitimize_reload_address (x, mode, opnum, type,
8779 ind_levels, win);
8780 fprintf (stderr,
8781 "\nrs6000_legitimize_reload_address: mode = %s, opnum = %d, "
8782 "type = %d, ind_levels = %d, win = %d, original addr:\n",
8783 GET_MODE_NAME (mode), opnum, type, ind_levels, *win);
8784 debug_rtx (x);
8786 if (x == ret)
8787 fprintf (stderr, "Same address returned\n");
8788 else if (!ret)
8789 fprintf (stderr, "NULL returned\n");
8790 else
8792 fprintf (stderr, "New address:\n");
8793 debug_rtx (ret);
8796 return ret;
8799 /* TARGET_LEGITIMATE_ADDRESS_P recognizes an RTL expression
8800 that is a valid memory address for an instruction.
8801 The MODE argument is the machine mode for the MEM expression
8802 that wants to use this address.
8804 On the RS/6000, there are four valid address: a SYMBOL_REF that
8805 refers to a constant pool entry of an address (or the sum of it
8806 plus a constant), a short (16-bit signed) constant plus a register,
8807 the sum of two registers, or a register indirect, possibly with an
8808 auto-increment. For DFmode, DDmode and DImode with a constant plus
8809 register, we must ensure that both words are addressable or PowerPC64
8810 with offset word aligned.
8812 For modes spanning multiple registers (DFmode and DDmode in 32-bit GPRs,
8813 32-bit DImode, TImode, TFmode, TDmode), indexed addressing cannot be used
8814 because adjacent memory cells are accessed by adding word-sized offsets
8815 during assembly output. */
8816 static bool
8817 rs6000_legitimate_address_p (machine_mode mode, rtx x, bool reg_ok_strict)
8819 bool reg_offset_p = reg_offset_addressing_ok_p (mode);
8820 bool quad_offset_p = mode_supports_vsx_dform_quad (mode);
8822 /* If this is an unaligned stvx/ldvx type address, discard the outer AND. */
8823 if (VECTOR_MEM_ALTIVEC_P (mode)
8824 && GET_CODE (x) == AND
8825 && GET_CODE (XEXP (x, 1)) == CONST_INT
8826 && INTVAL (XEXP (x, 1)) == -16)
8827 x = XEXP (x, 0);
8829 if (TARGET_ELF && RS6000_SYMBOL_REF_TLS_P (x))
8830 return 0;
8831 if (legitimate_indirect_address_p (x, reg_ok_strict))
8832 return 1;
8833 if (TARGET_UPDATE
8834 && (GET_CODE (x) == PRE_INC || GET_CODE (x) == PRE_DEC)
8835 && mode_supports_pre_incdec_p (mode)
8836 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict))
8837 return 1;
8838 /* Handle restricted vector d-form offsets in ISA 3.0. */
8839 if (quad_offset_p)
8841 if (quad_address_p (x, mode, reg_ok_strict))
8842 return 1;
8844 else if (virtual_stack_registers_memory_p (x))
8845 return 1;
8847 else if (reg_offset_p)
8849 if (legitimate_small_data_p (mode, x))
8850 return 1;
8851 if (legitimate_constant_pool_address_p (x, mode,
8852 reg_ok_strict || lra_in_progress))
8853 return 1;
8854 if (reg_addr[mode].fused_toc && GET_CODE (x) == UNSPEC
8855 && XINT (x, 1) == UNSPEC_FUSION_ADDIS)
8856 return 1;
8859 /* For TImode, if we have load/store quad and TImode in VSX registers, only
8860 allow register indirect addresses. This will allow the values to go in
8861 either GPRs or VSX registers without reloading. The vector types would
8862 tend to go into VSX registers, so we allow REG+REG, while TImode seems
8863 somewhat split, in that some uses are GPR based, and some VSX based. */
8864 if (mode == TImode && TARGET_QUAD_MEMORY && TARGET_VSX_TIMODE)
8865 return 0;
8866 /* If not REG_OK_STRICT (before reload) let pass any stack offset. */
8867 if (! reg_ok_strict
8868 && reg_offset_p
8869 && GET_CODE (x) == PLUS
8870 && GET_CODE (XEXP (x, 0)) == REG
8871 && (XEXP (x, 0) == virtual_stack_vars_rtx
8872 || XEXP (x, 0) == arg_pointer_rtx)
8873 && GET_CODE (XEXP (x, 1)) == CONST_INT)
8874 return 1;
8875 if (rs6000_legitimate_offset_address_p (mode, x, reg_ok_strict, false))
8876 return 1;
8877 if (!FLOAT128_2REG_P (mode)
8878 && ((TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
8879 || TARGET_POWERPC64
8880 || (mode != DFmode && mode != DDmode)
8881 || (TARGET_E500_DOUBLE && mode != DDmode))
8882 && (TARGET_POWERPC64 || mode != DImode)
8883 && (mode != TImode || VECTOR_MEM_VSX_P (TImode))
8884 && mode != PTImode
8885 && !avoiding_indexed_address_p (mode)
8886 && legitimate_indexed_address_p (x, reg_ok_strict))
8887 return 1;
8888 if (TARGET_UPDATE && GET_CODE (x) == PRE_MODIFY
8889 && mode_supports_pre_modify_p (mode)
8890 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict)
8891 && (rs6000_legitimate_offset_address_p (mode, XEXP (x, 1),
8892 reg_ok_strict, false)
8893 || (!avoiding_indexed_address_p (mode)
8894 && legitimate_indexed_address_p (XEXP (x, 1), reg_ok_strict)))
8895 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
8896 return 1;
8897 if (reg_offset_p && !quad_offset_p
8898 && legitimate_lo_sum_address_p (mode, x, reg_ok_strict))
8899 return 1;
8900 return 0;
8903 /* Debug version of rs6000_legitimate_address_p. */
8904 static bool
8905 rs6000_debug_legitimate_address_p (machine_mode mode, rtx x,
8906 bool reg_ok_strict)
8908 bool ret = rs6000_legitimate_address_p (mode, x, reg_ok_strict);
8909 fprintf (stderr,
8910 "\nrs6000_legitimate_address_p: return = %s, mode = %s, "
8911 "strict = %d, reload = %s, code = %s\n",
8912 ret ? "true" : "false",
8913 GET_MODE_NAME (mode),
8914 reg_ok_strict,
8915 (reload_completed
8916 ? "after"
8917 : (reload_in_progress ? "progress" : "before")),
8918 GET_RTX_NAME (GET_CODE (x)));
8919 debug_rtx (x);
8921 return ret;
8924 /* Implement TARGET_MODE_DEPENDENT_ADDRESS_P. */
8926 static bool
8927 rs6000_mode_dependent_address_p (const_rtx addr,
8928 addr_space_t as ATTRIBUTE_UNUSED)
8930 return rs6000_mode_dependent_address_ptr (addr);
8933 /* Go to LABEL if ADDR (a legitimate address expression)
8934 has an effect that depends on the machine mode it is used for.
8936 On the RS/6000 this is true of all integral offsets (since AltiVec
8937 and VSX modes don't allow them) or is a pre-increment or decrement.
8939 ??? Except that due to conceptual problems in offsettable_address_p
8940 we can't really report the problems of integral offsets. So leave
8941 this assuming that the adjustable offset must be valid for the
8942 sub-words of a TFmode operand, which is what we had before. */
8944 static bool
8945 rs6000_mode_dependent_address (const_rtx addr)
8947 switch (GET_CODE (addr))
8949 case PLUS:
8950 /* Any offset from virtual_stack_vars_rtx and arg_pointer_rtx
8951 is considered a legitimate address before reload, so there
8952 are no offset restrictions in that case. Note that this
8953 condition is safe in strict mode because any address involving
8954 virtual_stack_vars_rtx or arg_pointer_rtx would already have
8955 been rejected as illegitimate. */
8956 if (XEXP (addr, 0) != virtual_stack_vars_rtx
8957 && XEXP (addr, 0) != arg_pointer_rtx
8958 && GET_CODE (XEXP (addr, 1)) == CONST_INT)
8960 unsigned HOST_WIDE_INT val = INTVAL (XEXP (addr, 1));
8961 return val + 0x8000 >= 0x10000 - (TARGET_POWERPC64 ? 8 : 12);
8963 break;
8965 case LO_SUM:
8966 /* Anything in the constant pool is sufficiently aligned that
8967 all bytes have the same high part address. */
8968 return !legitimate_constant_pool_address_p (addr, QImode, false);
8970 /* Auto-increment cases are now treated generically in recog.c. */
8971 case PRE_MODIFY:
8972 return TARGET_UPDATE;
8974 /* AND is only allowed in Altivec loads. */
8975 case AND:
8976 return true;
8978 default:
8979 break;
8982 return false;
8985 /* Debug version of rs6000_mode_dependent_address. */
8986 static bool
8987 rs6000_debug_mode_dependent_address (const_rtx addr)
8989 bool ret = rs6000_mode_dependent_address (addr);
8991 fprintf (stderr, "\nrs6000_mode_dependent_address: ret = %s\n",
8992 ret ? "true" : "false");
8993 debug_rtx (addr);
8995 return ret;
8998 /* Implement FIND_BASE_TERM. */
9001 rs6000_find_base_term (rtx op)
9003 rtx base;
9005 base = op;
9006 if (GET_CODE (base) == CONST)
9007 base = XEXP (base, 0);
9008 if (GET_CODE (base) == PLUS)
9009 base = XEXP (base, 0);
9010 if (GET_CODE (base) == UNSPEC)
9011 switch (XINT (base, 1))
9013 case UNSPEC_TOCREL:
9014 case UNSPEC_MACHOPIC_OFFSET:
9015 /* OP represents SYM [+ OFFSET] - ANCHOR. SYM is the base term
9016 for aliasing purposes. */
9017 return XVECEXP (base, 0, 0);
9020 return op;
9023 /* More elaborate version of recog's offsettable_memref_p predicate
9024 that works around the ??? note of rs6000_mode_dependent_address.
9025 In particular it accepts
9027 (mem:DI (plus:SI (reg/f:SI 31 31) (const_int 32760 [0x7ff8])))
9029 in 32-bit mode, that the recog predicate rejects. */
9031 static bool
9032 rs6000_offsettable_memref_p (rtx op, machine_mode reg_mode)
9034 bool worst_case;
9036 if (!MEM_P (op))
9037 return false;
9039 /* First mimic offsettable_memref_p. */
9040 if (offsettable_address_p (true, GET_MODE (op), XEXP (op, 0)))
9041 return true;
9043 /* offsettable_address_p invokes rs6000_mode_dependent_address, but
9044 the latter predicate knows nothing about the mode of the memory
9045 reference and, therefore, assumes that it is the largest supported
9046 mode (TFmode). As a consequence, legitimate offsettable memory
9047 references are rejected. rs6000_legitimate_offset_address_p contains
9048 the correct logic for the PLUS case of rs6000_mode_dependent_address,
9049 at least with a little bit of help here given that we know the
9050 actual registers used. */
9051 worst_case = ((TARGET_POWERPC64 && GET_MODE_CLASS (reg_mode) == MODE_INT)
9052 || GET_MODE_SIZE (reg_mode) == 4);
9053 return rs6000_legitimate_offset_address_p (GET_MODE (op), XEXP (op, 0),
9054 true, worst_case);
9057 /* Determine the reassociation width to be used in reassociate_bb.
9058 This takes into account how many parallel operations we
9059 can actually do of a given type, and also the latency.
9061 int add/sub 6/cycle
9062 mul 2/cycle
9063 vect add/sub/mul 2/cycle
9064 fp add/sub/mul 2/cycle
9065 dfp 1/cycle
9068 static int
9069 rs6000_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED,
9070 enum machine_mode mode)
9072 switch (rs6000_cpu)
9074 case PROCESSOR_POWER8:
9075 case PROCESSOR_POWER9:
9076 if (DECIMAL_FLOAT_MODE_P (mode))
9077 return 1;
9078 if (VECTOR_MODE_P (mode))
9079 return 4;
9080 if (INTEGRAL_MODE_P (mode))
9081 return opc == MULT_EXPR ? 4 : 6;
9082 if (FLOAT_MODE_P (mode))
9083 return 4;
9084 break;
9085 default:
9086 break;
9088 return 1;
9091 /* Change register usage conditional on target flags. */
9092 static void
9093 rs6000_conditional_register_usage (void)
9095 int i;
9097 if (TARGET_DEBUG_TARGET)
9098 fprintf (stderr, "rs6000_conditional_register_usage called\n");
9100 /* Set MQ register fixed (already call_used) so that it will not be
9101 allocated. */
9102 fixed_regs[64] = 1;
9104 /* 64-bit AIX and Linux reserve GPR13 for thread-private data. */
9105 if (TARGET_64BIT)
9106 fixed_regs[13] = call_used_regs[13]
9107 = call_really_used_regs[13] = 1;
9109 /* Conditionally disable FPRs. */
9110 if (TARGET_SOFT_FLOAT || !TARGET_FPRS)
9111 for (i = 32; i < 64; i++)
9112 fixed_regs[i] = call_used_regs[i]
9113 = call_really_used_regs[i] = 1;
9115 /* The TOC register is not killed across calls in a way that is
9116 visible to the compiler. */
9117 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
9118 call_really_used_regs[2] = 0;
9120 if (DEFAULT_ABI == ABI_V4 && flag_pic == 2)
9121 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
9123 if (DEFAULT_ABI == ABI_V4 && flag_pic == 1)
9124 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
9125 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
9126 = call_really_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
9128 if (DEFAULT_ABI == ABI_DARWIN && flag_pic)
9129 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
9130 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
9131 = call_really_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
9133 if (TARGET_TOC && TARGET_MINIMAL_TOC)
9134 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
9135 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
9137 if (TARGET_SPE)
9139 global_regs[SPEFSCR_REGNO] = 1;
9140 /* We used to use r14 as FIXED_SCRATCH to address SPE 64-bit
9141 registers in prologues and epilogues. We no longer use r14
9142 for FIXED_SCRATCH, but we're keeping r14 out of the allocation
9143 pool for link-compatibility with older versions of GCC. Once
9144 "old" code has died out, we can return r14 to the allocation
9145 pool. */
9146 fixed_regs[14]
9147 = call_used_regs[14]
9148 = call_really_used_regs[14] = 1;
9151 if (!TARGET_ALTIVEC && !TARGET_VSX)
9153 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
9154 fixed_regs[i] = call_used_regs[i] = call_really_used_regs[i] = 1;
9155 call_really_used_regs[VRSAVE_REGNO] = 1;
9158 if (TARGET_ALTIVEC || TARGET_VSX)
9159 global_regs[VSCR_REGNO] = 1;
9161 if (TARGET_ALTIVEC_ABI)
9163 for (i = FIRST_ALTIVEC_REGNO; i < FIRST_ALTIVEC_REGNO + 20; ++i)
9164 call_used_regs[i] = call_really_used_regs[i] = 1;
9166 /* AIX reserves VR20:31 in non-extended ABI mode. */
9167 if (TARGET_XCOFF)
9168 for (i = FIRST_ALTIVEC_REGNO + 20; i < FIRST_ALTIVEC_REGNO + 32; ++i)
9169 fixed_regs[i] = call_used_regs[i] = call_really_used_regs[i] = 1;
9174 /* Output insns to set DEST equal to the constant SOURCE as a series of
9175 lis, ori and shl instructions and return TRUE. */
9177 bool
9178 rs6000_emit_set_const (rtx dest, rtx source)
9180 machine_mode mode = GET_MODE (dest);
9181 rtx temp, set;
9182 rtx_insn *insn;
9183 HOST_WIDE_INT c;
9185 gcc_checking_assert (CONST_INT_P (source));
9186 c = INTVAL (source);
9187 switch (mode)
9189 case QImode:
9190 case HImode:
9191 emit_insn (gen_rtx_SET (dest, source));
9192 return true;
9194 case SImode:
9195 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (SImode);
9197 emit_insn (gen_rtx_SET (copy_rtx (temp),
9198 GEN_INT (c & ~(HOST_WIDE_INT) 0xffff)));
9199 emit_insn (gen_rtx_SET (dest,
9200 gen_rtx_IOR (SImode, copy_rtx (temp),
9201 GEN_INT (c & 0xffff))));
9202 break;
9204 case DImode:
9205 if (!TARGET_POWERPC64)
9207 rtx hi, lo;
9209 hi = operand_subword_force (copy_rtx (dest), WORDS_BIG_ENDIAN == 0,
9210 DImode);
9211 lo = operand_subword_force (dest, WORDS_BIG_ENDIAN != 0,
9212 DImode);
9213 emit_move_insn (hi, GEN_INT (c >> 32));
9214 c = ((c & 0xffffffff) ^ 0x80000000) - 0x80000000;
9215 emit_move_insn (lo, GEN_INT (c));
9217 else
9218 rs6000_emit_set_long_const (dest, c);
9219 break;
9221 default:
9222 gcc_unreachable ();
9225 insn = get_last_insn ();
9226 set = single_set (insn);
9227 if (! CONSTANT_P (SET_SRC (set)))
9228 set_unique_reg_note (insn, REG_EQUAL, GEN_INT (c));
9230 return true;
9233 /* Subroutine of rs6000_emit_set_const, handling PowerPC64 DImode.
9234 Output insns to set DEST equal to the constant C as a series of
9235 lis, ori and shl instructions. */
9237 static void
9238 rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c)
9240 rtx temp;
9241 HOST_WIDE_INT ud1, ud2, ud3, ud4;
9243 ud1 = c & 0xffff;
9244 c = c >> 16;
9245 ud2 = c & 0xffff;
9246 c = c >> 16;
9247 ud3 = c & 0xffff;
9248 c = c >> 16;
9249 ud4 = c & 0xffff;
9251 if ((ud4 == 0xffff && ud3 == 0xffff && ud2 == 0xffff && (ud1 & 0x8000))
9252 || (ud4 == 0 && ud3 == 0 && ud2 == 0 && ! (ud1 & 0x8000)))
9253 emit_move_insn (dest, GEN_INT ((ud1 ^ 0x8000) - 0x8000));
9255 else if ((ud4 == 0xffff && ud3 == 0xffff && (ud2 & 0x8000))
9256 || (ud4 == 0 && ud3 == 0 && ! (ud2 & 0x8000)))
9258 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
9260 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
9261 GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
9262 if (ud1 != 0)
9263 emit_move_insn (dest,
9264 gen_rtx_IOR (DImode, copy_rtx (temp),
9265 GEN_INT (ud1)));
9267 else if (ud3 == 0 && ud4 == 0)
9269 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
9271 gcc_assert (ud2 & 0x8000);
9272 emit_move_insn (copy_rtx (temp),
9273 GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
9274 if (ud1 != 0)
9275 emit_move_insn (copy_rtx (temp),
9276 gen_rtx_IOR (DImode, copy_rtx (temp),
9277 GEN_INT (ud1)));
9278 emit_move_insn (dest,
9279 gen_rtx_ZERO_EXTEND (DImode,
9280 gen_lowpart (SImode,
9281 copy_rtx (temp))));
9283 else if ((ud4 == 0xffff && (ud3 & 0x8000))
9284 || (ud4 == 0 && ! (ud3 & 0x8000)))
9286 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
9288 emit_move_insn (copy_rtx (temp),
9289 GEN_INT (((ud3 << 16) ^ 0x80000000) - 0x80000000));
9290 if (ud2 != 0)
9291 emit_move_insn (copy_rtx (temp),
9292 gen_rtx_IOR (DImode, copy_rtx (temp),
9293 GEN_INT (ud2)));
9294 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
9295 gen_rtx_ASHIFT (DImode, copy_rtx (temp),
9296 GEN_INT (16)));
9297 if (ud1 != 0)
9298 emit_move_insn (dest,
9299 gen_rtx_IOR (DImode, copy_rtx (temp),
9300 GEN_INT (ud1)));
9302 else
9304 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
9306 emit_move_insn (copy_rtx (temp),
9307 GEN_INT (((ud4 << 16) ^ 0x80000000) - 0x80000000));
9308 if (ud3 != 0)
9309 emit_move_insn (copy_rtx (temp),
9310 gen_rtx_IOR (DImode, copy_rtx (temp),
9311 GEN_INT (ud3)));
9313 emit_move_insn (ud2 != 0 || ud1 != 0 ? copy_rtx (temp) : dest,
9314 gen_rtx_ASHIFT (DImode, copy_rtx (temp),
9315 GEN_INT (32)));
9316 if (ud2 != 0)
9317 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
9318 gen_rtx_IOR (DImode, copy_rtx (temp),
9319 GEN_INT (ud2 << 16)));
9320 if (ud1 != 0)
9321 emit_move_insn (dest,
9322 gen_rtx_IOR (DImode, copy_rtx (temp),
9323 GEN_INT (ud1)));
9327 /* Helper for the following. Get rid of [r+r] memory refs
9328 in cases where it won't work (TImode, TFmode, TDmode, PTImode). */
9330 static void
9331 rs6000_eliminate_indexed_memrefs (rtx operands[2])
9333 if (reload_in_progress)
9334 return;
9336 if (GET_CODE (operands[0]) == MEM
9337 && GET_CODE (XEXP (operands[0], 0)) != REG
9338 && ! legitimate_constant_pool_address_p (XEXP (operands[0], 0),
9339 GET_MODE (operands[0]), false))
9340 operands[0]
9341 = replace_equiv_address (operands[0],
9342 copy_addr_to_reg (XEXP (operands[0], 0)));
9344 if (GET_CODE (operands[1]) == MEM
9345 && GET_CODE (XEXP (operands[1], 0)) != REG
9346 && ! legitimate_constant_pool_address_p (XEXP (operands[1], 0),
9347 GET_MODE (operands[1]), false))
9348 operands[1]
9349 = replace_equiv_address (operands[1],
9350 copy_addr_to_reg (XEXP (operands[1], 0)));
9353 /* Generate a vector of constants to permute MODE for a little-endian
9354 storage operation by swapping the two halves of a vector. */
9355 static rtvec
9356 rs6000_const_vec (machine_mode mode)
9358 int i, subparts;
9359 rtvec v;
9361 switch (mode)
9363 case V1TImode:
9364 subparts = 1;
9365 break;
9366 case V2DFmode:
9367 case V2DImode:
9368 subparts = 2;
9369 break;
9370 case V4SFmode:
9371 case V4SImode:
9372 subparts = 4;
9373 break;
9374 case V8HImode:
9375 subparts = 8;
9376 break;
9377 case V16QImode:
9378 subparts = 16;
9379 break;
9380 default:
9381 gcc_unreachable();
9384 v = rtvec_alloc (subparts);
9386 for (i = 0; i < subparts / 2; ++i)
9387 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i + subparts / 2);
9388 for (i = subparts / 2; i < subparts; ++i)
9389 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i - subparts / 2);
9391 return v;
9394 /* Generate a permute rtx that represents an lxvd2x, stxvd2x, or xxpermdi
9395 for a VSX load or store operation. */
9397 rs6000_gen_le_vsx_permute (rtx source, machine_mode mode)
9399 /* Use ROTATE instead of VEC_SELECT on IEEE 128-bit floating point, and
9400 128-bit integers if they are allowed in VSX registers. */
9401 if (FLOAT128_VECTOR_P (mode) || mode == TImode)
9402 return gen_rtx_ROTATE (mode, source, GEN_INT (64));
9403 else
9405 rtx par = gen_rtx_PARALLEL (VOIDmode, rs6000_const_vec (mode));
9406 return gen_rtx_VEC_SELECT (mode, source, par);
9410 /* Emit a little-endian load from vector memory location SOURCE to VSX
9411 register DEST in mode MODE. The load is done with two permuting
9412 insn's that represent an lxvd2x and xxpermdi. */
9413 void
9414 rs6000_emit_le_vsx_load (rtx dest, rtx source, machine_mode mode)
9416 rtx tmp, permute_mem, permute_reg;
9418 /* Use V2DImode to do swaps of types with 128-bit scalare parts (TImode,
9419 V1TImode). */
9420 if (mode == TImode || mode == V1TImode)
9422 mode = V2DImode;
9423 dest = gen_lowpart (V2DImode, dest);
9424 source = adjust_address (source, V2DImode, 0);
9427 tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (dest) : dest;
9428 permute_mem = rs6000_gen_le_vsx_permute (source, mode);
9429 permute_reg = rs6000_gen_le_vsx_permute (tmp, mode);
9430 emit_insn (gen_rtx_SET (tmp, permute_mem));
9431 emit_insn (gen_rtx_SET (dest, permute_reg));
9434 /* Emit a little-endian store to vector memory location DEST from VSX
9435 register SOURCE in mode MODE. The store is done with two permuting
9436 insn's that represent an xxpermdi and an stxvd2x. */
9437 void
9438 rs6000_emit_le_vsx_store (rtx dest, rtx source, machine_mode mode)
9440 rtx tmp, permute_src, permute_tmp;
9442 /* This should never be called during or after reload, because it does
9443 not re-permute the source register. It is intended only for use
9444 during expand. */
9445 gcc_assert (!reload_in_progress && !lra_in_progress && !reload_completed);
9447 /* Use V2DImode to do swaps of types with 128-bit scalar parts (TImode,
9448 V1TImode). */
9449 if (mode == TImode || mode == V1TImode)
9451 mode = V2DImode;
9452 dest = adjust_address (dest, V2DImode, 0);
9453 source = gen_lowpart (V2DImode, source);
9456 tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (source) : source;
9457 permute_src = rs6000_gen_le_vsx_permute (source, mode);
9458 permute_tmp = rs6000_gen_le_vsx_permute (tmp, mode);
9459 emit_insn (gen_rtx_SET (tmp, permute_src));
9460 emit_insn (gen_rtx_SET (dest, permute_tmp));
9463 /* Emit a sequence representing a little-endian VSX load or store,
9464 moving data from SOURCE to DEST in mode MODE. This is done
9465 separately from rs6000_emit_move to ensure it is called only
9466 during expand. LE VSX loads and stores introduced later are
9467 handled with a split. The expand-time RTL generation allows
9468 us to optimize away redundant pairs of register-permutes. */
9469 void
9470 rs6000_emit_le_vsx_move (rtx dest, rtx source, machine_mode mode)
9472 gcc_assert (!BYTES_BIG_ENDIAN
9473 && VECTOR_MEM_VSX_P (mode)
9474 && !TARGET_P9_VECTOR
9475 && !gpr_or_gpr_p (dest, source)
9476 && (MEM_P (source) ^ MEM_P (dest)));
9478 if (MEM_P (source))
9480 gcc_assert (REG_P (dest) || GET_CODE (dest) == SUBREG);
9481 rs6000_emit_le_vsx_load (dest, source, mode);
9483 else
9485 if (!REG_P (source))
9486 source = force_reg (mode, source);
9487 rs6000_emit_le_vsx_store (dest, source, mode);
9491 /* Emit a move from SOURCE to DEST in mode MODE. */
9492 void
9493 rs6000_emit_move (rtx dest, rtx source, machine_mode mode)
9495 rtx operands[2];
9496 operands[0] = dest;
9497 operands[1] = source;
9499 if (TARGET_DEBUG_ADDR)
9501 fprintf (stderr,
9502 "\nrs6000_emit_move: mode = %s, reload_in_progress = %d, "
9503 "reload_completed = %d, can_create_pseudos = %d.\ndest:\n",
9504 GET_MODE_NAME (mode),
9505 reload_in_progress,
9506 reload_completed,
9507 can_create_pseudo_p ());
9508 debug_rtx (dest);
9509 fprintf (stderr, "source:\n");
9510 debug_rtx (source);
9513 /* Sanity checks. Check that we get CONST_DOUBLE only when we should. */
9514 if (CONST_WIDE_INT_P (operands[1])
9515 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
9517 /* This should be fixed with the introduction of CONST_WIDE_INT. */
9518 gcc_unreachable ();
9521 /* Check if GCC is setting up a block move that will end up using FP
9522 registers as temporaries. We must make sure this is acceptable. */
9523 if (GET_CODE (operands[0]) == MEM
9524 && GET_CODE (operands[1]) == MEM
9525 && mode == DImode
9526 && (SLOW_UNALIGNED_ACCESS (DImode, MEM_ALIGN (operands[0]))
9527 || SLOW_UNALIGNED_ACCESS (DImode, MEM_ALIGN (operands[1])))
9528 && ! (SLOW_UNALIGNED_ACCESS (SImode, (MEM_ALIGN (operands[0]) > 32
9529 ? 32 : MEM_ALIGN (operands[0])))
9530 || SLOW_UNALIGNED_ACCESS (SImode, (MEM_ALIGN (operands[1]) > 32
9531 ? 32
9532 : MEM_ALIGN (operands[1]))))
9533 && ! MEM_VOLATILE_P (operands [0])
9534 && ! MEM_VOLATILE_P (operands [1]))
9536 emit_move_insn (adjust_address (operands[0], SImode, 0),
9537 adjust_address (operands[1], SImode, 0));
9538 emit_move_insn (adjust_address (copy_rtx (operands[0]), SImode, 4),
9539 adjust_address (copy_rtx (operands[1]), SImode, 4));
9540 return;
9543 if (can_create_pseudo_p () && GET_CODE (operands[0]) == MEM
9544 && !gpc_reg_operand (operands[1], mode))
9545 operands[1] = force_reg (mode, operands[1]);
9547 /* Recognize the case where operand[1] is a reference to thread-local
9548 data and load its address to a register. */
9549 if (tls_referenced_p (operands[1]))
9551 enum tls_model model;
9552 rtx tmp = operands[1];
9553 rtx addend = NULL;
9555 if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS)
9557 addend = XEXP (XEXP (tmp, 0), 1);
9558 tmp = XEXP (XEXP (tmp, 0), 0);
9561 gcc_assert (GET_CODE (tmp) == SYMBOL_REF);
9562 model = SYMBOL_REF_TLS_MODEL (tmp);
9563 gcc_assert (model != 0);
9565 tmp = rs6000_legitimize_tls_address (tmp, model);
9566 if (addend)
9568 tmp = gen_rtx_PLUS (mode, tmp, addend);
9569 tmp = force_operand (tmp, operands[0]);
9571 operands[1] = tmp;
9574 /* Handle the case where reload calls us with an invalid address. */
9575 if (reload_in_progress && mode == Pmode
9576 && (! general_operand (operands[1], mode)
9577 || ! nonimmediate_operand (operands[0], mode)))
9578 goto emit_set;
9580 /* 128-bit constant floating-point values on Darwin should really be loaded
9581 as two parts. However, this premature splitting is a problem when DFmode
9582 values can go into Altivec registers. */
9583 if (FLOAT128_IBM_P (mode) && !reg_addr[DFmode].scalar_in_vmx_p
9584 && GET_CODE (operands[1]) == CONST_DOUBLE)
9586 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode, 0),
9587 simplify_gen_subreg (DFmode, operands[1], mode, 0),
9588 DFmode);
9589 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode,
9590 GET_MODE_SIZE (DFmode)),
9591 simplify_gen_subreg (DFmode, operands[1], mode,
9592 GET_MODE_SIZE (DFmode)),
9593 DFmode);
9594 return;
9597 if (reload_in_progress && cfun->machine->sdmode_stack_slot != NULL_RTX)
9598 cfun->machine->sdmode_stack_slot =
9599 eliminate_regs (cfun->machine->sdmode_stack_slot, VOIDmode, NULL_RTX);
9602 /* Transform (p0:DD, (SUBREG:DD p1:SD)) to ((SUBREG:SD p0:DD),
9603 p1:SD) if p1 is not of floating point class and p0 is spilled as
9604 we can have no analogous movsd_store for this. */
9605 if (lra_in_progress && mode == DDmode
9606 && REG_P (operands[0]) && REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER
9607 && reg_preferred_class (REGNO (operands[0])) == NO_REGS
9608 && GET_CODE (operands[1]) == SUBREG && REG_P (SUBREG_REG (operands[1]))
9609 && GET_MODE (SUBREG_REG (operands[1])) == SDmode)
9611 enum reg_class cl;
9612 int regno = REGNO (SUBREG_REG (operands[1]));
9614 if (regno >= FIRST_PSEUDO_REGISTER)
9616 cl = reg_preferred_class (regno);
9617 regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][1];
9619 if (regno >= 0 && ! FP_REGNO_P (regno))
9621 mode = SDmode;
9622 operands[0] = gen_lowpart_SUBREG (SDmode, operands[0]);
9623 operands[1] = SUBREG_REG (operands[1]);
9626 if (lra_in_progress
9627 && mode == SDmode
9628 && REG_P (operands[0]) && REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER
9629 && reg_preferred_class (REGNO (operands[0])) == NO_REGS
9630 && (REG_P (operands[1])
9631 || (GET_CODE (operands[1]) == SUBREG
9632 && REG_P (SUBREG_REG (operands[1])))))
9634 int regno = REGNO (GET_CODE (operands[1]) == SUBREG
9635 ? SUBREG_REG (operands[1]) : operands[1]);
9636 enum reg_class cl;
9638 if (regno >= FIRST_PSEUDO_REGISTER)
9640 cl = reg_preferred_class (regno);
9641 gcc_assert (cl != NO_REGS);
9642 regno = ira_class_hard_regs[cl][0];
9644 if (FP_REGNO_P (regno))
9646 if (GET_MODE (operands[0]) != DDmode)
9647 operands[0] = gen_rtx_SUBREG (DDmode, operands[0], 0);
9648 emit_insn (gen_movsd_store (operands[0], operands[1]));
9650 else if (INT_REGNO_P (regno))
9651 emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
9652 else
9653 gcc_unreachable();
9654 return;
9656 /* Transform ((SUBREG:DD p0:SD), p1:DD) to (p0:SD, (SUBREG:SD
9657 p:DD)) if p0 is not of floating point class and p1 is spilled as
9658 we can have no analogous movsd_load for this. */
9659 if (lra_in_progress && mode == DDmode
9660 && GET_CODE (operands[0]) == SUBREG && REG_P (SUBREG_REG (operands[0]))
9661 && GET_MODE (SUBREG_REG (operands[0])) == SDmode
9662 && REG_P (operands[1]) && REGNO (operands[1]) >= FIRST_PSEUDO_REGISTER
9663 && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
9665 enum reg_class cl;
9666 int regno = REGNO (SUBREG_REG (operands[0]));
9668 if (regno >= FIRST_PSEUDO_REGISTER)
9670 cl = reg_preferred_class (regno);
9671 regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][0];
9673 if (regno >= 0 && ! FP_REGNO_P (regno))
9675 mode = SDmode;
9676 operands[0] = SUBREG_REG (operands[0]);
9677 operands[1] = gen_lowpart_SUBREG (SDmode, operands[1]);
9680 if (lra_in_progress
9681 && mode == SDmode
9682 && (REG_P (operands[0])
9683 || (GET_CODE (operands[0]) == SUBREG
9684 && REG_P (SUBREG_REG (operands[0]))))
9685 && REG_P (operands[1]) && REGNO (operands[1]) >= FIRST_PSEUDO_REGISTER
9686 && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
9688 int regno = REGNO (GET_CODE (operands[0]) == SUBREG
9689 ? SUBREG_REG (operands[0]) : operands[0]);
9690 enum reg_class cl;
9692 if (regno >= FIRST_PSEUDO_REGISTER)
9694 cl = reg_preferred_class (regno);
9695 gcc_assert (cl != NO_REGS);
9696 regno = ira_class_hard_regs[cl][0];
9698 if (FP_REGNO_P (regno))
9700 if (GET_MODE (operands[1]) != DDmode)
9701 operands[1] = gen_rtx_SUBREG (DDmode, operands[1], 0);
9702 emit_insn (gen_movsd_load (operands[0], operands[1]));
9704 else if (INT_REGNO_P (regno))
9705 emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
9706 else
9707 gcc_unreachable();
9708 return;
9711 if (reload_in_progress
9712 && mode == SDmode
9713 && cfun->machine->sdmode_stack_slot != NULL_RTX
9714 && MEM_P (operands[0])
9715 && rtx_equal_p (operands[0], cfun->machine->sdmode_stack_slot)
9716 && REG_P (operands[1]))
9718 if (FP_REGNO_P (REGNO (operands[1])))
9720 rtx mem = adjust_address_nv (operands[0], DDmode, 0);
9721 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
9722 emit_insn (gen_movsd_store (mem, operands[1]));
9724 else if (INT_REGNO_P (REGNO (operands[1])))
9726 rtx mem = operands[0];
9727 if (BYTES_BIG_ENDIAN)
9728 mem = adjust_address_nv (mem, mode, 4);
9729 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
9730 emit_insn (gen_movsd_hardfloat (mem, operands[1]));
9732 else
9733 gcc_unreachable();
9734 return;
9736 if (reload_in_progress
9737 && mode == SDmode
9738 && REG_P (operands[0])
9739 && MEM_P (operands[1])
9740 && cfun->machine->sdmode_stack_slot != NULL_RTX
9741 && rtx_equal_p (operands[1], cfun->machine->sdmode_stack_slot))
9743 if (FP_REGNO_P (REGNO (operands[0])))
9745 rtx mem = adjust_address_nv (operands[1], DDmode, 0);
9746 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
9747 emit_insn (gen_movsd_load (operands[0], mem));
9749 else if (INT_REGNO_P (REGNO (operands[0])))
9751 rtx mem = operands[1];
9752 if (BYTES_BIG_ENDIAN)
9753 mem = adjust_address_nv (mem, mode, 4);
9754 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
9755 emit_insn (gen_movsd_hardfloat (operands[0], mem));
9757 else
9758 gcc_unreachable();
9759 return;
9762 /* FIXME: In the long term, this switch statement should go away
9763 and be replaced by a sequence of tests based on things like
9764 mode == Pmode. */
9765 switch (mode)
9767 case HImode:
9768 case QImode:
9769 if (CONSTANT_P (operands[1])
9770 && GET_CODE (operands[1]) != CONST_INT)
9771 operands[1] = force_const_mem (mode, operands[1]);
9772 break;
9774 case TFmode:
9775 case TDmode:
9776 case IFmode:
9777 case KFmode:
9778 if (FLOAT128_2REG_P (mode))
9779 rs6000_eliminate_indexed_memrefs (operands);
9780 /* fall through */
9782 case DFmode:
9783 case DDmode:
9784 case SFmode:
9785 case SDmode:
9786 if (CONSTANT_P (operands[1])
9787 && ! easy_fp_constant (operands[1], mode))
9788 operands[1] = force_const_mem (mode, operands[1]);
9789 break;
9791 case V16QImode:
9792 case V8HImode:
9793 case V4SFmode:
9794 case V4SImode:
9795 case V4HImode:
9796 case V2SFmode:
9797 case V2SImode:
9798 case V1DImode:
9799 case V2DFmode:
9800 case V2DImode:
9801 case V1TImode:
9802 if (CONSTANT_P (operands[1])
9803 && !easy_vector_constant (operands[1], mode))
9804 operands[1] = force_const_mem (mode, operands[1]);
9805 break;
9807 case SImode:
9808 case DImode:
9809 /* Use default pattern for address of ELF small data */
9810 if (TARGET_ELF
9811 && mode == Pmode
9812 && DEFAULT_ABI == ABI_V4
9813 && (GET_CODE (operands[1]) == SYMBOL_REF
9814 || GET_CODE (operands[1]) == CONST)
9815 && small_data_operand (operands[1], mode))
9817 emit_insn (gen_rtx_SET (operands[0], operands[1]));
9818 return;
9821 if (DEFAULT_ABI == ABI_V4
9822 && mode == Pmode && mode == SImode
9823 && flag_pic == 1 && got_operand (operands[1], mode))
9825 emit_insn (gen_movsi_got (operands[0], operands[1]));
9826 return;
9829 if ((TARGET_ELF || DEFAULT_ABI == ABI_DARWIN)
9830 && TARGET_NO_TOC
9831 && ! flag_pic
9832 && mode == Pmode
9833 && CONSTANT_P (operands[1])
9834 && GET_CODE (operands[1]) != HIGH
9835 && GET_CODE (operands[1]) != CONST_INT)
9837 rtx target = (!can_create_pseudo_p ()
9838 ? operands[0]
9839 : gen_reg_rtx (mode));
9841 /* If this is a function address on -mcall-aixdesc,
9842 convert it to the address of the descriptor. */
9843 if (DEFAULT_ABI == ABI_AIX
9844 && GET_CODE (operands[1]) == SYMBOL_REF
9845 && XSTR (operands[1], 0)[0] == '.')
9847 const char *name = XSTR (operands[1], 0);
9848 rtx new_ref;
9849 while (*name == '.')
9850 name++;
9851 new_ref = gen_rtx_SYMBOL_REF (Pmode, name);
9852 CONSTANT_POOL_ADDRESS_P (new_ref)
9853 = CONSTANT_POOL_ADDRESS_P (operands[1]);
9854 SYMBOL_REF_FLAGS (new_ref) = SYMBOL_REF_FLAGS (operands[1]);
9855 SYMBOL_REF_USED (new_ref) = SYMBOL_REF_USED (operands[1]);
9856 SYMBOL_REF_DATA (new_ref) = SYMBOL_REF_DATA (operands[1]);
9857 operands[1] = new_ref;
9860 if (DEFAULT_ABI == ABI_DARWIN)
9862 #if TARGET_MACHO
9863 if (MACHO_DYNAMIC_NO_PIC_P)
9865 /* Take care of any required data indirection. */
9866 operands[1] = rs6000_machopic_legitimize_pic_address (
9867 operands[1], mode, operands[0]);
9868 if (operands[0] != operands[1])
9869 emit_insn (gen_rtx_SET (operands[0], operands[1]));
9870 return;
9872 #endif
9873 emit_insn (gen_macho_high (target, operands[1]));
9874 emit_insn (gen_macho_low (operands[0], target, operands[1]));
9875 return;
9878 emit_insn (gen_elf_high (target, operands[1]));
9879 emit_insn (gen_elf_low (operands[0], target, operands[1]));
9880 return;
9883 /* If this is a SYMBOL_REF that refers to a constant pool entry,
9884 and we have put it in the TOC, we just need to make a TOC-relative
9885 reference to it. */
9886 if (TARGET_TOC
9887 && GET_CODE (operands[1]) == SYMBOL_REF
9888 && use_toc_relative_ref (operands[1], mode))
9889 operands[1] = create_TOC_reference (operands[1], operands[0]);
9890 else if (mode == Pmode
9891 && CONSTANT_P (operands[1])
9892 && GET_CODE (operands[1]) != HIGH
9893 && ((GET_CODE (operands[1]) != CONST_INT
9894 && ! easy_fp_constant (operands[1], mode))
9895 || (GET_CODE (operands[1]) == CONST_INT
9896 && (num_insns_constant (operands[1], mode)
9897 > (TARGET_CMODEL != CMODEL_SMALL ? 3 : 2)))
9898 || (GET_CODE (operands[0]) == REG
9899 && FP_REGNO_P (REGNO (operands[0]))))
9900 && !toc_relative_expr_p (operands[1], false)
9901 && (TARGET_CMODEL == CMODEL_SMALL
9902 || can_create_pseudo_p ()
9903 || (REG_P (operands[0])
9904 && INT_REG_OK_FOR_BASE_P (operands[0], true))))
9907 #if TARGET_MACHO
9908 /* Darwin uses a special PIC legitimizer. */
9909 if (DEFAULT_ABI == ABI_DARWIN && MACHOPIC_INDIRECT)
9911 operands[1] =
9912 rs6000_machopic_legitimize_pic_address (operands[1], mode,
9913 operands[0]);
9914 if (operands[0] != operands[1])
9915 emit_insn (gen_rtx_SET (operands[0], operands[1]));
9916 return;
9918 #endif
9920 /* If we are to limit the number of things we put in the TOC and
9921 this is a symbol plus a constant we can add in one insn,
9922 just put the symbol in the TOC and add the constant. Don't do
9923 this if reload is in progress. */
9924 if (GET_CODE (operands[1]) == CONST
9925 && TARGET_NO_SUM_IN_TOC && ! reload_in_progress
9926 && GET_CODE (XEXP (operands[1], 0)) == PLUS
9927 && add_operand (XEXP (XEXP (operands[1], 0), 1), mode)
9928 && (GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == LABEL_REF
9929 || GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == SYMBOL_REF)
9930 && ! side_effects_p (operands[0]))
9932 rtx sym =
9933 force_const_mem (mode, XEXP (XEXP (operands[1], 0), 0));
9934 rtx other = XEXP (XEXP (operands[1], 0), 1);
9936 sym = force_reg (mode, sym);
9937 emit_insn (gen_add3_insn (operands[0], sym, other));
9938 return;
9941 operands[1] = force_const_mem (mode, operands[1]);
9943 if (TARGET_TOC
9944 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
9945 && constant_pool_expr_p (XEXP (operands[1], 0))
9946 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (
9947 get_pool_constant (XEXP (operands[1], 0)),
9948 get_pool_mode (XEXP (operands[1], 0))))
9950 rtx tocref = create_TOC_reference (XEXP (operands[1], 0),
9951 operands[0]);
9952 operands[1] = gen_const_mem (mode, tocref);
9953 set_mem_alias_set (operands[1], get_TOC_alias_set ());
9956 break;
9958 case TImode:
9959 if (!VECTOR_MEM_VSX_P (TImode))
9960 rs6000_eliminate_indexed_memrefs (operands);
9961 break;
9963 case PTImode:
9964 rs6000_eliminate_indexed_memrefs (operands);
9965 break;
9967 default:
9968 fatal_insn ("bad move", gen_rtx_SET (dest, source));
9971 /* Above, we may have called force_const_mem which may have returned
9972 an invalid address. If we can, fix this up; otherwise, reload will
9973 have to deal with it. */
9974 if (GET_CODE (operands[1]) == MEM && ! reload_in_progress)
9975 operands[1] = validize_mem (operands[1]);
9977 emit_set:
9978 emit_insn (gen_rtx_SET (operands[0], operands[1]));
9981 /* Return true if a structure, union or array containing FIELD should be
9982 accessed using `BLKMODE'.
9984 For the SPE, simd types are V2SI, and gcc can be tempted to put the
9985 entire thing in a DI and use subregs to access the internals.
9986 store_bit_field() will force (subreg:DI (reg:V2SI x))'s to the
9987 back-end. Because a single GPR can hold a V2SI, but not a DI, the
9988 best thing to do is set structs to BLKmode and avoid Severe Tire
9989 Damage.
9991 On e500 v2, DF and DI modes suffer from the same anomaly. DF can
9992 fit into 1, whereas DI still needs two. */
9994 static bool
9995 rs6000_member_type_forces_blk (const_tree field, machine_mode mode)
9997 return ((TARGET_SPE && TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
9998 || (TARGET_E500_DOUBLE && mode == DFmode));
10001 /* Nonzero if we can use a floating-point register to pass this arg. */
10002 #define USE_FP_FOR_ARG_P(CUM,MODE) \
10003 (SCALAR_FLOAT_MODE_NOT_VECTOR_P (MODE) \
10004 && (CUM)->fregno <= FP_ARG_MAX_REG \
10005 && TARGET_HARD_FLOAT && TARGET_FPRS)
10007 /* Nonzero if we can use an AltiVec register to pass this arg. */
10008 #define USE_ALTIVEC_FOR_ARG_P(CUM,MODE,NAMED) \
10009 (ALTIVEC_OR_VSX_VECTOR_MODE (MODE) \
10010 && (CUM)->vregno <= ALTIVEC_ARG_MAX_REG \
10011 && TARGET_ALTIVEC_ABI \
10012 && (NAMED))
10014 /* Walk down the type tree of TYPE counting consecutive base elements.
10015 If *MODEP is VOIDmode, then set it to the first valid floating point
10016 or vector type. If a non-floating point or vector type is found, or
10017 if a floating point or vector type that doesn't match a non-VOIDmode
10018 *MODEP is found, then return -1, otherwise return the count in the
10019 sub-tree. */
10021 static int
10022 rs6000_aggregate_candidate (const_tree type, machine_mode *modep)
10024 machine_mode mode;
10025 HOST_WIDE_INT size;
10027 switch (TREE_CODE (type))
10029 case REAL_TYPE:
10030 mode = TYPE_MODE (type);
10031 if (!SCALAR_FLOAT_MODE_P (mode))
10032 return -1;
10034 if (*modep == VOIDmode)
10035 *modep = mode;
10037 if (*modep == mode)
10038 return 1;
10040 break;
10042 case COMPLEX_TYPE:
10043 mode = TYPE_MODE (TREE_TYPE (type));
10044 if (!SCALAR_FLOAT_MODE_P (mode))
10045 return -1;
10047 if (*modep == VOIDmode)
10048 *modep = mode;
10050 if (*modep == mode)
10051 return 2;
10053 break;
10055 case VECTOR_TYPE:
10056 if (!TARGET_ALTIVEC_ABI || !TARGET_ALTIVEC)
10057 return -1;
10059 /* Use V4SImode as representative of all 128-bit vector types. */
10060 size = int_size_in_bytes (type);
10061 switch (size)
10063 case 16:
10064 mode = V4SImode;
10065 break;
10066 default:
10067 return -1;
10070 if (*modep == VOIDmode)
10071 *modep = mode;
10073 /* Vector modes are considered to be opaque: two vectors are
10074 equivalent for the purposes of being homogeneous aggregates
10075 if they are the same size. */
10076 if (*modep == mode)
10077 return 1;
10079 break;
10081 case ARRAY_TYPE:
10083 int count;
10084 tree index = TYPE_DOMAIN (type);
10086 /* Can't handle incomplete types nor sizes that are not
10087 fixed. */
10088 if (!COMPLETE_TYPE_P (type)
10089 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
10090 return -1;
10092 count = rs6000_aggregate_candidate (TREE_TYPE (type), modep);
10093 if (count == -1
10094 || !index
10095 || !TYPE_MAX_VALUE (index)
10096 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
10097 || !TYPE_MIN_VALUE (index)
10098 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
10099 || count < 0)
10100 return -1;
10102 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
10103 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
10105 /* There must be no padding. */
10106 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
10107 return -1;
10109 return count;
10112 case RECORD_TYPE:
10114 int count = 0;
10115 int sub_count;
10116 tree field;
10118 /* Can't handle incomplete types nor sizes that are not
10119 fixed. */
10120 if (!COMPLETE_TYPE_P (type)
10121 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
10122 return -1;
10124 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
10126 if (TREE_CODE (field) != FIELD_DECL)
10127 continue;
10129 sub_count = rs6000_aggregate_candidate (TREE_TYPE (field), modep);
10130 if (sub_count < 0)
10131 return -1;
10132 count += sub_count;
10135 /* There must be no padding. */
10136 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
10137 return -1;
10139 return count;
10142 case UNION_TYPE:
10143 case QUAL_UNION_TYPE:
10145 /* These aren't very interesting except in a degenerate case. */
10146 int count = 0;
10147 int sub_count;
10148 tree field;
10150 /* Can't handle incomplete types nor sizes that are not
10151 fixed. */
10152 if (!COMPLETE_TYPE_P (type)
10153 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
10154 return -1;
10156 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
10158 if (TREE_CODE (field) != FIELD_DECL)
10159 continue;
10161 sub_count = rs6000_aggregate_candidate (TREE_TYPE (field), modep);
10162 if (sub_count < 0)
10163 return -1;
10164 count = count > sub_count ? count : sub_count;
10167 /* There must be no padding. */
10168 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
10169 return -1;
10171 return count;
10174 default:
10175 break;
10178 return -1;
10181 /* If an argument, whose type is described by TYPE and MODE, is a homogeneous
10182 float or vector aggregate that shall be passed in FP/vector registers
10183 according to the ELFv2 ABI, return the homogeneous element mode in
10184 *ELT_MODE and the number of elements in *N_ELTS, and return TRUE.
10186 Otherwise, set *ELT_MODE to MODE and *N_ELTS to 1, and return FALSE. */
10188 static bool
10189 rs6000_discover_homogeneous_aggregate (machine_mode mode, const_tree type,
10190 machine_mode *elt_mode,
10191 int *n_elts)
10193 /* Note that we do not accept complex types at the top level as
10194 homogeneous aggregates; these types are handled via the
10195 targetm.calls.split_complex_arg mechanism. Complex types
10196 can be elements of homogeneous aggregates, however. */
10197 if (DEFAULT_ABI == ABI_ELFv2 && type && AGGREGATE_TYPE_P (type))
10199 machine_mode field_mode = VOIDmode;
10200 int field_count = rs6000_aggregate_candidate (type, &field_mode);
10202 if (field_count > 0)
10204 int n_regs = (SCALAR_FLOAT_MODE_P (field_mode) ?
10205 (GET_MODE_SIZE (field_mode) + 7) >> 3 : 1);
10207 /* The ELFv2 ABI allows homogeneous aggregates to occupy
10208 up to AGGR_ARG_NUM_REG registers. */
10209 if (field_count * n_regs <= AGGR_ARG_NUM_REG)
10211 if (elt_mode)
10212 *elt_mode = field_mode;
10213 if (n_elts)
10214 *n_elts = field_count;
10215 return true;
10220 if (elt_mode)
10221 *elt_mode = mode;
10222 if (n_elts)
10223 *n_elts = 1;
10224 return false;
10227 /* Return a nonzero value to say to return the function value in
10228 memory, just as large structures are always returned. TYPE will be
10229 the data type of the value, and FNTYPE will be the type of the
10230 function doing the returning, or @code{NULL} for libcalls.
10232 The AIX ABI for the RS/6000 specifies that all structures are
10233 returned in memory. The Darwin ABI does the same.
10235 For the Darwin 64 Bit ABI, a function result can be returned in
10236 registers or in memory, depending on the size of the return data
10237 type. If it is returned in registers, the value occupies the same
10238 registers as it would if it were the first and only function
10239 argument. Otherwise, the function places its result in memory at
10240 the location pointed to by GPR3.
10242 The SVR4 ABI specifies that structures <= 8 bytes are returned in r3/r4,
10243 but a draft put them in memory, and GCC used to implement the draft
10244 instead of the final standard. Therefore, aix_struct_return
10245 controls this instead of DEFAULT_ABI; V.4 targets needing backward
10246 compatibility can change DRAFT_V4_STRUCT_RET to override the
10247 default, and -m switches get the final word. See
10248 rs6000_option_override_internal for more details.
10250 The PPC32 SVR4 ABI uses IEEE double extended for long double, if 128-bit
10251 long double support is enabled. These values are returned in memory.
10253 int_size_in_bytes returns -1 for variable size objects, which go in
10254 memory always. The cast to unsigned makes -1 > 8. */
10256 static bool
10257 rs6000_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
10259 /* For the Darwin64 ABI, test if we can fit the return value in regs. */
10260 if (TARGET_MACHO
10261 && rs6000_darwin64_abi
10262 && TREE_CODE (type) == RECORD_TYPE
10263 && int_size_in_bytes (type) > 0)
10265 CUMULATIVE_ARGS valcum;
10266 rtx valret;
10268 valcum.words = 0;
10269 valcum.fregno = FP_ARG_MIN_REG;
10270 valcum.vregno = ALTIVEC_ARG_MIN_REG;
10271 /* Do a trial code generation as if this were going to be passed
10272 as an argument; if any part goes in memory, we return NULL. */
10273 valret = rs6000_darwin64_record_arg (&valcum, type, true, true);
10274 if (valret)
10275 return false;
10276 /* Otherwise fall through to more conventional ABI rules. */
10279 /* The ELFv2 ABI returns homogeneous VFP aggregates in registers */
10280 if (rs6000_discover_homogeneous_aggregate (TYPE_MODE (type), type,
10281 NULL, NULL))
10282 return false;
10284 /* The ELFv2 ABI returns aggregates up to 16B in registers */
10285 if (DEFAULT_ABI == ABI_ELFv2 && AGGREGATE_TYPE_P (type)
10286 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) <= 16)
10287 return false;
10289 if (AGGREGATE_TYPE_P (type)
10290 && (aix_struct_return
10291 || (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8))
10292 return true;
10294 /* Allow -maltivec -mabi=no-altivec without warning. Altivec vector
10295 modes only exist for GCC vector types if -maltivec. */
10296 if (TARGET_32BIT && !TARGET_ALTIVEC_ABI
10297 && ALTIVEC_VECTOR_MODE (TYPE_MODE (type)))
10298 return false;
10300 /* Return synthetic vectors in memory. */
10301 if (TREE_CODE (type) == VECTOR_TYPE
10302 && int_size_in_bytes (type) > (TARGET_ALTIVEC_ABI ? 16 : 8))
10304 static bool warned_for_return_big_vectors = false;
10305 if (!warned_for_return_big_vectors)
10307 warning (0, "GCC vector returned by reference: "
10308 "non-standard ABI extension with no compatibility guarantee");
10309 warned_for_return_big_vectors = true;
10311 return true;
10314 if (DEFAULT_ABI == ABI_V4 && TARGET_IEEEQUAD
10315 && FLOAT128_IEEE_P (TYPE_MODE (type)))
10316 return true;
10318 return false;
10321 /* Specify whether values returned in registers should be at the most
10322 significant end of a register. We want aggregates returned by
10323 value to match the way aggregates are passed to functions. */
10325 static bool
10326 rs6000_return_in_msb (const_tree valtype)
10328 return (DEFAULT_ABI == ABI_ELFv2
10329 && BYTES_BIG_ENDIAN
10330 && AGGREGATE_TYPE_P (valtype)
10331 && FUNCTION_ARG_PADDING (TYPE_MODE (valtype), valtype) == upward);
10334 #ifdef HAVE_AS_GNU_ATTRIBUTE
10335 /* Return TRUE if a call to function FNDECL may be one that
10336 potentially affects the function calling ABI of the object file. */
10338 static bool
10339 call_ABI_of_interest (tree fndecl)
10341 if (symtab->state == EXPANSION)
10343 struct cgraph_node *c_node;
10345 /* Libcalls are always interesting. */
10346 if (fndecl == NULL_TREE)
10347 return true;
10349 /* Any call to an external function is interesting. */
10350 if (DECL_EXTERNAL (fndecl))
10351 return true;
10353 /* Interesting functions that we are emitting in this object file. */
10354 c_node = cgraph_node::get (fndecl);
10355 c_node = c_node->ultimate_alias_target ();
10356 return !c_node->only_called_directly_p ();
10358 return false;
10360 #endif
10362 /* Initialize a variable CUM of type CUMULATIVE_ARGS
10363 for a call to a function whose data type is FNTYPE.
10364 For a library call, FNTYPE is 0 and RETURN_MODE the return value mode.
10366 For incoming args we set the number of arguments in the prototype large
10367 so we never return a PARALLEL. */
10369 void
10370 init_cumulative_args (CUMULATIVE_ARGS *cum, tree fntype,
10371 rtx libname ATTRIBUTE_UNUSED, int incoming,
10372 int libcall, int n_named_args,
10373 tree fndecl ATTRIBUTE_UNUSED,
10374 machine_mode return_mode ATTRIBUTE_UNUSED)
10376 static CUMULATIVE_ARGS zero_cumulative;
10378 *cum = zero_cumulative;
10379 cum->words = 0;
10380 cum->fregno = FP_ARG_MIN_REG;
10381 cum->vregno = ALTIVEC_ARG_MIN_REG;
10382 cum->prototype = (fntype && prototype_p (fntype));
10383 cum->call_cookie = ((DEFAULT_ABI == ABI_V4 && libcall)
10384 ? CALL_LIBCALL : CALL_NORMAL);
10385 cum->sysv_gregno = GP_ARG_MIN_REG;
10386 cum->stdarg = stdarg_p (fntype);
10387 cum->libcall = libcall;
10389 cum->nargs_prototype = 0;
10390 if (incoming || cum->prototype)
10391 cum->nargs_prototype = n_named_args;
10393 /* Check for a longcall attribute. */
10394 if ((!fntype && rs6000_default_long_calls)
10395 || (fntype
10396 && lookup_attribute ("longcall", TYPE_ATTRIBUTES (fntype))
10397 && !lookup_attribute ("shortcall", TYPE_ATTRIBUTES (fntype))))
10398 cum->call_cookie |= CALL_LONG;
10400 if (TARGET_DEBUG_ARG)
10402 fprintf (stderr, "\ninit_cumulative_args:");
10403 if (fntype)
10405 tree ret_type = TREE_TYPE (fntype);
10406 fprintf (stderr, " ret code = %s,",
10407 get_tree_code_name (TREE_CODE (ret_type)));
10410 if (cum->call_cookie & CALL_LONG)
10411 fprintf (stderr, " longcall,");
10413 fprintf (stderr, " proto = %d, nargs = %d\n",
10414 cum->prototype, cum->nargs_prototype);
10417 #ifdef HAVE_AS_GNU_ATTRIBUTE
10418 if (DEFAULT_ABI == ABI_V4)
10420 cum->escapes = call_ABI_of_interest (fndecl);
10421 if (cum->escapes)
10423 tree return_type;
10425 if (fntype)
10427 return_type = TREE_TYPE (fntype);
10428 return_mode = TYPE_MODE (return_type);
10430 else
10431 return_type = lang_hooks.types.type_for_mode (return_mode, 0);
10433 if (return_type != NULL)
10435 if (TREE_CODE (return_type) == RECORD_TYPE
10436 && TYPE_TRANSPARENT_AGGR (return_type))
10438 return_type = TREE_TYPE (first_field (return_type));
10439 return_mode = TYPE_MODE (return_type);
10441 if (AGGREGATE_TYPE_P (return_type)
10442 && ((unsigned HOST_WIDE_INT) int_size_in_bytes (return_type)
10443 <= 8))
10444 rs6000_returns_struct = true;
10446 if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (return_mode))
10447 rs6000_passes_float = true;
10448 else if (ALTIVEC_OR_VSX_VECTOR_MODE (return_mode)
10449 || SPE_VECTOR_MODE (return_mode))
10450 rs6000_passes_vector = true;
10453 #endif
10455 if (fntype
10456 && !TARGET_ALTIVEC
10457 && TARGET_ALTIVEC_ABI
10458 && ALTIVEC_VECTOR_MODE (TYPE_MODE (TREE_TYPE (fntype))))
10460 error ("cannot return value in vector register because"
10461 " altivec instructions are disabled, use -maltivec"
10462 " to enable them");
10466 /* The mode the ABI uses for a word. This is not the same as word_mode
10467 for -m32 -mpowerpc64. This is used to implement various target hooks. */
10469 static machine_mode
10470 rs6000_abi_word_mode (void)
10472 return TARGET_32BIT ? SImode : DImode;
10475 /* Implement the TARGET_OFFLOAD_OPTIONS hook. */
10476 static char *
10477 rs6000_offload_options (void)
10479 if (TARGET_64BIT)
10480 return xstrdup ("-foffload-abi=lp64");
10481 else
10482 return xstrdup ("-foffload-abi=ilp32");
10485 /* On rs6000, function arguments are promoted, as are function return
10486 values. */
10488 static machine_mode
10489 rs6000_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
10490 machine_mode mode,
10491 int *punsignedp ATTRIBUTE_UNUSED,
10492 const_tree, int)
10494 PROMOTE_MODE (mode, *punsignedp, type);
10496 return mode;
10499 /* Return true if TYPE must be passed on the stack and not in registers. */
10501 static bool
10502 rs6000_must_pass_in_stack (machine_mode mode, const_tree type)
10504 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2 || TARGET_64BIT)
10505 return must_pass_in_stack_var_size (mode, type);
10506 else
10507 return must_pass_in_stack_var_size_or_pad (mode, type);
10510 static inline bool
10511 is_complex_IBM_long_double (machine_mode mode)
10513 return mode == ICmode || (!TARGET_IEEEQUAD && mode == TCmode);
10516 /* Whether ABI_V4 passes MODE args to a function in floating point
10517 registers. */
10519 static bool
10520 abi_v4_pass_in_fpr (machine_mode mode)
10522 if (!TARGET_FPRS || !TARGET_HARD_FLOAT)
10523 return false;
10524 if (TARGET_SINGLE_FLOAT && mode == SFmode)
10525 return true;
10526 if (TARGET_DOUBLE_FLOAT && mode == DFmode)
10527 return true;
10528 /* ABI_V4 passes complex IBM long double in 8 gprs.
10529 Stupid, but we can't change the ABI now. */
10530 if (is_complex_IBM_long_double (mode))
10531 return false;
10532 if (FLOAT128_2REG_P (mode))
10533 return true;
10534 if (DECIMAL_FLOAT_MODE_P (mode))
10535 return true;
10536 return false;
10539 /* If defined, a C expression which determines whether, and in which
10540 direction, to pad out an argument with extra space. The value
10541 should be of type `enum direction': either `upward' to pad above
10542 the argument, `downward' to pad below, or `none' to inhibit
10543 padding.
10545 For the AIX ABI structs are always stored left shifted in their
10546 argument slot. */
10548 enum direction
10549 function_arg_padding (machine_mode mode, const_tree type)
10551 #ifndef AGGREGATE_PADDING_FIXED
10552 #define AGGREGATE_PADDING_FIXED 0
10553 #endif
10554 #ifndef AGGREGATES_PAD_UPWARD_ALWAYS
10555 #define AGGREGATES_PAD_UPWARD_ALWAYS 0
10556 #endif
10558 if (!AGGREGATE_PADDING_FIXED)
10560 /* GCC used to pass structures of the same size as integer types as
10561 if they were in fact integers, ignoring FUNCTION_ARG_PADDING.
10562 i.e. Structures of size 1 or 2 (or 4 when TARGET_64BIT) were
10563 passed padded downward, except that -mstrict-align further
10564 muddied the water in that multi-component structures of 2 and 4
10565 bytes in size were passed padded upward.
10567 The following arranges for best compatibility with previous
10568 versions of gcc, but removes the -mstrict-align dependency. */
10569 if (BYTES_BIG_ENDIAN)
10571 HOST_WIDE_INT size = 0;
10573 if (mode == BLKmode)
10575 if (type && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
10576 size = int_size_in_bytes (type);
10578 else
10579 size = GET_MODE_SIZE (mode);
10581 if (size == 1 || size == 2 || size == 4)
10582 return downward;
10584 return upward;
10587 if (AGGREGATES_PAD_UPWARD_ALWAYS)
10589 if (type != 0 && AGGREGATE_TYPE_P (type))
10590 return upward;
10593 /* Fall back to the default. */
10594 return DEFAULT_FUNCTION_ARG_PADDING (mode, type);
10597 /* If defined, a C expression that gives the alignment boundary, in bits,
10598 of an argument with the specified mode and type. If it is not defined,
10599 PARM_BOUNDARY is used for all arguments.
10601 V.4 wants long longs and doubles to be double word aligned. Just
10602 testing the mode size is a boneheaded way to do this as it means
10603 that other types such as complex int are also double word aligned.
10604 However, we're stuck with this because changing the ABI might break
10605 existing library interfaces.
10607 Doubleword align SPE vectors.
10608 Quadword align Altivec/VSX vectors.
10609 Quadword align large synthetic vector types. */
10611 static unsigned int
10612 rs6000_function_arg_boundary (machine_mode mode, const_tree type)
10614 machine_mode elt_mode;
10615 int n_elts;
10617 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
10619 if (DEFAULT_ABI == ABI_V4
10620 && (GET_MODE_SIZE (mode) == 8
10621 || (TARGET_HARD_FLOAT
10622 && TARGET_FPRS
10623 && !is_complex_IBM_long_double (mode)
10624 && FLOAT128_2REG_P (mode))))
10625 return 64;
10626 else if (FLOAT128_VECTOR_P (mode))
10627 return 128;
10628 else if (SPE_VECTOR_MODE (mode)
10629 || (type && TREE_CODE (type) == VECTOR_TYPE
10630 && int_size_in_bytes (type) >= 8
10631 && int_size_in_bytes (type) < 16))
10632 return 64;
10633 else if (ALTIVEC_OR_VSX_VECTOR_MODE (elt_mode)
10634 || (type && TREE_CODE (type) == VECTOR_TYPE
10635 && int_size_in_bytes (type) >= 16))
10636 return 128;
10638 /* Aggregate types that need > 8 byte alignment are quadword-aligned
10639 in the parameter area in the ELFv2 ABI, and in the AIX ABI unless
10640 -mcompat-align-parm is used. */
10641 if (((DEFAULT_ABI == ABI_AIX && !rs6000_compat_align_parm)
10642 || DEFAULT_ABI == ABI_ELFv2)
10643 && type && TYPE_ALIGN (type) > 64)
10645 /* "Aggregate" means any AGGREGATE_TYPE except for single-element
10646 or homogeneous float/vector aggregates here. We already handled
10647 vector aggregates above, but still need to check for float here. */
10648 bool aggregate_p = (AGGREGATE_TYPE_P (type)
10649 && !SCALAR_FLOAT_MODE_P (elt_mode));
10651 /* We used to check for BLKmode instead of the above aggregate type
10652 check. Warn when this results in any difference to the ABI. */
10653 if (aggregate_p != (mode == BLKmode))
10655 static bool warned;
10656 if (!warned && warn_psabi)
10658 warned = true;
10659 inform (input_location,
10660 "the ABI of passing aggregates with %d-byte alignment"
10661 " has changed in GCC 5",
10662 (int) TYPE_ALIGN (type) / BITS_PER_UNIT);
10666 if (aggregate_p)
10667 return 128;
10670 /* Similar for the Darwin64 ABI. Note that for historical reasons we
10671 implement the "aggregate type" check as a BLKmode check here; this
10672 means certain aggregate types are in fact not aligned. */
10673 if (TARGET_MACHO && rs6000_darwin64_abi
10674 && mode == BLKmode
10675 && type && TYPE_ALIGN (type) > 64)
10676 return 128;
10678 return PARM_BOUNDARY;
10681 /* The offset in words to the start of the parameter save area. */
10683 static unsigned int
10684 rs6000_parm_offset (void)
10686 return (DEFAULT_ABI == ABI_V4 ? 2
10687 : DEFAULT_ABI == ABI_ELFv2 ? 4
10688 : 6);
10691 /* For a function parm of MODE and TYPE, return the starting word in
10692 the parameter area. NWORDS of the parameter area are already used. */
10694 static unsigned int
10695 rs6000_parm_start (machine_mode mode, const_tree type,
10696 unsigned int nwords)
10698 unsigned int align;
10700 align = rs6000_function_arg_boundary (mode, type) / PARM_BOUNDARY - 1;
10701 return nwords + (-(rs6000_parm_offset () + nwords) & align);
10704 /* Compute the size (in words) of a function argument. */
10706 static unsigned long
10707 rs6000_arg_size (machine_mode mode, const_tree type)
10709 unsigned long size;
10711 if (mode != BLKmode)
10712 size = GET_MODE_SIZE (mode);
10713 else
10714 size = int_size_in_bytes (type);
10716 if (TARGET_32BIT)
10717 return (size + 3) >> 2;
10718 else
10719 return (size + 7) >> 3;
10722 /* Use this to flush pending int fields. */
10724 static void
10725 rs6000_darwin64_record_arg_advance_flush (CUMULATIVE_ARGS *cum,
10726 HOST_WIDE_INT bitpos, int final)
10728 unsigned int startbit, endbit;
10729 int intregs, intoffset;
10730 machine_mode mode;
10732 /* Handle the situations where a float is taking up the first half
10733 of the GPR, and the other half is empty (typically due to
10734 alignment restrictions). We can detect this by a 8-byte-aligned
10735 int field, or by seeing that this is the final flush for this
10736 argument. Count the word and continue on. */
10737 if (cum->floats_in_gpr == 1
10738 && (cum->intoffset % 64 == 0
10739 || (cum->intoffset == -1 && final)))
10741 cum->words++;
10742 cum->floats_in_gpr = 0;
10745 if (cum->intoffset == -1)
10746 return;
10748 intoffset = cum->intoffset;
10749 cum->intoffset = -1;
10750 cum->floats_in_gpr = 0;
10752 if (intoffset % BITS_PER_WORD != 0)
10754 mode = mode_for_size (BITS_PER_WORD - intoffset % BITS_PER_WORD,
10755 MODE_INT, 0);
10756 if (mode == BLKmode)
10758 /* We couldn't find an appropriate mode, which happens,
10759 e.g., in packed structs when there are 3 bytes to load.
10760 Back intoffset back to the beginning of the word in this
10761 case. */
10762 intoffset = ROUND_DOWN (intoffset, BITS_PER_WORD);
10766 startbit = ROUND_DOWN (intoffset, BITS_PER_WORD);
10767 endbit = ROUND_UP (bitpos, BITS_PER_WORD);
10768 intregs = (endbit - startbit) / BITS_PER_WORD;
10769 cum->words += intregs;
10770 /* words should be unsigned. */
10771 if ((unsigned)cum->words < (endbit/BITS_PER_WORD))
10773 int pad = (endbit/BITS_PER_WORD) - cum->words;
10774 cum->words += pad;
10778 /* The darwin64 ABI calls for us to recurse down through structs,
10779 looking for elements passed in registers. Unfortunately, we have
10780 to track int register count here also because of misalignments
10781 in powerpc alignment mode. */
10783 static void
10784 rs6000_darwin64_record_arg_advance_recurse (CUMULATIVE_ARGS *cum,
10785 const_tree type,
10786 HOST_WIDE_INT startbitpos)
10788 tree f;
10790 for (f = TYPE_FIELDS (type); f ; f = DECL_CHAIN (f))
10791 if (TREE_CODE (f) == FIELD_DECL)
10793 HOST_WIDE_INT bitpos = startbitpos;
10794 tree ftype = TREE_TYPE (f);
10795 machine_mode mode;
10796 if (ftype == error_mark_node)
10797 continue;
10798 mode = TYPE_MODE (ftype);
10800 if (DECL_SIZE (f) != 0
10801 && tree_fits_uhwi_p (bit_position (f)))
10802 bitpos += int_bit_position (f);
10804 /* ??? FIXME: else assume zero offset. */
10806 if (TREE_CODE (ftype) == RECORD_TYPE)
10807 rs6000_darwin64_record_arg_advance_recurse (cum, ftype, bitpos);
10808 else if (USE_FP_FOR_ARG_P (cum, mode))
10810 unsigned n_fpregs = (GET_MODE_SIZE (mode) + 7) >> 3;
10811 rs6000_darwin64_record_arg_advance_flush (cum, bitpos, 0);
10812 cum->fregno += n_fpregs;
10813 /* Single-precision floats present a special problem for
10814 us, because they are smaller than an 8-byte GPR, and so
10815 the structure-packing rules combined with the standard
10816 varargs behavior mean that we want to pack float/float
10817 and float/int combinations into a single register's
10818 space. This is complicated by the arg advance flushing,
10819 which works on arbitrarily large groups of int-type
10820 fields. */
10821 if (mode == SFmode)
10823 if (cum->floats_in_gpr == 1)
10825 /* Two floats in a word; count the word and reset
10826 the float count. */
10827 cum->words++;
10828 cum->floats_in_gpr = 0;
10830 else if (bitpos % 64 == 0)
10832 /* A float at the beginning of an 8-byte word;
10833 count it and put off adjusting cum->words until
10834 we see if a arg advance flush is going to do it
10835 for us. */
10836 cum->floats_in_gpr++;
10838 else
10840 /* The float is at the end of a word, preceded
10841 by integer fields, so the arg advance flush
10842 just above has already set cum->words and
10843 everything is taken care of. */
10846 else
10847 cum->words += n_fpregs;
10849 else if (USE_ALTIVEC_FOR_ARG_P (cum, mode, 1))
10851 rs6000_darwin64_record_arg_advance_flush (cum, bitpos, 0);
10852 cum->vregno++;
10853 cum->words += 2;
10855 else if (cum->intoffset == -1)
10856 cum->intoffset = bitpos;
10860 /* Check for an item that needs to be considered specially under the darwin 64
10861 bit ABI. These are record types where the mode is BLK or the structure is
10862 8 bytes in size. */
10863 static int
10864 rs6000_darwin64_struct_check_p (machine_mode mode, const_tree type)
10866 return rs6000_darwin64_abi
10867 && ((mode == BLKmode
10868 && TREE_CODE (type) == RECORD_TYPE
10869 && int_size_in_bytes (type) > 0)
10870 || (type && TREE_CODE (type) == RECORD_TYPE
10871 && int_size_in_bytes (type) == 8)) ? 1 : 0;
10874 /* Update the data in CUM to advance over an argument
10875 of mode MODE and data type TYPE.
10876 (TYPE is null for libcalls where that information may not be available.)
10878 Note that for args passed by reference, function_arg will be called
10879 with MODE and TYPE set to that of the pointer to the arg, not the arg
10880 itself. */
10882 static void
10883 rs6000_function_arg_advance_1 (CUMULATIVE_ARGS *cum, machine_mode mode,
10884 const_tree type, bool named, int depth)
10886 machine_mode elt_mode;
10887 int n_elts;
10889 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
10891 /* Only tick off an argument if we're not recursing. */
10892 if (depth == 0)
10893 cum->nargs_prototype--;
10895 #ifdef HAVE_AS_GNU_ATTRIBUTE
10896 if (DEFAULT_ABI == ABI_V4
10897 && cum->escapes)
10899 if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode))
10900 rs6000_passes_float = true;
10901 else if (named && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
10902 rs6000_passes_vector = true;
10903 else if (SPE_VECTOR_MODE (mode)
10904 && !cum->stdarg
10905 && cum->sysv_gregno <= GP_ARG_MAX_REG)
10906 rs6000_passes_vector = true;
10908 #endif
10910 if (TARGET_ALTIVEC_ABI
10911 && (ALTIVEC_OR_VSX_VECTOR_MODE (elt_mode)
10912 || (type && TREE_CODE (type) == VECTOR_TYPE
10913 && int_size_in_bytes (type) == 16)))
10915 bool stack = false;
10917 if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
10919 cum->vregno += n_elts;
10921 if (!TARGET_ALTIVEC)
10922 error ("cannot pass argument in vector register because"
10923 " altivec instructions are disabled, use -maltivec"
10924 " to enable them");
10926 /* PowerPC64 Linux and AIX allocate GPRs for a vector argument
10927 even if it is going to be passed in a vector register.
10928 Darwin does the same for variable-argument functions. */
10929 if (((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
10930 && TARGET_64BIT)
10931 || (cum->stdarg && DEFAULT_ABI != ABI_V4))
10932 stack = true;
10934 else
10935 stack = true;
10937 if (stack)
10939 int align;
10941 /* Vector parameters must be 16-byte aligned. In 32-bit
10942 mode this means we need to take into account the offset
10943 to the parameter save area. In 64-bit mode, they just
10944 have to start on an even word, since the parameter save
10945 area is 16-byte aligned. */
10946 if (TARGET_32BIT)
10947 align = -(rs6000_parm_offset () + cum->words) & 3;
10948 else
10949 align = cum->words & 1;
10950 cum->words += align + rs6000_arg_size (mode, type);
10952 if (TARGET_DEBUG_ARG)
10954 fprintf (stderr, "function_adv: words = %2d, align=%d, ",
10955 cum->words, align);
10956 fprintf (stderr, "nargs = %4d, proto = %d, mode = %4s\n",
10957 cum->nargs_prototype, cum->prototype,
10958 GET_MODE_NAME (mode));
10962 else if (TARGET_SPE_ABI && TARGET_SPE && SPE_VECTOR_MODE (mode)
10963 && !cum->stdarg
10964 && cum->sysv_gregno <= GP_ARG_MAX_REG)
10965 cum->sysv_gregno++;
10967 else if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
10969 int size = int_size_in_bytes (type);
10970 /* Variable sized types have size == -1 and are
10971 treated as if consisting entirely of ints.
10972 Pad to 16 byte boundary if needed. */
10973 if (TYPE_ALIGN (type) >= 2 * BITS_PER_WORD
10974 && (cum->words % 2) != 0)
10975 cum->words++;
10976 /* For varargs, we can just go up by the size of the struct. */
10977 if (!named)
10978 cum->words += (size + 7) / 8;
10979 else
10981 /* It is tempting to say int register count just goes up by
10982 sizeof(type)/8, but this is wrong in a case such as
10983 { int; double; int; } [powerpc alignment]. We have to
10984 grovel through the fields for these too. */
10985 cum->intoffset = 0;
10986 cum->floats_in_gpr = 0;
10987 rs6000_darwin64_record_arg_advance_recurse (cum, type, 0);
10988 rs6000_darwin64_record_arg_advance_flush (cum,
10989 size * BITS_PER_UNIT, 1);
10991 if (TARGET_DEBUG_ARG)
10993 fprintf (stderr, "function_adv: words = %2d, align=%d, size=%d",
10994 cum->words, TYPE_ALIGN (type), size);
10995 fprintf (stderr,
10996 "nargs = %4d, proto = %d, mode = %4s (darwin64 abi)\n",
10997 cum->nargs_prototype, cum->prototype,
10998 GET_MODE_NAME (mode));
11001 else if (DEFAULT_ABI == ABI_V4)
11003 if (abi_v4_pass_in_fpr (mode))
11005 /* _Decimal128 must use an even/odd register pair. This assumes
11006 that the register number is odd when fregno is odd. */
11007 if (mode == TDmode && (cum->fregno % 2) == 1)
11008 cum->fregno++;
11010 if (cum->fregno + (FLOAT128_2REG_P (mode) ? 1 : 0)
11011 <= FP_ARG_V4_MAX_REG)
11012 cum->fregno += (GET_MODE_SIZE (mode) + 7) >> 3;
11013 else
11015 cum->fregno = FP_ARG_V4_MAX_REG + 1;
11016 if (mode == DFmode || FLOAT128_IBM_P (mode)
11017 || mode == DDmode || mode == TDmode)
11018 cum->words += cum->words & 1;
11019 cum->words += rs6000_arg_size (mode, type);
11022 else
11024 int n_words = rs6000_arg_size (mode, type);
11025 int gregno = cum->sysv_gregno;
11027 /* Long long and SPE vectors are put in (r3,r4), (r5,r6),
11028 (r7,r8) or (r9,r10). As does any other 2 word item such
11029 as complex int due to a historical mistake. */
11030 if (n_words == 2)
11031 gregno += (1 - gregno) & 1;
11033 /* Multi-reg args are not split between registers and stack. */
11034 if (gregno + n_words - 1 > GP_ARG_MAX_REG)
11036 /* Long long and SPE vectors are aligned on the stack.
11037 So are other 2 word items such as complex int due to
11038 a historical mistake. */
11039 if (n_words == 2)
11040 cum->words += cum->words & 1;
11041 cum->words += n_words;
11044 /* Note: continuing to accumulate gregno past when we've started
11045 spilling to the stack indicates the fact that we've started
11046 spilling to the stack to expand_builtin_saveregs. */
11047 cum->sysv_gregno = gregno + n_words;
11050 if (TARGET_DEBUG_ARG)
11052 fprintf (stderr, "function_adv: words = %2d, fregno = %2d, ",
11053 cum->words, cum->fregno);
11054 fprintf (stderr, "gregno = %2d, nargs = %4d, proto = %d, ",
11055 cum->sysv_gregno, cum->nargs_prototype, cum->prototype);
11056 fprintf (stderr, "mode = %4s, named = %d\n",
11057 GET_MODE_NAME (mode), named);
11060 else
11062 int n_words = rs6000_arg_size (mode, type);
11063 int start_words = cum->words;
11064 int align_words = rs6000_parm_start (mode, type, start_words);
11066 cum->words = align_words + n_words;
11068 if (SCALAR_FLOAT_MODE_P (elt_mode) && TARGET_HARD_FLOAT && TARGET_FPRS)
11070 /* _Decimal128 must be passed in an even/odd float register pair.
11071 This assumes that the register number is odd when fregno is
11072 odd. */
11073 if (elt_mode == TDmode && (cum->fregno % 2) == 1)
11074 cum->fregno++;
11075 cum->fregno += n_elts * ((GET_MODE_SIZE (elt_mode) + 7) >> 3);
11078 if (TARGET_DEBUG_ARG)
11080 fprintf (stderr, "function_adv: words = %2d, fregno = %2d, ",
11081 cum->words, cum->fregno);
11082 fprintf (stderr, "nargs = %4d, proto = %d, mode = %4s, ",
11083 cum->nargs_prototype, cum->prototype, GET_MODE_NAME (mode));
11084 fprintf (stderr, "named = %d, align = %d, depth = %d\n",
11085 named, align_words - start_words, depth);
11090 static void
11091 rs6000_function_arg_advance (cumulative_args_t cum, machine_mode mode,
11092 const_tree type, bool named)
11094 rs6000_function_arg_advance_1 (get_cumulative_args (cum), mode, type, named,
11098 static rtx
11099 spe_build_register_parallel (machine_mode mode, int gregno)
11101 rtx r1, r3, r5, r7;
11103 switch (mode)
11105 case DFmode:
11106 r1 = gen_rtx_REG (DImode, gregno);
11107 r1 = gen_rtx_EXPR_LIST (VOIDmode, r1, const0_rtx);
11108 return gen_rtx_PARALLEL (mode, gen_rtvec (1, r1));
11110 case DCmode:
11111 case TFmode:
11112 r1 = gen_rtx_REG (DImode, gregno);
11113 r1 = gen_rtx_EXPR_LIST (VOIDmode, r1, const0_rtx);
11114 r3 = gen_rtx_REG (DImode, gregno + 2);
11115 r3 = gen_rtx_EXPR_LIST (VOIDmode, r3, GEN_INT (8));
11116 return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r3));
11118 case TCmode:
11119 r1 = gen_rtx_REG (DImode, gregno);
11120 r1 = gen_rtx_EXPR_LIST (VOIDmode, r1, const0_rtx);
11121 r3 = gen_rtx_REG (DImode, gregno + 2);
11122 r3 = gen_rtx_EXPR_LIST (VOIDmode, r3, GEN_INT (8));
11123 r5 = gen_rtx_REG (DImode, gregno + 4);
11124 r5 = gen_rtx_EXPR_LIST (VOIDmode, r5, GEN_INT (16));
11125 r7 = gen_rtx_REG (DImode, gregno + 6);
11126 r7 = gen_rtx_EXPR_LIST (VOIDmode, r7, GEN_INT (24));
11127 return gen_rtx_PARALLEL (mode, gen_rtvec (4, r1, r3, r5, r7));
11129 default:
11130 gcc_unreachable ();
11134 /* Determine where to put a SIMD argument on the SPE. */
11135 static rtx
11136 rs6000_spe_function_arg (const CUMULATIVE_ARGS *cum, machine_mode mode,
11137 const_tree type)
11139 int gregno = cum->sysv_gregno;
11141 /* On E500 v2, double arithmetic is done on the full 64-bit GPR, but
11142 are passed and returned in a pair of GPRs for ABI compatibility. */
11143 if (TARGET_E500_DOUBLE && (mode == DFmode || mode == TFmode
11144 || mode == DCmode || mode == TCmode))
11146 int n_words = rs6000_arg_size (mode, type);
11148 /* Doubles go in an odd/even register pair (r5/r6, etc). */
11149 if (mode == DFmode)
11150 gregno += (1 - gregno) & 1;
11152 /* Multi-reg args are not split between registers and stack. */
11153 if (gregno + n_words - 1 > GP_ARG_MAX_REG)
11154 return NULL_RTX;
11156 return spe_build_register_parallel (mode, gregno);
11158 if (cum->stdarg)
11160 int n_words = rs6000_arg_size (mode, type);
11162 /* SPE vectors are put in odd registers. */
11163 if (n_words == 2 && (gregno & 1) == 0)
11164 gregno += 1;
11166 if (gregno + n_words - 1 <= GP_ARG_MAX_REG)
11168 rtx r1, r2;
11169 machine_mode m = SImode;
11171 r1 = gen_rtx_REG (m, gregno);
11172 r1 = gen_rtx_EXPR_LIST (m, r1, const0_rtx);
11173 r2 = gen_rtx_REG (m, gregno + 1);
11174 r2 = gen_rtx_EXPR_LIST (m, r2, GEN_INT (4));
11175 return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r2));
11177 else
11178 return NULL_RTX;
11180 else
11182 if (gregno <= GP_ARG_MAX_REG)
11183 return gen_rtx_REG (mode, gregno);
11184 else
11185 return NULL_RTX;
11189 /* A subroutine of rs6000_darwin64_record_arg. Assign the bits of the
11190 structure between cum->intoffset and bitpos to integer registers. */
11192 static void
11193 rs6000_darwin64_record_arg_flush (CUMULATIVE_ARGS *cum,
11194 HOST_WIDE_INT bitpos, rtx rvec[], int *k)
11196 machine_mode mode;
11197 unsigned int regno;
11198 unsigned int startbit, endbit;
11199 int this_regno, intregs, intoffset;
11200 rtx reg;
11202 if (cum->intoffset == -1)
11203 return;
11205 intoffset = cum->intoffset;
11206 cum->intoffset = -1;
11208 /* If this is the trailing part of a word, try to only load that
11209 much into the register. Otherwise load the whole register. Note
11210 that in the latter case we may pick up unwanted bits. It's not a
11211 problem at the moment but may wish to revisit. */
11213 if (intoffset % BITS_PER_WORD != 0)
11215 mode = mode_for_size (BITS_PER_WORD - intoffset % BITS_PER_WORD,
11216 MODE_INT, 0);
11217 if (mode == BLKmode)
11219 /* We couldn't find an appropriate mode, which happens,
11220 e.g., in packed structs when there are 3 bytes to load.
11221 Back intoffset back to the beginning of the word in this
11222 case. */
11223 intoffset = ROUND_DOWN (intoffset, BITS_PER_WORD);
11224 mode = word_mode;
11227 else
11228 mode = word_mode;
11230 startbit = ROUND_DOWN (intoffset, BITS_PER_WORD);
11231 endbit = ROUND_UP (bitpos, BITS_PER_WORD);
11232 intregs = (endbit - startbit) / BITS_PER_WORD;
11233 this_regno = cum->words + intoffset / BITS_PER_WORD;
11235 if (intregs > 0 && intregs > GP_ARG_NUM_REG - this_regno)
11236 cum->use_stack = 1;
11238 intregs = MIN (intregs, GP_ARG_NUM_REG - this_regno);
11239 if (intregs <= 0)
11240 return;
11242 intoffset /= BITS_PER_UNIT;
11245 regno = GP_ARG_MIN_REG + this_regno;
11246 reg = gen_rtx_REG (mode, regno);
11247 rvec[(*k)++] =
11248 gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (intoffset));
11250 this_regno += 1;
11251 intoffset = (intoffset | (UNITS_PER_WORD-1)) + 1;
11252 mode = word_mode;
11253 intregs -= 1;
11255 while (intregs > 0);
11258 /* Recursive workhorse for the following. */
11260 static void
11261 rs6000_darwin64_record_arg_recurse (CUMULATIVE_ARGS *cum, const_tree type,
11262 HOST_WIDE_INT startbitpos, rtx rvec[],
11263 int *k)
11265 tree f;
11267 for (f = TYPE_FIELDS (type); f ; f = DECL_CHAIN (f))
11268 if (TREE_CODE (f) == FIELD_DECL)
11270 HOST_WIDE_INT bitpos = startbitpos;
11271 tree ftype = TREE_TYPE (f);
11272 machine_mode mode;
11273 if (ftype == error_mark_node)
11274 continue;
11275 mode = TYPE_MODE (ftype);
11277 if (DECL_SIZE (f) != 0
11278 && tree_fits_uhwi_p (bit_position (f)))
11279 bitpos += int_bit_position (f);
11281 /* ??? FIXME: else assume zero offset. */
11283 if (TREE_CODE (ftype) == RECORD_TYPE)
11284 rs6000_darwin64_record_arg_recurse (cum, ftype, bitpos, rvec, k);
11285 else if (cum->named && USE_FP_FOR_ARG_P (cum, mode))
11287 unsigned n_fpreg = (GET_MODE_SIZE (mode) + 7) >> 3;
11288 #if 0
11289 switch (mode)
11291 case SCmode: mode = SFmode; break;
11292 case DCmode: mode = DFmode; break;
11293 case TCmode: mode = TFmode; break;
11294 default: break;
11296 #endif
11297 rs6000_darwin64_record_arg_flush (cum, bitpos, rvec, k);
11298 if (cum->fregno + n_fpreg > FP_ARG_MAX_REG + 1)
11300 gcc_assert (cum->fregno == FP_ARG_MAX_REG
11301 && (mode == TFmode || mode == TDmode));
11302 /* Long double or _Decimal128 split over regs and memory. */
11303 mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode : DFmode;
11304 cum->use_stack=1;
11306 rvec[(*k)++]
11307 = gen_rtx_EXPR_LIST (VOIDmode,
11308 gen_rtx_REG (mode, cum->fregno++),
11309 GEN_INT (bitpos / BITS_PER_UNIT));
11310 if (FLOAT128_2REG_P (mode))
11311 cum->fregno++;
11313 else if (cum->named && USE_ALTIVEC_FOR_ARG_P (cum, mode, 1))
11315 rs6000_darwin64_record_arg_flush (cum, bitpos, rvec, k);
11316 rvec[(*k)++]
11317 = gen_rtx_EXPR_LIST (VOIDmode,
11318 gen_rtx_REG (mode, cum->vregno++),
11319 GEN_INT (bitpos / BITS_PER_UNIT));
11321 else if (cum->intoffset == -1)
11322 cum->intoffset = bitpos;
11326 /* For the darwin64 ABI, we want to construct a PARALLEL consisting of
11327 the register(s) to be used for each field and subfield of a struct
11328 being passed by value, along with the offset of where the
11329 register's value may be found in the block. FP fields go in FP
11330 register, vector fields go in vector registers, and everything
11331 else goes in int registers, packed as in memory.
11333 This code is also used for function return values. RETVAL indicates
11334 whether this is the case.
11336 Much of this is taken from the SPARC V9 port, which has a similar
11337 calling convention. */
11339 static rtx
11340 rs6000_darwin64_record_arg (CUMULATIVE_ARGS *orig_cum, const_tree type,
11341 bool named, bool retval)
11343 rtx rvec[FIRST_PSEUDO_REGISTER];
11344 int k = 1, kbase = 1;
11345 HOST_WIDE_INT typesize = int_size_in_bytes (type);
11346 /* This is a copy; modifications are not visible to our caller. */
11347 CUMULATIVE_ARGS copy_cum = *orig_cum;
11348 CUMULATIVE_ARGS *cum = &copy_cum;
11350 /* Pad to 16 byte boundary if needed. */
11351 if (!retval && TYPE_ALIGN (type) >= 2 * BITS_PER_WORD
11352 && (cum->words % 2) != 0)
11353 cum->words++;
11355 cum->intoffset = 0;
11356 cum->use_stack = 0;
11357 cum->named = named;
11359 /* Put entries into rvec[] for individual FP and vector fields, and
11360 for the chunks of memory that go in int regs. Note we start at
11361 element 1; 0 is reserved for an indication of using memory, and
11362 may or may not be filled in below. */
11363 rs6000_darwin64_record_arg_recurse (cum, type, /* startbit pos= */ 0, rvec, &k);
11364 rs6000_darwin64_record_arg_flush (cum, typesize * BITS_PER_UNIT, rvec, &k);
11366 /* If any part of the struct went on the stack put all of it there.
11367 This hack is because the generic code for
11368 FUNCTION_ARG_PARTIAL_NREGS cannot handle cases where the register
11369 parts of the struct are not at the beginning. */
11370 if (cum->use_stack)
11372 if (retval)
11373 return NULL_RTX; /* doesn't go in registers at all */
11374 kbase = 0;
11375 rvec[0] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
11377 if (k > 1 || cum->use_stack)
11378 return gen_rtx_PARALLEL (BLKmode, gen_rtvec_v (k - kbase, &rvec[kbase]));
11379 else
11380 return NULL_RTX;
11383 /* Determine where to place an argument in 64-bit mode with 32-bit ABI. */
11385 static rtx
11386 rs6000_mixed_function_arg (machine_mode mode, const_tree type,
11387 int align_words)
11389 int n_units;
11390 int i, k;
11391 rtx rvec[GP_ARG_NUM_REG + 1];
11393 if (align_words >= GP_ARG_NUM_REG)
11394 return NULL_RTX;
11396 n_units = rs6000_arg_size (mode, type);
11398 /* Optimize the simple case where the arg fits in one gpr, except in
11399 the case of BLKmode due to assign_parms assuming that registers are
11400 BITS_PER_WORD wide. */
11401 if (n_units == 0
11402 || (n_units == 1 && mode != BLKmode))
11403 return gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
11405 k = 0;
11406 if (align_words + n_units > GP_ARG_NUM_REG)
11407 /* Not all of the arg fits in gprs. Say that it goes in memory too,
11408 using a magic NULL_RTX component.
11409 This is not strictly correct. Only some of the arg belongs in
11410 memory, not all of it. However, the normal scheme using
11411 function_arg_partial_nregs can result in unusual subregs, eg.
11412 (subreg:SI (reg:DF) 4), which are not handled well. The code to
11413 store the whole arg to memory is often more efficient than code
11414 to store pieces, and we know that space is available in the right
11415 place for the whole arg. */
11416 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
11418 i = 0;
11421 rtx r = gen_rtx_REG (SImode, GP_ARG_MIN_REG + align_words);
11422 rtx off = GEN_INT (i++ * 4);
11423 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
11425 while (++align_words < GP_ARG_NUM_REG && --n_units != 0);
11427 return gen_rtx_PARALLEL (mode, gen_rtvec_v (k, rvec));
11430 /* We have an argument of MODE and TYPE that goes into FPRs or VRs,
11431 but must also be copied into the parameter save area starting at
11432 offset ALIGN_WORDS. Fill in RVEC with the elements corresponding
11433 to the GPRs and/or memory. Return the number of elements used. */
11435 static int
11436 rs6000_psave_function_arg (machine_mode mode, const_tree type,
11437 int align_words, rtx *rvec)
11439 int k = 0;
11441 if (align_words < GP_ARG_NUM_REG)
11443 int n_words = rs6000_arg_size (mode, type);
11445 if (align_words + n_words > GP_ARG_NUM_REG
11446 || mode == BLKmode
11447 || (TARGET_32BIT && TARGET_POWERPC64))
11449 /* If this is partially on the stack, then we only
11450 include the portion actually in registers here. */
11451 machine_mode rmode = TARGET_32BIT ? SImode : DImode;
11452 int i = 0;
11454 if (align_words + n_words > GP_ARG_NUM_REG)
11456 /* Not all of the arg fits in gprs. Say that it goes in memory
11457 too, using a magic NULL_RTX component. Also see comment in
11458 rs6000_mixed_function_arg for why the normal
11459 function_arg_partial_nregs scheme doesn't work in this case. */
11460 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
11465 rtx r = gen_rtx_REG (rmode, GP_ARG_MIN_REG + align_words);
11466 rtx off = GEN_INT (i++ * GET_MODE_SIZE (rmode));
11467 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
11469 while (++align_words < GP_ARG_NUM_REG && --n_words != 0);
11471 else
11473 /* The whole arg fits in gprs. */
11474 rtx r = gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
11475 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, const0_rtx);
11478 else
11480 /* It's entirely in memory. */
11481 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
11484 return k;
11487 /* RVEC is a vector of K components of an argument of mode MODE.
11488 Construct the final function_arg return value from it. */
11490 static rtx
11491 rs6000_finish_function_arg (machine_mode mode, rtx *rvec, int k)
11493 gcc_assert (k >= 1);
11495 /* Avoid returning a PARALLEL in the trivial cases. */
11496 if (k == 1)
11498 if (XEXP (rvec[0], 0) == NULL_RTX)
11499 return NULL_RTX;
11501 if (GET_MODE (XEXP (rvec[0], 0)) == mode)
11502 return XEXP (rvec[0], 0);
11505 return gen_rtx_PARALLEL (mode, gen_rtvec_v (k, rvec));
11508 /* Determine where to put an argument to a function.
11509 Value is zero to push the argument on the stack,
11510 or a hard register in which to store the argument.
11512 MODE is the argument's machine mode.
11513 TYPE is the data type of the argument (as a tree).
11514 This is null for libcalls where that information may
11515 not be available.
11516 CUM is a variable of type CUMULATIVE_ARGS which gives info about
11517 the preceding args and about the function being called. It is
11518 not modified in this routine.
11519 NAMED is nonzero if this argument is a named parameter
11520 (otherwise it is an extra parameter matching an ellipsis).
11522 On RS/6000 the first eight words of non-FP are normally in registers
11523 and the rest are pushed. Under AIX, the first 13 FP args are in registers.
11524 Under V.4, the first 8 FP args are in registers.
11526 If this is floating-point and no prototype is specified, we use
11527 both an FP and integer register (or possibly FP reg and stack). Library
11528 functions (when CALL_LIBCALL is set) always have the proper types for args,
11529 so we can pass the FP value just in one register. emit_library_function
11530 doesn't support PARALLEL anyway.
11532 Note that for args passed by reference, function_arg will be called
11533 with MODE and TYPE set to that of the pointer to the arg, not the arg
11534 itself. */
11536 static rtx
11537 rs6000_function_arg (cumulative_args_t cum_v, machine_mode mode,
11538 const_tree type, bool named)
11540 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
11541 enum rs6000_abi abi = DEFAULT_ABI;
11542 machine_mode elt_mode;
11543 int n_elts;
11545 /* Return a marker to indicate whether CR1 needs to set or clear the
11546 bit that V.4 uses to say fp args were passed in registers.
11547 Assume that we don't need the marker for software floating point,
11548 or compiler generated library calls. */
11549 if (mode == VOIDmode)
11551 if (abi == ABI_V4
11552 && (cum->call_cookie & CALL_LIBCALL) == 0
11553 && (cum->stdarg
11554 || (cum->nargs_prototype < 0
11555 && (cum->prototype || TARGET_NO_PROTOTYPE))))
11557 /* For the SPE, we need to crxor CR6 always. */
11558 if (TARGET_SPE_ABI)
11559 return GEN_INT (cum->call_cookie | CALL_V4_SET_FP_ARGS);
11560 else if (TARGET_HARD_FLOAT && TARGET_FPRS)
11561 return GEN_INT (cum->call_cookie
11562 | ((cum->fregno == FP_ARG_MIN_REG)
11563 ? CALL_V4_SET_FP_ARGS
11564 : CALL_V4_CLEAR_FP_ARGS));
11567 return GEN_INT (cum->call_cookie & ~CALL_LIBCALL);
11570 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
11572 if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
11574 rtx rslt = rs6000_darwin64_record_arg (cum, type, named, /*retval= */false);
11575 if (rslt != NULL_RTX)
11576 return rslt;
11577 /* Else fall through to usual handling. */
11580 if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
11582 rtx rvec[GP_ARG_NUM_REG + AGGR_ARG_NUM_REG + 1];
11583 rtx r, off;
11584 int i, k = 0;
11586 /* Do we also need to pass this argument in the parameter save area?
11587 Library support functions for IEEE 128-bit are assumed to not need the
11588 value passed both in GPRs and in vector registers. */
11589 if (TARGET_64BIT && !cum->prototype
11590 && (!cum->libcall || !FLOAT128_VECTOR_P (elt_mode)))
11592 int align_words = ROUND_UP (cum->words, 2);
11593 k = rs6000_psave_function_arg (mode, type, align_words, rvec);
11596 /* Describe where this argument goes in the vector registers. */
11597 for (i = 0; i < n_elts && cum->vregno + i <= ALTIVEC_ARG_MAX_REG; i++)
11599 r = gen_rtx_REG (elt_mode, cum->vregno + i);
11600 off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
11601 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
11604 return rs6000_finish_function_arg (mode, rvec, k);
11606 else if (TARGET_ALTIVEC_ABI
11607 && (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
11608 || (type && TREE_CODE (type) == VECTOR_TYPE
11609 && int_size_in_bytes (type) == 16)))
11611 if (named || abi == ABI_V4)
11612 return NULL_RTX;
11613 else
11615 /* Vector parameters to varargs functions under AIX or Darwin
11616 get passed in memory and possibly also in GPRs. */
11617 int align, align_words, n_words;
11618 machine_mode part_mode;
11620 /* Vector parameters must be 16-byte aligned. In 32-bit
11621 mode this means we need to take into account the offset
11622 to the parameter save area. In 64-bit mode, they just
11623 have to start on an even word, since the parameter save
11624 area is 16-byte aligned. */
11625 if (TARGET_32BIT)
11626 align = -(rs6000_parm_offset () + cum->words) & 3;
11627 else
11628 align = cum->words & 1;
11629 align_words = cum->words + align;
11631 /* Out of registers? Memory, then. */
11632 if (align_words >= GP_ARG_NUM_REG)
11633 return NULL_RTX;
11635 if (TARGET_32BIT && TARGET_POWERPC64)
11636 return rs6000_mixed_function_arg (mode, type, align_words);
11638 /* The vector value goes in GPRs. Only the part of the
11639 value in GPRs is reported here. */
11640 part_mode = mode;
11641 n_words = rs6000_arg_size (mode, type);
11642 if (align_words + n_words > GP_ARG_NUM_REG)
11643 /* Fortunately, there are only two possibilities, the value
11644 is either wholly in GPRs or half in GPRs and half not. */
11645 part_mode = DImode;
11647 return gen_rtx_REG (part_mode, GP_ARG_MIN_REG + align_words);
11650 else if (TARGET_SPE_ABI && TARGET_SPE
11651 && (SPE_VECTOR_MODE (mode)
11652 || (TARGET_E500_DOUBLE && (mode == DFmode
11653 || mode == DCmode
11654 || mode == TFmode
11655 || mode == TCmode))))
11656 return rs6000_spe_function_arg (cum, mode, type);
11658 else if (abi == ABI_V4)
11660 if (abi_v4_pass_in_fpr (mode))
11662 /* _Decimal128 must use an even/odd register pair. This assumes
11663 that the register number is odd when fregno is odd. */
11664 if (mode == TDmode && (cum->fregno % 2) == 1)
11665 cum->fregno++;
11667 if (cum->fregno + (FLOAT128_2REG_P (mode) ? 1 : 0)
11668 <= FP_ARG_V4_MAX_REG)
11669 return gen_rtx_REG (mode, cum->fregno);
11670 else
11671 return NULL_RTX;
11673 else
11675 int n_words = rs6000_arg_size (mode, type);
11676 int gregno = cum->sysv_gregno;
11678 /* Long long and SPE vectors are put in (r3,r4), (r5,r6),
11679 (r7,r8) or (r9,r10). As does any other 2 word item such
11680 as complex int due to a historical mistake. */
11681 if (n_words == 2)
11682 gregno += (1 - gregno) & 1;
11684 /* Multi-reg args are not split between registers and stack. */
11685 if (gregno + n_words - 1 > GP_ARG_MAX_REG)
11686 return NULL_RTX;
11688 if (TARGET_32BIT && TARGET_POWERPC64)
11689 return rs6000_mixed_function_arg (mode, type,
11690 gregno - GP_ARG_MIN_REG);
11691 return gen_rtx_REG (mode, gregno);
11694 else
11696 int align_words = rs6000_parm_start (mode, type, cum->words);
11698 /* _Decimal128 must be passed in an even/odd float register pair.
11699 This assumes that the register number is odd when fregno is odd. */
11700 if (elt_mode == TDmode && (cum->fregno % 2) == 1)
11701 cum->fregno++;
11703 if (USE_FP_FOR_ARG_P (cum, elt_mode))
11705 rtx rvec[GP_ARG_NUM_REG + AGGR_ARG_NUM_REG + 1];
11706 rtx r, off;
11707 int i, k = 0;
11708 unsigned long n_fpreg = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
11709 int fpr_words;
11711 /* Do we also need to pass this argument in the parameter
11712 save area? */
11713 if (type && (cum->nargs_prototype <= 0
11714 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
11715 && TARGET_XL_COMPAT
11716 && align_words >= GP_ARG_NUM_REG)))
11717 k = rs6000_psave_function_arg (mode, type, align_words, rvec);
11719 /* Describe where this argument goes in the fprs. */
11720 for (i = 0; i < n_elts
11721 && cum->fregno + i * n_fpreg <= FP_ARG_MAX_REG; i++)
11723 /* Check if the argument is split over registers and memory.
11724 This can only ever happen for long double or _Decimal128;
11725 complex types are handled via split_complex_arg. */
11726 machine_mode fmode = elt_mode;
11727 if (cum->fregno + (i + 1) * n_fpreg > FP_ARG_MAX_REG + 1)
11729 gcc_assert (FLOAT128_2REG_P (fmode));
11730 fmode = DECIMAL_FLOAT_MODE_P (fmode) ? DDmode : DFmode;
11733 r = gen_rtx_REG (fmode, cum->fregno + i * n_fpreg);
11734 off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
11735 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
11738 /* If there were not enough FPRs to hold the argument, the rest
11739 usually goes into memory. However, if the current position
11740 is still within the register parameter area, a portion may
11741 actually have to go into GPRs.
11743 Note that it may happen that the portion of the argument
11744 passed in the first "half" of the first GPR was already
11745 passed in the last FPR as well.
11747 For unnamed arguments, we already set up GPRs to cover the
11748 whole argument in rs6000_psave_function_arg, so there is
11749 nothing further to do at this point. */
11750 fpr_words = (i * GET_MODE_SIZE (elt_mode)) / (TARGET_32BIT ? 4 : 8);
11751 if (i < n_elts && align_words + fpr_words < GP_ARG_NUM_REG
11752 && cum->nargs_prototype > 0)
11754 static bool warned;
11756 machine_mode rmode = TARGET_32BIT ? SImode : DImode;
11757 int n_words = rs6000_arg_size (mode, type);
11759 align_words += fpr_words;
11760 n_words -= fpr_words;
11764 r = gen_rtx_REG (rmode, GP_ARG_MIN_REG + align_words);
11765 off = GEN_INT (fpr_words++ * GET_MODE_SIZE (rmode));
11766 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
11768 while (++align_words < GP_ARG_NUM_REG && --n_words != 0);
11770 if (!warned && warn_psabi)
11772 warned = true;
11773 inform (input_location,
11774 "the ABI of passing homogeneous float aggregates"
11775 " has changed in GCC 5");
11779 return rs6000_finish_function_arg (mode, rvec, k);
11781 else if (align_words < GP_ARG_NUM_REG)
11783 if (TARGET_32BIT && TARGET_POWERPC64)
11784 return rs6000_mixed_function_arg (mode, type, align_words);
11786 return gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
11788 else
11789 return NULL_RTX;
11793 /* For an arg passed partly in registers and partly in memory, this is
11794 the number of bytes passed in registers. For args passed entirely in
11795 registers or entirely in memory, zero. When an arg is described by a
11796 PARALLEL, perhaps using more than one register type, this function
11797 returns the number of bytes used by the first element of the PARALLEL. */
11799 static int
11800 rs6000_arg_partial_bytes (cumulative_args_t cum_v, machine_mode mode,
11801 tree type, bool named)
11803 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
11804 bool passed_in_gprs = true;
11805 int ret = 0;
11806 int align_words;
11807 machine_mode elt_mode;
11808 int n_elts;
11810 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
11812 if (DEFAULT_ABI == ABI_V4)
11813 return 0;
11815 if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
11817 /* If we are passing this arg in the fixed parameter save area (gprs or
11818 memory) as well as VRs, we do not use the partial bytes mechanism;
11819 instead, rs6000_function_arg will return a PARALLEL including a memory
11820 element as necessary. Library support functions for IEEE 128-bit are
11821 assumed to not need the value passed both in GPRs and in vector
11822 registers. */
11823 if (TARGET_64BIT && !cum->prototype
11824 && (!cum->libcall || !FLOAT128_VECTOR_P (elt_mode)))
11825 return 0;
11827 /* Otherwise, we pass in VRs only. Check for partial copies. */
11828 passed_in_gprs = false;
11829 if (cum->vregno + n_elts > ALTIVEC_ARG_MAX_REG + 1)
11830 ret = (ALTIVEC_ARG_MAX_REG + 1 - cum->vregno) * 16;
11833 /* In this complicated case we just disable the partial_nregs code. */
11834 if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
11835 return 0;
11837 align_words = rs6000_parm_start (mode, type, cum->words);
11839 if (USE_FP_FOR_ARG_P (cum, elt_mode))
11841 unsigned long n_fpreg = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
11843 /* If we are passing this arg in the fixed parameter save area
11844 (gprs or memory) as well as FPRs, we do not use the partial
11845 bytes mechanism; instead, rs6000_function_arg will return a
11846 PARALLEL including a memory element as necessary. */
11847 if (type
11848 && (cum->nargs_prototype <= 0
11849 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
11850 && TARGET_XL_COMPAT
11851 && align_words >= GP_ARG_NUM_REG)))
11852 return 0;
11854 /* Otherwise, we pass in FPRs only. Check for partial copies. */
11855 passed_in_gprs = false;
11856 if (cum->fregno + n_elts * n_fpreg > FP_ARG_MAX_REG + 1)
11858 /* Compute number of bytes / words passed in FPRs. If there
11859 is still space available in the register parameter area
11860 *after* that amount, a part of the argument will be passed
11861 in GPRs. In that case, the total amount passed in any
11862 registers is equal to the amount that would have been passed
11863 in GPRs if everything were passed there, so we fall back to
11864 the GPR code below to compute the appropriate value. */
11865 int fpr = ((FP_ARG_MAX_REG + 1 - cum->fregno)
11866 * MIN (8, GET_MODE_SIZE (elt_mode)));
11867 int fpr_words = fpr / (TARGET_32BIT ? 4 : 8);
11869 if (align_words + fpr_words < GP_ARG_NUM_REG)
11870 passed_in_gprs = true;
11871 else
11872 ret = fpr;
11876 if (passed_in_gprs
11877 && align_words < GP_ARG_NUM_REG
11878 && GP_ARG_NUM_REG < align_words + rs6000_arg_size (mode, type))
11879 ret = (GP_ARG_NUM_REG - align_words) * (TARGET_32BIT ? 4 : 8);
11881 if (ret != 0 && TARGET_DEBUG_ARG)
11882 fprintf (stderr, "rs6000_arg_partial_bytes: %d\n", ret);
11884 return ret;
11887 /* A C expression that indicates when an argument must be passed by
11888 reference. If nonzero for an argument, a copy of that argument is
11889 made in memory and a pointer to the argument is passed instead of
11890 the argument itself. The pointer is passed in whatever way is
11891 appropriate for passing a pointer to that type.
11893 Under V.4, aggregates and long double are passed by reference.
11895 As an extension to all 32-bit ABIs, AltiVec vectors are passed by
11896 reference unless the AltiVec vector extension ABI is in force.
11898 As an extension to all ABIs, variable sized types are passed by
11899 reference. */
11901 static bool
11902 rs6000_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
11903 machine_mode mode, const_tree type,
11904 bool named ATTRIBUTE_UNUSED)
11906 if (!type)
11907 return 0;
11909 if (DEFAULT_ABI == ABI_V4 && TARGET_IEEEQUAD
11910 && FLOAT128_IEEE_P (TYPE_MODE (type)))
11912 if (TARGET_DEBUG_ARG)
11913 fprintf (stderr, "function_arg_pass_by_reference: V4 IEEE 128-bit\n");
11914 return 1;
11917 if (DEFAULT_ABI == ABI_V4 && AGGREGATE_TYPE_P (type))
11919 if (TARGET_DEBUG_ARG)
11920 fprintf (stderr, "function_arg_pass_by_reference: V4 aggregate\n");
11921 return 1;
11924 if (int_size_in_bytes (type) < 0)
11926 if (TARGET_DEBUG_ARG)
11927 fprintf (stderr, "function_arg_pass_by_reference: variable size\n");
11928 return 1;
11931 /* Allow -maltivec -mabi=no-altivec without warning. Altivec vector
11932 modes only exist for GCC vector types if -maltivec. */
11933 if (TARGET_32BIT && !TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (mode))
11935 if (TARGET_DEBUG_ARG)
11936 fprintf (stderr, "function_arg_pass_by_reference: AltiVec\n");
11937 return 1;
11940 /* Pass synthetic vectors in memory. */
11941 if (TREE_CODE (type) == VECTOR_TYPE
11942 && int_size_in_bytes (type) > (TARGET_ALTIVEC_ABI ? 16 : 8))
11944 static bool warned_for_pass_big_vectors = false;
11945 if (TARGET_DEBUG_ARG)
11946 fprintf (stderr, "function_arg_pass_by_reference: synthetic vector\n");
11947 if (!warned_for_pass_big_vectors)
11949 warning (0, "GCC vector passed by reference: "
11950 "non-standard ABI extension with no compatibility guarantee");
11951 warned_for_pass_big_vectors = true;
11953 return 1;
11956 return 0;
11959 /* Process parameter of type TYPE after ARGS_SO_FAR parameters were
11960 already processes. Return true if the parameter must be passed
11961 (fully or partially) on the stack. */
11963 static bool
11964 rs6000_parm_needs_stack (cumulative_args_t args_so_far, tree type)
11966 machine_mode mode;
11967 int unsignedp;
11968 rtx entry_parm;
11970 /* Catch errors. */
11971 if (type == NULL || type == error_mark_node)
11972 return true;
11974 /* Handle types with no storage requirement. */
11975 if (TYPE_MODE (type) == VOIDmode)
11976 return false;
11978 /* Handle complex types. */
11979 if (TREE_CODE (type) == COMPLEX_TYPE)
11980 return (rs6000_parm_needs_stack (args_so_far, TREE_TYPE (type))
11981 || rs6000_parm_needs_stack (args_so_far, TREE_TYPE (type)));
11983 /* Handle transparent aggregates. */
11984 if ((TREE_CODE (type) == UNION_TYPE || TREE_CODE (type) == RECORD_TYPE)
11985 && TYPE_TRANSPARENT_AGGR (type))
11986 type = TREE_TYPE (first_field (type));
11988 /* See if this arg was passed by invisible reference. */
11989 if (pass_by_reference (get_cumulative_args (args_so_far),
11990 TYPE_MODE (type), type, true))
11991 type = build_pointer_type (type);
11993 /* Find mode as it is passed by the ABI. */
11994 unsignedp = TYPE_UNSIGNED (type);
11995 mode = promote_mode (type, TYPE_MODE (type), &unsignedp);
11997 /* If we must pass in stack, we need a stack. */
11998 if (rs6000_must_pass_in_stack (mode, type))
11999 return true;
12001 /* If there is no incoming register, we need a stack. */
12002 entry_parm = rs6000_function_arg (args_so_far, mode, type, true);
12003 if (entry_parm == NULL)
12004 return true;
12006 /* Likewise if we need to pass both in registers and on the stack. */
12007 if (GET_CODE (entry_parm) == PARALLEL
12008 && XEXP (XVECEXP (entry_parm, 0, 0), 0) == NULL_RTX)
12009 return true;
12011 /* Also true if we're partially in registers and partially not. */
12012 if (rs6000_arg_partial_bytes (args_so_far, mode, type, true) != 0)
12013 return true;
12015 /* Update info on where next arg arrives in registers. */
12016 rs6000_function_arg_advance (args_so_far, mode, type, true);
12017 return false;
12020 /* Return true if FUN has no prototype, has a variable argument
12021 list, or passes any parameter in memory. */
12023 static bool
12024 rs6000_function_parms_need_stack (tree fun, bool incoming)
12026 tree fntype, result;
12027 CUMULATIVE_ARGS args_so_far_v;
12028 cumulative_args_t args_so_far;
12030 if (!fun)
12031 /* Must be a libcall, all of which only use reg parms. */
12032 return false;
12034 fntype = fun;
12035 if (!TYPE_P (fun))
12036 fntype = TREE_TYPE (fun);
12038 /* Varargs functions need the parameter save area. */
12039 if ((!incoming && !prototype_p (fntype)) || stdarg_p (fntype))
12040 return true;
12042 INIT_CUMULATIVE_INCOMING_ARGS (args_so_far_v, fntype, NULL_RTX);
12043 args_so_far = pack_cumulative_args (&args_so_far_v);
12045 /* When incoming, we will have been passed the function decl.
12046 It is necessary to use the decl to handle K&R style functions,
12047 where TYPE_ARG_TYPES may not be available. */
12048 if (incoming)
12050 gcc_assert (DECL_P (fun));
12051 result = DECL_RESULT (fun);
12053 else
12054 result = TREE_TYPE (fntype);
12056 if (result && aggregate_value_p (result, fntype))
12058 if (!TYPE_P (result))
12059 result = TREE_TYPE (result);
12060 result = build_pointer_type (result);
12061 rs6000_parm_needs_stack (args_so_far, result);
12064 if (incoming)
12066 tree parm;
12068 for (parm = DECL_ARGUMENTS (fun);
12069 parm && parm != void_list_node;
12070 parm = TREE_CHAIN (parm))
12071 if (rs6000_parm_needs_stack (args_so_far, TREE_TYPE (parm)))
12072 return true;
12074 else
12076 function_args_iterator args_iter;
12077 tree arg_type;
12079 FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
12080 if (rs6000_parm_needs_stack (args_so_far, arg_type))
12081 return true;
12084 return false;
12087 /* Return the size of the REG_PARM_STACK_SPACE are for FUN. This is
12088 usually a constant depending on the ABI. However, in the ELFv2 ABI
12089 the register parameter area is optional when calling a function that
12090 has a prototype is scope, has no variable argument list, and passes
12091 all parameters in registers. */
12094 rs6000_reg_parm_stack_space (tree fun, bool incoming)
12096 int reg_parm_stack_space;
12098 switch (DEFAULT_ABI)
12100 default:
12101 reg_parm_stack_space = 0;
12102 break;
12104 case ABI_AIX:
12105 case ABI_DARWIN:
12106 reg_parm_stack_space = TARGET_64BIT ? 64 : 32;
12107 break;
12109 case ABI_ELFv2:
12110 /* ??? Recomputing this every time is a bit expensive. Is there
12111 a place to cache this information? */
12112 if (rs6000_function_parms_need_stack (fun, incoming))
12113 reg_parm_stack_space = TARGET_64BIT ? 64 : 32;
12114 else
12115 reg_parm_stack_space = 0;
12116 break;
12119 return reg_parm_stack_space;
12122 static void
12123 rs6000_move_block_from_reg (int regno, rtx x, int nregs)
12125 int i;
12126 machine_mode reg_mode = TARGET_32BIT ? SImode : DImode;
12128 if (nregs == 0)
12129 return;
12131 for (i = 0; i < nregs; i++)
12133 rtx tem = adjust_address_nv (x, reg_mode, i * GET_MODE_SIZE (reg_mode));
12134 if (reload_completed)
12136 if (! strict_memory_address_p (reg_mode, XEXP (tem, 0)))
12137 tem = NULL_RTX;
12138 else
12139 tem = simplify_gen_subreg (reg_mode, x, BLKmode,
12140 i * GET_MODE_SIZE (reg_mode));
12142 else
12143 tem = replace_equiv_address (tem, XEXP (tem, 0));
12145 gcc_assert (tem);
12147 emit_move_insn (tem, gen_rtx_REG (reg_mode, regno + i));
12151 /* Perform any needed actions needed for a function that is receiving a
12152 variable number of arguments.
12154 CUM is as above.
12156 MODE and TYPE are the mode and type of the current parameter.
12158 PRETEND_SIZE is a variable that should be set to the amount of stack
12159 that must be pushed by the prolog to pretend that our caller pushed
12162 Normally, this macro will push all remaining incoming registers on the
12163 stack and set PRETEND_SIZE to the length of the registers pushed. */
12165 static void
12166 setup_incoming_varargs (cumulative_args_t cum, machine_mode mode,
12167 tree type, int *pretend_size ATTRIBUTE_UNUSED,
12168 int no_rtl)
12170 CUMULATIVE_ARGS next_cum;
12171 int reg_size = TARGET_32BIT ? 4 : 8;
12172 rtx save_area = NULL_RTX, mem;
12173 int first_reg_offset;
12174 alias_set_type set;
12176 /* Skip the last named argument. */
12177 next_cum = *get_cumulative_args (cum);
12178 rs6000_function_arg_advance_1 (&next_cum, mode, type, true, 0);
12180 if (DEFAULT_ABI == ABI_V4)
12182 first_reg_offset = next_cum.sysv_gregno - GP_ARG_MIN_REG;
12184 if (! no_rtl)
12186 int gpr_reg_num = 0, gpr_size = 0, fpr_size = 0;
12187 HOST_WIDE_INT offset = 0;
12189 /* Try to optimize the size of the varargs save area.
12190 The ABI requires that ap.reg_save_area is doubleword
12191 aligned, but we don't need to allocate space for all
12192 the bytes, only those to which we actually will save
12193 anything. */
12194 if (cfun->va_list_gpr_size && first_reg_offset < GP_ARG_NUM_REG)
12195 gpr_reg_num = GP_ARG_NUM_REG - first_reg_offset;
12196 if (TARGET_HARD_FLOAT && TARGET_FPRS
12197 && next_cum.fregno <= FP_ARG_V4_MAX_REG
12198 && cfun->va_list_fpr_size)
12200 if (gpr_reg_num)
12201 fpr_size = (next_cum.fregno - FP_ARG_MIN_REG)
12202 * UNITS_PER_FP_WORD;
12203 if (cfun->va_list_fpr_size
12204 < FP_ARG_V4_MAX_REG + 1 - next_cum.fregno)
12205 fpr_size += cfun->va_list_fpr_size * UNITS_PER_FP_WORD;
12206 else
12207 fpr_size += (FP_ARG_V4_MAX_REG + 1 - next_cum.fregno)
12208 * UNITS_PER_FP_WORD;
12210 if (gpr_reg_num)
12212 offset = -((first_reg_offset * reg_size) & ~7);
12213 if (!fpr_size && gpr_reg_num > cfun->va_list_gpr_size)
12215 gpr_reg_num = cfun->va_list_gpr_size;
12216 if (reg_size == 4 && (first_reg_offset & 1))
12217 gpr_reg_num++;
12219 gpr_size = (gpr_reg_num * reg_size + 7) & ~7;
12221 else if (fpr_size)
12222 offset = - (int) (next_cum.fregno - FP_ARG_MIN_REG)
12223 * UNITS_PER_FP_WORD
12224 - (int) (GP_ARG_NUM_REG * reg_size);
12226 if (gpr_size + fpr_size)
12228 rtx reg_save_area
12229 = assign_stack_local (BLKmode, gpr_size + fpr_size, 64);
12230 gcc_assert (GET_CODE (reg_save_area) == MEM);
12231 reg_save_area = XEXP (reg_save_area, 0);
12232 if (GET_CODE (reg_save_area) == PLUS)
12234 gcc_assert (XEXP (reg_save_area, 0)
12235 == virtual_stack_vars_rtx);
12236 gcc_assert (GET_CODE (XEXP (reg_save_area, 1)) == CONST_INT);
12237 offset += INTVAL (XEXP (reg_save_area, 1));
12239 else
12240 gcc_assert (reg_save_area == virtual_stack_vars_rtx);
12243 cfun->machine->varargs_save_offset = offset;
12244 save_area = plus_constant (Pmode, virtual_stack_vars_rtx, offset);
12247 else
12249 first_reg_offset = next_cum.words;
12250 save_area = crtl->args.internal_arg_pointer;
12252 if (targetm.calls.must_pass_in_stack (mode, type))
12253 first_reg_offset += rs6000_arg_size (TYPE_MODE (type), type);
12256 set = get_varargs_alias_set ();
12257 if (! no_rtl && first_reg_offset < GP_ARG_NUM_REG
12258 && cfun->va_list_gpr_size)
12260 int n_gpr, nregs = GP_ARG_NUM_REG - first_reg_offset;
12262 if (va_list_gpr_counter_field)
12263 /* V4 va_list_gpr_size counts number of registers needed. */
12264 n_gpr = cfun->va_list_gpr_size;
12265 else
12266 /* char * va_list instead counts number of bytes needed. */
12267 n_gpr = (cfun->va_list_gpr_size + reg_size - 1) / reg_size;
12269 if (nregs > n_gpr)
12270 nregs = n_gpr;
12272 mem = gen_rtx_MEM (BLKmode,
12273 plus_constant (Pmode, save_area,
12274 first_reg_offset * reg_size));
12275 MEM_NOTRAP_P (mem) = 1;
12276 set_mem_alias_set (mem, set);
12277 set_mem_align (mem, BITS_PER_WORD);
12279 rs6000_move_block_from_reg (GP_ARG_MIN_REG + first_reg_offset, mem,
12280 nregs);
12283 /* Save FP registers if needed. */
12284 if (DEFAULT_ABI == ABI_V4
12285 && TARGET_HARD_FLOAT && TARGET_FPRS
12286 && ! no_rtl
12287 && next_cum.fregno <= FP_ARG_V4_MAX_REG
12288 && cfun->va_list_fpr_size)
12290 int fregno = next_cum.fregno, nregs;
12291 rtx cr1 = gen_rtx_REG (CCmode, CR1_REGNO);
12292 rtx lab = gen_label_rtx ();
12293 int off = (GP_ARG_NUM_REG * reg_size) + ((fregno - FP_ARG_MIN_REG)
12294 * UNITS_PER_FP_WORD);
12296 emit_jump_insn
12297 (gen_rtx_SET (pc_rtx,
12298 gen_rtx_IF_THEN_ELSE (VOIDmode,
12299 gen_rtx_NE (VOIDmode, cr1,
12300 const0_rtx),
12301 gen_rtx_LABEL_REF (VOIDmode, lab),
12302 pc_rtx)));
12304 for (nregs = 0;
12305 fregno <= FP_ARG_V4_MAX_REG && nregs < cfun->va_list_fpr_size;
12306 fregno++, off += UNITS_PER_FP_WORD, nregs++)
12308 mem = gen_rtx_MEM ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
12309 ? DFmode : SFmode,
12310 plus_constant (Pmode, save_area, off));
12311 MEM_NOTRAP_P (mem) = 1;
12312 set_mem_alias_set (mem, set);
12313 set_mem_align (mem, GET_MODE_ALIGNMENT (
12314 (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
12315 ? DFmode : SFmode));
12316 emit_move_insn (mem, gen_rtx_REG (
12317 (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
12318 ? DFmode : SFmode, fregno));
12321 emit_label (lab);
12325 /* Create the va_list data type. */
12327 static tree
12328 rs6000_build_builtin_va_list (void)
12330 tree f_gpr, f_fpr, f_res, f_ovf, f_sav, record, type_decl;
12332 /* For AIX, prefer 'char *' because that's what the system
12333 header files like. */
12334 if (DEFAULT_ABI != ABI_V4)
12335 return build_pointer_type (char_type_node);
12337 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
12338 type_decl = build_decl (BUILTINS_LOCATION, TYPE_DECL,
12339 get_identifier ("__va_list_tag"), record);
12341 f_gpr = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("gpr"),
12342 unsigned_char_type_node);
12343 f_fpr = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("fpr"),
12344 unsigned_char_type_node);
12345 /* Give the two bytes of padding a name, so that -Wpadded won't warn on
12346 every user file. */
12347 f_res = build_decl (BUILTINS_LOCATION, FIELD_DECL,
12348 get_identifier ("reserved"), short_unsigned_type_node);
12349 f_ovf = build_decl (BUILTINS_LOCATION, FIELD_DECL,
12350 get_identifier ("overflow_arg_area"),
12351 ptr_type_node);
12352 f_sav = build_decl (BUILTINS_LOCATION, FIELD_DECL,
12353 get_identifier ("reg_save_area"),
12354 ptr_type_node);
12356 va_list_gpr_counter_field = f_gpr;
12357 va_list_fpr_counter_field = f_fpr;
12359 DECL_FIELD_CONTEXT (f_gpr) = record;
12360 DECL_FIELD_CONTEXT (f_fpr) = record;
12361 DECL_FIELD_CONTEXT (f_res) = record;
12362 DECL_FIELD_CONTEXT (f_ovf) = record;
12363 DECL_FIELD_CONTEXT (f_sav) = record;
12365 TYPE_STUB_DECL (record) = type_decl;
12366 TYPE_NAME (record) = type_decl;
12367 TYPE_FIELDS (record) = f_gpr;
12368 DECL_CHAIN (f_gpr) = f_fpr;
12369 DECL_CHAIN (f_fpr) = f_res;
12370 DECL_CHAIN (f_res) = f_ovf;
12371 DECL_CHAIN (f_ovf) = f_sav;
12373 layout_type (record);
12375 /* The correct type is an array type of one element. */
12376 return build_array_type (record, build_index_type (size_zero_node));
12379 /* Implement va_start. */
12381 static void
12382 rs6000_va_start (tree valist, rtx nextarg)
12384 HOST_WIDE_INT words, n_gpr, n_fpr;
12385 tree f_gpr, f_fpr, f_res, f_ovf, f_sav;
12386 tree gpr, fpr, ovf, sav, t;
12388 /* Only SVR4 needs something special. */
12389 if (DEFAULT_ABI != ABI_V4)
12391 std_expand_builtin_va_start (valist, nextarg);
12392 return;
12395 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
12396 f_fpr = DECL_CHAIN (f_gpr);
12397 f_res = DECL_CHAIN (f_fpr);
12398 f_ovf = DECL_CHAIN (f_res);
12399 f_sav = DECL_CHAIN (f_ovf);
12401 valist = build_simple_mem_ref (valist);
12402 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
12403 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
12404 f_fpr, NULL_TREE);
12405 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
12406 f_ovf, NULL_TREE);
12407 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
12408 f_sav, NULL_TREE);
12410 /* Count number of gp and fp argument registers used. */
12411 words = crtl->args.info.words;
12412 n_gpr = MIN (crtl->args.info.sysv_gregno - GP_ARG_MIN_REG,
12413 GP_ARG_NUM_REG);
12414 n_fpr = MIN (crtl->args.info.fregno - FP_ARG_MIN_REG,
12415 FP_ARG_NUM_REG);
12417 if (TARGET_DEBUG_ARG)
12418 fprintf (stderr, "va_start: words = " HOST_WIDE_INT_PRINT_DEC", n_gpr = "
12419 HOST_WIDE_INT_PRINT_DEC", n_fpr = " HOST_WIDE_INT_PRINT_DEC"\n",
12420 words, n_gpr, n_fpr);
12422 if (cfun->va_list_gpr_size)
12424 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
12425 build_int_cst (NULL_TREE, n_gpr));
12426 TREE_SIDE_EFFECTS (t) = 1;
12427 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12430 if (cfun->va_list_fpr_size)
12432 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
12433 build_int_cst (NULL_TREE, n_fpr));
12434 TREE_SIDE_EFFECTS (t) = 1;
12435 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12437 #ifdef HAVE_AS_GNU_ATTRIBUTE
12438 if (call_ABI_of_interest (cfun->decl))
12439 rs6000_passes_float = true;
12440 #endif
12443 /* Find the overflow area. */
12444 t = make_tree (TREE_TYPE (ovf), crtl->args.internal_arg_pointer);
12445 if (words != 0)
12446 t = fold_build_pointer_plus_hwi (t, words * MIN_UNITS_PER_WORD);
12447 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
12448 TREE_SIDE_EFFECTS (t) = 1;
12449 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12451 /* If there were no va_arg invocations, don't set up the register
12452 save area. */
12453 if (!cfun->va_list_gpr_size
12454 && !cfun->va_list_fpr_size
12455 && n_gpr < GP_ARG_NUM_REG
12456 && n_fpr < FP_ARG_V4_MAX_REG)
12457 return;
12459 /* Find the register save area. */
12460 t = make_tree (TREE_TYPE (sav), virtual_stack_vars_rtx);
12461 if (cfun->machine->varargs_save_offset)
12462 t = fold_build_pointer_plus_hwi (t, cfun->machine->varargs_save_offset);
12463 t = build2 (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
12464 TREE_SIDE_EFFECTS (t) = 1;
12465 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12468 /* Implement va_arg. */
12470 static tree
12471 rs6000_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
12472 gimple_seq *post_p)
12474 tree f_gpr, f_fpr, f_res, f_ovf, f_sav;
12475 tree gpr, fpr, ovf, sav, reg, t, u;
12476 int size, rsize, n_reg, sav_ofs, sav_scale;
12477 tree lab_false, lab_over, addr;
12478 int align;
12479 tree ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
12480 int regalign = 0;
12481 gimple *stmt;
12483 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
12485 t = rs6000_gimplify_va_arg (valist, ptrtype, pre_p, post_p);
12486 return build_va_arg_indirect_ref (t);
12489 /* We need to deal with the fact that the darwin ppc64 ABI is defined by an
12490 earlier version of gcc, with the property that it always applied alignment
12491 adjustments to the va-args (even for zero-sized types). The cheapest way
12492 to deal with this is to replicate the effect of the part of
12493 std_gimplify_va_arg_expr that carries out the align adjust, for the case
12494 of relevance.
12495 We don't need to check for pass-by-reference because of the test above.
12496 We can return a simplifed answer, since we know there's no offset to add. */
12498 if (((TARGET_MACHO
12499 && rs6000_darwin64_abi)
12500 || DEFAULT_ABI == ABI_ELFv2
12501 || (DEFAULT_ABI == ABI_AIX && !rs6000_compat_align_parm))
12502 && integer_zerop (TYPE_SIZE (type)))
12504 unsigned HOST_WIDE_INT align, boundary;
12505 tree valist_tmp = get_initialized_tmp_var (valist, pre_p, NULL);
12506 align = PARM_BOUNDARY / BITS_PER_UNIT;
12507 boundary = rs6000_function_arg_boundary (TYPE_MODE (type), type);
12508 if (boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
12509 boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
12510 boundary /= BITS_PER_UNIT;
12511 if (boundary > align)
12513 tree t ;
12514 /* This updates arg ptr by the amount that would be necessary
12515 to align the zero-sized (but not zero-alignment) item. */
12516 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist_tmp,
12517 fold_build_pointer_plus_hwi (valist_tmp, boundary - 1));
12518 gimplify_and_add (t, pre_p);
12520 t = fold_convert (sizetype, valist_tmp);
12521 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist_tmp,
12522 fold_convert (TREE_TYPE (valist),
12523 fold_build2 (BIT_AND_EXPR, sizetype, t,
12524 size_int (-boundary))));
12525 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
12526 gimplify_and_add (t, pre_p);
12528 /* Since it is zero-sized there's no increment for the item itself. */
12529 valist_tmp = fold_convert (build_pointer_type (type), valist_tmp);
12530 return build_va_arg_indirect_ref (valist_tmp);
12533 if (DEFAULT_ABI != ABI_V4)
12535 if (targetm.calls.split_complex_arg && TREE_CODE (type) == COMPLEX_TYPE)
12537 tree elem_type = TREE_TYPE (type);
12538 machine_mode elem_mode = TYPE_MODE (elem_type);
12539 int elem_size = GET_MODE_SIZE (elem_mode);
12541 if (elem_size < UNITS_PER_WORD)
12543 tree real_part, imag_part;
12544 gimple_seq post = NULL;
12546 real_part = rs6000_gimplify_va_arg (valist, elem_type, pre_p,
12547 &post);
12548 /* Copy the value into a temporary, lest the formal temporary
12549 be reused out from under us. */
12550 real_part = get_initialized_tmp_var (real_part, pre_p, &post);
12551 gimple_seq_add_seq (pre_p, post);
12553 imag_part = rs6000_gimplify_va_arg (valist, elem_type, pre_p,
12554 post_p);
12556 return build2 (COMPLEX_EXPR, type, real_part, imag_part);
12560 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
12563 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
12564 f_fpr = DECL_CHAIN (f_gpr);
12565 f_res = DECL_CHAIN (f_fpr);
12566 f_ovf = DECL_CHAIN (f_res);
12567 f_sav = DECL_CHAIN (f_ovf);
12569 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
12570 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
12571 f_fpr, NULL_TREE);
12572 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
12573 f_ovf, NULL_TREE);
12574 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
12575 f_sav, NULL_TREE);
12577 size = int_size_in_bytes (type);
12578 rsize = (size + 3) / 4;
12579 align = 1;
12581 machine_mode mode = TYPE_MODE (type);
12582 if (abi_v4_pass_in_fpr (mode))
12584 /* FP args go in FP registers, if present. */
12585 reg = fpr;
12586 n_reg = (size + 7) / 8;
12587 sav_ofs = ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT) ? 8 : 4) * 4;
12588 sav_scale = ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT) ? 8 : 4);
12589 if (mode != SFmode && mode != SDmode)
12590 align = 8;
12592 else
12594 /* Otherwise into GP registers. */
12595 reg = gpr;
12596 n_reg = rsize;
12597 sav_ofs = 0;
12598 sav_scale = 4;
12599 if (n_reg == 2)
12600 align = 8;
12603 /* Pull the value out of the saved registers.... */
12605 lab_over = NULL;
12606 addr = create_tmp_var (ptr_type_node, "addr");
12608 /* AltiVec vectors never go in registers when -mabi=altivec. */
12609 if (TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (mode))
12610 align = 16;
12611 else
12613 lab_false = create_artificial_label (input_location);
12614 lab_over = create_artificial_label (input_location);
12616 /* Long long and SPE vectors are aligned in the registers.
12617 As are any other 2 gpr item such as complex int due to a
12618 historical mistake. */
12619 u = reg;
12620 if (n_reg == 2 && reg == gpr)
12622 regalign = 1;
12623 u = build2 (BIT_AND_EXPR, TREE_TYPE (reg), unshare_expr (reg),
12624 build_int_cst (TREE_TYPE (reg), n_reg - 1));
12625 u = build2 (POSTINCREMENT_EXPR, TREE_TYPE (reg),
12626 unshare_expr (reg), u);
12628 /* _Decimal128 is passed in even/odd fpr pairs; the stored
12629 reg number is 0 for f1, so we want to make it odd. */
12630 else if (reg == fpr && mode == TDmode)
12632 t = build2 (BIT_IOR_EXPR, TREE_TYPE (reg), unshare_expr (reg),
12633 build_int_cst (TREE_TYPE (reg), 1));
12634 u = build2 (MODIFY_EXPR, void_type_node, unshare_expr (reg), t);
12637 t = fold_convert (TREE_TYPE (reg), size_int (8 - n_reg + 1));
12638 t = build2 (GE_EXPR, boolean_type_node, u, t);
12639 u = build1 (GOTO_EXPR, void_type_node, lab_false);
12640 t = build3 (COND_EXPR, void_type_node, t, u, NULL_TREE);
12641 gimplify_and_add (t, pre_p);
12643 t = sav;
12644 if (sav_ofs)
12645 t = fold_build_pointer_plus_hwi (sav, sav_ofs);
12647 u = build2 (POSTINCREMENT_EXPR, TREE_TYPE (reg), unshare_expr (reg),
12648 build_int_cst (TREE_TYPE (reg), n_reg));
12649 u = fold_convert (sizetype, u);
12650 u = build2 (MULT_EXPR, sizetype, u, size_int (sav_scale));
12651 t = fold_build_pointer_plus (t, u);
12653 /* _Decimal32 varargs are located in the second word of the 64-bit
12654 FP register for 32-bit binaries. */
12655 if (TARGET_32BIT
12656 && TARGET_HARD_FLOAT && TARGET_FPRS
12657 && mode == SDmode)
12658 t = fold_build_pointer_plus_hwi (t, size);
12660 gimplify_assign (addr, t, pre_p);
12662 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
12664 stmt = gimple_build_label (lab_false);
12665 gimple_seq_add_stmt (pre_p, stmt);
12667 if ((n_reg == 2 && !regalign) || n_reg > 2)
12669 /* Ensure that we don't find any more args in regs.
12670 Alignment has taken care of for special cases. */
12671 gimplify_assign (reg, build_int_cst (TREE_TYPE (reg), 8), pre_p);
12675 /* ... otherwise out of the overflow area. */
12677 /* Care for on-stack alignment if needed. */
12678 t = ovf;
12679 if (align != 1)
12681 t = fold_build_pointer_plus_hwi (t, align - 1);
12682 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
12683 build_int_cst (TREE_TYPE (t), -align));
12685 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
12687 gimplify_assign (unshare_expr (addr), t, pre_p);
12689 t = fold_build_pointer_plus_hwi (t, size);
12690 gimplify_assign (unshare_expr (ovf), t, pre_p);
12692 if (lab_over)
12694 stmt = gimple_build_label (lab_over);
12695 gimple_seq_add_stmt (pre_p, stmt);
12698 if (STRICT_ALIGNMENT
12699 && (TYPE_ALIGN (type)
12700 > (unsigned) BITS_PER_UNIT * (align < 4 ? 4 : align)))
12702 /* The value (of type complex double, for example) may not be
12703 aligned in memory in the saved registers, so copy via a
12704 temporary. (This is the same code as used for SPARC.) */
12705 tree tmp = create_tmp_var (type, "va_arg_tmp");
12706 tree dest_addr = build_fold_addr_expr (tmp);
12708 tree copy = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
12709 3, dest_addr, addr, size_int (rsize * 4));
12711 gimplify_and_add (copy, pre_p);
12712 addr = dest_addr;
12715 addr = fold_convert (ptrtype, addr);
12716 return build_va_arg_indirect_ref (addr);
12719 /* Builtins. */
12721 static void
12722 def_builtin (const char *name, tree type, enum rs6000_builtins code)
12724 tree t;
12725 unsigned classify = rs6000_builtin_info[(int)code].attr;
12726 const char *attr_string = "";
12728 gcc_assert (name != NULL);
12729 gcc_assert (IN_RANGE ((int)code, 0, (int)RS6000_BUILTIN_COUNT));
12731 if (rs6000_builtin_decls[(int)code])
12732 fatal_error (input_location,
12733 "internal error: builtin function %s already processed", name);
12735 rs6000_builtin_decls[(int)code] = t =
12736 add_builtin_function (name, type, (int)code, BUILT_IN_MD, NULL, NULL_TREE);
12738 /* Set any special attributes. */
12739 if ((classify & RS6000_BTC_CONST) != 0)
12741 /* const function, function only depends on the inputs. */
12742 TREE_READONLY (t) = 1;
12743 TREE_NOTHROW (t) = 1;
12744 attr_string = ", const";
12746 else if ((classify & RS6000_BTC_PURE) != 0)
12748 /* pure function, function can read global memory, but does not set any
12749 external state. */
12750 DECL_PURE_P (t) = 1;
12751 TREE_NOTHROW (t) = 1;
12752 attr_string = ", pure";
12754 else if ((classify & RS6000_BTC_FP) != 0)
12756 /* Function is a math function. If rounding mode is on, then treat the
12757 function as not reading global memory, but it can have arbitrary side
12758 effects. If it is off, then assume the function is a const function.
12759 This mimics the ATTR_MATHFN_FPROUNDING attribute in
12760 builtin-attribute.def that is used for the math functions. */
12761 TREE_NOTHROW (t) = 1;
12762 if (flag_rounding_math)
12764 DECL_PURE_P (t) = 1;
12765 DECL_IS_NOVOPS (t) = 1;
12766 attr_string = ", fp, pure";
12768 else
12770 TREE_READONLY (t) = 1;
12771 attr_string = ", fp, const";
12774 else if ((classify & RS6000_BTC_ATTR_MASK) != 0)
12775 gcc_unreachable ();
12777 if (TARGET_DEBUG_BUILTIN)
12778 fprintf (stderr, "rs6000_builtin, code = %4d, %s%s\n",
12779 (int)code, name, attr_string);
12782 /* Simple ternary operations: VECd = foo (VECa, VECb, VECc). */
12784 #undef RS6000_BUILTIN_0
12785 #undef RS6000_BUILTIN_1
12786 #undef RS6000_BUILTIN_2
12787 #undef RS6000_BUILTIN_3
12788 #undef RS6000_BUILTIN_A
12789 #undef RS6000_BUILTIN_D
12790 #undef RS6000_BUILTIN_E
12791 #undef RS6000_BUILTIN_H
12792 #undef RS6000_BUILTIN_P
12793 #undef RS6000_BUILTIN_Q
12794 #undef RS6000_BUILTIN_S
12795 #undef RS6000_BUILTIN_X
12797 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
12798 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
12799 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
12800 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE) \
12801 { MASK, ICODE, NAME, ENUM },
12803 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
12804 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
12805 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
12806 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
12807 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
12808 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
12809 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
12810 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
12812 static const struct builtin_description bdesc_3arg[] =
12814 #include "rs6000-builtin.def"
12817 /* DST operations: void foo (void *, const int, const char). */
12819 #undef RS6000_BUILTIN_0
12820 #undef RS6000_BUILTIN_1
12821 #undef RS6000_BUILTIN_2
12822 #undef RS6000_BUILTIN_3
12823 #undef RS6000_BUILTIN_A
12824 #undef RS6000_BUILTIN_D
12825 #undef RS6000_BUILTIN_E
12826 #undef RS6000_BUILTIN_H
12827 #undef RS6000_BUILTIN_P
12828 #undef RS6000_BUILTIN_Q
12829 #undef RS6000_BUILTIN_S
12830 #undef RS6000_BUILTIN_X
12832 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
12833 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
12834 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
12835 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
12836 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
12837 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE) \
12838 { MASK, ICODE, NAME, ENUM },
12840 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
12841 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
12842 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
12843 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
12844 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
12845 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
12847 static const struct builtin_description bdesc_dst[] =
12849 #include "rs6000-builtin.def"
12852 /* Simple binary operations: VECc = foo (VECa, VECb). */
12854 #undef RS6000_BUILTIN_0
12855 #undef RS6000_BUILTIN_1
12856 #undef RS6000_BUILTIN_2
12857 #undef RS6000_BUILTIN_3
12858 #undef RS6000_BUILTIN_A
12859 #undef RS6000_BUILTIN_D
12860 #undef RS6000_BUILTIN_E
12861 #undef RS6000_BUILTIN_H
12862 #undef RS6000_BUILTIN_P
12863 #undef RS6000_BUILTIN_Q
12864 #undef RS6000_BUILTIN_S
12865 #undef RS6000_BUILTIN_X
12867 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
12868 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
12869 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE) \
12870 { MASK, ICODE, NAME, ENUM },
12872 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
12873 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
12874 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
12875 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
12876 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
12877 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
12878 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
12879 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
12880 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
12882 static const struct builtin_description bdesc_2arg[] =
12884 #include "rs6000-builtin.def"
12887 #undef RS6000_BUILTIN_0
12888 #undef RS6000_BUILTIN_1
12889 #undef RS6000_BUILTIN_2
12890 #undef RS6000_BUILTIN_3
12891 #undef RS6000_BUILTIN_A
12892 #undef RS6000_BUILTIN_D
12893 #undef RS6000_BUILTIN_E
12894 #undef RS6000_BUILTIN_H
12895 #undef RS6000_BUILTIN_P
12896 #undef RS6000_BUILTIN_Q
12897 #undef RS6000_BUILTIN_S
12898 #undef RS6000_BUILTIN_X
12900 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
12901 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
12902 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
12903 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
12904 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
12905 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
12906 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
12907 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
12908 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) \
12909 { MASK, ICODE, NAME, ENUM },
12911 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
12912 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
12913 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
12915 /* AltiVec predicates. */
12917 static const struct builtin_description bdesc_altivec_preds[] =
12919 #include "rs6000-builtin.def"
12922 /* SPE predicates. */
12923 #undef RS6000_BUILTIN_0
12924 #undef RS6000_BUILTIN_1
12925 #undef RS6000_BUILTIN_2
12926 #undef RS6000_BUILTIN_3
12927 #undef RS6000_BUILTIN_A
12928 #undef RS6000_BUILTIN_D
12929 #undef RS6000_BUILTIN_E
12930 #undef RS6000_BUILTIN_H
12931 #undef RS6000_BUILTIN_P
12932 #undef RS6000_BUILTIN_Q
12933 #undef RS6000_BUILTIN_S
12934 #undef RS6000_BUILTIN_X
12936 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
12937 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
12938 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
12939 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
12940 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
12941 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
12942 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
12943 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
12944 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
12945 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
12946 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE) \
12947 { MASK, ICODE, NAME, ENUM },
12949 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
12951 static const struct builtin_description bdesc_spe_predicates[] =
12953 #include "rs6000-builtin.def"
12956 /* SPE evsel predicates. */
12957 #undef RS6000_BUILTIN_0
12958 #undef RS6000_BUILTIN_1
12959 #undef RS6000_BUILTIN_2
12960 #undef RS6000_BUILTIN_3
12961 #undef RS6000_BUILTIN_A
12962 #undef RS6000_BUILTIN_D
12963 #undef RS6000_BUILTIN_E
12964 #undef RS6000_BUILTIN_H
12965 #undef RS6000_BUILTIN_P
12966 #undef RS6000_BUILTIN_Q
12967 #undef RS6000_BUILTIN_S
12968 #undef RS6000_BUILTIN_X
12970 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
12971 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
12972 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
12973 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
12974 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
12975 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
12976 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE) \
12977 { MASK, ICODE, NAME, ENUM },
12979 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
12980 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
12981 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
12982 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
12983 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
12985 static const struct builtin_description bdesc_spe_evsel[] =
12987 #include "rs6000-builtin.def"
12990 /* PAIRED predicates. */
12991 #undef RS6000_BUILTIN_0
12992 #undef RS6000_BUILTIN_1
12993 #undef RS6000_BUILTIN_2
12994 #undef RS6000_BUILTIN_3
12995 #undef RS6000_BUILTIN_A
12996 #undef RS6000_BUILTIN_D
12997 #undef RS6000_BUILTIN_E
12998 #undef RS6000_BUILTIN_H
12999 #undef RS6000_BUILTIN_P
13000 #undef RS6000_BUILTIN_Q
13001 #undef RS6000_BUILTIN_S
13002 #undef RS6000_BUILTIN_X
13004 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
13005 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
13006 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
13007 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
13008 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
13009 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
13010 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
13011 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
13012 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
13013 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE) \
13014 { MASK, ICODE, NAME, ENUM },
13016 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
13017 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13019 static const struct builtin_description bdesc_paired_preds[] =
13021 #include "rs6000-builtin.def"
13024 /* ABS* operations. */
13026 #undef RS6000_BUILTIN_0
13027 #undef RS6000_BUILTIN_1
13028 #undef RS6000_BUILTIN_2
13029 #undef RS6000_BUILTIN_3
13030 #undef RS6000_BUILTIN_A
13031 #undef RS6000_BUILTIN_D
13032 #undef RS6000_BUILTIN_E
13033 #undef RS6000_BUILTIN_H
13034 #undef RS6000_BUILTIN_P
13035 #undef RS6000_BUILTIN_Q
13036 #undef RS6000_BUILTIN_S
13037 #undef RS6000_BUILTIN_X
13039 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
13040 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
13041 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
13042 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
13043 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE) \
13044 { MASK, ICODE, NAME, ENUM },
13046 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
13047 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
13048 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
13049 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
13050 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
13051 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
13052 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13054 static const struct builtin_description bdesc_abs[] =
13056 #include "rs6000-builtin.def"
13059 /* Simple unary operations: VECb = foo (unsigned literal) or VECb =
13060 foo (VECa). */
13062 #undef RS6000_BUILTIN_0
13063 #undef RS6000_BUILTIN_1
13064 #undef RS6000_BUILTIN_2
13065 #undef RS6000_BUILTIN_3
13066 #undef RS6000_BUILTIN_A
13067 #undef RS6000_BUILTIN_D
13068 #undef RS6000_BUILTIN_E
13069 #undef RS6000_BUILTIN_H
13070 #undef RS6000_BUILTIN_P
13071 #undef RS6000_BUILTIN_Q
13072 #undef RS6000_BUILTIN_S
13073 #undef RS6000_BUILTIN_X
13075 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
13076 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE) \
13077 { MASK, ICODE, NAME, ENUM },
13079 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
13080 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
13081 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
13082 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
13083 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
13084 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
13085 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
13086 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
13087 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
13088 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13090 static const struct builtin_description bdesc_1arg[] =
13092 #include "rs6000-builtin.def"
13095 /* Simple no-argument operations: result = __builtin_darn_32 () */
13097 #undef RS6000_BUILTIN_0
13098 #undef RS6000_BUILTIN_1
13099 #undef RS6000_BUILTIN_2
13100 #undef RS6000_BUILTIN_3
13101 #undef RS6000_BUILTIN_A
13102 #undef RS6000_BUILTIN_D
13103 #undef RS6000_BUILTIN_E
13104 #undef RS6000_BUILTIN_H
13105 #undef RS6000_BUILTIN_P
13106 #undef RS6000_BUILTIN_Q
13107 #undef RS6000_BUILTIN_S
13108 #undef RS6000_BUILTIN_X
13110 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE) \
13111 { MASK, ICODE, NAME, ENUM },
13113 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
13114 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
13115 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
13116 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
13117 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
13118 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
13119 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
13120 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
13121 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
13122 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
13123 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13125 static const struct builtin_description bdesc_0arg[] =
13127 #include "rs6000-builtin.def"
13130 /* HTM builtins. */
13131 #undef RS6000_BUILTIN_0
13132 #undef RS6000_BUILTIN_1
13133 #undef RS6000_BUILTIN_2
13134 #undef RS6000_BUILTIN_3
13135 #undef RS6000_BUILTIN_A
13136 #undef RS6000_BUILTIN_D
13137 #undef RS6000_BUILTIN_E
13138 #undef RS6000_BUILTIN_H
13139 #undef RS6000_BUILTIN_P
13140 #undef RS6000_BUILTIN_Q
13141 #undef RS6000_BUILTIN_S
13142 #undef RS6000_BUILTIN_X
13144 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
13145 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
13146 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
13147 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
13148 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
13149 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
13150 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
13151 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) \
13152 { MASK, ICODE, NAME, ENUM },
13154 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
13155 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
13156 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
13157 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13159 static const struct builtin_description bdesc_htm[] =
13161 #include "rs6000-builtin.def"
13164 #undef RS6000_BUILTIN_0
13165 #undef RS6000_BUILTIN_1
13166 #undef RS6000_BUILTIN_2
13167 #undef RS6000_BUILTIN_3
13168 #undef RS6000_BUILTIN_A
13169 #undef RS6000_BUILTIN_D
13170 #undef RS6000_BUILTIN_E
13171 #undef RS6000_BUILTIN_H
13172 #undef RS6000_BUILTIN_P
13173 #undef RS6000_BUILTIN_Q
13174 #undef RS6000_BUILTIN_S
13176 /* Return true if a builtin function is overloaded. */
13177 bool
13178 rs6000_overloaded_builtin_p (enum rs6000_builtins fncode)
13180 return (rs6000_builtin_info[(int)fncode].attr & RS6000_BTC_OVERLOADED) != 0;
13183 /* Expand an expression EXP that calls a builtin without arguments. */
13184 static rtx
13185 rs6000_expand_zeroop_builtin (enum insn_code icode, rtx target)
13187 rtx pat;
13188 machine_mode tmode = insn_data[icode].operand[0].mode;
13190 if (icode == CODE_FOR_nothing)
13191 /* Builtin not supported on this processor. */
13192 return 0;
13194 if (target == 0
13195 || GET_MODE (target) != tmode
13196 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13197 target = gen_reg_rtx (tmode);
13199 pat = GEN_FCN (icode) (target);
13200 if (! pat)
13201 return 0;
13202 emit_insn (pat);
13204 return target;
13208 static rtx
13209 rs6000_expand_mtfsf_builtin (enum insn_code icode, tree exp)
13211 rtx pat;
13212 tree arg0 = CALL_EXPR_ARG (exp, 0);
13213 tree arg1 = CALL_EXPR_ARG (exp, 1);
13214 rtx op0 = expand_normal (arg0);
13215 rtx op1 = expand_normal (arg1);
13216 machine_mode mode0 = insn_data[icode].operand[0].mode;
13217 machine_mode mode1 = insn_data[icode].operand[1].mode;
13219 if (icode == CODE_FOR_nothing)
13220 /* Builtin not supported on this processor. */
13221 return 0;
13223 /* If we got invalid arguments bail out before generating bad rtl. */
13224 if (arg0 == error_mark_node || arg1 == error_mark_node)
13225 return const0_rtx;
13227 if (GET_CODE (op0) != CONST_INT
13228 || INTVAL (op0) > 255
13229 || INTVAL (op0) < 0)
13231 error ("argument 1 must be an 8-bit field value");
13232 return const0_rtx;
13235 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
13236 op0 = copy_to_mode_reg (mode0, op0);
13238 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
13239 op1 = copy_to_mode_reg (mode1, op1);
13241 pat = GEN_FCN (icode) (op0, op1);
13242 if (! pat)
13243 return const0_rtx;
13244 emit_insn (pat);
13246 return NULL_RTX;
13249 static rtx
13250 rs6000_expand_unop_builtin (enum insn_code icode, tree exp, rtx target)
13252 rtx pat;
13253 tree arg0 = CALL_EXPR_ARG (exp, 0);
13254 rtx op0 = expand_normal (arg0);
13255 machine_mode tmode = insn_data[icode].operand[0].mode;
13256 machine_mode mode0 = insn_data[icode].operand[1].mode;
13258 if (icode == CODE_FOR_nothing)
13259 /* Builtin not supported on this processor. */
13260 return 0;
13262 /* If we got invalid arguments bail out before generating bad rtl. */
13263 if (arg0 == error_mark_node)
13264 return const0_rtx;
13266 if (icode == CODE_FOR_altivec_vspltisb
13267 || icode == CODE_FOR_altivec_vspltish
13268 || icode == CODE_FOR_altivec_vspltisw
13269 || icode == CODE_FOR_spe_evsplatfi
13270 || icode == CODE_FOR_spe_evsplati)
13272 /* Only allow 5-bit *signed* literals. */
13273 if (GET_CODE (op0) != CONST_INT
13274 || INTVAL (op0) > 15
13275 || INTVAL (op0) < -16)
13277 error ("argument 1 must be a 5-bit signed literal");
13278 return const0_rtx;
13282 if (target == 0
13283 || GET_MODE (target) != tmode
13284 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13285 target = gen_reg_rtx (tmode);
13287 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13288 op0 = copy_to_mode_reg (mode0, op0);
13290 pat = GEN_FCN (icode) (target, op0);
13291 if (! pat)
13292 return 0;
13293 emit_insn (pat);
13295 return target;
13298 static rtx
13299 altivec_expand_abs_builtin (enum insn_code icode, tree exp, rtx target)
13301 rtx pat, scratch1, scratch2;
13302 tree arg0 = CALL_EXPR_ARG (exp, 0);
13303 rtx op0 = expand_normal (arg0);
13304 machine_mode tmode = insn_data[icode].operand[0].mode;
13305 machine_mode mode0 = insn_data[icode].operand[1].mode;
13307 /* If we have invalid arguments, bail out before generating bad rtl. */
13308 if (arg0 == error_mark_node)
13309 return const0_rtx;
13311 if (target == 0
13312 || GET_MODE (target) != tmode
13313 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13314 target = gen_reg_rtx (tmode);
13316 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13317 op0 = copy_to_mode_reg (mode0, op0);
13319 scratch1 = gen_reg_rtx (mode0);
13320 scratch2 = gen_reg_rtx (mode0);
13322 pat = GEN_FCN (icode) (target, op0, scratch1, scratch2);
13323 if (! pat)
13324 return 0;
13325 emit_insn (pat);
13327 return target;
13330 static rtx
13331 rs6000_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
13333 rtx pat;
13334 tree arg0 = CALL_EXPR_ARG (exp, 0);
13335 tree arg1 = CALL_EXPR_ARG (exp, 1);
13336 rtx op0 = expand_normal (arg0);
13337 rtx op1 = expand_normal (arg1);
13338 machine_mode tmode = insn_data[icode].operand[0].mode;
13339 machine_mode mode0 = insn_data[icode].operand[1].mode;
13340 machine_mode mode1 = insn_data[icode].operand[2].mode;
13342 if (icode == CODE_FOR_nothing)
13343 /* Builtin not supported on this processor. */
13344 return 0;
13346 /* If we got invalid arguments bail out before generating bad rtl. */
13347 if (arg0 == error_mark_node || arg1 == error_mark_node)
13348 return const0_rtx;
13350 if (icode == CODE_FOR_altivec_vcfux
13351 || icode == CODE_FOR_altivec_vcfsx
13352 || icode == CODE_FOR_altivec_vctsxs
13353 || icode == CODE_FOR_altivec_vctuxs
13354 || icode == CODE_FOR_altivec_vspltb
13355 || icode == CODE_FOR_altivec_vsplth
13356 || icode == CODE_FOR_altivec_vspltw
13357 || icode == CODE_FOR_spe_evaddiw
13358 || icode == CODE_FOR_spe_evldd
13359 || icode == CODE_FOR_spe_evldh
13360 || icode == CODE_FOR_spe_evldw
13361 || icode == CODE_FOR_spe_evlhhesplat
13362 || icode == CODE_FOR_spe_evlhhossplat
13363 || icode == CODE_FOR_spe_evlhhousplat
13364 || icode == CODE_FOR_spe_evlwhe
13365 || icode == CODE_FOR_spe_evlwhos
13366 || icode == CODE_FOR_spe_evlwhou
13367 || icode == CODE_FOR_spe_evlwhsplat
13368 || icode == CODE_FOR_spe_evlwwsplat
13369 || icode == CODE_FOR_spe_evrlwi
13370 || icode == CODE_FOR_spe_evslwi
13371 || icode == CODE_FOR_spe_evsrwis
13372 || icode == CODE_FOR_spe_evsubifw
13373 || icode == CODE_FOR_spe_evsrwiu)
13375 /* Only allow 5-bit unsigned literals. */
13376 STRIP_NOPS (arg1);
13377 if (TREE_CODE (arg1) != INTEGER_CST
13378 || TREE_INT_CST_LOW (arg1) & ~0x1f)
13380 error ("argument 2 must be a 5-bit unsigned literal");
13381 return const0_rtx;
13384 else if (icode == CODE_FOR_dfptstsfi_eq_dd
13385 || icode == CODE_FOR_dfptstsfi_lt_dd
13386 || icode == CODE_FOR_dfptstsfi_gt_dd
13387 || icode == CODE_FOR_dfptstsfi_unordered_dd
13388 || icode == CODE_FOR_dfptstsfi_eq_td
13389 || icode == CODE_FOR_dfptstsfi_lt_td
13390 || icode == CODE_FOR_dfptstsfi_gt_td
13391 || icode == CODE_FOR_dfptstsfi_unordered_td)
13393 /* Only allow 6-bit unsigned literals. */
13394 STRIP_NOPS (arg0);
13395 if (TREE_CODE (arg0) != INTEGER_CST
13396 || !IN_RANGE (TREE_INT_CST_LOW (arg0), 0, 63))
13398 error ("argument 1 must be a 6-bit unsigned literal");
13399 return CONST0_RTX (tmode);
13403 if (target == 0
13404 || GET_MODE (target) != tmode
13405 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13406 target = gen_reg_rtx (tmode);
13408 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13409 op0 = copy_to_mode_reg (mode0, op0);
13410 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13411 op1 = copy_to_mode_reg (mode1, op1);
13413 pat = GEN_FCN (icode) (target, op0, op1);
13414 if (! pat)
13415 return 0;
13416 emit_insn (pat);
13418 return target;
13421 static rtx
13422 altivec_expand_predicate_builtin (enum insn_code icode, tree exp, rtx target)
13424 rtx pat, scratch;
13425 tree cr6_form = CALL_EXPR_ARG (exp, 0);
13426 tree arg0 = CALL_EXPR_ARG (exp, 1);
13427 tree arg1 = CALL_EXPR_ARG (exp, 2);
13428 rtx op0 = expand_normal (arg0);
13429 rtx op1 = expand_normal (arg1);
13430 machine_mode tmode = SImode;
13431 machine_mode mode0 = insn_data[icode].operand[1].mode;
13432 machine_mode mode1 = insn_data[icode].operand[2].mode;
13433 int cr6_form_int;
13435 if (TREE_CODE (cr6_form) != INTEGER_CST)
13437 error ("argument 1 of __builtin_altivec_predicate must be a constant");
13438 return const0_rtx;
13440 else
13441 cr6_form_int = TREE_INT_CST_LOW (cr6_form);
13443 gcc_assert (mode0 == mode1);
13445 /* If we have invalid arguments, bail out before generating bad rtl. */
13446 if (arg0 == error_mark_node || arg1 == error_mark_node)
13447 return const0_rtx;
13449 if (target == 0
13450 || GET_MODE (target) != tmode
13451 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13452 target = gen_reg_rtx (tmode);
13454 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13455 op0 = copy_to_mode_reg (mode0, op0);
13456 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13457 op1 = copy_to_mode_reg (mode1, op1);
13459 scratch = gen_reg_rtx (mode0);
13461 pat = GEN_FCN (icode) (scratch, op0, op1);
13462 if (! pat)
13463 return 0;
13464 emit_insn (pat);
13466 /* The vec_any* and vec_all* predicates use the same opcodes for two
13467 different operations, but the bits in CR6 will be different
13468 depending on what information we want. So we have to play tricks
13469 with CR6 to get the right bits out.
13471 If you think this is disgusting, look at the specs for the
13472 AltiVec predicates. */
13474 switch (cr6_form_int)
13476 case 0:
13477 emit_insn (gen_cr6_test_for_zero (target));
13478 break;
13479 case 1:
13480 emit_insn (gen_cr6_test_for_zero_reverse (target));
13481 break;
13482 case 2:
13483 emit_insn (gen_cr6_test_for_lt (target));
13484 break;
13485 case 3:
13486 emit_insn (gen_cr6_test_for_lt_reverse (target));
13487 break;
13488 default:
13489 error ("argument 1 of __builtin_altivec_predicate is out of range");
13490 break;
13493 return target;
13496 static rtx
13497 paired_expand_lv_builtin (enum insn_code icode, tree exp, rtx target)
13499 rtx pat, addr;
13500 tree arg0 = CALL_EXPR_ARG (exp, 0);
13501 tree arg1 = CALL_EXPR_ARG (exp, 1);
13502 machine_mode tmode = insn_data[icode].operand[0].mode;
13503 machine_mode mode0 = Pmode;
13504 machine_mode mode1 = Pmode;
13505 rtx op0 = expand_normal (arg0);
13506 rtx op1 = expand_normal (arg1);
13508 if (icode == CODE_FOR_nothing)
13509 /* Builtin not supported on this processor. */
13510 return 0;
13512 /* If we got invalid arguments bail out before generating bad rtl. */
13513 if (arg0 == error_mark_node || arg1 == error_mark_node)
13514 return const0_rtx;
13516 if (target == 0
13517 || GET_MODE (target) != tmode
13518 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13519 target = gen_reg_rtx (tmode);
13521 op1 = copy_to_mode_reg (mode1, op1);
13523 if (op0 == const0_rtx)
13525 addr = gen_rtx_MEM (tmode, op1);
13527 else
13529 op0 = copy_to_mode_reg (mode0, op0);
13530 addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op0, op1));
13533 pat = GEN_FCN (icode) (target, addr);
13535 if (! pat)
13536 return 0;
13537 emit_insn (pat);
13539 return target;
13542 /* Return a constant vector for use as a little-endian permute control vector
13543 to reverse the order of elements of the given vector mode. */
13544 static rtx
13545 swap_selector_for_mode (machine_mode mode)
13547 /* These are little endian vectors, so their elements are reversed
13548 from what you would normally expect for a permute control vector. */
13549 unsigned int swap2[16] = {7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8};
13550 unsigned int swap4[16] = {3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12};
13551 unsigned int swap8[16] = {1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14};
13552 unsigned int swap16[16] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
13553 unsigned int *swaparray, i;
13554 rtx perm[16];
13556 switch (mode)
13558 case V2DFmode:
13559 case V2DImode:
13560 swaparray = swap2;
13561 break;
13562 case V4SFmode:
13563 case V4SImode:
13564 swaparray = swap4;
13565 break;
13566 case V8HImode:
13567 swaparray = swap8;
13568 break;
13569 case V16QImode:
13570 swaparray = swap16;
13571 break;
13572 default:
13573 gcc_unreachable ();
13576 for (i = 0; i < 16; ++i)
13577 perm[i] = GEN_INT (swaparray[i]);
13579 return force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm)));
13582 /* Generate code for an "lvxl", or "lve*x" built-in for a little endian target
13583 with -maltivec=be specified. Issue the load followed by an element-
13584 reversing permute. */
13585 void
13586 altivec_expand_lvx_be (rtx op0, rtx op1, machine_mode mode, unsigned unspec)
13588 rtx tmp = gen_reg_rtx (mode);
13589 rtx load = gen_rtx_SET (tmp, op1);
13590 rtx lvx = gen_rtx_UNSPEC (mode, gen_rtvec (1, const0_rtx), unspec);
13591 rtx par = gen_rtx_PARALLEL (mode, gen_rtvec (2, load, lvx));
13592 rtx sel = swap_selector_for_mode (mode);
13593 rtx vperm = gen_rtx_UNSPEC (mode, gen_rtvec (3, tmp, tmp, sel), UNSPEC_VPERM);
13595 gcc_assert (REG_P (op0));
13596 emit_insn (par);
13597 emit_insn (gen_rtx_SET (op0, vperm));
13600 /* Generate code for a "stvxl" built-in for a little endian target with
13601 -maltivec=be specified. Issue the store preceded by an element-reversing
13602 permute. */
13603 void
13604 altivec_expand_stvx_be (rtx op0, rtx op1, machine_mode mode, unsigned unspec)
13606 rtx tmp = gen_reg_rtx (mode);
13607 rtx store = gen_rtx_SET (op0, tmp);
13608 rtx stvx = gen_rtx_UNSPEC (mode, gen_rtvec (1, const0_rtx), unspec);
13609 rtx par = gen_rtx_PARALLEL (mode, gen_rtvec (2, store, stvx));
13610 rtx sel = swap_selector_for_mode (mode);
13611 rtx vperm;
13613 gcc_assert (REG_P (op1));
13614 vperm = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op1, sel), UNSPEC_VPERM);
13615 emit_insn (gen_rtx_SET (tmp, vperm));
13616 emit_insn (par);
13619 /* Generate code for a "stve*x" built-in for a little endian target with -maltivec=be
13620 specified. Issue the store preceded by an element-reversing permute. */
13621 void
13622 altivec_expand_stvex_be (rtx op0, rtx op1, machine_mode mode, unsigned unspec)
13624 machine_mode inner_mode = GET_MODE_INNER (mode);
13625 rtx tmp = gen_reg_rtx (mode);
13626 rtx stvx = gen_rtx_UNSPEC (inner_mode, gen_rtvec (1, tmp), unspec);
13627 rtx sel = swap_selector_for_mode (mode);
13628 rtx vperm;
13630 gcc_assert (REG_P (op1));
13631 vperm = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op1, sel), UNSPEC_VPERM);
13632 emit_insn (gen_rtx_SET (tmp, vperm));
13633 emit_insn (gen_rtx_SET (op0, stvx));
13636 static rtx
13637 altivec_expand_lv_builtin (enum insn_code icode, tree exp, rtx target, bool blk)
13639 rtx pat, addr;
13640 tree arg0 = CALL_EXPR_ARG (exp, 0);
13641 tree arg1 = CALL_EXPR_ARG (exp, 1);
13642 machine_mode tmode = insn_data[icode].operand[0].mode;
13643 machine_mode mode0 = Pmode;
13644 machine_mode mode1 = Pmode;
13645 rtx op0 = expand_normal (arg0);
13646 rtx op1 = expand_normal (arg1);
13648 if (icode == CODE_FOR_nothing)
13649 /* Builtin not supported on this processor. */
13650 return 0;
13652 /* If we got invalid arguments bail out before generating bad rtl. */
13653 if (arg0 == error_mark_node || arg1 == error_mark_node)
13654 return const0_rtx;
13656 if (target == 0
13657 || GET_MODE (target) != tmode
13658 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13659 target = gen_reg_rtx (tmode);
13661 op1 = copy_to_mode_reg (mode1, op1);
13663 /* For LVX, express the RTL accurately by ANDing the address with -16.
13664 LVXL and LVE*X expand to use UNSPECs to hide their special behavior,
13665 so the raw address is fine. */
13666 if (icode == CODE_FOR_altivec_lvx_v2df_2op
13667 || icode == CODE_FOR_altivec_lvx_v2di_2op
13668 || icode == CODE_FOR_altivec_lvx_v4sf_2op
13669 || icode == CODE_FOR_altivec_lvx_v4si_2op
13670 || icode == CODE_FOR_altivec_lvx_v8hi_2op
13671 || icode == CODE_FOR_altivec_lvx_v16qi_2op)
13673 rtx rawaddr;
13674 if (op0 == const0_rtx)
13675 rawaddr = op1;
13676 else
13678 op0 = copy_to_mode_reg (mode0, op0);
13679 rawaddr = gen_rtx_PLUS (Pmode, op1, op0);
13681 addr = gen_rtx_AND (Pmode, rawaddr, gen_rtx_CONST_INT (Pmode, -16));
13682 addr = gen_rtx_MEM (blk ? BLKmode : tmode, addr);
13684 /* For -maltivec=be, emit the load and follow it up with a
13685 permute to swap the elements. */
13686 if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
13688 rtx temp = gen_reg_rtx (tmode);
13689 emit_insn (gen_rtx_SET (temp, addr));
13691 rtx sel = swap_selector_for_mode (tmode);
13692 rtx vperm = gen_rtx_UNSPEC (tmode, gen_rtvec (3, temp, temp, sel),
13693 UNSPEC_VPERM);
13694 emit_insn (gen_rtx_SET (target, vperm));
13696 else
13697 emit_insn (gen_rtx_SET (target, addr));
13699 else
13701 if (op0 == const0_rtx)
13702 addr = gen_rtx_MEM (blk ? BLKmode : tmode, op1);
13703 else
13705 op0 = copy_to_mode_reg (mode0, op0);
13706 addr = gen_rtx_MEM (blk ? BLKmode : tmode,
13707 gen_rtx_PLUS (Pmode, op1, op0));
13710 pat = GEN_FCN (icode) (target, addr);
13711 if (! pat)
13712 return 0;
13713 emit_insn (pat);
13716 return target;
13719 static rtx
13720 spe_expand_stv_builtin (enum insn_code icode, tree exp)
13722 tree arg0 = CALL_EXPR_ARG (exp, 0);
13723 tree arg1 = CALL_EXPR_ARG (exp, 1);
13724 tree arg2 = CALL_EXPR_ARG (exp, 2);
13725 rtx op0 = expand_normal (arg0);
13726 rtx op1 = expand_normal (arg1);
13727 rtx op2 = expand_normal (arg2);
13728 rtx pat;
13729 machine_mode mode0 = insn_data[icode].operand[0].mode;
13730 machine_mode mode1 = insn_data[icode].operand[1].mode;
13731 machine_mode mode2 = insn_data[icode].operand[2].mode;
13733 /* Invalid arguments. Bail before doing anything stoopid! */
13734 if (arg0 == error_mark_node
13735 || arg1 == error_mark_node
13736 || arg2 == error_mark_node)
13737 return const0_rtx;
13739 if (! (*insn_data[icode].operand[2].predicate) (op0, mode2))
13740 op0 = copy_to_mode_reg (mode2, op0);
13741 if (! (*insn_data[icode].operand[0].predicate) (op1, mode0))
13742 op1 = copy_to_mode_reg (mode0, op1);
13743 if (! (*insn_data[icode].operand[1].predicate) (op2, mode1))
13744 op2 = copy_to_mode_reg (mode1, op2);
13746 pat = GEN_FCN (icode) (op1, op2, op0);
13747 if (pat)
13748 emit_insn (pat);
13749 return NULL_RTX;
13752 static rtx
13753 paired_expand_stv_builtin (enum insn_code icode, tree exp)
13755 tree arg0 = CALL_EXPR_ARG (exp, 0);
13756 tree arg1 = CALL_EXPR_ARG (exp, 1);
13757 tree arg2 = CALL_EXPR_ARG (exp, 2);
13758 rtx op0 = expand_normal (arg0);
13759 rtx op1 = expand_normal (arg1);
13760 rtx op2 = expand_normal (arg2);
13761 rtx pat, addr;
13762 machine_mode tmode = insn_data[icode].operand[0].mode;
13763 machine_mode mode1 = Pmode;
13764 machine_mode mode2 = Pmode;
13766 /* Invalid arguments. Bail before doing anything stoopid! */
13767 if (arg0 == error_mark_node
13768 || arg1 == error_mark_node
13769 || arg2 == error_mark_node)
13770 return const0_rtx;
13772 if (! (*insn_data[icode].operand[1].predicate) (op0, tmode))
13773 op0 = copy_to_mode_reg (tmode, op0);
13775 op2 = copy_to_mode_reg (mode2, op2);
13777 if (op1 == const0_rtx)
13779 addr = gen_rtx_MEM (tmode, op2);
13781 else
13783 op1 = copy_to_mode_reg (mode1, op1);
13784 addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op1, op2));
13787 pat = GEN_FCN (icode) (addr, op0);
13788 if (pat)
13789 emit_insn (pat);
13790 return NULL_RTX;
13793 static rtx
13794 altivec_expand_stv_builtin (enum insn_code icode, tree exp)
13796 tree arg0 = CALL_EXPR_ARG (exp, 0);
13797 tree arg1 = CALL_EXPR_ARG (exp, 1);
13798 tree arg2 = CALL_EXPR_ARG (exp, 2);
13799 rtx op0 = expand_normal (arg0);
13800 rtx op1 = expand_normal (arg1);
13801 rtx op2 = expand_normal (arg2);
13802 rtx pat, addr, rawaddr;
13803 machine_mode tmode = insn_data[icode].operand[0].mode;
13804 machine_mode smode = insn_data[icode].operand[1].mode;
13805 machine_mode mode1 = Pmode;
13806 machine_mode mode2 = Pmode;
13808 /* Invalid arguments. Bail before doing anything stoopid! */
13809 if (arg0 == error_mark_node
13810 || arg1 == error_mark_node
13811 || arg2 == error_mark_node)
13812 return const0_rtx;
13814 op2 = copy_to_mode_reg (mode2, op2);
13816 /* For STVX, express the RTL accurately by ANDing the address with -16.
13817 STVXL and STVE*X expand to use UNSPECs to hide their special behavior,
13818 so the raw address is fine. */
13819 if (icode == CODE_FOR_altivec_stvx_v2df_2op
13820 || icode == CODE_FOR_altivec_stvx_v2di_2op
13821 || icode == CODE_FOR_altivec_stvx_v4sf_2op
13822 || icode == CODE_FOR_altivec_stvx_v4si_2op
13823 || icode == CODE_FOR_altivec_stvx_v8hi_2op
13824 || icode == CODE_FOR_altivec_stvx_v16qi_2op)
13826 if (op1 == const0_rtx)
13827 rawaddr = op2;
13828 else
13830 op1 = copy_to_mode_reg (mode1, op1);
13831 rawaddr = gen_rtx_PLUS (Pmode, op2, op1);
13834 addr = gen_rtx_AND (Pmode, rawaddr, gen_rtx_CONST_INT (Pmode, -16));
13835 addr = gen_rtx_MEM (tmode, addr);
13837 op0 = copy_to_mode_reg (tmode, op0);
13839 /* For -maltivec=be, emit a permute to swap the elements, followed
13840 by the store. */
13841 if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
13843 rtx temp = gen_reg_rtx (tmode);
13844 rtx sel = swap_selector_for_mode (tmode);
13845 rtx vperm = gen_rtx_UNSPEC (tmode, gen_rtvec (3, op0, op0, sel),
13846 UNSPEC_VPERM);
13847 emit_insn (gen_rtx_SET (temp, vperm));
13848 emit_insn (gen_rtx_SET (addr, temp));
13850 else
13851 emit_insn (gen_rtx_SET (addr, op0));
13853 else
13855 if (! (*insn_data[icode].operand[1].predicate) (op0, smode))
13856 op0 = copy_to_mode_reg (smode, op0);
13858 if (op1 == const0_rtx)
13859 addr = gen_rtx_MEM (tmode, op2);
13860 else
13862 op1 = copy_to_mode_reg (mode1, op1);
13863 addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op2, op1));
13866 pat = GEN_FCN (icode) (addr, op0);
13867 if (pat)
13868 emit_insn (pat);
13871 return NULL_RTX;
13874 /* Return the appropriate SPR number associated with the given builtin. */
13875 static inline HOST_WIDE_INT
13876 htm_spr_num (enum rs6000_builtins code)
13878 if (code == HTM_BUILTIN_GET_TFHAR
13879 || code == HTM_BUILTIN_SET_TFHAR)
13880 return TFHAR_SPR;
13881 else if (code == HTM_BUILTIN_GET_TFIAR
13882 || code == HTM_BUILTIN_SET_TFIAR)
13883 return TFIAR_SPR;
13884 else if (code == HTM_BUILTIN_GET_TEXASR
13885 || code == HTM_BUILTIN_SET_TEXASR)
13886 return TEXASR_SPR;
13887 gcc_assert (code == HTM_BUILTIN_GET_TEXASRU
13888 || code == HTM_BUILTIN_SET_TEXASRU);
13889 return TEXASRU_SPR;
13892 /* Return the appropriate SPR regno associated with the given builtin. */
13893 static inline HOST_WIDE_INT
13894 htm_spr_regno (enum rs6000_builtins code)
13896 if (code == HTM_BUILTIN_GET_TFHAR
13897 || code == HTM_BUILTIN_SET_TFHAR)
13898 return TFHAR_REGNO;
13899 else if (code == HTM_BUILTIN_GET_TFIAR
13900 || code == HTM_BUILTIN_SET_TFIAR)
13901 return TFIAR_REGNO;
13902 gcc_assert (code == HTM_BUILTIN_GET_TEXASR
13903 || code == HTM_BUILTIN_SET_TEXASR
13904 || code == HTM_BUILTIN_GET_TEXASRU
13905 || code == HTM_BUILTIN_SET_TEXASRU);
13906 return TEXASR_REGNO;
13909 /* Return the correct ICODE value depending on whether we are
13910 setting or reading the HTM SPRs. */
13911 static inline enum insn_code
13912 rs6000_htm_spr_icode (bool nonvoid)
13914 if (nonvoid)
13915 return (TARGET_POWERPC64) ? CODE_FOR_htm_mfspr_di : CODE_FOR_htm_mfspr_si;
13916 else
13917 return (TARGET_POWERPC64) ? CODE_FOR_htm_mtspr_di : CODE_FOR_htm_mtspr_si;
13920 /* Expand the HTM builtin in EXP and store the result in TARGET.
13921 Store true in *EXPANDEDP if we found a builtin to expand. */
13922 static rtx
13923 htm_expand_builtin (tree exp, rtx target, bool * expandedp)
13925 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
13926 bool nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
13927 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
13928 const struct builtin_description *d;
13929 size_t i;
13931 *expandedp = true;
13933 if (!TARGET_POWERPC64
13934 && (fcode == HTM_BUILTIN_TABORTDC
13935 || fcode == HTM_BUILTIN_TABORTDCI))
13937 size_t uns_fcode = (size_t)fcode;
13938 const char *name = rs6000_builtin_info[uns_fcode].name;
13939 error ("builtin %s is only valid in 64-bit mode", name);
13940 return const0_rtx;
13943 /* Expand the HTM builtins. */
13944 d = bdesc_htm;
13945 for (i = 0; i < ARRAY_SIZE (bdesc_htm); i++, d++)
13946 if (d->code == fcode)
13948 rtx op[MAX_HTM_OPERANDS], pat;
13949 int nopnds = 0;
13950 tree arg;
13951 call_expr_arg_iterator iter;
13952 unsigned attr = rs6000_builtin_info[fcode].attr;
13953 enum insn_code icode = d->icode;
13954 const struct insn_operand_data *insn_op;
13955 bool uses_spr = (attr & RS6000_BTC_SPR);
13956 rtx cr = NULL_RTX;
13958 if (uses_spr)
13959 icode = rs6000_htm_spr_icode (nonvoid);
13960 insn_op = &insn_data[icode].operand[0];
13962 if (nonvoid)
13964 machine_mode tmode = (uses_spr) ? insn_op->mode : SImode;
13965 if (!target
13966 || GET_MODE (target) != tmode
13967 || (uses_spr && !(*insn_op->predicate) (target, tmode)))
13968 target = gen_reg_rtx (tmode);
13969 if (uses_spr)
13970 op[nopnds++] = target;
13973 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
13975 if (arg == error_mark_node || nopnds >= MAX_HTM_OPERANDS)
13976 return const0_rtx;
13978 insn_op = &insn_data[icode].operand[nopnds];
13980 op[nopnds] = expand_normal (arg);
13982 if (!(*insn_op->predicate) (op[nopnds], insn_op->mode))
13984 if (!strcmp (insn_op->constraint, "n"))
13986 int arg_num = (nonvoid) ? nopnds : nopnds + 1;
13987 if (!CONST_INT_P (op[nopnds]))
13988 error ("argument %d must be an unsigned literal", arg_num);
13989 else
13990 error ("argument %d is an unsigned literal that is "
13991 "out of range", arg_num);
13992 return const0_rtx;
13994 op[nopnds] = copy_to_mode_reg (insn_op->mode, op[nopnds]);
13997 nopnds++;
14000 /* Handle the builtins for extended mnemonics. These accept
14001 no arguments, but map to builtins that take arguments. */
14002 switch (fcode)
14004 case HTM_BUILTIN_TENDALL: /* Alias for: tend. 1 */
14005 case HTM_BUILTIN_TRESUME: /* Alias for: tsr. 1 */
14006 op[nopnds++] = GEN_INT (1);
14007 if (flag_checking)
14008 attr |= RS6000_BTC_UNARY;
14009 break;
14010 case HTM_BUILTIN_TSUSPEND: /* Alias for: tsr. 0 */
14011 op[nopnds++] = GEN_INT (0);
14012 if (flag_checking)
14013 attr |= RS6000_BTC_UNARY;
14014 break;
14015 default:
14016 break;
14019 /* If this builtin accesses SPRs, then pass in the appropriate
14020 SPR number and SPR regno as the last two operands. */
14021 if (uses_spr)
14023 machine_mode mode = (TARGET_POWERPC64) ? DImode : SImode;
14024 op[nopnds++] = gen_rtx_CONST_INT (mode, htm_spr_num (fcode));
14025 op[nopnds++] = gen_rtx_REG (mode, htm_spr_regno (fcode));
14027 /* If this builtin accesses a CR, then pass in a scratch
14028 CR as the last operand. */
14029 else if (attr & RS6000_BTC_CR)
14030 { cr = gen_reg_rtx (CCmode);
14031 op[nopnds++] = cr;
14034 if (flag_checking)
14036 int expected_nopnds = 0;
14037 if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_UNARY)
14038 expected_nopnds = 1;
14039 else if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_BINARY)
14040 expected_nopnds = 2;
14041 else if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_TERNARY)
14042 expected_nopnds = 3;
14043 if (!(attr & RS6000_BTC_VOID))
14044 expected_nopnds += 1;
14045 if (uses_spr)
14046 expected_nopnds += 2;
14048 gcc_assert (nopnds == expected_nopnds
14049 && nopnds <= MAX_HTM_OPERANDS);
14052 switch (nopnds)
14054 case 1:
14055 pat = GEN_FCN (icode) (op[0]);
14056 break;
14057 case 2:
14058 pat = GEN_FCN (icode) (op[0], op[1]);
14059 break;
14060 case 3:
14061 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
14062 break;
14063 case 4:
14064 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
14065 break;
14066 default:
14067 gcc_unreachable ();
14069 if (!pat)
14070 return NULL_RTX;
14071 emit_insn (pat);
14073 if (attr & RS6000_BTC_CR)
14075 if (fcode == HTM_BUILTIN_TBEGIN)
14077 /* Emit code to set TARGET to true or false depending on
14078 whether the tbegin. instruction successfully or failed
14079 to start a transaction. We do this by placing the 1's
14080 complement of CR's EQ bit into TARGET. */
14081 rtx scratch = gen_reg_rtx (SImode);
14082 emit_insn (gen_rtx_SET (scratch,
14083 gen_rtx_EQ (SImode, cr,
14084 const0_rtx)));
14085 emit_insn (gen_rtx_SET (target,
14086 gen_rtx_XOR (SImode, scratch,
14087 GEN_INT (1))));
14089 else
14091 /* Emit code to copy the 4-bit condition register field
14092 CR into the least significant end of register TARGET. */
14093 rtx scratch1 = gen_reg_rtx (SImode);
14094 rtx scratch2 = gen_reg_rtx (SImode);
14095 rtx subreg = simplify_gen_subreg (CCmode, scratch1, SImode, 0);
14096 emit_insn (gen_movcc (subreg, cr));
14097 emit_insn (gen_lshrsi3 (scratch2, scratch1, GEN_INT (28)));
14098 emit_insn (gen_andsi3 (target, scratch2, GEN_INT (0xf)));
14102 if (nonvoid)
14103 return target;
14104 return const0_rtx;
14107 *expandedp = false;
14108 return NULL_RTX;
14111 /* Expand the CPU builtin in FCODE and store the result in TARGET. */
14113 static rtx
14114 cpu_expand_builtin (enum rs6000_builtins fcode, tree exp ATTRIBUTE_UNUSED,
14115 rtx target)
14117 /* __builtin_cpu_init () is a nop, so expand to nothing. */
14118 if (fcode == RS6000_BUILTIN_CPU_INIT)
14119 return const0_rtx;
14121 if (target == 0 || GET_MODE (target) != SImode)
14122 target = gen_reg_rtx (SImode);
14124 #ifdef TARGET_LIBC_PROVIDES_HWCAP_IN_TCB
14125 tree arg = TREE_OPERAND (CALL_EXPR_ARG (exp, 0), 0);
14126 if (TREE_CODE (arg) != STRING_CST)
14128 error ("builtin %s only accepts a string argument",
14129 rs6000_builtin_info[(size_t) fcode].name);
14130 return const0_rtx;
14133 if (fcode == RS6000_BUILTIN_CPU_IS)
14135 const char *cpu = TREE_STRING_POINTER (arg);
14136 rtx cpuid = NULL_RTX;
14137 for (size_t i = 0; i < ARRAY_SIZE (cpu_is_info); i++)
14138 if (strcmp (cpu, cpu_is_info[i].cpu) == 0)
14140 /* The CPUID value in the TCB is offset by _DL_FIRST_PLATFORM. */
14141 cpuid = GEN_INT (cpu_is_info[i].cpuid + _DL_FIRST_PLATFORM);
14142 break;
14144 if (cpuid == NULL_RTX)
14146 /* Invalid CPU argument. */
14147 error ("cpu %s is an invalid argument to builtin %s",
14148 cpu, rs6000_builtin_info[(size_t) fcode].name);
14149 return const0_rtx;
14152 rtx platform = gen_reg_rtx (SImode);
14153 rtx tcbmem = gen_const_mem (SImode,
14154 gen_rtx_PLUS (Pmode,
14155 gen_rtx_REG (Pmode, TLS_REGNUM),
14156 GEN_INT (TCB_PLATFORM_OFFSET)));
14157 emit_move_insn (platform, tcbmem);
14158 emit_insn (gen_eqsi3 (target, platform, cpuid));
14160 else if (fcode == RS6000_BUILTIN_CPU_SUPPORTS)
14162 const char *hwcap = TREE_STRING_POINTER (arg);
14163 rtx mask = NULL_RTX;
14164 int hwcap_offset;
14165 for (size_t i = 0; i < ARRAY_SIZE (cpu_supports_info); i++)
14166 if (strcmp (hwcap, cpu_supports_info[i].hwcap) == 0)
14168 mask = GEN_INT (cpu_supports_info[i].mask);
14169 hwcap_offset = TCB_HWCAP_OFFSET (cpu_supports_info[i].id);
14170 break;
14172 if (mask == NULL_RTX)
14174 /* Invalid HWCAP argument. */
14175 error ("hwcap %s is an invalid argument to builtin %s",
14176 hwcap, rs6000_builtin_info[(size_t) fcode].name);
14177 return const0_rtx;
14180 rtx tcb_hwcap = gen_reg_rtx (SImode);
14181 rtx tcbmem = gen_const_mem (SImode,
14182 gen_rtx_PLUS (Pmode,
14183 gen_rtx_REG (Pmode, TLS_REGNUM),
14184 GEN_INT (hwcap_offset)));
14185 emit_move_insn (tcb_hwcap, tcbmem);
14186 rtx scratch1 = gen_reg_rtx (SImode);
14187 emit_insn (gen_rtx_SET (scratch1, gen_rtx_AND (SImode, tcb_hwcap, mask)));
14188 rtx scratch2 = gen_reg_rtx (SImode);
14189 emit_insn (gen_eqsi3 (scratch2, scratch1, const0_rtx));
14190 emit_insn (gen_rtx_SET (target, gen_rtx_XOR (SImode, scratch2, const1_rtx)));
14193 /* Record that we have expanded a CPU builtin, so that we can later
14194 emit a reference to the special symbol exported by LIBC to ensure we
14195 do not link against an old LIBC that doesn't support this feature. */
14196 cpu_builtin_p = true;
14198 #else
14199 /* For old LIBCs, always return FALSE. */
14200 emit_move_insn (target, GEN_INT (0));
14201 #endif /* TARGET_LIBC_PROVIDES_HWCAP_IN_TCB */
14203 return target;
14206 static rtx
14207 rs6000_expand_ternop_builtin (enum insn_code icode, tree exp, rtx target)
14209 rtx pat;
14210 tree arg0 = CALL_EXPR_ARG (exp, 0);
14211 tree arg1 = CALL_EXPR_ARG (exp, 1);
14212 tree arg2 = CALL_EXPR_ARG (exp, 2);
14213 rtx op0 = expand_normal (arg0);
14214 rtx op1 = expand_normal (arg1);
14215 rtx op2 = expand_normal (arg2);
14216 machine_mode tmode = insn_data[icode].operand[0].mode;
14217 machine_mode mode0 = insn_data[icode].operand[1].mode;
14218 machine_mode mode1 = insn_data[icode].operand[2].mode;
14219 machine_mode mode2 = insn_data[icode].operand[3].mode;
14221 if (icode == CODE_FOR_nothing)
14222 /* Builtin not supported on this processor. */
14223 return 0;
14225 /* If we got invalid arguments bail out before generating bad rtl. */
14226 if (arg0 == error_mark_node
14227 || arg1 == error_mark_node
14228 || arg2 == error_mark_node)
14229 return const0_rtx;
14231 /* Check and prepare argument depending on the instruction code.
14233 Note that a switch statement instead of the sequence of tests
14234 would be incorrect as many of the CODE_FOR values could be
14235 CODE_FOR_nothing and that would yield multiple alternatives
14236 with identical values. We'd never reach here at runtime in
14237 this case. */
14238 if (icode == CODE_FOR_altivec_vsldoi_v4sf
14239 || icode == CODE_FOR_altivec_vsldoi_v4si
14240 || icode == CODE_FOR_altivec_vsldoi_v8hi
14241 || icode == CODE_FOR_altivec_vsldoi_v16qi)
14243 /* Only allow 4-bit unsigned literals. */
14244 STRIP_NOPS (arg2);
14245 if (TREE_CODE (arg2) != INTEGER_CST
14246 || TREE_INT_CST_LOW (arg2) & ~0xf)
14248 error ("argument 3 must be a 4-bit unsigned literal");
14249 return const0_rtx;
14252 else if (icode == CODE_FOR_vsx_xxpermdi_v2df
14253 || icode == CODE_FOR_vsx_xxpermdi_v2di
14254 || icode == CODE_FOR_vsx_xxsldwi_v16qi
14255 || icode == CODE_FOR_vsx_xxsldwi_v8hi
14256 || icode == CODE_FOR_vsx_xxsldwi_v4si
14257 || icode == CODE_FOR_vsx_xxsldwi_v4sf
14258 || icode == CODE_FOR_vsx_xxsldwi_v2di
14259 || icode == CODE_FOR_vsx_xxsldwi_v2df)
14261 /* Only allow 2-bit unsigned literals. */
14262 STRIP_NOPS (arg2);
14263 if (TREE_CODE (arg2) != INTEGER_CST
14264 || TREE_INT_CST_LOW (arg2) & ~0x3)
14266 error ("argument 3 must be a 2-bit unsigned literal");
14267 return const0_rtx;
14270 else if (icode == CODE_FOR_vsx_set_v2df
14271 || icode == CODE_FOR_vsx_set_v2di
14272 || icode == CODE_FOR_bcdadd
14273 || icode == CODE_FOR_bcdadd_lt
14274 || icode == CODE_FOR_bcdadd_eq
14275 || icode == CODE_FOR_bcdadd_gt
14276 || icode == CODE_FOR_bcdsub
14277 || icode == CODE_FOR_bcdsub_lt
14278 || icode == CODE_FOR_bcdsub_eq
14279 || icode == CODE_FOR_bcdsub_gt)
14281 /* Only allow 1-bit unsigned literals. */
14282 STRIP_NOPS (arg2);
14283 if (TREE_CODE (arg2) != INTEGER_CST
14284 || TREE_INT_CST_LOW (arg2) & ~0x1)
14286 error ("argument 3 must be a 1-bit unsigned literal");
14287 return const0_rtx;
14290 else if (icode == CODE_FOR_dfp_ddedpd_dd
14291 || icode == CODE_FOR_dfp_ddedpd_td)
14293 /* Only allow 2-bit unsigned literals where the value is 0 or 2. */
14294 STRIP_NOPS (arg0);
14295 if (TREE_CODE (arg0) != INTEGER_CST
14296 || TREE_INT_CST_LOW (arg2) & ~0x3)
14298 error ("argument 1 must be 0 or 2");
14299 return const0_rtx;
14302 else if (icode == CODE_FOR_dfp_denbcd_dd
14303 || icode == CODE_FOR_dfp_denbcd_td)
14305 /* Only allow 1-bit unsigned literals. */
14306 STRIP_NOPS (arg0);
14307 if (TREE_CODE (arg0) != INTEGER_CST
14308 || TREE_INT_CST_LOW (arg0) & ~0x1)
14310 error ("argument 1 must be a 1-bit unsigned literal");
14311 return const0_rtx;
14314 else if (icode == CODE_FOR_dfp_dscli_dd
14315 || icode == CODE_FOR_dfp_dscli_td
14316 || icode == CODE_FOR_dfp_dscri_dd
14317 || icode == CODE_FOR_dfp_dscri_td)
14319 /* Only allow 6-bit unsigned literals. */
14320 STRIP_NOPS (arg1);
14321 if (TREE_CODE (arg1) != INTEGER_CST
14322 || TREE_INT_CST_LOW (arg1) & ~0x3f)
14324 error ("argument 2 must be a 6-bit unsigned literal");
14325 return const0_rtx;
14328 else if (icode == CODE_FOR_crypto_vshasigmaw
14329 || icode == CODE_FOR_crypto_vshasigmad)
14331 /* Check whether the 2nd and 3rd arguments are integer constants and in
14332 range and prepare arguments. */
14333 STRIP_NOPS (arg1);
14334 if (TREE_CODE (arg1) != INTEGER_CST || wi::geu_p (arg1, 2))
14336 error ("argument 2 must be 0 or 1");
14337 return const0_rtx;
14340 STRIP_NOPS (arg2);
14341 if (TREE_CODE (arg2) != INTEGER_CST || wi::geu_p (arg1, 16))
14343 error ("argument 3 must be in the range 0..15");
14344 return const0_rtx;
14348 if (target == 0
14349 || GET_MODE (target) != tmode
14350 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14351 target = gen_reg_rtx (tmode);
14353 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14354 op0 = copy_to_mode_reg (mode0, op0);
14355 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14356 op1 = copy_to_mode_reg (mode1, op1);
14357 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
14358 op2 = copy_to_mode_reg (mode2, op2);
14360 if (TARGET_PAIRED_FLOAT && icode == CODE_FOR_selv2sf4)
14361 pat = GEN_FCN (icode) (target, op0, op1, op2, CONST0_RTX (SFmode));
14362 else
14363 pat = GEN_FCN (icode) (target, op0, op1, op2);
14364 if (! pat)
14365 return 0;
14366 emit_insn (pat);
14368 return target;
14371 /* Expand the lvx builtins. */
14372 static rtx
14373 altivec_expand_ld_builtin (tree exp, rtx target, bool *expandedp)
14375 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
14376 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
14377 tree arg0;
14378 machine_mode tmode, mode0;
14379 rtx pat, op0;
14380 enum insn_code icode;
14382 switch (fcode)
14384 case ALTIVEC_BUILTIN_LD_INTERNAL_16qi:
14385 icode = CODE_FOR_vector_altivec_load_v16qi;
14386 break;
14387 case ALTIVEC_BUILTIN_LD_INTERNAL_8hi:
14388 icode = CODE_FOR_vector_altivec_load_v8hi;
14389 break;
14390 case ALTIVEC_BUILTIN_LD_INTERNAL_4si:
14391 icode = CODE_FOR_vector_altivec_load_v4si;
14392 break;
14393 case ALTIVEC_BUILTIN_LD_INTERNAL_4sf:
14394 icode = CODE_FOR_vector_altivec_load_v4sf;
14395 break;
14396 case ALTIVEC_BUILTIN_LD_INTERNAL_2df:
14397 icode = CODE_FOR_vector_altivec_load_v2df;
14398 break;
14399 case ALTIVEC_BUILTIN_LD_INTERNAL_2di:
14400 icode = CODE_FOR_vector_altivec_load_v2di;
14401 case ALTIVEC_BUILTIN_LD_INTERNAL_1ti:
14402 icode = CODE_FOR_vector_altivec_load_v1ti;
14403 break;
14404 default:
14405 *expandedp = false;
14406 return NULL_RTX;
14409 *expandedp = true;
14411 arg0 = CALL_EXPR_ARG (exp, 0);
14412 op0 = expand_normal (arg0);
14413 tmode = insn_data[icode].operand[0].mode;
14414 mode0 = insn_data[icode].operand[1].mode;
14416 if (target == 0
14417 || GET_MODE (target) != tmode
14418 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14419 target = gen_reg_rtx (tmode);
14421 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14422 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
14424 pat = GEN_FCN (icode) (target, op0);
14425 if (! pat)
14426 return 0;
14427 emit_insn (pat);
14428 return target;
14431 /* Expand the stvx builtins. */
14432 static rtx
14433 altivec_expand_st_builtin (tree exp, rtx target ATTRIBUTE_UNUSED,
14434 bool *expandedp)
14436 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
14437 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
14438 tree arg0, arg1;
14439 machine_mode mode0, mode1;
14440 rtx pat, op0, op1;
14441 enum insn_code icode;
14443 switch (fcode)
14445 case ALTIVEC_BUILTIN_ST_INTERNAL_16qi:
14446 icode = CODE_FOR_vector_altivec_store_v16qi;
14447 break;
14448 case ALTIVEC_BUILTIN_ST_INTERNAL_8hi:
14449 icode = CODE_FOR_vector_altivec_store_v8hi;
14450 break;
14451 case ALTIVEC_BUILTIN_ST_INTERNAL_4si:
14452 icode = CODE_FOR_vector_altivec_store_v4si;
14453 break;
14454 case ALTIVEC_BUILTIN_ST_INTERNAL_4sf:
14455 icode = CODE_FOR_vector_altivec_store_v4sf;
14456 break;
14457 case ALTIVEC_BUILTIN_ST_INTERNAL_2df:
14458 icode = CODE_FOR_vector_altivec_store_v2df;
14459 break;
14460 case ALTIVEC_BUILTIN_ST_INTERNAL_2di:
14461 icode = CODE_FOR_vector_altivec_store_v2di;
14462 case ALTIVEC_BUILTIN_ST_INTERNAL_1ti:
14463 icode = CODE_FOR_vector_altivec_store_v1ti;
14464 break;
14465 default:
14466 *expandedp = false;
14467 return NULL_RTX;
14470 arg0 = CALL_EXPR_ARG (exp, 0);
14471 arg1 = CALL_EXPR_ARG (exp, 1);
14472 op0 = expand_normal (arg0);
14473 op1 = expand_normal (arg1);
14474 mode0 = insn_data[icode].operand[0].mode;
14475 mode1 = insn_data[icode].operand[1].mode;
14477 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
14478 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
14479 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
14480 op1 = copy_to_mode_reg (mode1, op1);
14482 pat = GEN_FCN (icode) (op0, op1);
14483 if (pat)
14484 emit_insn (pat);
14486 *expandedp = true;
14487 return NULL_RTX;
14490 /* Expand the dst builtins. */
14491 static rtx
14492 altivec_expand_dst_builtin (tree exp, rtx target ATTRIBUTE_UNUSED,
14493 bool *expandedp)
14495 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
14496 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
14497 tree arg0, arg1, arg2;
14498 machine_mode mode0, mode1;
14499 rtx pat, op0, op1, op2;
14500 const struct builtin_description *d;
14501 size_t i;
14503 *expandedp = false;
14505 /* Handle DST variants. */
14506 d = bdesc_dst;
14507 for (i = 0; i < ARRAY_SIZE (bdesc_dst); i++, d++)
14508 if (d->code == fcode)
14510 arg0 = CALL_EXPR_ARG (exp, 0);
14511 arg1 = CALL_EXPR_ARG (exp, 1);
14512 arg2 = CALL_EXPR_ARG (exp, 2);
14513 op0 = expand_normal (arg0);
14514 op1 = expand_normal (arg1);
14515 op2 = expand_normal (arg2);
14516 mode0 = insn_data[d->icode].operand[0].mode;
14517 mode1 = insn_data[d->icode].operand[1].mode;
14519 /* Invalid arguments, bail out before generating bad rtl. */
14520 if (arg0 == error_mark_node
14521 || arg1 == error_mark_node
14522 || arg2 == error_mark_node)
14523 return const0_rtx;
14525 *expandedp = true;
14526 STRIP_NOPS (arg2);
14527 if (TREE_CODE (arg2) != INTEGER_CST
14528 || TREE_INT_CST_LOW (arg2) & ~0x3)
14530 error ("argument to %qs must be a 2-bit unsigned literal", d->name);
14531 return const0_rtx;
14534 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
14535 op0 = copy_to_mode_reg (Pmode, op0);
14536 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
14537 op1 = copy_to_mode_reg (mode1, op1);
14539 pat = GEN_FCN (d->icode) (op0, op1, op2);
14540 if (pat != 0)
14541 emit_insn (pat);
14543 return NULL_RTX;
14546 return NULL_RTX;
14549 /* Expand vec_init builtin. */
14550 static rtx
14551 altivec_expand_vec_init_builtin (tree type, tree exp, rtx target)
14553 machine_mode tmode = TYPE_MODE (type);
14554 machine_mode inner_mode = GET_MODE_INNER (tmode);
14555 int i, n_elt = GET_MODE_NUNITS (tmode);
14557 gcc_assert (VECTOR_MODE_P (tmode));
14558 gcc_assert (n_elt == call_expr_nargs (exp));
14560 if (!target || !register_operand (target, tmode))
14561 target = gen_reg_rtx (tmode);
14563 /* If we have a vector compromised of a single element, such as V1TImode, do
14564 the initialization directly. */
14565 if (n_elt == 1 && GET_MODE_SIZE (tmode) == GET_MODE_SIZE (inner_mode))
14567 rtx x = expand_normal (CALL_EXPR_ARG (exp, 0));
14568 emit_move_insn (target, gen_lowpart (tmode, x));
14570 else
14572 rtvec v = rtvec_alloc (n_elt);
14574 for (i = 0; i < n_elt; ++i)
14576 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
14577 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
14580 rs6000_expand_vector_init (target, gen_rtx_PARALLEL (tmode, v));
14583 return target;
14586 /* Return the integer constant in ARG. Constrain it to be in the range
14587 of the subparts of VEC_TYPE; issue an error if not. */
14589 static int
14590 get_element_number (tree vec_type, tree arg)
14592 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
14594 if (!tree_fits_uhwi_p (arg)
14595 || (elt = tree_to_uhwi (arg), elt > max))
14597 error ("selector must be an integer constant in the range 0..%wi", max);
14598 return 0;
14601 return elt;
14604 /* Expand vec_set builtin. */
14605 static rtx
14606 altivec_expand_vec_set_builtin (tree exp)
14608 machine_mode tmode, mode1;
14609 tree arg0, arg1, arg2;
14610 int elt;
14611 rtx op0, op1;
14613 arg0 = CALL_EXPR_ARG (exp, 0);
14614 arg1 = CALL_EXPR_ARG (exp, 1);
14615 arg2 = CALL_EXPR_ARG (exp, 2);
14617 tmode = TYPE_MODE (TREE_TYPE (arg0));
14618 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
14619 gcc_assert (VECTOR_MODE_P (tmode));
14621 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
14622 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
14623 elt = get_element_number (TREE_TYPE (arg0), arg2);
14625 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
14626 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
14628 op0 = force_reg (tmode, op0);
14629 op1 = force_reg (mode1, op1);
14631 rs6000_expand_vector_set (op0, op1, elt);
14633 return op0;
14636 /* Expand vec_ext builtin. */
14637 static rtx
14638 altivec_expand_vec_ext_builtin (tree exp, rtx target)
14640 machine_mode tmode, mode0;
14641 tree arg0, arg1;
14642 int elt;
14643 rtx op0;
14645 arg0 = CALL_EXPR_ARG (exp, 0);
14646 arg1 = CALL_EXPR_ARG (exp, 1);
14648 op0 = expand_normal (arg0);
14649 elt = get_element_number (TREE_TYPE (arg0), arg1);
14651 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
14652 mode0 = TYPE_MODE (TREE_TYPE (arg0));
14653 gcc_assert (VECTOR_MODE_P (mode0));
14655 op0 = force_reg (mode0, op0);
14657 if (optimize || !target || !register_operand (target, tmode))
14658 target = gen_reg_rtx (tmode);
14660 rs6000_expand_vector_extract (target, op0, elt);
14662 return target;
14665 /* Expand the builtin in EXP and store the result in TARGET. Store
14666 true in *EXPANDEDP if we found a builtin to expand. */
14667 static rtx
14668 altivec_expand_builtin (tree exp, rtx target, bool *expandedp)
14670 const struct builtin_description *d;
14671 size_t i;
14672 enum insn_code icode;
14673 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
14674 tree arg0;
14675 rtx op0, pat;
14676 machine_mode tmode, mode0;
14677 enum rs6000_builtins fcode
14678 = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
14680 if (rs6000_overloaded_builtin_p (fcode))
14682 *expandedp = true;
14683 error ("unresolved overload for Altivec builtin %qF", fndecl);
14685 /* Given it is invalid, just generate a normal call. */
14686 return expand_call (exp, target, false);
14689 target = altivec_expand_ld_builtin (exp, target, expandedp);
14690 if (*expandedp)
14691 return target;
14693 target = altivec_expand_st_builtin (exp, target, expandedp);
14694 if (*expandedp)
14695 return target;
14697 target = altivec_expand_dst_builtin (exp, target, expandedp);
14698 if (*expandedp)
14699 return target;
14701 *expandedp = true;
14703 switch (fcode)
14705 case ALTIVEC_BUILTIN_STVX_V2DF:
14706 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v2df_2op, exp);
14707 case ALTIVEC_BUILTIN_STVX_V2DI:
14708 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v2di_2op, exp);
14709 case ALTIVEC_BUILTIN_STVX_V4SF:
14710 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v4sf_2op, exp);
14711 case ALTIVEC_BUILTIN_STVX:
14712 case ALTIVEC_BUILTIN_STVX_V4SI:
14713 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v4si_2op, exp);
14714 case ALTIVEC_BUILTIN_STVX_V8HI:
14715 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v8hi_2op, exp);
14716 case ALTIVEC_BUILTIN_STVX_V16QI:
14717 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v16qi_2op, exp);
14718 case ALTIVEC_BUILTIN_STVEBX:
14719 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvebx, exp);
14720 case ALTIVEC_BUILTIN_STVEHX:
14721 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvehx, exp);
14722 case ALTIVEC_BUILTIN_STVEWX:
14723 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvewx, exp);
14724 case ALTIVEC_BUILTIN_STVXL_V2DF:
14725 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v2df, exp);
14726 case ALTIVEC_BUILTIN_STVXL_V2DI:
14727 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v2di, exp);
14728 case ALTIVEC_BUILTIN_STVXL_V4SF:
14729 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v4sf, exp);
14730 case ALTIVEC_BUILTIN_STVXL:
14731 case ALTIVEC_BUILTIN_STVXL_V4SI:
14732 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v4si, exp);
14733 case ALTIVEC_BUILTIN_STVXL_V8HI:
14734 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v8hi, exp);
14735 case ALTIVEC_BUILTIN_STVXL_V16QI:
14736 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v16qi, exp);
14738 case ALTIVEC_BUILTIN_STVLX:
14739 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvlx, exp);
14740 case ALTIVEC_BUILTIN_STVLXL:
14741 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvlxl, exp);
14742 case ALTIVEC_BUILTIN_STVRX:
14743 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvrx, exp);
14744 case ALTIVEC_BUILTIN_STVRXL:
14745 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvrxl, exp);
14747 case VSX_BUILTIN_STXVD2X_V1TI:
14748 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v1ti, exp);
14749 case VSX_BUILTIN_STXVD2X_V2DF:
14750 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v2df, exp);
14751 case VSX_BUILTIN_STXVD2X_V2DI:
14752 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v2di, exp);
14753 case VSX_BUILTIN_STXVW4X_V4SF:
14754 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v4sf, exp);
14755 case VSX_BUILTIN_STXVW4X_V4SI:
14756 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v4si, exp);
14757 case VSX_BUILTIN_STXVW4X_V8HI:
14758 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v8hi, exp);
14759 case VSX_BUILTIN_STXVW4X_V16QI:
14760 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v16qi, exp);
14762 /* For the following on big endian, it's ok to use any appropriate
14763 unaligned-supporting store, so use a generic expander. For
14764 little-endian, the exact element-reversing instruction must
14765 be used. */
14766 case VSX_BUILTIN_ST_ELEMREV_V2DF:
14768 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v2df
14769 : CODE_FOR_vsx_st_elemrev_v2df);
14770 return altivec_expand_stv_builtin (code, exp);
14772 case VSX_BUILTIN_ST_ELEMREV_V2DI:
14774 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v2di
14775 : CODE_FOR_vsx_st_elemrev_v2di);
14776 return altivec_expand_stv_builtin (code, exp);
14778 case VSX_BUILTIN_ST_ELEMREV_V4SF:
14780 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v4sf
14781 : CODE_FOR_vsx_st_elemrev_v4sf);
14782 return altivec_expand_stv_builtin (code, exp);
14784 case VSX_BUILTIN_ST_ELEMREV_V4SI:
14786 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v4si
14787 : CODE_FOR_vsx_st_elemrev_v4si);
14788 return altivec_expand_stv_builtin (code, exp);
14790 case VSX_BUILTIN_ST_ELEMREV_V8HI:
14792 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v8hi
14793 : CODE_FOR_vsx_st_elemrev_v8hi);
14794 return altivec_expand_stv_builtin (code, exp);
14796 case VSX_BUILTIN_ST_ELEMREV_V16QI:
14798 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v16qi
14799 : CODE_FOR_vsx_st_elemrev_v16qi);
14800 return altivec_expand_stv_builtin (code, exp);
14803 case ALTIVEC_BUILTIN_MFVSCR:
14804 icode = CODE_FOR_altivec_mfvscr;
14805 tmode = insn_data[icode].operand[0].mode;
14807 if (target == 0
14808 || GET_MODE (target) != tmode
14809 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14810 target = gen_reg_rtx (tmode);
14812 pat = GEN_FCN (icode) (target);
14813 if (! pat)
14814 return 0;
14815 emit_insn (pat);
14816 return target;
14818 case ALTIVEC_BUILTIN_MTVSCR:
14819 icode = CODE_FOR_altivec_mtvscr;
14820 arg0 = CALL_EXPR_ARG (exp, 0);
14821 op0 = expand_normal (arg0);
14822 mode0 = insn_data[icode].operand[0].mode;
14824 /* If we got invalid arguments bail out before generating bad rtl. */
14825 if (arg0 == error_mark_node)
14826 return const0_rtx;
14828 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
14829 op0 = copy_to_mode_reg (mode0, op0);
14831 pat = GEN_FCN (icode) (op0);
14832 if (pat)
14833 emit_insn (pat);
14834 return NULL_RTX;
14836 case ALTIVEC_BUILTIN_DSSALL:
14837 emit_insn (gen_altivec_dssall ());
14838 return NULL_RTX;
14840 case ALTIVEC_BUILTIN_DSS:
14841 icode = CODE_FOR_altivec_dss;
14842 arg0 = CALL_EXPR_ARG (exp, 0);
14843 STRIP_NOPS (arg0);
14844 op0 = expand_normal (arg0);
14845 mode0 = insn_data[icode].operand[0].mode;
14847 /* If we got invalid arguments bail out before generating bad rtl. */
14848 if (arg0 == error_mark_node)
14849 return const0_rtx;
14851 if (TREE_CODE (arg0) != INTEGER_CST
14852 || TREE_INT_CST_LOW (arg0) & ~0x3)
14854 error ("argument to dss must be a 2-bit unsigned literal");
14855 return const0_rtx;
14858 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
14859 op0 = copy_to_mode_reg (mode0, op0);
14861 emit_insn (gen_altivec_dss (op0));
14862 return NULL_RTX;
14864 case ALTIVEC_BUILTIN_VEC_INIT_V4SI:
14865 case ALTIVEC_BUILTIN_VEC_INIT_V8HI:
14866 case ALTIVEC_BUILTIN_VEC_INIT_V16QI:
14867 case ALTIVEC_BUILTIN_VEC_INIT_V4SF:
14868 case VSX_BUILTIN_VEC_INIT_V2DF:
14869 case VSX_BUILTIN_VEC_INIT_V2DI:
14870 case VSX_BUILTIN_VEC_INIT_V1TI:
14871 return altivec_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
14873 case ALTIVEC_BUILTIN_VEC_SET_V4SI:
14874 case ALTIVEC_BUILTIN_VEC_SET_V8HI:
14875 case ALTIVEC_BUILTIN_VEC_SET_V16QI:
14876 case ALTIVEC_BUILTIN_VEC_SET_V4SF:
14877 case VSX_BUILTIN_VEC_SET_V2DF:
14878 case VSX_BUILTIN_VEC_SET_V2DI:
14879 case VSX_BUILTIN_VEC_SET_V1TI:
14880 return altivec_expand_vec_set_builtin (exp);
14882 case ALTIVEC_BUILTIN_VEC_EXT_V4SI:
14883 case ALTIVEC_BUILTIN_VEC_EXT_V8HI:
14884 case ALTIVEC_BUILTIN_VEC_EXT_V16QI:
14885 case ALTIVEC_BUILTIN_VEC_EXT_V4SF:
14886 case VSX_BUILTIN_VEC_EXT_V2DF:
14887 case VSX_BUILTIN_VEC_EXT_V2DI:
14888 case VSX_BUILTIN_VEC_EXT_V1TI:
14889 return altivec_expand_vec_ext_builtin (exp, target);
14891 default:
14892 break;
14893 /* Fall through. */
14896 /* Expand abs* operations. */
14897 d = bdesc_abs;
14898 for (i = 0; i < ARRAY_SIZE (bdesc_abs); i++, d++)
14899 if (d->code == fcode)
14900 return altivec_expand_abs_builtin (d->icode, exp, target);
14902 /* Expand the AltiVec predicates. */
14903 d = bdesc_altivec_preds;
14904 for (i = 0; i < ARRAY_SIZE (bdesc_altivec_preds); i++, d++)
14905 if (d->code == fcode)
14906 return altivec_expand_predicate_builtin (d->icode, exp, target);
14908 /* LV* are funky. We initialized them differently. */
14909 switch (fcode)
14911 case ALTIVEC_BUILTIN_LVSL:
14912 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvsl,
14913 exp, target, false);
14914 case ALTIVEC_BUILTIN_LVSR:
14915 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvsr,
14916 exp, target, false);
14917 case ALTIVEC_BUILTIN_LVEBX:
14918 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvebx,
14919 exp, target, false);
14920 case ALTIVEC_BUILTIN_LVEHX:
14921 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvehx,
14922 exp, target, false);
14923 case ALTIVEC_BUILTIN_LVEWX:
14924 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvewx,
14925 exp, target, false);
14926 case ALTIVEC_BUILTIN_LVXL_V2DF:
14927 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v2df,
14928 exp, target, false);
14929 case ALTIVEC_BUILTIN_LVXL_V2DI:
14930 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v2di,
14931 exp, target, false);
14932 case ALTIVEC_BUILTIN_LVXL_V4SF:
14933 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v4sf,
14934 exp, target, false);
14935 case ALTIVEC_BUILTIN_LVXL:
14936 case ALTIVEC_BUILTIN_LVXL_V4SI:
14937 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v4si,
14938 exp, target, false);
14939 case ALTIVEC_BUILTIN_LVXL_V8HI:
14940 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v8hi,
14941 exp, target, false);
14942 case ALTIVEC_BUILTIN_LVXL_V16QI:
14943 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v16qi,
14944 exp, target, false);
14945 case ALTIVEC_BUILTIN_LVX_V2DF:
14946 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v2df_2op,
14947 exp, target, false);
14948 case ALTIVEC_BUILTIN_LVX_V2DI:
14949 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v2di_2op,
14950 exp, target, false);
14951 case ALTIVEC_BUILTIN_LVX_V4SF:
14952 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v4sf_2op,
14953 exp, target, false);
14954 case ALTIVEC_BUILTIN_LVX:
14955 case ALTIVEC_BUILTIN_LVX_V4SI:
14956 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v4si_2op,
14957 exp, target, false);
14958 case ALTIVEC_BUILTIN_LVX_V8HI:
14959 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v8hi_2op,
14960 exp, target, false);
14961 case ALTIVEC_BUILTIN_LVX_V16QI:
14962 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v16qi_2op,
14963 exp, target, false);
14964 case ALTIVEC_BUILTIN_LVLX:
14965 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvlx,
14966 exp, target, true);
14967 case ALTIVEC_BUILTIN_LVLXL:
14968 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvlxl,
14969 exp, target, true);
14970 case ALTIVEC_BUILTIN_LVRX:
14971 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvrx,
14972 exp, target, true);
14973 case ALTIVEC_BUILTIN_LVRXL:
14974 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvrxl,
14975 exp, target, true);
14976 case VSX_BUILTIN_LXVD2X_V1TI:
14977 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v1ti,
14978 exp, target, false);
14979 case VSX_BUILTIN_LXVD2X_V2DF:
14980 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v2df,
14981 exp, target, false);
14982 case VSX_BUILTIN_LXVD2X_V2DI:
14983 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v2di,
14984 exp, target, false);
14985 case VSX_BUILTIN_LXVW4X_V4SF:
14986 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v4sf,
14987 exp, target, false);
14988 case VSX_BUILTIN_LXVW4X_V4SI:
14989 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v4si,
14990 exp, target, false);
14991 case VSX_BUILTIN_LXVW4X_V8HI:
14992 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v8hi,
14993 exp, target, false);
14994 case VSX_BUILTIN_LXVW4X_V16QI:
14995 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v16qi,
14996 exp, target, false);
14997 /* For the following on big endian, it's ok to use any appropriate
14998 unaligned-supporting load, so use a generic expander. For
14999 little-endian, the exact element-reversing instruction must
15000 be used. */
15001 case VSX_BUILTIN_LD_ELEMREV_V2DF:
15003 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v2df
15004 : CODE_FOR_vsx_ld_elemrev_v2df);
15005 return altivec_expand_lv_builtin (code, exp, target, false);
15007 case VSX_BUILTIN_LD_ELEMREV_V2DI:
15009 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v2di
15010 : CODE_FOR_vsx_ld_elemrev_v2di);
15011 return altivec_expand_lv_builtin (code, exp, target, false);
15013 case VSX_BUILTIN_LD_ELEMREV_V4SF:
15015 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v4sf
15016 : CODE_FOR_vsx_ld_elemrev_v4sf);
15017 return altivec_expand_lv_builtin (code, exp, target, false);
15019 case VSX_BUILTIN_LD_ELEMREV_V4SI:
15021 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v4si
15022 : CODE_FOR_vsx_ld_elemrev_v4si);
15023 return altivec_expand_lv_builtin (code, exp, target, false);
15025 case VSX_BUILTIN_LD_ELEMREV_V8HI:
15027 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v8hi
15028 : CODE_FOR_vsx_ld_elemrev_v8hi);
15029 return altivec_expand_lv_builtin (code, exp, target, false);
15031 case VSX_BUILTIN_LD_ELEMREV_V16QI:
15033 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v16qi
15034 : CODE_FOR_vsx_ld_elemrev_v16qi);
15035 return altivec_expand_lv_builtin (code, exp, target, false);
15037 break;
15038 default:
15039 break;
15040 /* Fall through. */
15043 *expandedp = false;
15044 return NULL_RTX;
15047 /* Expand the builtin in EXP and store the result in TARGET. Store
15048 true in *EXPANDEDP if we found a builtin to expand. */
15049 static rtx
15050 paired_expand_builtin (tree exp, rtx target, bool * expandedp)
15052 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
15053 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
15054 const struct builtin_description *d;
15055 size_t i;
15057 *expandedp = true;
15059 switch (fcode)
15061 case PAIRED_BUILTIN_STX:
15062 return paired_expand_stv_builtin (CODE_FOR_paired_stx, exp);
15063 case PAIRED_BUILTIN_LX:
15064 return paired_expand_lv_builtin (CODE_FOR_paired_lx, exp, target);
15065 default:
15066 break;
15067 /* Fall through. */
15070 /* Expand the paired predicates. */
15071 d = bdesc_paired_preds;
15072 for (i = 0; i < ARRAY_SIZE (bdesc_paired_preds); i++, d++)
15073 if (d->code == fcode)
15074 return paired_expand_predicate_builtin (d->icode, exp, target);
15076 *expandedp = false;
15077 return NULL_RTX;
15080 /* Binops that need to be initialized manually, but can be expanded
15081 automagically by rs6000_expand_binop_builtin. */
15082 static const struct builtin_description bdesc_2arg_spe[] =
15084 { RS6000_BTM_SPE, CODE_FOR_spe_evlddx, "__builtin_spe_evlddx", SPE_BUILTIN_EVLDDX },
15085 { RS6000_BTM_SPE, CODE_FOR_spe_evldwx, "__builtin_spe_evldwx", SPE_BUILTIN_EVLDWX },
15086 { RS6000_BTM_SPE, CODE_FOR_spe_evldhx, "__builtin_spe_evldhx", SPE_BUILTIN_EVLDHX },
15087 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhex, "__builtin_spe_evlwhex", SPE_BUILTIN_EVLWHEX },
15088 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhoux, "__builtin_spe_evlwhoux", SPE_BUILTIN_EVLWHOUX },
15089 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhosx, "__builtin_spe_evlwhosx", SPE_BUILTIN_EVLWHOSX },
15090 { RS6000_BTM_SPE, CODE_FOR_spe_evlwwsplatx, "__builtin_spe_evlwwsplatx", SPE_BUILTIN_EVLWWSPLATX },
15091 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhsplatx, "__builtin_spe_evlwhsplatx", SPE_BUILTIN_EVLWHSPLATX },
15092 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhesplatx, "__builtin_spe_evlhhesplatx", SPE_BUILTIN_EVLHHESPLATX },
15093 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhousplatx, "__builtin_spe_evlhhousplatx", SPE_BUILTIN_EVLHHOUSPLATX },
15094 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhossplatx, "__builtin_spe_evlhhossplatx", SPE_BUILTIN_EVLHHOSSPLATX },
15095 { RS6000_BTM_SPE, CODE_FOR_spe_evldd, "__builtin_spe_evldd", SPE_BUILTIN_EVLDD },
15096 { RS6000_BTM_SPE, CODE_FOR_spe_evldw, "__builtin_spe_evldw", SPE_BUILTIN_EVLDW },
15097 { RS6000_BTM_SPE, CODE_FOR_spe_evldh, "__builtin_spe_evldh", SPE_BUILTIN_EVLDH },
15098 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhe, "__builtin_spe_evlwhe", SPE_BUILTIN_EVLWHE },
15099 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhou, "__builtin_spe_evlwhou", SPE_BUILTIN_EVLWHOU },
15100 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhos, "__builtin_spe_evlwhos", SPE_BUILTIN_EVLWHOS },
15101 { RS6000_BTM_SPE, CODE_FOR_spe_evlwwsplat, "__builtin_spe_evlwwsplat", SPE_BUILTIN_EVLWWSPLAT },
15102 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhsplat, "__builtin_spe_evlwhsplat", SPE_BUILTIN_EVLWHSPLAT },
15103 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhesplat, "__builtin_spe_evlhhesplat", SPE_BUILTIN_EVLHHESPLAT },
15104 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhousplat, "__builtin_spe_evlhhousplat", SPE_BUILTIN_EVLHHOUSPLAT },
15105 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhossplat, "__builtin_spe_evlhhossplat", SPE_BUILTIN_EVLHHOSSPLAT }
15108 /* Expand the builtin in EXP and store the result in TARGET. Store
15109 true in *EXPANDEDP if we found a builtin to expand.
15111 This expands the SPE builtins that are not simple unary and binary
15112 operations. */
15113 static rtx
15114 spe_expand_builtin (tree exp, rtx target, bool *expandedp)
15116 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
15117 tree arg1, arg0;
15118 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
15119 enum insn_code icode;
15120 machine_mode tmode, mode0;
15121 rtx pat, op0;
15122 const struct builtin_description *d;
15123 size_t i;
15125 *expandedp = true;
15127 /* Syntax check for a 5-bit unsigned immediate. */
15128 switch (fcode)
15130 case SPE_BUILTIN_EVSTDD:
15131 case SPE_BUILTIN_EVSTDH:
15132 case SPE_BUILTIN_EVSTDW:
15133 case SPE_BUILTIN_EVSTWHE:
15134 case SPE_BUILTIN_EVSTWHO:
15135 case SPE_BUILTIN_EVSTWWE:
15136 case SPE_BUILTIN_EVSTWWO:
15137 arg1 = CALL_EXPR_ARG (exp, 2);
15138 if (TREE_CODE (arg1) != INTEGER_CST
15139 || TREE_INT_CST_LOW (arg1) & ~0x1f)
15141 error ("argument 2 must be a 5-bit unsigned literal");
15142 return const0_rtx;
15144 break;
15145 default:
15146 break;
15149 /* The evsplat*i instructions are not quite generic. */
15150 switch (fcode)
15152 case SPE_BUILTIN_EVSPLATFI:
15153 return rs6000_expand_unop_builtin (CODE_FOR_spe_evsplatfi,
15154 exp, target);
15155 case SPE_BUILTIN_EVSPLATI:
15156 return rs6000_expand_unop_builtin (CODE_FOR_spe_evsplati,
15157 exp, target);
15158 default:
15159 break;
15162 d = bdesc_2arg_spe;
15163 for (i = 0; i < ARRAY_SIZE (bdesc_2arg_spe); ++i, ++d)
15164 if (d->code == fcode)
15165 return rs6000_expand_binop_builtin (d->icode, exp, target);
15167 d = bdesc_spe_predicates;
15168 for (i = 0; i < ARRAY_SIZE (bdesc_spe_predicates); ++i, ++d)
15169 if (d->code == fcode)
15170 return spe_expand_predicate_builtin (d->icode, exp, target);
15172 d = bdesc_spe_evsel;
15173 for (i = 0; i < ARRAY_SIZE (bdesc_spe_evsel); ++i, ++d)
15174 if (d->code == fcode)
15175 return spe_expand_evsel_builtin (d->icode, exp, target);
15177 switch (fcode)
15179 case SPE_BUILTIN_EVSTDDX:
15180 return spe_expand_stv_builtin (CODE_FOR_spe_evstddx, exp);
15181 case SPE_BUILTIN_EVSTDHX:
15182 return spe_expand_stv_builtin (CODE_FOR_spe_evstdhx, exp);
15183 case SPE_BUILTIN_EVSTDWX:
15184 return spe_expand_stv_builtin (CODE_FOR_spe_evstdwx, exp);
15185 case SPE_BUILTIN_EVSTWHEX:
15186 return spe_expand_stv_builtin (CODE_FOR_spe_evstwhex, exp);
15187 case SPE_BUILTIN_EVSTWHOX:
15188 return spe_expand_stv_builtin (CODE_FOR_spe_evstwhox, exp);
15189 case SPE_BUILTIN_EVSTWWEX:
15190 return spe_expand_stv_builtin (CODE_FOR_spe_evstwwex, exp);
15191 case SPE_BUILTIN_EVSTWWOX:
15192 return spe_expand_stv_builtin (CODE_FOR_spe_evstwwox, exp);
15193 case SPE_BUILTIN_EVSTDD:
15194 return spe_expand_stv_builtin (CODE_FOR_spe_evstdd, exp);
15195 case SPE_BUILTIN_EVSTDH:
15196 return spe_expand_stv_builtin (CODE_FOR_spe_evstdh, exp);
15197 case SPE_BUILTIN_EVSTDW:
15198 return spe_expand_stv_builtin (CODE_FOR_spe_evstdw, exp);
15199 case SPE_BUILTIN_EVSTWHE:
15200 return spe_expand_stv_builtin (CODE_FOR_spe_evstwhe, exp);
15201 case SPE_BUILTIN_EVSTWHO:
15202 return spe_expand_stv_builtin (CODE_FOR_spe_evstwho, exp);
15203 case SPE_BUILTIN_EVSTWWE:
15204 return spe_expand_stv_builtin (CODE_FOR_spe_evstwwe, exp);
15205 case SPE_BUILTIN_EVSTWWO:
15206 return spe_expand_stv_builtin (CODE_FOR_spe_evstwwo, exp);
15207 case SPE_BUILTIN_MFSPEFSCR:
15208 icode = CODE_FOR_spe_mfspefscr;
15209 tmode = insn_data[icode].operand[0].mode;
15211 if (target == 0
15212 || GET_MODE (target) != tmode
15213 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15214 target = gen_reg_rtx (tmode);
15216 pat = GEN_FCN (icode) (target);
15217 if (! pat)
15218 return 0;
15219 emit_insn (pat);
15220 return target;
15221 case SPE_BUILTIN_MTSPEFSCR:
15222 icode = CODE_FOR_spe_mtspefscr;
15223 arg0 = CALL_EXPR_ARG (exp, 0);
15224 op0 = expand_normal (arg0);
15225 mode0 = insn_data[icode].operand[0].mode;
15227 if (arg0 == error_mark_node)
15228 return const0_rtx;
15230 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
15231 op0 = copy_to_mode_reg (mode0, op0);
15233 pat = GEN_FCN (icode) (op0);
15234 if (pat)
15235 emit_insn (pat);
15236 return NULL_RTX;
15237 default:
15238 break;
15241 *expandedp = false;
15242 return NULL_RTX;
15245 static rtx
15246 paired_expand_predicate_builtin (enum insn_code icode, tree exp, rtx target)
15248 rtx pat, scratch, tmp;
15249 tree form = CALL_EXPR_ARG (exp, 0);
15250 tree arg0 = CALL_EXPR_ARG (exp, 1);
15251 tree arg1 = CALL_EXPR_ARG (exp, 2);
15252 rtx op0 = expand_normal (arg0);
15253 rtx op1 = expand_normal (arg1);
15254 machine_mode mode0 = insn_data[icode].operand[1].mode;
15255 machine_mode mode1 = insn_data[icode].operand[2].mode;
15256 int form_int;
15257 enum rtx_code code;
15259 if (TREE_CODE (form) != INTEGER_CST)
15261 error ("argument 1 of __builtin_paired_predicate must be a constant");
15262 return const0_rtx;
15264 else
15265 form_int = TREE_INT_CST_LOW (form);
15267 gcc_assert (mode0 == mode1);
15269 if (arg0 == error_mark_node || arg1 == error_mark_node)
15270 return const0_rtx;
15272 if (target == 0
15273 || GET_MODE (target) != SImode
15274 || !(*insn_data[icode].operand[0].predicate) (target, SImode))
15275 target = gen_reg_rtx (SImode);
15276 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
15277 op0 = copy_to_mode_reg (mode0, op0);
15278 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
15279 op1 = copy_to_mode_reg (mode1, op1);
15281 scratch = gen_reg_rtx (CCFPmode);
15283 pat = GEN_FCN (icode) (scratch, op0, op1);
15284 if (!pat)
15285 return const0_rtx;
15287 emit_insn (pat);
15289 switch (form_int)
15291 /* LT bit. */
15292 case 0:
15293 code = LT;
15294 break;
15295 /* GT bit. */
15296 case 1:
15297 code = GT;
15298 break;
15299 /* EQ bit. */
15300 case 2:
15301 code = EQ;
15302 break;
15303 /* UN bit. */
15304 case 3:
15305 emit_insn (gen_move_from_CR_ov_bit (target, scratch));
15306 return target;
15307 default:
15308 error ("argument 1 of __builtin_paired_predicate is out of range");
15309 return const0_rtx;
15312 tmp = gen_rtx_fmt_ee (code, SImode, scratch, const0_rtx);
15313 emit_move_insn (target, tmp);
15314 return target;
15317 static rtx
15318 spe_expand_predicate_builtin (enum insn_code icode, tree exp, rtx target)
15320 rtx pat, scratch, tmp;
15321 tree form = CALL_EXPR_ARG (exp, 0);
15322 tree arg0 = CALL_EXPR_ARG (exp, 1);
15323 tree arg1 = CALL_EXPR_ARG (exp, 2);
15324 rtx op0 = expand_normal (arg0);
15325 rtx op1 = expand_normal (arg1);
15326 machine_mode mode0 = insn_data[icode].operand[1].mode;
15327 machine_mode mode1 = insn_data[icode].operand[2].mode;
15328 int form_int;
15329 enum rtx_code code;
15331 if (TREE_CODE (form) != INTEGER_CST)
15333 error ("argument 1 of __builtin_spe_predicate must be a constant");
15334 return const0_rtx;
15336 else
15337 form_int = TREE_INT_CST_LOW (form);
15339 gcc_assert (mode0 == mode1);
15341 if (arg0 == error_mark_node || arg1 == error_mark_node)
15342 return const0_rtx;
15344 if (target == 0
15345 || GET_MODE (target) != SImode
15346 || ! (*insn_data[icode].operand[0].predicate) (target, SImode))
15347 target = gen_reg_rtx (SImode);
15349 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15350 op0 = copy_to_mode_reg (mode0, op0);
15351 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
15352 op1 = copy_to_mode_reg (mode1, op1);
15354 scratch = gen_reg_rtx (CCmode);
15356 pat = GEN_FCN (icode) (scratch, op0, op1);
15357 if (! pat)
15358 return const0_rtx;
15359 emit_insn (pat);
15361 /* There are 4 variants for each predicate: _any_, _all_, _upper_,
15362 _lower_. We use one compare, but look in different bits of the
15363 CR for each variant.
15365 There are 2 elements in each SPE simd type (upper/lower). The CR
15366 bits are set as follows:
15368 BIT0 | BIT 1 | BIT 2 | BIT 3
15369 U | L | (U | L) | (U & L)
15371 So, for an "all" relationship, BIT 3 would be set.
15372 For an "any" relationship, BIT 2 would be set. Etc.
15374 Following traditional nomenclature, these bits map to:
15376 BIT0 | BIT 1 | BIT 2 | BIT 3
15377 LT | GT | EQ | OV
15379 Later, we will generate rtl to look in the LT/EQ/EQ/OV bits.
15382 switch (form_int)
15384 /* All variant. OV bit. */
15385 case 0:
15386 /* We need to get to the OV bit, which is the ORDERED bit. We
15387 could generate (ordered:SI (reg:CC xx) (const_int 0)), but
15388 that's ugly and will make validate_condition_mode die.
15389 So let's just use another pattern. */
15390 emit_insn (gen_move_from_CR_ov_bit (target, scratch));
15391 return target;
15392 /* Any variant. EQ bit. */
15393 case 1:
15394 code = EQ;
15395 break;
15396 /* Upper variant. LT bit. */
15397 case 2:
15398 code = LT;
15399 break;
15400 /* Lower variant. GT bit. */
15401 case 3:
15402 code = GT;
15403 break;
15404 default:
15405 error ("argument 1 of __builtin_spe_predicate is out of range");
15406 return const0_rtx;
15409 tmp = gen_rtx_fmt_ee (code, SImode, scratch, const0_rtx);
15410 emit_move_insn (target, tmp);
15412 return target;
15415 /* The evsel builtins look like this:
15417 e = __builtin_spe_evsel_OP (a, b, c, d);
15419 and work like this:
15421 e[upper] = a[upper] *OP* b[upper] ? c[upper] : d[upper];
15422 e[lower] = a[lower] *OP* b[lower] ? c[lower] : d[lower];
15425 static rtx
15426 spe_expand_evsel_builtin (enum insn_code icode, tree exp, rtx target)
15428 rtx pat, scratch;
15429 tree arg0 = CALL_EXPR_ARG (exp, 0);
15430 tree arg1 = CALL_EXPR_ARG (exp, 1);
15431 tree arg2 = CALL_EXPR_ARG (exp, 2);
15432 tree arg3 = CALL_EXPR_ARG (exp, 3);
15433 rtx op0 = expand_normal (arg0);
15434 rtx op1 = expand_normal (arg1);
15435 rtx op2 = expand_normal (arg2);
15436 rtx op3 = expand_normal (arg3);
15437 machine_mode mode0 = insn_data[icode].operand[1].mode;
15438 machine_mode mode1 = insn_data[icode].operand[2].mode;
15440 gcc_assert (mode0 == mode1);
15442 if (arg0 == error_mark_node || arg1 == error_mark_node
15443 || arg2 == error_mark_node || arg3 == error_mark_node)
15444 return const0_rtx;
15446 if (target == 0
15447 || GET_MODE (target) != mode0
15448 || ! (*insn_data[icode].operand[0].predicate) (target, mode0))
15449 target = gen_reg_rtx (mode0);
15451 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15452 op0 = copy_to_mode_reg (mode0, op0);
15453 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
15454 op1 = copy_to_mode_reg (mode0, op1);
15455 if (! (*insn_data[icode].operand[1].predicate) (op2, mode1))
15456 op2 = copy_to_mode_reg (mode0, op2);
15457 if (! (*insn_data[icode].operand[1].predicate) (op3, mode1))
15458 op3 = copy_to_mode_reg (mode0, op3);
15460 /* Generate the compare. */
15461 scratch = gen_reg_rtx (CCmode);
15462 pat = GEN_FCN (icode) (scratch, op0, op1);
15463 if (! pat)
15464 return const0_rtx;
15465 emit_insn (pat);
15467 if (mode0 == V2SImode)
15468 emit_insn (gen_spe_evsel (target, op2, op3, scratch));
15469 else
15470 emit_insn (gen_spe_evsel_fs (target, op2, op3, scratch));
15472 return target;
15475 /* Raise an error message for a builtin function that is called without the
15476 appropriate target options being set. */
15478 static void
15479 rs6000_invalid_builtin (enum rs6000_builtins fncode)
15481 size_t uns_fncode = (size_t)fncode;
15482 const char *name = rs6000_builtin_info[uns_fncode].name;
15483 HOST_WIDE_INT fnmask = rs6000_builtin_info[uns_fncode].mask;
15485 gcc_assert (name != NULL);
15486 if ((fnmask & RS6000_BTM_CELL) != 0)
15487 error ("Builtin function %s is only valid for the cell processor", name);
15488 else if ((fnmask & RS6000_BTM_VSX) != 0)
15489 error ("Builtin function %s requires the -mvsx option", name);
15490 else if ((fnmask & RS6000_BTM_HTM) != 0)
15491 error ("Builtin function %s requires the -mhtm option", name);
15492 else if ((fnmask & RS6000_BTM_ALTIVEC) != 0)
15493 error ("Builtin function %s requires the -maltivec option", name);
15494 else if ((fnmask & RS6000_BTM_PAIRED) != 0)
15495 error ("Builtin function %s requires the -mpaired option", name);
15496 else if ((fnmask & RS6000_BTM_SPE) != 0)
15497 error ("Builtin function %s requires the -mspe option", name);
15498 else if ((fnmask & (RS6000_BTM_DFP | RS6000_BTM_P8_VECTOR))
15499 == (RS6000_BTM_DFP | RS6000_BTM_P8_VECTOR))
15500 error ("Builtin function %s requires the -mhard-dfp and"
15501 " -mpower8-vector options", name);
15502 else if ((fnmask & RS6000_BTM_DFP) != 0)
15503 error ("Builtin function %s requires the -mhard-dfp option", name);
15504 else if ((fnmask & RS6000_BTM_P8_VECTOR) != 0)
15505 error ("Builtin function %s requires the -mpower8-vector option", name);
15506 else if ((fnmask & RS6000_BTM_P9_VECTOR) != 0)
15507 error ("Builtin function %s requires the -mpower9-vector option", name);
15508 else if ((fnmask & (RS6000_BTM_P9_MISC | RS6000_BTM_64BIT))
15509 == (RS6000_BTM_P9_MISC | RS6000_BTM_64BIT))
15510 error ("Builtin function %s requires the -mpower9-misc and"
15511 " -m64 options", name);
15512 else if ((fnmask & RS6000_BTM_P9_MISC) == RS6000_BTM_P9_MISC)
15513 error ("Builtin function %s requires the -mpower9-misc option", name);
15514 else if ((fnmask & (RS6000_BTM_HARD_FLOAT | RS6000_BTM_LDBL128))
15515 == (RS6000_BTM_HARD_FLOAT | RS6000_BTM_LDBL128))
15516 error ("Builtin function %s requires the -mhard-float and"
15517 " -mlong-double-128 options", name);
15518 else if ((fnmask & RS6000_BTM_HARD_FLOAT) != 0)
15519 error ("Builtin function %s requires the -mhard-float option", name);
15520 else if ((fnmask & RS6000_BTM_FLOAT128) != 0)
15521 error ("Builtin function %s requires the -mfloat128 option", name);
15522 else
15523 error ("Builtin function %s is not supported with the current options",
15524 name);
15527 /* Target hook for early folding of built-ins, shamelessly stolen
15528 from ia64.c. */
15530 static tree
15531 rs6000_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED,
15532 tree *args, bool ignore ATTRIBUTE_UNUSED)
15534 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
15536 enum rs6000_builtins fn_code
15537 = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
15538 switch (fn_code)
15540 case RS6000_BUILTIN_NANQ:
15541 case RS6000_BUILTIN_NANSQ:
15543 tree type = TREE_TYPE (TREE_TYPE (fndecl));
15544 const char *str = c_getstr (*args);
15545 int quiet = fn_code == RS6000_BUILTIN_NANQ;
15546 REAL_VALUE_TYPE real;
15548 if (str && real_nan (&real, str, quiet, TYPE_MODE (type)))
15549 return build_real (type, real);
15550 return NULL_TREE;
15552 case RS6000_BUILTIN_INFQ:
15553 case RS6000_BUILTIN_HUGE_VALQ:
15555 tree type = TREE_TYPE (TREE_TYPE (fndecl));
15556 REAL_VALUE_TYPE inf;
15557 real_inf (&inf);
15558 return build_real (type, inf);
15560 default:
15561 break;
15564 #ifdef SUBTARGET_FOLD_BUILTIN
15565 return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
15566 #else
15567 return NULL_TREE;
15568 #endif
15571 /* Expand an expression EXP that calls a built-in function,
15572 with result going to TARGET if that's convenient
15573 (and in mode MODE if that's convenient).
15574 SUBTARGET may be used as the target for computing one of EXP's operands.
15575 IGNORE is nonzero if the value is to be ignored. */
15577 static rtx
15578 rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
15579 machine_mode mode ATTRIBUTE_UNUSED,
15580 int ignore ATTRIBUTE_UNUSED)
15582 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
15583 enum rs6000_builtins fcode
15584 = (enum rs6000_builtins)DECL_FUNCTION_CODE (fndecl);
15585 size_t uns_fcode = (size_t)fcode;
15586 const struct builtin_description *d;
15587 size_t i;
15588 rtx ret;
15589 bool success;
15590 HOST_WIDE_INT mask = rs6000_builtin_info[uns_fcode].mask;
15591 bool func_valid_p = ((rs6000_builtin_mask & mask) == mask);
15593 if (TARGET_DEBUG_BUILTIN)
15595 enum insn_code icode = rs6000_builtin_info[uns_fcode].icode;
15596 const char *name1 = rs6000_builtin_info[uns_fcode].name;
15597 const char *name2 = ((icode != CODE_FOR_nothing)
15598 ? get_insn_name ((int)icode)
15599 : "nothing");
15600 const char *name3;
15602 switch (rs6000_builtin_info[uns_fcode].attr & RS6000_BTC_TYPE_MASK)
15604 default: name3 = "unknown"; break;
15605 case RS6000_BTC_SPECIAL: name3 = "special"; break;
15606 case RS6000_BTC_UNARY: name3 = "unary"; break;
15607 case RS6000_BTC_BINARY: name3 = "binary"; break;
15608 case RS6000_BTC_TERNARY: name3 = "ternary"; break;
15609 case RS6000_BTC_PREDICATE: name3 = "predicate"; break;
15610 case RS6000_BTC_ABS: name3 = "abs"; break;
15611 case RS6000_BTC_EVSEL: name3 = "evsel"; break;
15612 case RS6000_BTC_DST: name3 = "dst"; break;
15616 fprintf (stderr,
15617 "rs6000_expand_builtin, %s (%d), insn = %s (%d), type=%s%s\n",
15618 (name1) ? name1 : "---", fcode,
15619 (name2) ? name2 : "---", (int)icode,
15620 name3,
15621 func_valid_p ? "" : ", not valid");
15624 if (!func_valid_p)
15626 rs6000_invalid_builtin (fcode);
15628 /* Given it is invalid, just generate a normal call. */
15629 return expand_call (exp, target, ignore);
15632 switch (fcode)
15634 case RS6000_BUILTIN_RECIP:
15635 return rs6000_expand_binop_builtin (CODE_FOR_recipdf3, exp, target);
15637 case RS6000_BUILTIN_RECIPF:
15638 return rs6000_expand_binop_builtin (CODE_FOR_recipsf3, exp, target);
15640 case RS6000_BUILTIN_RSQRTF:
15641 return rs6000_expand_unop_builtin (CODE_FOR_rsqrtsf2, exp, target);
15643 case RS6000_BUILTIN_RSQRT:
15644 return rs6000_expand_unop_builtin (CODE_FOR_rsqrtdf2, exp, target);
15646 case POWER7_BUILTIN_BPERMD:
15647 return rs6000_expand_binop_builtin (((TARGET_64BIT)
15648 ? CODE_FOR_bpermd_di
15649 : CODE_FOR_bpermd_si), exp, target);
15651 case RS6000_BUILTIN_GET_TB:
15652 return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_get_timebase,
15653 target);
15655 case RS6000_BUILTIN_MFTB:
15656 return rs6000_expand_zeroop_builtin (((TARGET_64BIT)
15657 ? CODE_FOR_rs6000_mftb_di
15658 : CODE_FOR_rs6000_mftb_si),
15659 target);
15661 case RS6000_BUILTIN_MFFS:
15662 return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_mffs, target);
15664 case RS6000_BUILTIN_MTFSF:
15665 return rs6000_expand_mtfsf_builtin (CODE_FOR_rs6000_mtfsf, exp);
15667 case RS6000_BUILTIN_CPU_INIT:
15668 case RS6000_BUILTIN_CPU_IS:
15669 case RS6000_BUILTIN_CPU_SUPPORTS:
15670 return cpu_expand_builtin (fcode, exp, target);
15672 case ALTIVEC_BUILTIN_MASK_FOR_LOAD:
15673 case ALTIVEC_BUILTIN_MASK_FOR_STORE:
15675 int icode = (BYTES_BIG_ENDIAN ? (int) CODE_FOR_altivec_lvsr_direct
15676 : (int) CODE_FOR_altivec_lvsl_direct);
15677 machine_mode tmode = insn_data[icode].operand[0].mode;
15678 machine_mode mode = insn_data[icode].operand[1].mode;
15679 tree arg;
15680 rtx op, addr, pat;
15682 gcc_assert (TARGET_ALTIVEC);
15684 arg = CALL_EXPR_ARG (exp, 0);
15685 gcc_assert (POINTER_TYPE_P (TREE_TYPE (arg)));
15686 op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
15687 addr = memory_address (mode, op);
15688 if (fcode == ALTIVEC_BUILTIN_MASK_FOR_STORE)
15689 op = addr;
15690 else
15692 /* For the load case need to negate the address. */
15693 op = gen_reg_rtx (GET_MODE (addr));
15694 emit_insn (gen_rtx_SET (op, gen_rtx_NEG (GET_MODE (addr), addr)));
15696 op = gen_rtx_MEM (mode, op);
15698 if (target == 0
15699 || GET_MODE (target) != tmode
15700 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15701 target = gen_reg_rtx (tmode);
15703 pat = GEN_FCN (icode) (target, op);
15704 if (!pat)
15705 return 0;
15706 emit_insn (pat);
15708 return target;
15711 case ALTIVEC_BUILTIN_VCFUX:
15712 case ALTIVEC_BUILTIN_VCFSX:
15713 case ALTIVEC_BUILTIN_VCTUXS:
15714 case ALTIVEC_BUILTIN_VCTSXS:
15715 /* FIXME: There's got to be a nicer way to handle this case than
15716 constructing a new CALL_EXPR. */
15717 if (call_expr_nargs (exp) == 1)
15719 exp = build_call_nary (TREE_TYPE (exp), CALL_EXPR_FN (exp),
15720 2, CALL_EXPR_ARG (exp, 0), integer_zero_node);
15722 break;
15724 default:
15725 break;
15728 if (TARGET_ALTIVEC)
15730 ret = altivec_expand_builtin (exp, target, &success);
15732 if (success)
15733 return ret;
15735 if (TARGET_SPE)
15737 ret = spe_expand_builtin (exp, target, &success);
15739 if (success)
15740 return ret;
15742 if (TARGET_PAIRED_FLOAT)
15744 ret = paired_expand_builtin (exp, target, &success);
15746 if (success)
15747 return ret;
15749 if (TARGET_HTM)
15751 ret = htm_expand_builtin (exp, target, &success);
15753 if (success)
15754 return ret;
15757 unsigned attr = rs6000_builtin_info[uns_fcode].attr & RS6000_BTC_TYPE_MASK;
15758 /* RS6000_BTC_SPECIAL represents no-operand operators. */
15759 gcc_assert (attr == RS6000_BTC_UNARY
15760 || attr == RS6000_BTC_BINARY
15761 || attr == RS6000_BTC_TERNARY
15762 || attr == RS6000_BTC_SPECIAL);
15764 /* Handle simple unary operations. */
15765 d = bdesc_1arg;
15766 for (i = 0; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
15767 if (d->code == fcode)
15768 return rs6000_expand_unop_builtin (d->icode, exp, target);
15770 /* Handle simple binary operations. */
15771 d = bdesc_2arg;
15772 for (i = 0; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
15773 if (d->code == fcode)
15774 return rs6000_expand_binop_builtin (d->icode, exp, target);
15776 /* Handle simple ternary operations. */
15777 d = bdesc_3arg;
15778 for (i = 0; i < ARRAY_SIZE (bdesc_3arg); i++, d++)
15779 if (d->code == fcode)
15780 return rs6000_expand_ternop_builtin (d->icode, exp, target);
15782 /* Handle simple no-argument operations. */
15783 d = bdesc_0arg;
15784 for (i = 0; i < ARRAY_SIZE (bdesc_0arg); i++, d++)
15785 if (d->code == fcode)
15786 return rs6000_expand_zeroop_builtin (d->icode, target);
15788 gcc_unreachable ();
15791 static void
15792 rs6000_init_builtins (void)
15794 tree tdecl;
15795 tree ftype;
15796 machine_mode mode;
15798 if (TARGET_DEBUG_BUILTIN)
15799 fprintf (stderr, "rs6000_init_builtins%s%s%s%s\n",
15800 (TARGET_PAIRED_FLOAT) ? ", paired" : "",
15801 (TARGET_SPE) ? ", spe" : "",
15802 (TARGET_ALTIVEC) ? ", altivec" : "",
15803 (TARGET_VSX) ? ", vsx" : "");
15805 V2SI_type_node = build_vector_type (intSI_type_node, 2);
15806 V2SF_type_node = build_vector_type (float_type_node, 2);
15807 V2DI_type_node = build_vector_type (intDI_type_node, 2);
15808 V2DF_type_node = build_vector_type (double_type_node, 2);
15809 V4HI_type_node = build_vector_type (intHI_type_node, 4);
15810 V4SI_type_node = build_vector_type (intSI_type_node, 4);
15811 V4SF_type_node = build_vector_type (float_type_node, 4);
15812 V8HI_type_node = build_vector_type (intHI_type_node, 8);
15813 V16QI_type_node = build_vector_type (intQI_type_node, 16);
15815 unsigned_V16QI_type_node = build_vector_type (unsigned_intQI_type_node, 16);
15816 unsigned_V8HI_type_node = build_vector_type (unsigned_intHI_type_node, 8);
15817 unsigned_V4SI_type_node = build_vector_type (unsigned_intSI_type_node, 4);
15818 unsigned_V2DI_type_node = build_vector_type (unsigned_intDI_type_node, 2);
15820 opaque_V2SF_type_node = build_opaque_vector_type (float_type_node, 2);
15821 opaque_V2SI_type_node = build_opaque_vector_type (intSI_type_node, 2);
15822 opaque_p_V2SI_type_node = build_pointer_type (opaque_V2SI_type_node);
15823 opaque_V4SI_type_node = build_opaque_vector_type (intSI_type_node, 4);
15825 const_str_type_node
15826 = build_pointer_type (build_qualified_type (char_type_node,
15827 TYPE_QUAL_CONST));
15829 /* We use V1TI mode as a special container to hold __int128_t items that
15830 must live in VSX registers. */
15831 if (intTI_type_node)
15833 V1TI_type_node = build_vector_type (intTI_type_node, 1);
15834 unsigned_V1TI_type_node = build_vector_type (unsigned_intTI_type_node, 1);
15837 /* The 'vector bool ...' types must be kept distinct from 'vector unsigned ...'
15838 types, especially in C++ land. Similarly, 'vector pixel' is distinct from
15839 'vector unsigned short'. */
15841 bool_char_type_node = build_distinct_type_copy (unsigned_intQI_type_node);
15842 bool_short_type_node = build_distinct_type_copy (unsigned_intHI_type_node);
15843 bool_int_type_node = build_distinct_type_copy (unsigned_intSI_type_node);
15844 bool_long_type_node = build_distinct_type_copy (unsigned_intDI_type_node);
15845 pixel_type_node = build_distinct_type_copy (unsigned_intHI_type_node);
15847 long_integer_type_internal_node = long_integer_type_node;
15848 long_unsigned_type_internal_node = long_unsigned_type_node;
15849 long_long_integer_type_internal_node = long_long_integer_type_node;
15850 long_long_unsigned_type_internal_node = long_long_unsigned_type_node;
15851 intQI_type_internal_node = intQI_type_node;
15852 uintQI_type_internal_node = unsigned_intQI_type_node;
15853 intHI_type_internal_node = intHI_type_node;
15854 uintHI_type_internal_node = unsigned_intHI_type_node;
15855 intSI_type_internal_node = intSI_type_node;
15856 uintSI_type_internal_node = unsigned_intSI_type_node;
15857 intDI_type_internal_node = intDI_type_node;
15858 uintDI_type_internal_node = unsigned_intDI_type_node;
15859 intTI_type_internal_node = intTI_type_node;
15860 uintTI_type_internal_node = unsigned_intTI_type_node;
15861 float_type_internal_node = float_type_node;
15862 double_type_internal_node = double_type_node;
15863 long_double_type_internal_node = long_double_type_node;
15864 dfloat64_type_internal_node = dfloat64_type_node;
15865 dfloat128_type_internal_node = dfloat128_type_node;
15866 void_type_internal_node = void_type_node;
15868 /* 128-bit floating point support. KFmode is IEEE 128-bit floating point.
15869 IFmode is the IBM extended 128-bit format that is a pair of doubles.
15870 TFmode will be either IEEE 128-bit floating point or the IBM double-double
15871 format that uses a pair of doubles, depending on the switches and
15872 defaults. */
15873 if (TARGET_FLOAT128)
15875 ibm128_float_type_node = make_node (REAL_TYPE);
15876 TYPE_PRECISION (ibm128_float_type_node) = 128;
15877 layout_type (ibm128_float_type_node);
15878 SET_TYPE_MODE (ibm128_float_type_node, IFmode);
15880 ieee128_float_type_node = make_node (REAL_TYPE);
15881 TYPE_PRECISION (ieee128_float_type_node) = 128;
15882 layout_type (ieee128_float_type_node);
15883 SET_TYPE_MODE (ieee128_float_type_node, KFmode);
15885 lang_hooks.types.register_builtin_type (ieee128_float_type_node,
15886 "__float128");
15888 lang_hooks.types.register_builtin_type (ibm128_float_type_node,
15889 "__ibm128");
15891 else
15893 /* All types must be nonzero, or self-test barfs during bootstrap. */
15894 ieee128_float_type_node = long_double_type_node;
15895 ibm128_float_type_node = long_double_type_node;
15898 /* Initialize the modes for builtin_function_type, mapping a machine mode to
15899 tree type node. */
15900 builtin_mode_to_type[QImode][0] = integer_type_node;
15901 builtin_mode_to_type[HImode][0] = integer_type_node;
15902 builtin_mode_to_type[SImode][0] = intSI_type_node;
15903 builtin_mode_to_type[SImode][1] = unsigned_intSI_type_node;
15904 builtin_mode_to_type[DImode][0] = intDI_type_node;
15905 builtin_mode_to_type[DImode][1] = unsigned_intDI_type_node;
15906 builtin_mode_to_type[TImode][0] = intTI_type_node;
15907 builtin_mode_to_type[TImode][1] = unsigned_intTI_type_node;
15908 builtin_mode_to_type[SFmode][0] = float_type_node;
15909 builtin_mode_to_type[DFmode][0] = double_type_node;
15910 builtin_mode_to_type[IFmode][0] = ibm128_float_type_node;
15911 builtin_mode_to_type[KFmode][0] = ieee128_float_type_node;
15912 builtin_mode_to_type[TFmode][0] = long_double_type_node;
15913 builtin_mode_to_type[DDmode][0] = dfloat64_type_node;
15914 builtin_mode_to_type[TDmode][0] = dfloat128_type_node;
15915 builtin_mode_to_type[V1TImode][0] = V1TI_type_node;
15916 builtin_mode_to_type[V1TImode][1] = unsigned_V1TI_type_node;
15917 builtin_mode_to_type[V2SImode][0] = V2SI_type_node;
15918 builtin_mode_to_type[V2SFmode][0] = V2SF_type_node;
15919 builtin_mode_to_type[V2DImode][0] = V2DI_type_node;
15920 builtin_mode_to_type[V2DImode][1] = unsigned_V2DI_type_node;
15921 builtin_mode_to_type[V2DFmode][0] = V2DF_type_node;
15922 builtin_mode_to_type[V4HImode][0] = V4HI_type_node;
15923 builtin_mode_to_type[V4SImode][0] = V4SI_type_node;
15924 builtin_mode_to_type[V4SImode][1] = unsigned_V4SI_type_node;
15925 builtin_mode_to_type[V4SFmode][0] = V4SF_type_node;
15926 builtin_mode_to_type[V8HImode][0] = V8HI_type_node;
15927 builtin_mode_to_type[V8HImode][1] = unsigned_V8HI_type_node;
15928 builtin_mode_to_type[V16QImode][0] = V16QI_type_node;
15929 builtin_mode_to_type[V16QImode][1] = unsigned_V16QI_type_node;
15931 tdecl = add_builtin_type ("__bool char", bool_char_type_node);
15932 TYPE_NAME (bool_char_type_node) = tdecl;
15934 tdecl = add_builtin_type ("__bool short", bool_short_type_node);
15935 TYPE_NAME (bool_short_type_node) = tdecl;
15937 tdecl = add_builtin_type ("__bool int", bool_int_type_node);
15938 TYPE_NAME (bool_int_type_node) = tdecl;
15940 tdecl = add_builtin_type ("__pixel", pixel_type_node);
15941 TYPE_NAME (pixel_type_node) = tdecl;
15943 bool_V16QI_type_node = build_vector_type (bool_char_type_node, 16);
15944 bool_V8HI_type_node = build_vector_type (bool_short_type_node, 8);
15945 bool_V4SI_type_node = build_vector_type (bool_int_type_node, 4);
15946 bool_V2DI_type_node = build_vector_type (bool_long_type_node, 2);
15947 pixel_V8HI_type_node = build_vector_type (pixel_type_node, 8);
15949 tdecl = add_builtin_type ("__vector unsigned char", unsigned_V16QI_type_node);
15950 TYPE_NAME (unsigned_V16QI_type_node) = tdecl;
15952 tdecl = add_builtin_type ("__vector signed char", V16QI_type_node);
15953 TYPE_NAME (V16QI_type_node) = tdecl;
15955 tdecl = add_builtin_type ("__vector __bool char", bool_V16QI_type_node);
15956 TYPE_NAME ( bool_V16QI_type_node) = tdecl;
15958 tdecl = add_builtin_type ("__vector unsigned short", unsigned_V8HI_type_node);
15959 TYPE_NAME (unsigned_V8HI_type_node) = tdecl;
15961 tdecl = add_builtin_type ("__vector signed short", V8HI_type_node);
15962 TYPE_NAME (V8HI_type_node) = tdecl;
15964 tdecl = add_builtin_type ("__vector __bool short", bool_V8HI_type_node);
15965 TYPE_NAME (bool_V8HI_type_node) = tdecl;
15967 tdecl = add_builtin_type ("__vector unsigned int", unsigned_V4SI_type_node);
15968 TYPE_NAME (unsigned_V4SI_type_node) = tdecl;
15970 tdecl = add_builtin_type ("__vector signed int", V4SI_type_node);
15971 TYPE_NAME (V4SI_type_node) = tdecl;
15973 tdecl = add_builtin_type ("__vector __bool int", bool_V4SI_type_node);
15974 TYPE_NAME (bool_V4SI_type_node) = tdecl;
15976 tdecl = add_builtin_type ("__vector float", V4SF_type_node);
15977 TYPE_NAME (V4SF_type_node) = tdecl;
15979 tdecl = add_builtin_type ("__vector __pixel", pixel_V8HI_type_node);
15980 TYPE_NAME (pixel_V8HI_type_node) = tdecl;
15982 tdecl = add_builtin_type ("__vector double", V2DF_type_node);
15983 TYPE_NAME (V2DF_type_node) = tdecl;
15985 if (TARGET_POWERPC64)
15987 tdecl = add_builtin_type ("__vector long", V2DI_type_node);
15988 TYPE_NAME (V2DI_type_node) = tdecl;
15990 tdecl = add_builtin_type ("__vector unsigned long",
15991 unsigned_V2DI_type_node);
15992 TYPE_NAME (unsigned_V2DI_type_node) = tdecl;
15994 tdecl = add_builtin_type ("__vector __bool long", bool_V2DI_type_node);
15995 TYPE_NAME (bool_V2DI_type_node) = tdecl;
15997 else
15999 tdecl = add_builtin_type ("__vector long long", V2DI_type_node);
16000 TYPE_NAME (V2DI_type_node) = tdecl;
16002 tdecl = add_builtin_type ("__vector unsigned long long",
16003 unsigned_V2DI_type_node);
16004 TYPE_NAME (unsigned_V2DI_type_node) = tdecl;
16006 tdecl = add_builtin_type ("__vector __bool long long",
16007 bool_V2DI_type_node);
16008 TYPE_NAME (bool_V2DI_type_node) = tdecl;
16011 if (V1TI_type_node)
16013 tdecl = add_builtin_type ("__vector __int128", V1TI_type_node);
16014 TYPE_NAME (V1TI_type_node) = tdecl;
16016 tdecl = add_builtin_type ("__vector unsigned __int128",
16017 unsigned_V1TI_type_node);
16018 TYPE_NAME (unsigned_V1TI_type_node) = tdecl;
16021 /* Paired and SPE builtins are only available if you build a compiler with
16022 the appropriate options, so only create those builtins with the
16023 appropriate compiler option. Create Altivec and VSX builtins on machines
16024 with at least the general purpose extensions (970 and newer) to allow the
16025 use of the target attribute. */
16026 if (TARGET_PAIRED_FLOAT)
16027 paired_init_builtins ();
16028 if (TARGET_SPE)
16029 spe_init_builtins ();
16030 if (TARGET_EXTRA_BUILTINS)
16031 altivec_init_builtins ();
16032 if (TARGET_HTM)
16033 htm_init_builtins ();
16035 if (TARGET_EXTRA_BUILTINS || TARGET_SPE || TARGET_PAIRED_FLOAT)
16036 rs6000_common_init_builtins ();
16038 ftype = build_function_type_list (ieee128_float_type_node,
16039 const_str_type_node, NULL_TREE);
16040 def_builtin ("__builtin_nanq", ftype, RS6000_BUILTIN_NANQ);
16041 def_builtin ("__builtin_nansq", ftype, RS6000_BUILTIN_NANSQ);
16043 ftype = build_function_type_list (ieee128_float_type_node, NULL_TREE);
16044 def_builtin ("__builtin_infq", ftype, RS6000_BUILTIN_INFQ);
16045 def_builtin ("__builtin_huge_valq", ftype, RS6000_BUILTIN_HUGE_VALQ);
16047 ftype = builtin_function_type (DFmode, DFmode, DFmode, VOIDmode,
16048 RS6000_BUILTIN_RECIP, "__builtin_recipdiv");
16049 def_builtin ("__builtin_recipdiv", ftype, RS6000_BUILTIN_RECIP);
16051 ftype = builtin_function_type (SFmode, SFmode, SFmode, VOIDmode,
16052 RS6000_BUILTIN_RECIPF, "__builtin_recipdivf");
16053 def_builtin ("__builtin_recipdivf", ftype, RS6000_BUILTIN_RECIPF);
16055 ftype = builtin_function_type (DFmode, DFmode, VOIDmode, VOIDmode,
16056 RS6000_BUILTIN_RSQRT, "__builtin_rsqrt");
16057 def_builtin ("__builtin_rsqrt", ftype, RS6000_BUILTIN_RSQRT);
16059 ftype = builtin_function_type (SFmode, SFmode, VOIDmode, VOIDmode,
16060 RS6000_BUILTIN_RSQRTF, "__builtin_rsqrtf");
16061 def_builtin ("__builtin_rsqrtf", ftype, RS6000_BUILTIN_RSQRTF);
16063 mode = (TARGET_64BIT) ? DImode : SImode;
16064 ftype = builtin_function_type (mode, mode, mode, VOIDmode,
16065 POWER7_BUILTIN_BPERMD, "__builtin_bpermd");
16066 def_builtin ("__builtin_bpermd", ftype, POWER7_BUILTIN_BPERMD);
16068 ftype = build_function_type_list (unsigned_intDI_type_node,
16069 NULL_TREE);
16070 def_builtin ("__builtin_ppc_get_timebase", ftype, RS6000_BUILTIN_GET_TB);
16072 if (TARGET_64BIT)
16073 ftype = build_function_type_list (unsigned_intDI_type_node,
16074 NULL_TREE);
16075 else
16076 ftype = build_function_type_list (unsigned_intSI_type_node,
16077 NULL_TREE);
16078 def_builtin ("__builtin_ppc_mftb", ftype, RS6000_BUILTIN_MFTB);
16080 ftype = build_function_type_list (double_type_node, NULL_TREE);
16081 def_builtin ("__builtin_mffs", ftype, RS6000_BUILTIN_MFFS);
16083 ftype = build_function_type_list (void_type_node,
16084 intSI_type_node, double_type_node,
16085 NULL_TREE);
16086 def_builtin ("__builtin_mtfsf", ftype, RS6000_BUILTIN_MTFSF);
16088 ftype = build_function_type_list (void_type_node, NULL_TREE);
16089 def_builtin ("__builtin_cpu_init", ftype, RS6000_BUILTIN_CPU_INIT);
16091 ftype = build_function_type_list (bool_int_type_node, const_ptr_type_node,
16092 NULL_TREE);
16093 def_builtin ("__builtin_cpu_is", ftype, RS6000_BUILTIN_CPU_IS);
16094 def_builtin ("__builtin_cpu_supports", ftype, RS6000_BUILTIN_CPU_SUPPORTS);
16096 #if TARGET_XCOFF
16097 /* AIX libm provides clog as __clog. */
16098 if ((tdecl = builtin_decl_explicit (BUILT_IN_CLOG)) != NULL_TREE)
16099 set_user_assembler_name (tdecl, "__clog");
16100 #endif
16102 #ifdef SUBTARGET_INIT_BUILTINS
16103 SUBTARGET_INIT_BUILTINS;
16104 #endif
16107 /* Returns the rs6000 builtin decl for CODE. */
16109 static tree
16110 rs6000_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
16112 HOST_WIDE_INT fnmask;
16114 if (code >= RS6000_BUILTIN_COUNT)
16115 return error_mark_node;
16117 fnmask = rs6000_builtin_info[code].mask;
16118 if ((fnmask & rs6000_builtin_mask) != fnmask)
16120 rs6000_invalid_builtin ((enum rs6000_builtins)code);
16121 return error_mark_node;
16124 return rs6000_builtin_decls[code];
16127 static void
16128 spe_init_builtins (void)
16130 tree puint_type_node = build_pointer_type (unsigned_type_node);
16131 tree pushort_type_node = build_pointer_type (short_unsigned_type_node);
16132 const struct builtin_description *d;
16133 size_t i;
16135 tree v2si_ftype_4_v2si
16136 = build_function_type_list (opaque_V2SI_type_node,
16137 opaque_V2SI_type_node,
16138 opaque_V2SI_type_node,
16139 opaque_V2SI_type_node,
16140 opaque_V2SI_type_node,
16141 NULL_TREE);
16143 tree v2sf_ftype_4_v2sf
16144 = build_function_type_list (opaque_V2SF_type_node,
16145 opaque_V2SF_type_node,
16146 opaque_V2SF_type_node,
16147 opaque_V2SF_type_node,
16148 opaque_V2SF_type_node,
16149 NULL_TREE);
16151 tree int_ftype_int_v2si_v2si
16152 = build_function_type_list (integer_type_node,
16153 integer_type_node,
16154 opaque_V2SI_type_node,
16155 opaque_V2SI_type_node,
16156 NULL_TREE);
16158 tree int_ftype_int_v2sf_v2sf
16159 = build_function_type_list (integer_type_node,
16160 integer_type_node,
16161 opaque_V2SF_type_node,
16162 opaque_V2SF_type_node,
16163 NULL_TREE);
16165 tree void_ftype_v2si_puint_int
16166 = build_function_type_list (void_type_node,
16167 opaque_V2SI_type_node,
16168 puint_type_node,
16169 integer_type_node,
16170 NULL_TREE);
16172 tree void_ftype_v2si_puint_char
16173 = build_function_type_list (void_type_node,
16174 opaque_V2SI_type_node,
16175 puint_type_node,
16176 char_type_node,
16177 NULL_TREE);
16179 tree void_ftype_v2si_pv2si_int
16180 = build_function_type_list (void_type_node,
16181 opaque_V2SI_type_node,
16182 opaque_p_V2SI_type_node,
16183 integer_type_node,
16184 NULL_TREE);
16186 tree void_ftype_v2si_pv2si_char
16187 = build_function_type_list (void_type_node,
16188 opaque_V2SI_type_node,
16189 opaque_p_V2SI_type_node,
16190 char_type_node,
16191 NULL_TREE);
16193 tree void_ftype_int
16194 = build_function_type_list (void_type_node, integer_type_node, NULL_TREE);
16196 tree int_ftype_void
16197 = build_function_type_list (integer_type_node, NULL_TREE);
16199 tree v2si_ftype_pv2si_int
16200 = build_function_type_list (opaque_V2SI_type_node,
16201 opaque_p_V2SI_type_node,
16202 integer_type_node,
16203 NULL_TREE);
16205 tree v2si_ftype_puint_int
16206 = build_function_type_list (opaque_V2SI_type_node,
16207 puint_type_node,
16208 integer_type_node,
16209 NULL_TREE);
16211 tree v2si_ftype_pushort_int
16212 = build_function_type_list (opaque_V2SI_type_node,
16213 pushort_type_node,
16214 integer_type_node,
16215 NULL_TREE);
16217 tree v2si_ftype_signed_char
16218 = build_function_type_list (opaque_V2SI_type_node,
16219 signed_char_type_node,
16220 NULL_TREE);
16222 add_builtin_type ("__ev64_opaque__", opaque_V2SI_type_node);
16224 /* Initialize irregular SPE builtins. */
16226 def_builtin ("__builtin_spe_mtspefscr", void_ftype_int, SPE_BUILTIN_MTSPEFSCR);
16227 def_builtin ("__builtin_spe_mfspefscr", int_ftype_void, SPE_BUILTIN_MFSPEFSCR);
16228 def_builtin ("__builtin_spe_evstddx", void_ftype_v2si_pv2si_int, SPE_BUILTIN_EVSTDDX);
16229 def_builtin ("__builtin_spe_evstdhx", void_ftype_v2si_pv2si_int, SPE_BUILTIN_EVSTDHX);
16230 def_builtin ("__builtin_spe_evstdwx", void_ftype_v2si_pv2si_int, SPE_BUILTIN_EVSTDWX);
16231 def_builtin ("__builtin_spe_evstwhex", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWHEX);
16232 def_builtin ("__builtin_spe_evstwhox", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWHOX);
16233 def_builtin ("__builtin_spe_evstwwex", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWWEX);
16234 def_builtin ("__builtin_spe_evstwwox", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWWOX);
16235 def_builtin ("__builtin_spe_evstdd", void_ftype_v2si_pv2si_char, SPE_BUILTIN_EVSTDD);
16236 def_builtin ("__builtin_spe_evstdh", void_ftype_v2si_pv2si_char, SPE_BUILTIN_EVSTDH);
16237 def_builtin ("__builtin_spe_evstdw", void_ftype_v2si_pv2si_char, SPE_BUILTIN_EVSTDW);
16238 def_builtin ("__builtin_spe_evstwhe", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWHE);
16239 def_builtin ("__builtin_spe_evstwho", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWHO);
16240 def_builtin ("__builtin_spe_evstwwe", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWWE);
16241 def_builtin ("__builtin_spe_evstwwo", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWWO);
16242 def_builtin ("__builtin_spe_evsplatfi", v2si_ftype_signed_char, SPE_BUILTIN_EVSPLATFI);
16243 def_builtin ("__builtin_spe_evsplati", v2si_ftype_signed_char, SPE_BUILTIN_EVSPLATI);
16245 /* Loads. */
16246 def_builtin ("__builtin_spe_evlddx", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDDX);
16247 def_builtin ("__builtin_spe_evldwx", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDWX);
16248 def_builtin ("__builtin_spe_evldhx", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDHX);
16249 def_builtin ("__builtin_spe_evlwhex", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHEX);
16250 def_builtin ("__builtin_spe_evlwhoux", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOUX);
16251 def_builtin ("__builtin_spe_evlwhosx", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOSX);
16252 def_builtin ("__builtin_spe_evlwwsplatx", v2si_ftype_puint_int, SPE_BUILTIN_EVLWWSPLATX);
16253 def_builtin ("__builtin_spe_evlwhsplatx", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHSPLATX);
16254 def_builtin ("__builtin_spe_evlhhesplatx", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHESPLATX);
16255 def_builtin ("__builtin_spe_evlhhousplatx", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOUSPLATX);
16256 def_builtin ("__builtin_spe_evlhhossplatx", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOSSPLATX);
16257 def_builtin ("__builtin_spe_evldd", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDD);
16258 def_builtin ("__builtin_spe_evldw", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDW);
16259 def_builtin ("__builtin_spe_evldh", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDH);
16260 def_builtin ("__builtin_spe_evlhhesplat", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHESPLAT);
16261 def_builtin ("__builtin_spe_evlhhossplat", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOSSPLAT);
16262 def_builtin ("__builtin_spe_evlhhousplat", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOUSPLAT);
16263 def_builtin ("__builtin_spe_evlwhe", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHE);
16264 def_builtin ("__builtin_spe_evlwhos", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOS);
16265 def_builtin ("__builtin_spe_evlwhou", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOU);
16266 def_builtin ("__builtin_spe_evlwhsplat", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHSPLAT);
16267 def_builtin ("__builtin_spe_evlwwsplat", v2si_ftype_puint_int, SPE_BUILTIN_EVLWWSPLAT);
16269 /* Predicates. */
16270 d = bdesc_spe_predicates;
16271 for (i = 0; i < ARRAY_SIZE (bdesc_spe_predicates); ++i, d++)
16273 tree type;
16275 switch (insn_data[d->icode].operand[1].mode)
16277 case V2SImode:
16278 type = int_ftype_int_v2si_v2si;
16279 break;
16280 case V2SFmode:
16281 type = int_ftype_int_v2sf_v2sf;
16282 break;
16283 default:
16284 gcc_unreachable ();
16287 def_builtin (d->name, type, d->code);
16290 /* Evsel predicates. */
16291 d = bdesc_spe_evsel;
16292 for (i = 0; i < ARRAY_SIZE (bdesc_spe_evsel); ++i, d++)
16294 tree type;
16296 switch (insn_data[d->icode].operand[1].mode)
16298 case V2SImode:
16299 type = v2si_ftype_4_v2si;
16300 break;
16301 case V2SFmode:
16302 type = v2sf_ftype_4_v2sf;
16303 break;
16304 default:
16305 gcc_unreachable ();
16308 def_builtin (d->name, type, d->code);
16312 static void
16313 paired_init_builtins (void)
16315 const struct builtin_description *d;
16316 size_t i;
16318 tree int_ftype_int_v2sf_v2sf
16319 = build_function_type_list (integer_type_node,
16320 integer_type_node,
16321 V2SF_type_node,
16322 V2SF_type_node,
16323 NULL_TREE);
16324 tree pcfloat_type_node =
16325 build_pointer_type (build_qualified_type
16326 (float_type_node, TYPE_QUAL_CONST));
16328 tree v2sf_ftype_long_pcfloat = build_function_type_list (V2SF_type_node,
16329 long_integer_type_node,
16330 pcfloat_type_node,
16331 NULL_TREE);
16332 tree void_ftype_v2sf_long_pcfloat =
16333 build_function_type_list (void_type_node,
16334 V2SF_type_node,
16335 long_integer_type_node,
16336 pcfloat_type_node,
16337 NULL_TREE);
16340 def_builtin ("__builtin_paired_lx", v2sf_ftype_long_pcfloat,
16341 PAIRED_BUILTIN_LX);
16344 def_builtin ("__builtin_paired_stx", void_ftype_v2sf_long_pcfloat,
16345 PAIRED_BUILTIN_STX);
16347 /* Predicates. */
16348 d = bdesc_paired_preds;
16349 for (i = 0; i < ARRAY_SIZE (bdesc_paired_preds); ++i, d++)
16351 tree type;
16353 if (TARGET_DEBUG_BUILTIN)
16354 fprintf (stderr, "paired pred #%d, insn = %s [%d], mode = %s\n",
16355 (int)i, get_insn_name (d->icode), (int)d->icode,
16356 GET_MODE_NAME (insn_data[d->icode].operand[1].mode));
16358 switch (insn_data[d->icode].operand[1].mode)
16360 case V2SFmode:
16361 type = int_ftype_int_v2sf_v2sf;
16362 break;
16363 default:
16364 gcc_unreachable ();
16367 def_builtin (d->name, type, d->code);
16371 static void
16372 altivec_init_builtins (void)
16374 const struct builtin_description *d;
16375 size_t i;
16376 tree ftype;
16377 tree decl;
16379 tree pvoid_type_node = build_pointer_type (void_type_node);
16381 tree pcvoid_type_node
16382 = build_pointer_type (build_qualified_type (void_type_node,
16383 TYPE_QUAL_CONST));
16385 tree int_ftype_opaque
16386 = build_function_type_list (integer_type_node,
16387 opaque_V4SI_type_node, NULL_TREE);
16388 tree opaque_ftype_opaque
16389 = build_function_type_list (integer_type_node, NULL_TREE);
16390 tree opaque_ftype_opaque_int
16391 = build_function_type_list (opaque_V4SI_type_node,
16392 opaque_V4SI_type_node, integer_type_node, NULL_TREE);
16393 tree opaque_ftype_opaque_opaque_int
16394 = build_function_type_list (opaque_V4SI_type_node,
16395 opaque_V4SI_type_node, opaque_V4SI_type_node,
16396 integer_type_node, NULL_TREE);
16397 tree opaque_ftype_opaque_opaque_opaque
16398 = build_function_type_list (opaque_V4SI_type_node,
16399 opaque_V4SI_type_node, opaque_V4SI_type_node,
16400 opaque_V4SI_type_node, NULL_TREE);
16401 tree opaque_ftype_opaque_opaque
16402 = build_function_type_list (opaque_V4SI_type_node,
16403 opaque_V4SI_type_node, opaque_V4SI_type_node,
16404 NULL_TREE);
16405 tree int_ftype_int_opaque_opaque
16406 = build_function_type_list (integer_type_node,
16407 integer_type_node, opaque_V4SI_type_node,
16408 opaque_V4SI_type_node, NULL_TREE);
16409 tree int_ftype_int_v4si_v4si
16410 = build_function_type_list (integer_type_node,
16411 integer_type_node, V4SI_type_node,
16412 V4SI_type_node, NULL_TREE);
16413 tree int_ftype_int_v2di_v2di
16414 = build_function_type_list (integer_type_node,
16415 integer_type_node, V2DI_type_node,
16416 V2DI_type_node, NULL_TREE);
16417 tree void_ftype_v4si
16418 = build_function_type_list (void_type_node, V4SI_type_node, NULL_TREE);
16419 tree v8hi_ftype_void
16420 = build_function_type_list (V8HI_type_node, NULL_TREE);
16421 tree void_ftype_void
16422 = build_function_type_list (void_type_node, NULL_TREE);
16423 tree void_ftype_int
16424 = build_function_type_list (void_type_node, integer_type_node, NULL_TREE);
16426 tree opaque_ftype_long_pcvoid
16427 = build_function_type_list (opaque_V4SI_type_node,
16428 long_integer_type_node, pcvoid_type_node,
16429 NULL_TREE);
16430 tree v16qi_ftype_long_pcvoid
16431 = build_function_type_list (V16QI_type_node,
16432 long_integer_type_node, pcvoid_type_node,
16433 NULL_TREE);
16434 tree v8hi_ftype_long_pcvoid
16435 = build_function_type_list (V8HI_type_node,
16436 long_integer_type_node, pcvoid_type_node,
16437 NULL_TREE);
16438 tree v4si_ftype_long_pcvoid
16439 = build_function_type_list (V4SI_type_node,
16440 long_integer_type_node, pcvoid_type_node,
16441 NULL_TREE);
16442 tree v4sf_ftype_long_pcvoid
16443 = build_function_type_list (V4SF_type_node,
16444 long_integer_type_node, pcvoid_type_node,
16445 NULL_TREE);
16446 tree v2df_ftype_long_pcvoid
16447 = build_function_type_list (V2DF_type_node,
16448 long_integer_type_node, pcvoid_type_node,
16449 NULL_TREE);
16450 tree v2di_ftype_long_pcvoid
16451 = build_function_type_list (V2DI_type_node,
16452 long_integer_type_node, pcvoid_type_node,
16453 NULL_TREE);
16455 tree void_ftype_opaque_long_pvoid
16456 = build_function_type_list (void_type_node,
16457 opaque_V4SI_type_node, long_integer_type_node,
16458 pvoid_type_node, NULL_TREE);
16459 tree void_ftype_v4si_long_pvoid
16460 = build_function_type_list (void_type_node,
16461 V4SI_type_node, long_integer_type_node,
16462 pvoid_type_node, NULL_TREE);
16463 tree void_ftype_v16qi_long_pvoid
16464 = build_function_type_list (void_type_node,
16465 V16QI_type_node, long_integer_type_node,
16466 pvoid_type_node, NULL_TREE);
16467 tree void_ftype_v8hi_long_pvoid
16468 = build_function_type_list (void_type_node,
16469 V8HI_type_node, long_integer_type_node,
16470 pvoid_type_node, NULL_TREE);
16471 tree void_ftype_v4sf_long_pvoid
16472 = build_function_type_list (void_type_node,
16473 V4SF_type_node, long_integer_type_node,
16474 pvoid_type_node, NULL_TREE);
16475 tree void_ftype_v2df_long_pvoid
16476 = build_function_type_list (void_type_node,
16477 V2DF_type_node, long_integer_type_node,
16478 pvoid_type_node, NULL_TREE);
16479 tree void_ftype_v2di_long_pvoid
16480 = build_function_type_list (void_type_node,
16481 V2DI_type_node, long_integer_type_node,
16482 pvoid_type_node, NULL_TREE);
16483 tree int_ftype_int_v8hi_v8hi
16484 = build_function_type_list (integer_type_node,
16485 integer_type_node, V8HI_type_node,
16486 V8HI_type_node, NULL_TREE);
16487 tree int_ftype_int_v16qi_v16qi
16488 = build_function_type_list (integer_type_node,
16489 integer_type_node, V16QI_type_node,
16490 V16QI_type_node, NULL_TREE);
16491 tree int_ftype_int_v4sf_v4sf
16492 = build_function_type_list (integer_type_node,
16493 integer_type_node, V4SF_type_node,
16494 V4SF_type_node, NULL_TREE);
16495 tree int_ftype_int_v2df_v2df
16496 = build_function_type_list (integer_type_node,
16497 integer_type_node, V2DF_type_node,
16498 V2DF_type_node, NULL_TREE);
16499 tree v2di_ftype_v2di
16500 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
16501 tree v4si_ftype_v4si
16502 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
16503 tree v8hi_ftype_v8hi
16504 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
16505 tree v16qi_ftype_v16qi
16506 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
16507 tree v4sf_ftype_v4sf
16508 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
16509 tree v2df_ftype_v2df
16510 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
16511 tree void_ftype_pcvoid_int_int
16512 = build_function_type_list (void_type_node,
16513 pcvoid_type_node, integer_type_node,
16514 integer_type_node, NULL_TREE);
16516 def_builtin ("__builtin_altivec_mtvscr", void_ftype_v4si, ALTIVEC_BUILTIN_MTVSCR);
16517 def_builtin ("__builtin_altivec_mfvscr", v8hi_ftype_void, ALTIVEC_BUILTIN_MFVSCR);
16518 def_builtin ("__builtin_altivec_dssall", void_ftype_void, ALTIVEC_BUILTIN_DSSALL);
16519 def_builtin ("__builtin_altivec_dss", void_ftype_int, ALTIVEC_BUILTIN_DSS);
16520 def_builtin ("__builtin_altivec_lvsl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVSL);
16521 def_builtin ("__builtin_altivec_lvsr", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVSR);
16522 def_builtin ("__builtin_altivec_lvebx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEBX);
16523 def_builtin ("__builtin_altivec_lvehx", v8hi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEHX);
16524 def_builtin ("__builtin_altivec_lvewx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEWX);
16525 def_builtin ("__builtin_altivec_lvxl", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVXL);
16526 def_builtin ("__builtin_altivec_lvxl_v2df", v2df_ftype_long_pcvoid,
16527 ALTIVEC_BUILTIN_LVXL_V2DF);
16528 def_builtin ("__builtin_altivec_lvxl_v2di", v2di_ftype_long_pcvoid,
16529 ALTIVEC_BUILTIN_LVXL_V2DI);
16530 def_builtin ("__builtin_altivec_lvxl_v4sf", v4sf_ftype_long_pcvoid,
16531 ALTIVEC_BUILTIN_LVXL_V4SF);
16532 def_builtin ("__builtin_altivec_lvxl_v4si", v4si_ftype_long_pcvoid,
16533 ALTIVEC_BUILTIN_LVXL_V4SI);
16534 def_builtin ("__builtin_altivec_lvxl_v8hi", v8hi_ftype_long_pcvoid,
16535 ALTIVEC_BUILTIN_LVXL_V8HI);
16536 def_builtin ("__builtin_altivec_lvxl_v16qi", v16qi_ftype_long_pcvoid,
16537 ALTIVEC_BUILTIN_LVXL_V16QI);
16538 def_builtin ("__builtin_altivec_lvx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVX);
16539 def_builtin ("__builtin_altivec_lvx_v2df", v2df_ftype_long_pcvoid,
16540 ALTIVEC_BUILTIN_LVX_V2DF);
16541 def_builtin ("__builtin_altivec_lvx_v2di", v2di_ftype_long_pcvoid,
16542 ALTIVEC_BUILTIN_LVX_V2DI);
16543 def_builtin ("__builtin_altivec_lvx_v4sf", v4sf_ftype_long_pcvoid,
16544 ALTIVEC_BUILTIN_LVX_V4SF);
16545 def_builtin ("__builtin_altivec_lvx_v4si", v4si_ftype_long_pcvoid,
16546 ALTIVEC_BUILTIN_LVX_V4SI);
16547 def_builtin ("__builtin_altivec_lvx_v8hi", v8hi_ftype_long_pcvoid,
16548 ALTIVEC_BUILTIN_LVX_V8HI);
16549 def_builtin ("__builtin_altivec_lvx_v16qi", v16qi_ftype_long_pcvoid,
16550 ALTIVEC_BUILTIN_LVX_V16QI);
16551 def_builtin ("__builtin_altivec_stvx", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVX);
16552 def_builtin ("__builtin_altivec_stvx_v2df", void_ftype_v2df_long_pvoid,
16553 ALTIVEC_BUILTIN_STVX_V2DF);
16554 def_builtin ("__builtin_altivec_stvx_v2di", void_ftype_v2di_long_pvoid,
16555 ALTIVEC_BUILTIN_STVX_V2DI);
16556 def_builtin ("__builtin_altivec_stvx_v4sf", void_ftype_v4sf_long_pvoid,
16557 ALTIVEC_BUILTIN_STVX_V4SF);
16558 def_builtin ("__builtin_altivec_stvx_v4si", void_ftype_v4si_long_pvoid,
16559 ALTIVEC_BUILTIN_STVX_V4SI);
16560 def_builtin ("__builtin_altivec_stvx_v8hi", void_ftype_v8hi_long_pvoid,
16561 ALTIVEC_BUILTIN_STVX_V8HI);
16562 def_builtin ("__builtin_altivec_stvx_v16qi", void_ftype_v16qi_long_pvoid,
16563 ALTIVEC_BUILTIN_STVX_V16QI);
16564 def_builtin ("__builtin_altivec_stvewx", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVEWX);
16565 def_builtin ("__builtin_altivec_stvxl", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVXL);
16566 def_builtin ("__builtin_altivec_stvxl_v2df", void_ftype_v2df_long_pvoid,
16567 ALTIVEC_BUILTIN_STVXL_V2DF);
16568 def_builtin ("__builtin_altivec_stvxl_v2di", void_ftype_v2di_long_pvoid,
16569 ALTIVEC_BUILTIN_STVXL_V2DI);
16570 def_builtin ("__builtin_altivec_stvxl_v4sf", void_ftype_v4sf_long_pvoid,
16571 ALTIVEC_BUILTIN_STVXL_V4SF);
16572 def_builtin ("__builtin_altivec_stvxl_v4si", void_ftype_v4si_long_pvoid,
16573 ALTIVEC_BUILTIN_STVXL_V4SI);
16574 def_builtin ("__builtin_altivec_stvxl_v8hi", void_ftype_v8hi_long_pvoid,
16575 ALTIVEC_BUILTIN_STVXL_V8HI);
16576 def_builtin ("__builtin_altivec_stvxl_v16qi", void_ftype_v16qi_long_pvoid,
16577 ALTIVEC_BUILTIN_STVXL_V16QI);
16578 def_builtin ("__builtin_altivec_stvebx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVEBX);
16579 def_builtin ("__builtin_altivec_stvehx", void_ftype_v8hi_long_pvoid, ALTIVEC_BUILTIN_STVEHX);
16580 def_builtin ("__builtin_vec_ld", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LD);
16581 def_builtin ("__builtin_vec_lde", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LDE);
16582 def_builtin ("__builtin_vec_ldl", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LDL);
16583 def_builtin ("__builtin_vec_lvsl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVSL);
16584 def_builtin ("__builtin_vec_lvsr", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVSR);
16585 def_builtin ("__builtin_vec_lvebx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEBX);
16586 def_builtin ("__builtin_vec_lvehx", v8hi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEHX);
16587 def_builtin ("__builtin_vec_lvewx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEWX);
16588 def_builtin ("__builtin_vec_st", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_ST);
16589 def_builtin ("__builtin_vec_ste", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STE);
16590 def_builtin ("__builtin_vec_stl", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STL);
16591 def_builtin ("__builtin_vec_stvewx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEWX);
16592 def_builtin ("__builtin_vec_stvebx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEBX);
16593 def_builtin ("__builtin_vec_stvehx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEHX);
16595 def_builtin ("__builtin_vsx_lxvd2x_v2df", v2df_ftype_long_pcvoid,
16596 VSX_BUILTIN_LXVD2X_V2DF);
16597 def_builtin ("__builtin_vsx_lxvd2x_v2di", v2di_ftype_long_pcvoid,
16598 VSX_BUILTIN_LXVD2X_V2DI);
16599 def_builtin ("__builtin_vsx_lxvw4x_v4sf", v4sf_ftype_long_pcvoid,
16600 VSX_BUILTIN_LXVW4X_V4SF);
16601 def_builtin ("__builtin_vsx_lxvw4x_v4si", v4si_ftype_long_pcvoid,
16602 VSX_BUILTIN_LXVW4X_V4SI);
16603 def_builtin ("__builtin_vsx_lxvw4x_v8hi", v8hi_ftype_long_pcvoid,
16604 VSX_BUILTIN_LXVW4X_V8HI);
16605 def_builtin ("__builtin_vsx_lxvw4x_v16qi", v16qi_ftype_long_pcvoid,
16606 VSX_BUILTIN_LXVW4X_V16QI);
16607 def_builtin ("__builtin_vsx_stxvd2x_v2df", void_ftype_v2df_long_pvoid,
16608 VSX_BUILTIN_STXVD2X_V2DF);
16609 def_builtin ("__builtin_vsx_stxvd2x_v2di", void_ftype_v2di_long_pvoid,
16610 VSX_BUILTIN_STXVD2X_V2DI);
16611 def_builtin ("__builtin_vsx_stxvw4x_v4sf", void_ftype_v4sf_long_pvoid,
16612 VSX_BUILTIN_STXVW4X_V4SF);
16613 def_builtin ("__builtin_vsx_stxvw4x_v4si", void_ftype_v4si_long_pvoid,
16614 VSX_BUILTIN_STXVW4X_V4SI);
16615 def_builtin ("__builtin_vsx_stxvw4x_v8hi", void_ftype_v8hi_long_pvoid,
16616 VSX_BUILTIN_STXVW4X_V8HI);
16617 def_builtin ("__builtin_vsx_stxvw4x_v16qi", void_ftype_v16qi_long_pvoid,
16618 VSX_BUILTIN_STXVW4X_V16QI);
16620 def_builtin ("__builtin_vsx_ld_elemrev_v2df", v2df_ftype_long_pcvoid,
16621 VSX_BUILTIN_LD_ELEMREV_V2DF);
16622 def_builtin ("__builtin_vsx_ld_elemrev_v2di", v2di_ftype_long_pcvoid,
16623 VSX_BUILTIN_LD_ELEMREV_V2DI);
16624 def_builtin ("__builtin_vsx_ld_elemrev_v4sf", v4sf_ftype_long_pcvoid,
16625 VSX_BUILTIN_LD_ELEMREV_V4SF);
16626 def_builtin ("__builtin_vsx_ld_elemrev_v4si", v4si_ftype_long_pcvoid,
16627 VSX_BUILTIN_LD_ELEMREV_V4SI);
16628 def_builtin ("__builtin_vsx_st_elemrev_v2df", void_ftype_v2df_long_pvoid,
16629 VSX_BUILTIN_ST_ELEMREV_V2DF);
16630 def_builtin ("__builtin_vsx_st_elemrev_v2di", void_ftype_v2di_long_pvoid,
16631 VSX_BUILTIN_ST_ELEMREV_V2DI);
16632 def_builtin ("__builtin_vsx_st_elemrev_v4sf", void_ftype_v4sf_long_pvoid,
16633 VSX_BUILTIN_ST_ELEMREV_V4SF);
16634 def_builtin ("__builtin_vsx_st_elemrev_v4si", void_ftype_v4si_long_pvoid,
16635 VSX_BUILTIN_ST_ELEMREV_V4SI);
16637 if (TARGET_P9_VECTOR)
16639 def_builtin ("__builtin_vsx_ld_elemrev_v8hi", v8hi_ftype_long_pcvoid,
16640 VSX_BUILTIN_LD_ELEMREV_V8HI);
16641 def_builtin ("__builtin_vsx_ld_elemrev_v16qi", v16qi_ftype_long_pcvoid,
16642 VSX_BUILTIN_LD_ELEMREV_V16QI);
16643 def_builtin ("__builtin_vsx_st_elemrev_v8hi",
16644 void_ftype_v8hi_long_pvoid, VSX_BUILTIN_ST_ELEMREV_V8HI);
16645 def_builtin ("__builtin_vsx_st_elemrev_v16qi",
16646 void_ftype_v16qi_long_pvoid, VSX_BUILTIN_ST_ELEMREV_V16QI);
16649 def_builtin ("__builtin_vec_vsx_ld", opaque_ftype_long_pcvoid,
16650 VSX_BUILTIN_VEC_LD);
16651 def_builtin ("__builtin_vec_vsx_st", void_ftype_opaque_long_pvoid,
16652 VSX_BUILTIN_VEC_ST);
16653 def_builtin ("__builtin_vec_xl", opaque_ftype_long_pcvoid,
16654 VSX_BUILTIN_VEC_XL);
16655 def_builtin ("__builtin_vec_xst", void_ftype_opaque_long_pvoid,
16656 VSX_BUILTIN_VEC_XST);
16658 def_builtin ("__builtin_vec_step", int_ftype_opaque, ALTIVEC_BUILTIN_VEC_STEP);
16659 def_builtin ("__builtin_vec_splats", opaque_ftype_opaque, ALTIVEC_BUILTIN_VEC_SPLATS);
16660 def_builtin ("__builtin_vec_promote", opaque_ftype_opaque, ALTIVEC_BUILTIN_VEC_PROMOTE);
16662 def_builtin ("__builtin_vec_sld", opaque_ftype_opaque_opaque_int, ALTIVEC_BUILTIN_VEC_SLD);
16663 def_builtin ("__builtin_vec_splat", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_SPLAT);
16664 def_builtin ("__builtin_vec_extract", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_EXTRACT);
16665 def_builtin ("__builtin_vec_insert", opaque_ftype_opaque_opaque_int, ALTIVEC_BUILTIN_VEC_INSERT);
16666 def_builtin ("__builtin_vec_vspltw", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTW);
16667 def_builtin ("__builtin_vec_vsplth", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTH);
16668 def_builtin ("__builtin_vec_vspltb", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTB);
16669 def_builtin ("__builtin_vec_ctf", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTF);
16670 def_builtin ("__builtin_vec_vcfsx", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VCFSX);
16671 def_builtin ("__builtin_vec_vcfux", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VCFUX);
16672 def_builtin ("__builtin_vec_cts", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTS);
16673 def_builtin ("__builtin_vec_ctu", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTU);
16675 def_builtin ("__builtin_vec_adde", opaque_ftype_opaque_opaque_opaque,
16676 ALTIVEC_BUILTIN_VEC_ADDE);
16677 def_builtin ("__builtin_vec_addec", opaque_ftype_opaque_opaque_opaque,
16678 ALTIVEC_BUILTIN_VEC_ADDEC);
16679 def_builtin ("__builtin_vec_cmpne", opaque_ftype_opaque_opaque,
16680 ALTIVEC_BUILTIN_VEC_CMPNE);
16681 def_builtin ("__builtin_vec_mul", opaque_ftype_opaque_opaque,
16682 ALTIVEC_BUILTIN_VEC_MUL);
16684 /* Cell builtins. */
16685 def_builtin ("__builtin_altivec_lvlx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVLX);
16686 def_builtin ("__builtin_altivec_lvlxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVLXL);
16687 def_builtin ("__builtin_altivec_lvrx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVRX);
16688 def_builtin ("__builtin_altivec_lvrxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVRXL);
16690 def_builtin ("__builtin_vec_lvlx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVLX);
16691 def_builtin ("__builtin_vec_lvlxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVLXL);
16692 def_builtin ("__builtin_vec_lvrx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVRX);
16693 def_builtin ("__builtin_vec_lvrxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVRXL);
16695 def_builtin ("__builtin_altivec_stvlx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVLX);
16696 def_builtin ("__builtin_altivec_stvlxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVLXL);
16697 def_builtin ("__builtin_altivec_stvrx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVRX);
16698 def_builtin ("__builtin_altivec_stvrxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVRXL);
16700 def_builtin ("__builtin_vec_stvlx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVLX);
16701 def_builtin ("__builtin_vec_stvlxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVLXL);
16702 def_builtin ("__builtin_vec_stvrx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVRX);
16703 def_builtin ("__builtin_vec_stvrxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVRXL);
16705 /* Add the DST variants. */
16706 d = bdesc_dst;
16707 for (i = 0; i < ARRAY_SIZE (bdesc_dst); i++, d++)
16708 def_builtin (d->name, void_ftype_pcvoid_int_int, d->code);
16710 /* Initialize the predicates. */
16711 d = bdesc_altivec_preds;
16712 for (i = 0; i < ARRAY_SIZE (bdesc_altivec_preds); i++, d++)
16714 machine_mode mode1;
16715 tree type;
16717 if (rs6000_overloaded_builtin_p (d->code))
16718 mode1 = VOIDmode;
16719 else
16720 mode1 = insn_data[d->icode].operand[1].mode;
16722 switch (mode1)
16724 case VOIDmode:
16725 type = int_ftype_int_opaque_opaque;
16726 break;
16727 case V2DImode:
16728 type = int_ftype_int_v2di_v2di;
16729 break;
16730 case V4SImode:
16731 type = int_ftype_int_v4si_v4si;
16732 break;
16733 case V8HImode:
16734 type = int_ftype_int_v8hi_v8hi;
16735 break;
16736 case V16QImode:
16737 type = int_ftype_int_v16qi_v16qi;
16738 break;
16739 case V4SFmode:
16740 type = int_ftype_int_v4sf_v4sf;
16741 break;
16742 case V2DFmode:
16743 type = int_ftype_int_v2df_v2df;
16744 break;
16745 default:
16746 gcc_unreachable ();
16749 def_builtin (d->name, type, d->code);
16752 /* Initialize the abs* operators. */
16753 d = bdesc_abs;
16754 for (i = 0; i < ARRAY_SIZE (bdesc_abs); i++, d++)
16756 machine_mode mode0;
16757 tree type;
16759 mode0 = insn_data[d->icode].operand[0].mode;
16761 switch (mode0)
16763 case V2DImode:
16764 type = v2di_ftype_v2di;
16765 break;
16766 case V4SImode:
16767 type = v4si_ftype_v4si;
16768 break;
16769 case V8HImode:
16770 type = v8hi_ftype_v8hi;
16771 break;
16772 case V16QImode:
16773 type = v16qi_ftype_v16qi;
16774 break;
16775 case V4SFmode:
16776 type = v4sf_ftype_v4sf;
16777 break;
16778 case V2DFmode:
16779 type = v2df_ftype_v2df;
16780 break;
16781 default:
16782 gcc_unreachable ();
16785 def_builtin (d->name, type, d->code);
16788 /* Initialize target builtin that implements
16789 targetm.vectorize.builtin_mask_for_load. */
16791 decl = add_builtin_function ("__builtin_altivec_mask_for_load",
16792 v16qi_ftype_long_pcvoid,
16793 ALTIVEC_BUILTIN_MASK_FOR_LOAD,
16794 BUILT_IN_MD, NULL, NULL_TREE);
16795 TREE_READONLY (decl) = 1;
16796 /* Record the decl. Will be used by rs6000_builtin_mask_for_load. */
16797 altivec_builtin_mask_for_load = decl;
16799 /* Access to the vec_init patterns. */
16800 ftype = build_function_type_list (V4SI_type_node, integer_type_node,
16801 integer_type_node, integer_type_node,
16802 integer_type_node, NULL_TREE);
16803 def_builtin ("__builtin_vec_init_v4si", ftype, ALTIVEC_BUILTIN_VEC_INIT_V4SI);
16805 ftype = build_function_type_list (V8HI_type_node, short_integer_type_node,
16806 short_integer_type_node,
16807 short_integer_type_node,
16808 short_integer_type_node,
16809 short_integer_type_node,
16810 short_integer_type_node,
16811 short_integer_type_node,
16812 short_integer_type_node, NULL_TREE);
16813 def_builtin ("__builtin_vec_init_v8hi", ftype, ALTIVEC_BUILTIN_VEC_INIT_V8HI);
16815 ftype = build_function_type_list (V16QI_type_node, char_type_node,
16816 char_type_node, char_type_node,
16817 char_type_node, char_type_node,
16818 char_type_node, char_type_node,
16819 char_type_node, char_type_node,
16820 char_type_node, char_type_node,
16821 char_type_node, char_type_node,
16822 char_type_node, char_type_node,
16823 char_type_node, NULL_TREE);
16824 def_builtin ("__builtin_vec_init_v16qi", ftype,
16825 ALTIVEC_BUILTIN_VEC_INIT_V16QI);
16827 ftype = build_function_type_list (V4SF_type_node, float_type_node,
16828 float_type_node, float_type_node,
16829 float_type_node, NULL_TREE);
16830 def_builtin ("__builtin_vec_init_v4sf", ftype, ALTIVEC_BUILTIN_VEC_INIT_V4SF);
16832 /* VSX builtins. */
16833 ftype = build_function_type_list (V2DF_type_node, double_type_node,
16834 double_type_node, NULL_TREE);
16835 def_builtin ("__builtin_vec_init_v2df", ftype, VSX_BUILTIN_VEC_INIT_V2DF);
16837 ftype = build_function_type_list (V2DI_type_node, intDI_type_node,
16838 intDI_type_node, NULL_TREE);
16839 def_builtin ("__builtin_vec_init_v2di", ftype, VSX_BUILTIN_VEC_INIT_V2DI);
16841 /* Access to the vec_set patterns. */
16842 ftype = build_function_type_list (V4SI_type_node, V4SI_type_node,
16843 intSI_type_node,
16844 integer_type_node, NULL_TREE);
16845 def_builtin ("__builtin_vec_set_v4si", ftype, ALTIVEC_BUILTIN_VEC_SET_V4SI);
16847 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
16848 intHI_type_node,
16849 integer_type_node, NULL_TREE);
16850 def_builtin ("__builtin_vec_set_v8hi", ftype, ALTIVEC_BUILTIN_VEC_SET_V8HI);
16852 ftype = build_function_type_list (V16QI_type_node, V16QI_type_node,
16853 intQI_type_node,
16854 integer_type_node, NULL_TREE);
16855 def_builtin ("__builtin_vec_set_v16qi", ftype, ALTIVEC_BUILTIN_VEC_SET_V16QI);
16857 ftype = build_function_type_list (V4SF_type_node, V4SF_type_node,
16858 float_type_node,
16859 integer_type_node, NULL_TREE);
16860 def_builtin ("__builtin_vec_set_v4sf", ftype, ALTIVEC_BUILTIN_VEC_SET_V4SF);
16862 ftype = build_function_type_list (V2DF_type_node, V2DF_type_node,
16863 double_type_node,
16864 integer_type_node, NULL_TREE);
16865 def_builtin ("__builtin_vec_set_v2df", ftype, VSX_BUILTIN_VEC_SET_V2DF);
16867 ftype = build_function_type_list (V2DI_type_node, V2DI_type_node,
16868 intDI_type_node,
16869 integer_type_node, NULL_TREE);
16870 def_builtin ("__builtin_vec_set_v2di", ftype, VSX_BUILTIN_VEC_SET_V2DI);
16872 /* Access to the vec_extract patterns. */
16873 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
16874 integer_type_node, NULL_TREE);
16875 def_builtin ("__builtin_vec_ext_v4si", ftype, ALTIVEC_BUILTIN_VEC_EXT_V4SI);
16877 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
16878 integer_type_node, NULL_TREE);
16879 def_builtin ("__builtin_vec_ext_v8hi", ftype, ALTIVEC_BUILTIN_VEC_EXT_V8HI);
16881 ftype = build_function_type_list (intQI_type_node, V16QI_type_node,
16882 integer_type_node, NULL_TREE);
16883 def_builtin ("__builtin_vec_ext_v16qi", ftype, ALTIVEC_BUILTIN_VEC_EXT_V16QI);
16885 ftype = build_function_type_list (float_type_node, V4SF_type_node,
16886 integer_type_node, NULL_TREE);
16887 def_builtin ("__builtin_vec_ext_v4sf", ftype, ALTIVEC_BUILTIN_VEC_EXT_V4SF);
16889 ftype = build_function_type_list (double_type_node, V2DF_type_node,
16890 integer_type_node, NULL_TREE);
16891 def_builtin ("__builtin_vec_ext_v2df", ftype, VSX_BUILTIN_VEC_EXT_V2DF);
16893 ftype = build_function_type_list (intDI_type_node, V2DI_type_node,
16894 integer_type_node, NULL_TREE);
16895 def_builtin ("__builtin_vec_ext_v2di", ftype, VSX_BUILTIN_VEC_EXT_V2DI);
16898 if (V1TI_type_node)
16900 tree v1ti_ftype_long_pcvoid
16901 = build_function_type_list (V1TI_type_node,
16902 long_integer_type_node, pcvoid_type_node,
16903 NULL_TREE);
16904 tree void_ftype_v1ti_long_pvoid
16905 = build_function_type_list (void_type_node,
16906 V1TI_type_node, long_integer_type_node,
16907 pvoid_type_node, NULL_TREE);
16908 def_builtin ("__builtin_vsx_lxvd2x_v1ti", v1ti_ftype_long_pcvoid,
16909 VSX_BUILTIN_LXVD2X_V1TI);
16910 def_builtin ("__builtin_vsx_stxvd2x_v1ti", void_ftype_v1ti_long_pvoid,
16911 VSX_BUILTIN_STXVD2X_V1TI);
16912 ftype = build_function_type_list (V1TI_type_node, intTI_type_node,
16913 NULL_TREE, NULL_TREE);
16914 def_builtin ("__builtin_vec_init_v1ti", ftype, VSX_BUILTIN_VEC_INIT_V1TI);
16915 ftype = build_function_type_list (V1TI_type_node, V1TI_type_node,
16916 intTI_type_node,
16917 integer_type_node, NULL_TREE);
16918 def_builtin ("__builtin_vec_set_v1ti", ftype, VSX_BUILTIN_VEC_SET_V1TI);
16919 ftype = build_function_type_list (intTI_type_node, V1TI_type_node,
16920 integer_type_node, NULL_TREE);
16921 def_builtin ("__builtin_vec_ext_v1ti", ftype, VSX_BUILTIN_VEC_EXT_V1TI);
16926 static void
16927 htm_init_builtins (void)
16929 HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;
16930 const struct builtin_description *d;
16931 size_t i;
16933 d = bdesc_htm;
16934 for (i = 0; i < ARRAY_SIZE (bdesc_htm); i++, d++)
16936 tree op[MAX_HTM_OPERANDS], type;
16937 HOST_WIDE_INT mask = d->mask;
16938 unsigned attr = rs6000_builtin_info[d->code].attr;
16939 bool void_func = (attr & RS6000_BTC_VOID);
16940 int attr_args = (attr & RS6000_BTC_TYPE_MASK);
16941 int nopnds = 0;
16942 tree gpr_type_node;
16943 tree rettype;
16944 tree argtype;
16946 if (TARGET_32BIT && TARGET_POWERPC64)
16947 gpr_type_node = long_long_unsigned_type_node;
16948 else
16949 gpr_type_node = long_unsigned_type_node;
16951 if (attr & RS6000_BTC_SPR)
16953 rettype = gpr_type_node;
16954 argtype = gpr_type_node;
16956 else if (d->code == HTM_BUILTIN_TABORTDC
16957 || d->code == HTM_BUILTIN_TABORTDCI)
16959 rettype = unsigned_type_node;
16960 argtype = gpr_type_node;
16962 else
16964 rettype = unsigned_type_node;
16965 argtype = unsigned_type_node;
16968 if ((mask & builtin_mask) != mask)
16970 if (TARGET_DEBUG_BUILTIN)
16971 fprintf (stderr, "htm_builtin, skip binary %s\n", d->name);
16972 continue;
16975 if (d->name == 0)
16977 if (TARGET_DEBUG_BUILTIN)
16978 fprintf (stderr, "htm_builtin, bdesc_htm[%ld] no name\n",
16979 (long unsigned) i);
16980 continue;
16983 op[nopnds++] = (void_func) ? void_type_node : rettype;
16985 if (attr_args == RS6000_BTC_UNARY)
16986 op[nopnds++] = argtype;
16987 else if (attr_args == RS6000_BTC_BINARY)
16989 op[nopnds++] = argtype;
16990 op[nopnds++] = argtype;
16992 else if (attr_args == RS6000_BTC_TERNARY)
16994 op[nopnds++] = argtype;
16995 op[nopnds++] = argtype;
16996 op[nopnds++] = argtype;
16999 switch (nopnds)
17001 case 1:
17002 type = build_function_type_list (op[0], NULL_TREE);
17003 break;
17004 case 2:
17005 type = build_function_type_list (op[0], op[1], NULL_TREE);
17006 break;
17007 case 3:
17008 type = build_function_type_list (op[0], op[1], op[2], NULL_TREE);
17009 break;
17010 case 4:
17011 type = build_function_type_list (op[0], op[1], op[2], op[3],
17012 NULL_TREE);
17013 break;
17014 default:
17015 gcc_unreachable ();
17018 def_builtin (d->name, type, d->code);
17022 /* Hash function for builtin functions with up to 3 arguments and a return
17023 type. */
17024 hashval_t
17025 builtin_hasher::hash (builtin_hash_struct *bh)
17027 unsigned ret = 0;
17028 int i;
17030 for (i = 0; i < 4; i++)
17032 ret = (ret * (unsigned)MAX_MACHINE_MODE) + ((unsigned)bh->mode[i]);
17033 ret = (ret * 2) + bh->uns_p[i];
17036 return ret;
17039 /* Compare builtin hash entries H1 and H2 for equivalence. */
17040 bool
17041 builtin_hasher::equal (builtin_hash_struct *p1, builtin_hash_struct *p2)
17043 return ((p1->mode[0] == p2->mode[0])
17044 && (p1->mode[1] == p2->mode[1])
17045 && (p1->mode[2] == p2->mode[2])
17046 && (p1->mode[3] == p2->mode[3])
17047 && (p1->uns_p[0] == p2->uns_p[0])
17048 && (p1->uns_p[1] == p2->uns_p[1])
17049 && (p1->uns_p[2] == p2->uns_p[2])
17050 && (p1->uns_p[3] == p2->uns_p[3]));
17053 /* Map types for builtin functions with an explicit return type and up to 3
17054 arguments. Functions with fewer than 3 arguments use VOIDmode as the type
17055 of the argument. */
17056 static tree
17057 builtin_function_type (machine_mode mode_ret, machine_mode mode_arg0,
17058 machine_mode mode_arg1, machine_mode mode_arg2,
17059 enum rs6000_builtins builtin, const char *name)
17061 struct builtin_hash_struct h;
17062 struct builtin_hash_struct *h2;
17063 int num_args = 3;
17064 int i;
17065 tree ret_type = NULL_TREE;
17066 tree arg_type[3] = { NULL_TREE, NULL_TREE, NULL_TREE };
17068 /* Create builtin_hash_table. */
17069 if (builtin_hash_table == NULL)
17070 builtin_hash_table = hash_table<builtin_hasher>::create_ggc (1500);
17072 h.type = NULL_TREE;
17073 h.mode[0] = mode_ret;
17074 h.mode[1] = mode_arg0;
17075 h.mode[2] = mode_arg1;
17076 h.mode[3] = mode_arg2;
17077 h.uns_p[0] = 0;
17078 h.uns_p[1] = 0;
17079 h.uns_p[2] = 0;
17080 h.uns_p[3] = 0;
17082 /* If the builtin is a type that produces unsigned results or takes unsigned
17083 arguments, and it is returned as a decl for the vectorizer (such as
17084 widening multiplies, permute), make sure the arguments and return value
17085 are type correct. */
17086 switch (builtin)
17088 /* unsigned 1 argument functions. */
17089 case CRYPTO_BUILTIN_VSBOX:
17090 case P8V_BUILTIN_VGBBD:
17091 case MISC_BUILTIN_CDTBCD:
17092 case MISC_BUILTIN_CBCDTD:
17093 h.uns_p[0] = 1;
17094 h.uns_p[1] = 1;
17095 break;
17097 /* unsigned 2 argument functions. */
17098 case ALTIVEC_BUILTIN_VMULEUB_UNS:
17099 case ALTIVEC_BUILTIN_VMULEUH_UNS:
17100 case ALTIVEC_BUILTIN_VMULOUB_UNS:
17101 case ALTIVEC_BUILTIN_VMULOUH_UNS:
17102 case CRYPTO_BUILTIN_VCIPHER:
17103 case CRYPTO_BUILTIN_VCIPHERLAST:
17104 case CRYPTO_BUILTIN_VNCIPHER:
17105 case CRYPTO_BUILTIN_VNCIPHERLAST:
17106 case CRYPTO_BUILTIN_VPMSUMB:
17107 case CRYPTO_BUILTIN_VPMSUMH:
17108 case CRYPTO_BUILTIN_VPMSUMW:
17109 case CRYPTO_BUILTIN_VPMSUMD:
17110 case CRYPTO_BUILTIN_VPMSUM:
17111 case MISC_BUILTIN_ADDG6S:
17112 case MISC_BUILTIN_DIVWEU:
17113 case MISC_BUILTIN_DIVWEUO:
17114 case MISC_BUILTIN_DIVDEU:
17115 case MISC_BUILTIN_DIVDEUO:
17116 h.uns_p[0] = 1;
17117 h.uns_p[1] = 1;
17118 h.uns_p[2] = 1;
17119 break;
17121 /* unsigned 3 argument functions. */
17122 case ALTIVEC_BUILTIN_VPERM_16QI_UNS:
17123 case ALTIVEC_BUILTIN_VPERM_8HI_UNS:
17124 case ALTIVEC_BUILTIN_VPERM_4SI_UNS:
17125 case ALTIVEC_BUILTIN_VPERM_2DI_UNS:
17126 case ALTIVEC_BUILTIN_VSEL_16QI_UNS:
17127 case ALTIVEC_BUILTIN_VSEL_8HI_UNS:
17128 case ALTIVEC_BUILTIN_VSEL_4SI_UNS:
17129 case ALTIVEC_BUILTIN_VSEL_2DI_UNS:
17130 case VSX_BUILTIN_VPERM_16QI_UNS:
17131 case VSX_BUILTIN_VPERM_8HI_UNS:
17132 case VSX_BUILTIN_VPERM_4SI_UNS:
17133 case VSX_BUILTIN_VPERM_2DI_UNS:
17134 case VSX_BUILTIN_XXSEL_16QI_UNS:
17135 case VSX_BUILTIN_XXSEL_8HI_UNS:
17136 case VSX_BUILTIN_XXSEL_4SI_UNS:
17137 case VSX_BUILTIN_XXSEL_2DI_UNS:
17138 case CRYPTO_BUILTIN_VPERMXOR:
17139 case CRYPTO_BUILTIN_VPERMXOR_V2DI:
17140 case CRYPTO_BUILTIN_VPERMXOR_V4SI:
17141 case CRYPTO_BUILTIN_VPERMXOR_V8HI:
17142 case CRYPTO_BUILTIN_VPERMXOR_V16QI:
17143 case CRYPTO_BUILTIN_VSHASIGMAW:
17144 case CRYPTO_BUILTIN_VSHASIGMAD:
17145 case CRYPTO_BUILTIN_VSHASIGMA:
17146 h.uns_p[0] = 1;
17147 h.uns_p[1] = 1;
17148 h.uns_p[2] = 1;
17149 h.uns_p[3] = 1;
17150 break;
17152 /* signed permute functions with unsigned char mask. */
17153 case ALTIVEC_BUILTIN_VPERM_16QI:
17154 case ALTIVEC_BUILTIN_VPERM_8HI:
17155 case ALTIVEC_BUILTIN_VPERM_4SI:
17156 case ALTIVEC_BUILTIN_VPERM_4SF:
17157 case ALTIVEC_BUILTIN_VPERM_2DI:
17158 case ALTIVEC_BUILTIN_VPERM_2DF:
17159 case VSX_BUILTIN_VPERM_16QI:
17160 case VSX_BUILTIN_VPERM_8HI:
17161 case VSX_BUILTIN_VPERM_4SI:
17162 case VSX_BUILTIN_VPERM_4SF:
17163 case VSX_BUILTIN_VPERM_2DI:
17164 case VSX_BUILTIN_VPERM_2DF:
17165 h.uns_p[3] = 1;
17166 break;
17168 /* unsigned args, signed return. */
17169 case VSX_BUILTIN_XVCVUXDDP_UNS:
17170 case ALTIVEC_BUILTIN_UNSFLOAT_V4SI_V4SF:
17171 h.uns_p[1] = 1;
17172 break;
17174 /* signed args, unsigned return. */
17175 case VSX_BUILTIN_XVCVDPUXDS_UNS:
17176 case ALTIVEC_BUILTIN_FIXUNS_V4SF_V4SI:
17177 case MISC_BUILTIN_UNPACK_TD:
17178 case MISC_BUILTIN_UNPACK_V1TI:
17179 h.uns_p[0] = 1;
17180 break;
17182 /* unsigned arguments for 128-bit pack instructions. */
17183 case MISC_BUILTIN_PACK_TD:
17184 case MISC_BUILTIN_PACK_V1TI:
17185 h.uns_p[1] = 1;
17186 h.uns_p[2] = 1;
17187 break;
17189 default:
17190 break;
17193 /* Figure out how many args are present. */
17194 while (num_args > 0 && h.mode[num_args] == VOIDmode)
17195 num_args--;
17197 ret_type = builtin_mode_to_type[h.mode[0]][h.uns_p[0]];
17198 if (!ret_type && h.uns_p[0])
17199 ret_type = builtin_mode_to_type[h.mode[0]][0];
17201 if (!ret_type)
17202 fatal_error (input_location,
17203 "internal error: builtin function %s had an unexpected "
17204 "return type %s", name, GET_MODE_NAME (h.mode[0]));
17206 for (i = 0; i < (int) ARRAY_SIZE (arg_type); i++)
17207 arg_type[i] = NULL_TREE;
17209 for (i = 0; i < num_args; i++)
17211 int m = (int) h.mode[i+1];
17212 int uns_p = h.uns_p[i+1];
17214 arg_type[i] = builtin_mode_to_type[m][uns_p];
17215 if (!arg_type[i] && uns_p)
17216 arg_type[i] = builtin_mode_to_type[m][0];
17218 if (!arg_type[i])
17219 fatal_error (input_location,
17220 "internal error: builtin function %s, argument %d "
17221 "had unexpected argument type %s", name, i,
17222 GET_MODE_NAME (m));
17225 builtin_hash_struct **found = builtin_hash_table->find_slot (&h, INSERT);
17226 if (*found == NULL)
17228 h2 = ggc_alloc<builtin_hash_struct> ();
17229 *h2 = h;
17230 *found = h2;
17232 h2->type = build_function_type_list (ret_type, arg_type[0], arg_type[1],
17233 arg_type[2], NULL_TREE);
17236 return (*found)->type;
17239 static void
17240 rs6000_common_init_builtins (void)
17242 const struct builtin_description *d;
17243 size_t i;
17245 tree opaque_ftype_opaque = NULL_TREE;
17246 tree opaque_ftype_opaque_opaque = NULL_TREE;
17247 tree opaque_ftype_opaque_opaque_opaque = NULL_TREE;
17248 tree v2si_ftype = NULL_TREE;
17249 tree v2si_ftype_qi = NULL_TREE;
17250 tree v2si_ftype_v2si_qi = NULL_TREE;
17251 tree v2si_ftype_int_qi = NULL_TREE;
17252 HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;
17254 if (!TARGET_PAIRED_FLOAT)
17256 builtin_mode_to_type[V2SImode][0] = opaque_V2SI_type_node;
17257 builtin_mode_to_type[V2SFmode][0] = opaque_V2SF_type_node;
17260 /* Paired and SPE builtins are only available if you build a compiler with
17261 the appropriate options, so only create those builtins with the
17262 appropriate compiler option. Create Altivec and VSX builtins on machines
17263 with at least the general purpose extensions (970 and newer) to allow the
17264 use of the target attribute.. */
17266 if (TARGET_EXTRA_BUILTINS)
17267 builtin_mask |= RS6000_BTM_COMMON;
17269 /* Add the ternary operators. */
17270 d = bdesc_3arg;
17271 for (i = 0; i < ARRAY_SIZE (bdesc_3arg); i++, d++)
17273 tree type;
17274 HOST_WIDE_INT mask = d->mask;
17276 if ((mask & builtin_mask) != mask)
17278 if (TARGET_DEBUG_BUILTIN)
17279 fprintf (stderr, "rs6000_builtin, skip ternary %s\n", d->name);
17280 continue;
17283 if (rs6000_overloaded_builtin_p (d->code))
17285 if (! (type = opaque_ftype_opaque_opaque_opaque))
17286 type = opaque_ftype_opaque_opaque_opaque
17287 = build_function_type_list (opaque_V4SI_type_node,
17288 opaque_V4SI_type_node,
17289 opaque_V4SI_type_node,
17290 opaque_V4SI_type_node,
17291 NULL_TREE);
17293 else
17295 enum insn_code icode = d->icode;
17296 if (d->name == 0)
17298 if (TARGET_DEBUG_BUILTIN)
17299 fprintf (stderr, "rs6000_builtin, bdesc_3arg[%ld] no name\n",
17300 (long unsigned)i);
17302 continue;
17305 if (icode == CODE_FOR_nothing)
17307 if (TARGET_DEBUG_BUILTIN)
17308 fprintf (stderr, "rs6000_builtin, skip ternary %s (no code)\n",
17309 d->name);
17311 continue;
17314 type = builtin_function_type (insn_data[icode].operand[0].mode,
17315 insn_data[icode].operand[1].mode,
17316 insn_data[icode].operand[2].mode,
17317 insn_data[icode].operand[3].mode,
17318 d->code, d->name);
17321 def_builtin (d->name, type, d->code);
17324 /* Add the binary operators. */
17325 d = bdesc_2arg;
17326 for (i = 0; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
17328 machine_mode mode0, mode1, mode2;
17329 tree type;
17330 HOST_WIDE_INT mask = d->mask;
17332 if ((mask & builtin_mask) != mask)
17334 if (TARGET_DEBUG_BUILTIN)
17335 fprintf (stderr, "rs6000_builtin, skip binary %s\n", d->name);
17336 continue;
17339 if (rs6000_overloaded_builtin_p (d->code))
17341 if (! (type = opaque_ftype_opaque_opaque))
17342 type = opaque_ftype_opaque_opaque
17343 = build_function_type_list (opaque_V4SI_type_node,
17344 opaque_V4SI_type_node,
17345 opaque_V4SI_type_node,
17346 NULL_TREE);
17348 else
17350 enum insn_code icode = d->icode;
17351 if (d->name == 0)
17353 if (TARGET_DEBUG_BUILTIN)
17354 fprintf (stderr, "rs6000_builtin, bdesc_2arg[%ld] no name\n",
17355 (long unsigned)i);
17357 continue;
17360 if (icode == CODE_FOR_nothing)
17362 if (TARGET_DEBUG_BUILTIN)
17363 fprintf (stderr, "rs6000_builtin, skip binary %s (no code)\n",
17364 d->name);
17366 continue;
17369 mode0 = insn_data[icode].operand[0].mode;
17370 mode1 = insn_data[icode].operand[1].mode;
17371 mode2 = insn_data[icode].operand[2].mode;
17373 if (mode0 == V2SImode && mode1 == V2SImode && mode2 == QImode)
17375 if (! (type = v2si_ftype_v2si_qi))
17376 type = v2si_ftype_v2si_qi
17377 = build_function_type_list (opaque_V2SI_type_node,
17378 opaque_V2SI_type_node,
17379 char_type_node,
17380 NULL_TREE);
17383 else if (mode0 == V2SImode && GET_MODE_CLASS (mode1) == MODE_INT
17384 && mode2 == QImode)
17386 if (! (type = v2si_ftype_int_qi))
17387 type = v2si_ftype_int_qi
17388 = build_function_type_list (opaque_V2SI_type_node,
17389 integer_type_node,
17390 char_type_node,
17391 NULL_TREE);
17394 else
17395 type = builtin_function_type (mode0, mode1, mode2, VOIDmode,
17396 d->code, d->name);
17399 def_builtin (d->name, type, d->code);
17402 /* Add the simple unary operators. */
17403 d = bdesc_1arg;
17404 for (i = 0; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
17406 machine_mode mode0, mode1;
17407 tree type;
17408 HOST_WIDE_INT mask = d->mask;
17410 if ((mask & builtin_mask) != mask)
17412 if (TARGET_DEBUG_BUILTIN)
17413 fprintf (stderr, "rs6000_builtin, skip unary %s\n", d->name);
17414 continue;
17417 if (rs6000_overloaded_builtin_p (d->code))
17419 if (! (type = opaque_ftype_opaque))
17420 type = opaque_ftype_opaque
17421 = build_function_type_list (opaque_V4SI_type_node,
17422 opaque_V4SI_type_node,
17423 NULL_TREE);
17425 else
17427 enum insn_code icode = d->icode;
17428 if (d->name == 0)
17430 if (TARGET_DEBUG_BUILTIN)
17431 fprintf (stderr, "rs6000_builtin, bdesc_1arg[%ld] no name\n",
17432 (long unsigned)i);
17434 continue;
17437 if (icode == CODE_FOR_nothing)
17439 if (TARGET_DEBUG_BUILTIN)
17440 fprintf (stderr, "rs6000_builtin, skip unary %s (no code)\n",
17441 d->name);
17443 continue;
17446 mode0 = insn_data[icode].operand[0].mode;
17447 mode1 = insn_data[icode].operand[1].mode;
17449 if (mode0 == V2SImode && mode1 == QImode)
17451 if (! (type = v2si_ftype_qi))
17452 type = v2si_ftype_qi
17453 = build_function_type_list (opaque_V2SI_type_node,
17454 char_type_node,
17455 NULL_TREE);
17458 else
17459 type = builtin_function_type (mode0, mode1, VOIDmode, VOIDmode,
17460 d->code, d->name);
17463 def_builtin (d->name, type, d->code);
17466 /* Add the simple no-argument operators. */
17467 d = bdesc_0arg;
17468 for (i = 0; i < ARRAY_SIZE (bdesc_0arg); i++, d++)
17470 machine_mode mode0;
17471 tree type;
17472 HOST_WIDE_INT mask = d->mask;
17474 if ((mask & builtin_mask) != mask)
17476 if (TARGET_DEBUG_BUILTIN)
17477 fprintf (stderr, "rs6000_builtin, skip no-argument %s\n", d->name);
17478 continue;
17480 if (rs6000_overloaded_builtin_p (d->code))
17482 if (!opaque_ftype_opaque)
17483 opaque_ftype_opaque
17484 = build_function_type_list (opaque_V4SI_type_node, NULL_TREE);
17485 type = opaque_ftype_opaque;
17487 else
17489 enum insn_code icode = d->icode;
17490 if (d->name == 0)
17492 if (TARGET_DEBUG_BUILTIN)
17493 fprintf (stderr, "rs6000_builtin, bdesc_0arg[%lu] no name\n",
17494 (long unsigned) i);
17495 continue;
17497 if (icode == CODE_FOR_nothing)
17499 if (TARGET_DEBUG_BUILTIN)
17500 fprintf (stderr,
17501 "rs6000_builtin, skip no-argument %s (no code)\n",
17502 d->name);
17503 continue;
17505 mode0 = insn_data[icode].operand[0].mode;
17506 if (mode0 == V2SImode)
17508 /* code for SPE */
17509 if (! (type = v2si_ftype))
17511 v2si_ftype
17512 = build_function_type_list (opaque_V2SI_type_node,
17513 NULL_TREE);
17514 type = v2si_ftype;
17517 else
17518 type = builtin_function_type (mode0, VOIDmode, VOIDmode, VOIDmode,
17519 d->code, d->name);
17521 def_builtin (d->name, type, d->code);
17525 /* Set up AIX/Darwin/64-bit Linux quad floating point routines. */
17526 static void
17527 init_float128_ibm (machine_mode mode)
17529 if (!TARGET_XL_COMPAT)
17531 set_optab_libfunc (add_optab, mode, "__gcc_qadd");
17532 set_optab_libfunc (sub_optab, mode, "__gcc_qsub");
17533 set_optab_libfunc (smul_optab, mode, "__gcc_qmul");
17534 set_optab_libfunc (sdiv_optab, mode, "__gcc_qdiv");
17536 if (!(TARGET_HARD_FLOAT && (TARGET_FPRS || TARGET_E500_DOUBLE)))
17538 set_optab_libfunc (neg_optab, mode, "__gcc_qneg");
17539 set_optab_libfunc (eq_optab, mode, "__gcc_qeq");
17540 set_optab_libfunc (ne_optab, mode, "__gcc_qne");
17541 set_optab_libfunc (gt_optab, mode, "__gcc_qgt");
17542 set_optab_libfunc (ge_optab, mode, "__gcc_qge");
17543 set_optab_libfunc (lt_optab, mode, "__gcc_qlt");
17544 set_optab_libfunc (le_optab, mode, "__gcc_qle");
17546 set_conv_libfunc (sext_optab, mode, SFmode, "__gcc_stoq");
17547 set_conv_libfunc (sext_optab, mode, DFmode, "__gcc_dtoq");
17548 set_conv_libfunc (trunc_optab, SFmode, mode, "__gcc_qtos");
17549 set_conv_libfunc (trunc_optab, DFmode, mode, "__gcc_qtod");
17550 set_conv_libfunc (sfix_optab, SImode, mode, "__gcc_qtoi");
17551 set_conv_libfunc (ufix_optab, SImode, mode, "__gcc_qtou");
17552 set_conv_libfunc (sfloat_optab, mode, SImode, "__gcc_itoq");
17553 set_conv_libfunc (ufloat_optab, mode, SImode, "__gcc_utoq");
17556 if (!(TARGET_HARD_FLOAT && TARGET_FPRS))
17557 set_optab_libfunc (unord_optab, mode, "__gcc_qunord");
17559 else
17561 set_optab_libfunc (add_optab, mode, "_xlqadd");
17562 set_optab_libfunc (sub_optab, mode, "_xlqsub");
17563 set_optab_libfunc (smul_optab, mode, "_xlqmul");
17564 set_optab_libfunc (sdiv_optab, mode, "_xlqdiv");
17567 /* Add various conversions for IFmode to use the traditional TFmode
17568 names. */
17569 if (mode == IFmode)
17571 set_conv_libfunc (sext_optab, mode, SDmode, "__dpd_extendsdtf2");
17572 set_conv_libfunc (sext_optab, mode, DDmode, "__dpd_extendddtf2");
17573 set_conv_libfunc (trunc_optab, mode, TDmode, "__dpd_trunctftd2");
17574 set_conv_libfunc (trunc_optab, SDmode, mode, "__dpd_trunctfsd2");
17575 set_conv_libfunc (trunc_optab, DDmode, mode, "__dpd_trunctfdd2");
17576 set_conv_libfunc (sext_optab, TDmode, mode, "__dpd_extendtdtf2");
17578 if (TARGET_POWERPC64)
17580 set_conv_libfunc (sfix_optab, TImode, mode, "__fixtfti");
17581 set_conv_libfunc (ufix_optab, TImode, mode, "__fixunstfti");
17582 set_conv_libfunc (sfloat_optab, mode, TImode, "__floattitf");
17583 set_conv_libfunc (ufloat_optab, mode, TImode, "__floatuntitf");
17588 /* Set up IEEE 128-bit floating point routines. Use different names if the
17589 arguments can be passed in a vector register. The historical PowerPC
17590 implementation of IEEE 128-bit floating point used _q_<op> for the names, so
17591 continue to use that if we aren't using vector registers to pass IEEE
17592 128-bit floating point. */
17594 static void
17595 init_float128_ieee (machine_mode mode)
17597 if (FLOAT128_VECTOR_P (mode))
17599 set_optab_libfunc (add_optab, mode, "__addkf3");
17600 set_optab_libfunc (sub_optab, mode, "__subkf3");
17601 set_optab_libfunc (neg_optab, mode, "__negkf2");
17602 set_optab_libfunc (smul_optab, mode, "__mulkf3");
17603 set_optab_libfunc (sdiv_optab, mode, "__divkf3");
17604 set_optab_libfunc (sqrt_optab, mode, "__sqrtkf2");
17605 set_optab_libfunc (abs_optab, mode, "__abstkf2");
17607 set_optab_libfunc (eq_optab, mode, "__eqkf2");
17608 set_optab_libfunc (ne_optab, mode, "__nekf2");
17609 set_optab_libfunc (gt_optab, mode, "__gtkf2");
17610 set_optab_libfunc (ge_optab, mode, "__gekf2");
17611 set_optab_libfunc (lt_optab, mode, "__ltkf2");
17612 set_optab_libfunc (le_optab, mode, "__lekf2");
17613 set_optab_libfunc (unord_optab, mode, "__unordkf2");
17615 set_conv_libfunc (sext_optab, mode, SFmode, "__extendsfkf2");
17616 set_conv_libfunc (sext_optab, mode, DFmode, "__extenddfkf2");
17617 set_conv_libfunc (trunc_optab, SFmode, mode, "__trunckfsf2");
17618 set_conv_libfunc (trunc_optab, DFmode, mode, "__trunckfdf2");
17620 set_conv_libfunc (sext_optab, mode, IFmode, "__extendtfkf2");
17621 if (mode != TFmode && FLOAT128_IBM_P (TFmode))
17622 set_conv_libfunc (sext_optab, mode, TFmode, "__extendtfkf2");
17624 set_conv_libfunc (trunc_optab, IFmode, mode, "__trunckftf2");
17625 if (mode != TFmode && FLOAT128_IBM_P (TFmode))
17626 set_conv_libfunc (trunc_optab, TFmode, mode, "__trunckftf2");
17628 set_conv_libfunc (sext_optab, mode, SDmode, "__dpd_extendsdkf2");
17629 set_conv_libfunc (sext_optab, mode, DDmode, "__dpd_extendddkf2");
17630 set_conv_libfunc (trunc_optab, mode, TDmode, "__dpd_trunckftd2");
17631 set_conv_libfunc (trunc_optab, SDmode, mode, "__dpd_trunckfsd2");
17632 set_conv_libfunc (trunc_optab, DDmode, mode, "__dpd_trunckfdd2");
17633 set_conv_libfunc (sext_optab, TDmode, mode, "__dpd_extendtdkf2");
17635 set_conv_libfunc (sfix_optab, SImode, mode, "__fixkfsi");
17636 set_conv_libfunc (ufix_optab, SImode, mode, "__fixunskfsi");
17637 set_conv_libfunc (sfix_optab, DImode, mode, "__fixkfdi");
17638 set_conv_libfunc (ufix_optab, DImode, mode, "__fixunskfdi");
17640 set_conv_libfunc (sfloat_optab, mode, SImode, "__floatsikf");
17641 set_conv_libfunc (ufloat_optab, mode, SImode, "__floatunsikf");
17642 set_conv_libfunc (sfloat_optab, mode, DImode, "__floatdikf");
17643 set_conv_libfunc (ufloat_optab, mode, DImode, "__floatundikf");
17645 if (TARGET_POWERPC64)
17647 set_conv_libfunc (sfix_optab, TImode, mode, "__fixkfti");
17648 set_conv_libfunc (ufix_optab, TImode, mode, "__fixunskfti");
17649 set_conv_libfunc (sfloat_optab, mode, TImode, "__floattikf");
17650 set_conv_libfunc (ufloat_optab, mode, TImode, "__floatuntikf");
17654 else
17656 set_optab_libfunc (add_optab, mode, "_q_add");
17657 set_optab_libfunc (sub_optab, mode, "_q_sub");
17658 set_optab_libfunc (neg_optab, mode, "_q_neg");
17659 set_optab_libfunc (smul_optab, mode, "_q_mul");
17660 set_optab_libfunc (sdiv_optab, mode, "_q_div");
17661 if (TARGET_PPC_GPOPT)
17662 set_optab_libfunc (sqrt_optab, mode, "_q_sqrt");
17664 set_optab_libfunc (eq_optab, mode, "_q_feq");
17665 set_optab_libfunc (ne_optab, mode, "_q_fne");
17666 set_optab_libfunc (gt_optab, mode, "_q_fgt");
17667 set_optab_libfunc (ge_optab, mode, "_q_fge");
17668 set_optab_libfunc (lt_optab, mode, "_q_flt");
17669 set_optab_libfunc (le_optab, mode, "_q_fle");
17671 set_conv_libfunc (sext_optab, mode, SFmode, "_q_stoq");
17672 set_conv_libfunc (sext_optab, mode, DFmode, "_q_dtoq");
17673 set_conv_libfunc (trunc_optab, SFmode, mode, "_q_qtos");
17674 set_conv_libfunc (trunc_optab, DFmode, mode, "_q_qtod");
17675 set_conv_libfunc (sfix_optab, SImode, mode, "_q_qtoi");
17676 set_conv_libfunc (ufix_optab, SImode, mode, "_q_qtou");
17677 set_conv_libfunc (sfloat_optab, mode, SImode, "_q_itoq");
17678 set_conv_libfunc (ufloat_optab, mode, SImode, "_q_utoq");
17682 static void
17683 rs6000_init_libfuncs (void)
17685 /* __float128 support. */
17686 if (TARGET_FLOAT128)
17688 init_float128_ibm (IFmode);
17689 init_float128_ieee (KFmode);
17692 /* AIX/Darwin/64-bit Linux quad floating point routines. */
17693 if (TARGET_LONG_DOUBLE_128)
17695 if (!TARGET_IEEEQUAD)
17696 init_float128_ibm (TFmode);
17698 /* IEEE 128-bit including 32-bit SVR4 quad floating point routines. */
17699 else
17700 init_float128_ieee (TFmode);
17705 /* Expand a block clear operation, and return 1 if successful. Return 0
17706 if we should let the compiler generate normal code.
17708 operands[0] is the destination
17709 operands[1] is the length
17710 operands[3] is the alignment */
17713 expand_block_clear (rtx operands[])
17715 rtx orig_dest = operands[0];
17716 rtx bytes_rtx = operands[1];
17717 rtx align_rtx = operands[3];
17718 bool constp = (GET_CODE (bytes_rtx) == CONST_INT);
17719 HOST_WIDE_INT align;
17720 HOST_WIDE_INT bytes;
17721 int offset;
17722 int clear_bytes;
17723 int clear_step;
17725 /* If this is not a fixed size move, just call memcpy */
17726 if (! constp)
17727 return 0;
17729 /* This must be a fixed size alignment */
17730 gcc_assert (GET_CODE (align_rtx) == CONST_INT);
17731 align = INTVAL (align_rtx) * BITS_PER_UNIT;
17733 /* Anything to clear? */
17734 bytes = INTVAL (bytes_rtx);
17735 if (bytes <= 0)
17736 return 1;
17738 /* Use the builtin memset after a point, to avoid huge code bloat.
17739 When optimize_size, avoid any significant code bloat; calling
17740 memset is about 4 instructions, so allow for one instruction to
17741 load zero and three to do clearing. */
17742 if (TARGET_ALTIVEC && align >= 128)
17743 clear_step = 16;
17744 else if (TARGET_POWERPC64 && (align >= 64 || !STRICT_ALIGNMENT))
17745 clear_step = 8;
17746 else if (TARGET_SPE && align >= 64)
17747 clear_step = 8;
17748 else
17749 clear_step = 4;
17751 if (optimize_size && bytes > 3 * clear_step)
17752 return 0;
17753 if (! optimize_size && bytes > 8 * clear_step)
17754 return 0;
17756 for (offset = 0; bytes > 0; offset += clear_bytes, bytes -= clear_bytes)
17758 machine_mode mode = BLKmode;
17759 rtx dest;
17761 if (bytes >= 16 && TARGET_ALTIVEC && align >= 128)
17763 clear_bytes = 16;
17764 mode = V4SImode;
17766 else if (bytes >= 8 && TARGET_SPE && align >= 64)
17768 clear_bytes = 8;
17769 mode = V2SImode;
17771 else if (bytes >= 8 && TARGET_POWERPC64
17772 && (align >= 64 || !STRICT_ALIGNMENT))
17774 clear_bytes = 8;
17775 mode = DImode;
17776 if (offset == 0 && align < 64)
17778 rtx addr;
17780 /* If the address form is reg+offset with offset not a
17781 multiple of four, reload into reg indirect form here
17782 rather than waiting for reload. This way we get one
17783 reload, not one per store. */
17784 addr = XEXP (orig_dest, 0);
17785 if ((GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
17786 && GET_CODE (XEXP (addr, 1)) == CONST_INT
17787 && (INTVAL (XEXP (addr, 1)) & 3) != 0)
17789 addr = copy_addr_to_reg (addr);
17790 orig_dest = replace_equiv_address (orig_dest, addr);
17794 else if (bytes >= 4 && (align >= 32 || !STRICT_ALIGNMENT))
17795 { /* move 4 bytes */
17796 clear_bytes = 4;
17797 mode = SImode;
17799 else if (bytes >= 2 && (align >= 16 || !STRICT_ALIGNMENT))
17800 { /* move 2 bytes */
17801 clear_bytes = 2;
17802 mode = HImode;
17804 else /* move 1 byte at a time */
17806 clear_bytes = 1;
17807 mode = QImode;
17810 dest = adjust_address (orig_dest, mode, offset);
17812 emit_move_insn (dest, CONST0_RTX (mode));
17815 return 1;
17819 /* Expand a block move operation, and return 1 if successful. Return 0
17820 if we should let the compiler generate normal code.
17822 operands[0] is the destination
17823 operands[1] is the source
17824 operands[2] is the length
17825 operands[3] is the alignment */
17827 #define MAX_MOVE_REG 4
17830 expand_block_move (rtx operands[])
17832 rtx orig_dest = operands[0];
17833 rtx orig_src = operands[1];
17834 rtx bytes_rtx = operands[2];
17835 rtx align_rtx = operands[3];
17836 int constp = (GET_CODE (bytes_rtx) == CONST_INT);
17837 int align;
17838 int bytes;
17839 int offset;
17840 int move_bytes;
17841 rtx stores[MAX_MOVE_REG];
17842 int num_reg = 0;
17844 /* If this is not a fixed size move, just call memcpy */
17845 if (! constp)
17846 return 0;
17848 /* This must be a fixed size alignment */
17849 gcc_assert (GET_CODE (align_rtx) == CONST_INT);
17850 align = INTVAL (align_rtx) * BITS_PER_UNIT;
17852 /* Anything to move? */
17853 bytes = INTVAL (bytes_rtx);
17854 if (bytes <= 0)
17855 return 1;
17857 if (bytes > rs6000_block_move_inline_limit)
17858 return 0;
17860 for (offset = 0; bytes > 0; offset += move_bytes, bytes -= move_bytes)
17862 union {
17863 rtx (*movmemsi) (rtx, rtx, rtx, rtx);
17864 rtx (*mov) (rtx, rtx);
17865 } gen_func;
17866 machine_mode mode = BLKmode;
17867 rtx src, dest;
17869 /* Altivec first, since it will be faster than a string move
17870 when it applies, and usually not significantly larger. */
17871 if (TARGET_ALTIVEC && bytes >= 16 && align >= 128)
17873 move_bytes = 16;
17874 mode = V4SImode;
17875 gen_func.mov = gen_movv4si;
17877 else if (TARGET_SPE && bytes >= 8 && align >= 64)
17879 move_bytes = 8;
17880 mode = V2SImode;
17881 gen_func.mov = gen_movv2si;
17883 else if (TARGET_STRING
17884 && bytes > 24 /* move up to 32 bytes at a time */
17885 && ! fixed_regs[5]
17886 && ! fixed_regs[6]
17887 && ! fixed_regs[7]
17888 && ! fixed_regs[8]
17889 && ! fixed_regs[9]
17890 && ! fixed_regs[10]
17891 && ! fixed_regs[11]
17892 && ! fixed_regs[12])
17894 move_bytes = (bytes > 32) ? 32 : bytes;
17895 gen_func.movmemsi = gen_movmemsi_8reg;
17897 else if (TARGET_STRING
17898 && bytes > 16 /* move up to 24 bytes at a time */
17899 && ! fixed_regs[5]
17900 && ! fixed_regs[6]
17901 && ! fixed_regs[7]
17902 && ! fixed_regs[8]
17903 && ! fixed_regs[9]
17904 && ! fixed_regs[10])
17906 move_bytes = (bytes > 24) ? 24 : bytes;
17907 gen_func.movmemsi = gen_movmemsi_6reg;
17909 else if (TARGET_STRING
17910 && bytes > 8 /* move up to 16 bytes at a time */
17911 && ! fixed_regs[5]
17912 && ! fixed_regs[6]
17913 && ! fixed_regs[7]
17914 && ! fixed_regs[8])
17916 move_bytes = (bytes > 16) ? 16 : bytes;
17917 gen_func.movmemsi = gen_movmemsi_4reg;
17919 else if (bytes >= 8 && TARGET_POWERPC64
17920 && (align >= 64 || !STRICT_ALIGNMENT))
17922 move_bytes = 8;
17923 mode = DImode;
17924 gen_func.mov = gen_movdi;
17925 if (offset == 0 && align < 64)
17927 rtx addr;
17929 /* If the address form is reg+offset with offset not a
17930 multiple of four, reload into reg indirect form here
17931 rather than waiting for reload. This way we get one
17932 reload, not one per load and/or store. */
17933 addr = XEXP (orig_dest, 0);
17934 if ((GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
17935 && GET_CODE (XEXP (addr, 1)) == CONST_INT
17936 && (INTVAL (XEXP (addr, 1)) & 3) != 0)
17938 addr = copy_addr_to_reg (addr);
17939 orig_dest = replace_equiv_address (orig_dest, addr);
17941 addr = XEXP (orig_src, 0);
17942 if ((GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
17943 && GET_CODE (XEXP (addr, 1)) == CONST_INT
17944 && (INTVAL (XEXP (addr, 1)) & 3) != 0)
17946 addr = copy_addr_to_reg (addr);
17947 orig_src = replace_equiv_address (orig_src, addr);
17951 else if (TARGET_STRING && bytes > 4 && !TARGET_POWERPC64)
17952 { /* move up to 8 bytes at a time */
17953 move_bytes = (bytes > 8) ? 8 : bytes;
17954 gen_func.movmemsi = gen_movmemsi_2reg;
17956 else if (bytes >= 4 && (align >= 32 || !STRICT_ALIGNMENT))
17957 { /* move 4 bytes */
17958 move_bytes = 4;
17959 mode = SImode;
17960 gen_func.mov = gen_movsi;
17962 else if (bytes >= 2 && (align >= 16 || !STRICT_ALIGNMENT))
17963 { /* move 2 bytes */
17964 move_bytes = 2;
17965 mode = HImode;
17966 gen_func.mov = gen_movhi;
17968 else if (TARGET_STRING && bytes > 1)
17969 { /* move up to 4 bytes at a time */
17970 move_bytes = (bytes > 4) ? 4 : bytes;
17971 gen_func.movmemsi = gen_movmemsi_1reg;
17973 else /* move 1 byte at a time */
17975 move_bytes = 1;
17976 mode = QImode;
17977 gen_func.mov = gen_movqi;
17980 src = adjust_address (orig_src, mode, offset);
17981 dest = adjust_address (orig_dest, mode, offset);
17983 if (mode != BLKmode)
17985 rtx tmp_reg = gen_reg_rtx (mode);
17987 emit_insn ((*gen_func.mov) (tmp_reg, src));
17988 stores[num_reg++] = (*gen_func.mov) (dest, tmp_reg);
17991 if (mode == BLKmode || num_reg >= MAX_MOVE_REG || bytes == move_bytes)
17993 int i;
17994 for (i = 0; i < num_reg; i++)
17995 emit_insn (stores[i]);
17996 num_reg = 0;
17999 if (mode == BLKmode)
18001 /* Move the address into scratch registers. The movmemsi
18002 patterns require zero offset. */
18003 if (!REG_P (XEXP (src, 0)))
18005 rtx src_reg = copy_addr_to_reg (XEXP (src, 0));
18006 src = replace_equiv_address (src, src_reg);
18008 set_mem_size (src, move_bytes);
18010 if (!REG_P (XEXP (dest, 0)))
18012 rtx dest_reg = copy_addr_to_reg (XEXP (dest, 0));
18013 dest = replace_equiv_address (dest, dest_reg);
18015 set_mem_size (dest, move_bytes);
18017 emit_insn ((*gen_func.movmemsi) (dest, src,
18018 GEN_INT (move_bytes & 31),
18019 align_rtx));
18023 return 1;
18027 /* Return a string to perform a load_multiple operation.
18028 operands[0] is the vector.
18029 operands[1] is the source address.
18030 operands[2] is the first destination register. */
18032 const char *
18033 rs6000_output_load_multiple (rtx operands[3])
18035 /* We have to handle the case where the pseudo used to contain the address
18036 is assigned to one of the output registers. */
18037 int i, j;
18038 int words = XVECLEN (operands[0], 0);
18039 rtx xop[10];
18041 if (XVECLEN (operands[0], 0) == 1)
18042 return "lwz %2,0(%1)";
18044 for (i = 0; i < words; i++)
18045 if (refers_to_regno_p (REGNO (operands[2]) + i, operands[1]))
18047 if (i == words-1)
18049 xop[0] = GEN_INT (4 * (words-1));
18050 xop[1] = operands[1];
18051 xop[2] = operands[2];
18052 output_asm_insn ("lswi %2,%1,%0\n\tlwz %1,%0(%1)", xop);
18053 return "";
18055 else if (i == 0)
18057 xop[0] = GEN_INT (4 * (words-1));
18058 xop[1] = operands[1];
18059 xop[2] = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
18060 output_asm_insn ("addi %1,%1,4\n\tlswi %2,%1,%0\n\tlwz %1,-4(%1)", xop);
18061 return "";
18063 else
18065 for (j = 0; j < words; j++)
18066 if (j != i)
18068 xop[0] = GEN_INT (j * 4);
18069 xop[1] = operands[1];
18070 xop[2] = gen_rtx_REG (SImode, REGNO (operands[2]) + j);
18071 output_asm_insn ("lwz %2,%0(%1)", xop);
18073 xop[0] = GEN_INT (i * 4);
18074 xop[1] = operands[1];
18075 output_asm_insn ("lwz %1,%0(%1)", xop);
18076 return "";
18080 return "lswi %2,%1,%N0";
18084 /* A validation routine: say whether CODE, a condition code, and MODE
18085 match. The other alternatives either don't make sense or should
18086 never be generated. */
18088 void
18089 validate_condition_mode (enum rtx_code code, machine_mode mode)
18091 gcc_assert ((GET_RTX_CLASS (code) == RTX_COMPARE
18092 || GET_RTX_CLASS (code) == RTX_COMM_COMPARE)
18093 && GET_MODE_CLASS (mode) == MODE_CC);
18095 /* These don't make sense. */
18096 gcc_assert ((code != GT && code != LT && code != GE && code != LE)
18097 || mode != CCUNSmode);
18099 gcc_assert ((code != GTU && code != LTU && code != GEU && code != LEU)
18100 || mode == CCUNSmode);
18102 gcc_assert (mode == CCFPmode
18103 || (code != ORDERED && code != UNORDERED
18104 && code != UNEQ && code != LTGT
18105 && code != UNGT && code != UNLT
18106 && code != UNGE && code != UNLE));
18108 /* These should never be generated except for
18109 flag_finite_math_only. */
18110 gcc_assert (mode != CCFPmode
18111 || flag_finite_math_only
18112 || (code != LE && code != GE
18113 && code != UNEQ && code != LTGT
18114 && code != UNGT && code != UNLT));
18116 /* These are invalid; the information is not there. */
18117 gcc_assert (mode != CCEQmode || code == EQ || code == NE);
18121 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm,
18122 rldicl, rldicr, or rldic instruction in mode MODE. If so, if E is
18123 not zero, store there the bit offset (counted from the right) where
18124 the single stretch of 1 bits begins; and similarly for B, the bit
18125 offset where it ends. */
18127 bool
18128 rs6000_is_valid_mask (rtx mask, int *b, int *e, machine_mode mode)
18130 unsigned HOST_WIDE_INT val = INTVAL (mask);
18131 unsigned HOST_WIDE_INT bit;
18132 int nb, ne;
18133 int n = GET_MODE_PRECISION (mode);
18135 if (mode != DImode && mode != SImode)
18136 return false;
18138 if (INTVAL (mask) >= 0)
18140 bit = val & -val;
18141 ne = exact_log2 (bit);
18142 nb = exact_log2 (val + bit);
18144 else if (val + 1 == 0)
18146 nb = n;
18147 ne = 0;
18149 else if (val & 1)
18151 val = ~val;
18152 bit = val & -val;
18153 nb = exact_log2 (bit);
18154 ne = exact_log2 (val + bit);
18156 else
18158 bit = val & -val;
18159 ne = exact_log2 (bit);
18160 if (val + bit == 0)
18161 nb = n;
18162 else
18163 nb = 0;
18166 nb--;
18168 if (nb < 0 || ne < 0 || nb >= n || ne >= n)
18169 return false;
18171 if (b)
18172 *b = nb;
18173 if (e)
18174 *e = ne;
18176 return true;
18179 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm, rldicl,
18180 or rldicr instruction, to implement an AND with it in mode MODE. */
18182 bool
18183 rs6000_is_valid_and_mask (rtx mask, machine_mode mode)
18185 int nb, ne;
18187 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
18188 return false;
18190 /* For DImode, we need a rldicl, rldicr, or a rlwinm with mask that
18191 does not wrap. */
18192 if (mode == DImode)
18193 return (ne == 0 || nb == 63 || (nb < 32 && ne <= nb));
18195 /* For SImode, rlwinm can do everything. */
18196 if (mode == SImode)
18197 return (nb < 32 && ne < 32);
18199 return false;
18202 /* Return the instruction template for an AND with mask in mode MODE, with
18203 operands OPERANDS. If DOT is true, make it a record-form instruction. */
18205 const char *
18206 rs6000_insn_for_and_mask (machine_mode mode, rtx *operands, bool dot)
18208 int nb, ne;
18210 if (!rs6000_is_valid_mask (operands[2], &nb, &ne, mode))
18211 gcc_unreachable ();
18213 if (mode == DImode && ne == 0)
18215 operands[3] = GEN_INT (63 - nb);
18216 if (dot)
18217 return "rldicl. %0,%1,0,%3";
18218 return "rldicl %0,%1,0,%3";
18221 if (mode == DImode && nb == 63)
18223 operands[3] = GEN_INT (63 - ne);
18224 if (dot)
18225 return "rldicr. %0,%1,0,%3";
18226 return "rldicr %0,%1,0,%3";
18229 if (nb < 32 && ne < 32)
18231 operands[3] = GEN_INT (31 - nb);
18232 operands[4] = GEN_INT (31 - ne);
18233 if (dot)
18234 return "rlwinm. %0,%1,0,%3,%4";
18235 return "rlwinm %0,%1,0,%3,%4";
18238 gcc_unreachable ();
18241 /* Return whether MASK (a CONST_INT) is a valid mask for any rlw[i]nm,
18242 rld[i]cl, rld[i]cr, or rld[i]c instruction, to implement an AND with
18243 shift SHIFT (a ROTATE, ASHIFT, or LSHIFTRT) in mode MODE. */
18245 bool
18246 rs6000_is_valid_shift_mask (rtx mask, rtx shift, machine_mode mode)
18248 int nb, ne;
18250 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
18251 return false;
18253 int n = GET_MODE_PRECISION (mode);
18254 int sh = -1;
18256 if (CONST_INT_P (XEXP (shift, 1)))
18258 sh = INTVAL (XEXP (shift, 1));
18259 if (sh < 0 || sh >= n)
18260 return false;
18263 rtx_code code = GET_CODE (shift);
18265 /* Convert any shift by 0 to a rotate, to simplify below code. */
18266 if (sh == 0)
18267 code = ROTATE;
18269 /* Convert rotate to simple shift if we can, to make analysis simpler. */
18270 if (code == ROTATE && sh >= 0 && nb >= ne && ne >= sh)
18271 code = ASHIFT;
18272 if (code == ROTATE && sh >= 0 && nb >= ne && nb < sh)
18274 code = LSHIFTRT;
18275 sh = n - sh;
18278 /* DImode rotates need rld*. */
18279 if (mode == DImode && code == ROTATE)
18280 return (nb == 63 || ne == 0 || ne == sh);
18282 /* SImode rotates need rlw*. */
18283 if (mode == SImode && code == ROTATE)
18284 return (nb < 32 && ne < 32 && sh < 32);
18286 /* Wrap-around masks are only okay for rotates. */
18287 if (ne > nb)
18288 return false;
18290 /* Variable shifts are only okay for rotates. */
18291 if (sh < 0)
18292 return false;
18294 /* Don't allow ASHIFT if the mask is wrong for that. */
18295 if (code == ASHIFT && ne < sh)
18296 return false;
18298 /* If we can do it with an rlw*, we can do it. Don't allow LSHIFTRT
18299 if the mask is wrong for that. */
18300 if (nb < 32 && ne < 32 && sh < 32
18301 && !(code == LSHIFTRT && nb >= 32 - sh))
18302 return true;
18304 /* If we can do it with an rld*, we can do it. Don't allow LSHIFTRT
18305 if the mask is wrong for that. */
18306 if (code == LSHIFTRT)
18307 sh = 64 - sh;
18308 if (nb == 63 || ne == 0 || ne == sh)
18309 return !(code == LSHIFTRT && nb >= sh);
18311 return false;
18314 /* Return the instruction template for a shift with mask in mode MODE, with
18315 operands OPERANDS. If DOT is true, make it a record-form instruction. */
18317 const char *
18318 rs6000_insn_for_shift_mask (machine_mode mode, rtx *operands, bool dot)
18320 int nb, ne;
18322 if (!rs6000_is_valid_mask (operands[3], &nb, &ne, mode))
18323 gcc_unreachable ();
18325 if (mode == DImode && ne == 0)
18327 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
18328 operands[2] = GEN_INT (64 - INTVAL (operands[2]));
18329 operands[3] = GEN_INT (63 - nb);
18330 if (dot)
18331 return "rld%I2cl. %0,%1,%2,%3";
18332 return "rld%I2cl %0,%1,%2,%3";
18335 if (mode == DImode && nb == 63)
18337 operands[3] = GEN_INT (63 - ne);
18338 if (dot)
18339 return "rld%I2cr. %0,%1,%2,%3";
18340 return "rld%I2cr %0,%1,%2,%3";
18343 if (mode == DImode
18344 && GET_CODE (operands[4]) != LSHIFTRT
18345 && CONST_INT_P (operands[2])
18346 && ne == INTVAL (operands[2]))
18348 operands[3] = GEN_INT (63 - nb);
18349 if (dot)
18350 return "rld%I2c. %0,%1,%2,%3";
18351 return "rld%I2c %0,%1,%2,%3";
18354 if (nb < 32 && ne < 32)
18356 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
18357 operands[2] = GEN_INT (32 - INTVAL (operands[2]));
18358 operands[3] = GEN_INT (31 - nb);
18359 operands[4] = GEN_INT (31 - ne);
18360 /* This insn can also be a 64-bit rotate with mask that really makes
18361 it just a shift right (with mask); the %h below are to adjust for
18362 that situation (shift count is >= 32 in that case). */
18363 if (dot)
18364 return "rlw%I2nm. %0,%1,%h2,%3,%4";
18365 return "rlw%I2nm %0,%1,%h2,%3,%4";
18368 gcc_unreachable ();
18371 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwimi or
18372 rldimi instruction, to implement an insert with shift SHIFT (a ROTATE,
18373 ASHIFT, or LSHIFTRT) in mode MODE. */
18375 bool
18376 rs6000_is_valid_insert_mask (rtx mask, rtx shift, machine_mode mode)
18378 int nb, ne;
18380 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
18381 return false;
18383 int n = GET_MODE_PRECISION (mode);
18385 int sh = INTVAL (XEXP (shift, 1));
18386 if (sh < 0 || sh >= n)
18387 return false;
18389 rtx_code code = GET_CODE (shift);
18391 /* Convert any shift by 0 to a rotate, to simplify below code. */
18392 if (sh == 0)
18393 code = ROTATE;
18395 /* Convert rotate to simple shift if we can, to make analysis simpler. */
18396 if (code == ROTATE && sh >= 0 && nb >= ne && ne >= sh)
18397 code = ASHIFT;
18398 if (code == ROTATE && sh >= 0 && nb >= ne && nb < sh)
18400 code = LSHIFTRT;
18401 sh = n - sh;
18404 /* DImode rotates need rldimi. */
18405 if (mode == DImode && code == ROTATE)
18406 return (ne == sh);
18408 /* SImode rotates need rlwimi. */
18409 if (mode == SImode && code == ROTATE)
18410 return (nb < 32 && ne < 32 && sh < 32);
18412 /* Wrap-around masks are only okay for rotates. */
18413 if (ne > nb)
18414 return false;
18416 /* Don't allow ASHIFT if the mask is wrong for that. */
18417 if (code == ASHIFT && ne < sh)
18418 return false;
18420 /* If we can do it with an rlwimi, we can do it. Don't allow LSHIFTRT
18421 if the mask is wrong for that. */
18422 if (nb < 32 && ne < 32 && sh < 32
18423 && !(code == LSHIFTRT && nb >= 32 - sh))
18424 return true;
18426 /* If we can do it with an rldimi, we can do it. Don't allow LSHIFTRT
18427 if the mask is wrong for that. */
18428 if (code == LSHIFTRT)
18429 sh = 64 - sh;
18430 if (ne == sh)
18431 return !(code == LSHIFTRT && nb >= sh);
18433 return false;
18436 /* Return the instruction template for an insert with mask in mode MODE, with
18437 operands OPERANDS. If DOT is true, make it a record-form instruction. */
18439 const char *
18440 rs6000_insn_for_insert_mask (machine_mode mode, rtx *operands, bool dot)
18442 int nb, ne;
18444 if (!rs6000_is_valid_mask (operands[3], &nb, &ne, mode))
18445 gcc_unreachable ();
18447 /* Prefer rldimi because rlwimi is cracked. */
18448 if (TARGET_POWERPC64
18449 && (!dot || mode == DImode)
18450 && GET_CODE (operands[4]) != LSHIFTRT
18451 && ne == INTVAL (operands[2]))
18453 operands[3] = GEN_INT (63 - nb);
18454 if (dot)
18455 return "rldimi. %0,%1,%2,%3";
18456 return "rldimi %0,%1,%2,%3";
18459 if (nb < 32 && ne < 32)
18461 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
18462 operands[2] = GEN_INT (32 - INTVAL (operands[2]));
18463 operands[3] = GEN_INT (31 - nb);
18464 operands[4] = GEN_INT (31 - ne);
18465 if (dot)
18466 return "rlwimi. %0,%1,%2,%3,%4";
18467 return "rlwimi %0,%1,%2,%3,%4";
18470 gcc_unreachable ();
18473 /* Return whether an AND with C (a CONST_INT) in mode MODE can be done
18474 using two machine instructions. */
18476 bool
18477 rs6000_is_valid_2insn_and (rtx c, machine_mode mode)
18479 /* There are two kinds of AND we can handle with two insns:
18480 1) those we can do with two rl* insn;
18481 2) ori[s];xori[s].
18483 We do not handle that last case yet. */
18485 /* If there is just one stretch of ones, we can do it. */
18486 if (rs6000_is_valid_mask (c, NULL, NULL, mode))
18487 return true;
18489 /* Otherwise, fill in the lowest "hole"; if we can do the result with
18490 one insn, we can do the whole thing with two. */
18491 unsigned HOST_WIDE_INT val = INTVAL (c);
18492 unsigned HOST_WIDE_INT bit1 = val & -val;
18493 unsigned HOST_WIDE_INT bit2 = (val + bit1) & ~val;
18494 unsigned HOST_WIDE_INT val1 = (val + bit1) & val;
18495 unsigned HOST_WIDE_INT bit3 = val1 & -val1;
18496 return rs6000_is_valid_and_mask (GEN_INT (val + bit3 - bit2), mode);
18499 /* Emit a potentially record-form instruction, setting DST from SRC.
18500 If DOT is 0, that is all; otherwise, set CCREG to the result of the
18501 signed comparison of DST with zero. If DOT is 1, the generated RTL
18502 doesn't care about the DST result; if DOT is 2, it does. If CCREG
18503 is CR0 do a single dot insn (as a PARALLEL); otherwise, do a SET and
18504 a separate COMPARE. */
18506 static void
18507 rs6000_emit_dot_insn (rtx dst, rtx src, int dot, rtx ccreg)
18509 if (dot == 0)
18511 emit_move_insn (dst, src);
18512 return;
18515 if (cc_reg_not_cr0_operand (ccreg, CCmode))
18517 emit_move_insn (dst, src);
18518 emit_move_insn (ccreg, gen_rtx_COMPARE (CCmode, dst, const0_rtx));
18519 return;
18522 rtx ccset = gen_rtx_SET (ccreg, gen_rtx_COMPARE (CCmode, src, const0_rtx));
18523 if (dot == 1)
18525 rtx clobber = gen_rtx_CLOBBER (VOIDmode, dst);
18526 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, ccset, clobber)));
18528 else
18530 rtx set = gen_rtx_SET (dst, src);
18531 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, ccset, set)));
18535 /* Emit the two insns to do an AND in mode MODE, with operands OPERANDS.
18536 If EXPAND is true, split rotate-and-mask instructions we generate to
18537 their constituent parts as well (this is used during expand); if DOT
18538 is 1, make the last insn a record-form instruction clobbering the
18539 destination GPR and setting the CC reg (from operands[3]); if 2, set
18540 that GPR as well as the CC reg. */
18542 void
18543 rs6000_emit_2insn_and (machine_mode mode, rtx *operands, bool expand, int dot)
18545 gcc_assert (!(expand && dot));
18547 unsigned HOST_WIDE_INT val = INTVAL (operands[2]);
18549 /* If it is one stretch of ones, it is DImode; shift left, mask, then
18550 shift right. This generates better code than doing the masks without
18551 shifts, or shifting first right and then left. */
18552 int nb, ne;
18553 if (rs6000_is_valid_mask (operands[2], &nb, &ne, mode) && nb >= ne)
18555 gcc_assert (mode == DImode);
18557 int shift = 63 - nb;
18558 if (expand)
18560 rtx tmp1 = gen_reg_rtx (DImode);
18561 rtx tmp2 = gen_reg_rtx (DImode);
18562 emit_insn (gen_ashldi3 (tmp1, operands[1], GEN_INT (shift)));
18563 emit_insn (gen_anddi3 (tmp2, tmp1, GEN_INT (val << shift)));
18564 emit_insn (gen_lshrdi3 (operands[0], tmp2, GEN_INT (shift)));
18566 else
18568 rtx tmp = gen_rtx_ASHIFT (mode, operands[1], GEN_INT (shift));
18569 tmp = gen_rtx_AND (mode, tmp, GEN_INT (val << shift));
18570 emit_move_insn (operands[0], tmp);
18571 tmp = gen_rtx_LSHIFTRT (mode, operands[0], GEN_INT (shift));
18572 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
18574 return;
18577 /* Otherwise, make a mask2 that cuts out the lowest "hole", and a mask1
18578 that does the rest. */
18579 unsigned HOST_WIDE_INT bit1 = val & -val;
18580 unsigned HOST_WIDE_INT bit2 = (val + bit1) & ~val;
18581 unsigned HOST_WIDE_INT val1 = (val + bit1) & val;
18582 unsigned HOST_WIDE_INT bit3 = val1 & -val1;
18584 unsigned HOST_WIDE_INT mask1 = -bit3 + bit2 - 1;
18585 unsigned HOST_WIDE_INT mask2 = val + bit3 - bit2;
18587 gcc_assert (rs6000_is_valid_and_mask (GEN_INT (mask2), mode));
18589 /* Two "no-rotate"-and-mask instructions, for SImode. */
18590 if (rs6000_is_valid_and_mask (GEN_INT (mask1), mode))
18592 gcc_assert (mode == SImode);
18594 rtx reg = expand ? gen_reg_rtx (mode) : operands[0];
18595 rtx tmp = gen_rtx_AND (mode, operands[1], GEN_INT (mask1));
18596 emit_move_insn (reg, tmp);
18597 tmp = gen_rtx_AND (mode, reg, GEN_INT (mask2));
18598 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
18599 return;
18602 gcc_assert (mode == DImode);
18604 /* Two "no-rotate"-and-mask instructions, for DImode: both are rlwinm
18605 insns; we have to do the first in SImode, because it wraps. */
18606 if (mask2 <= 0xffffffff
18607 && rs6000_is_valid_and_mask (GEN_INT (mask1), SImode))
18609 rtx reg = expand ? gen_reg_rtx (mode) : operands[0];
18610 rtx tmp = gen_rtx_AND (SImode, gen_lowpart (SImode, operands[1]),
18611 GEN_INT (mask1));
18612 rtx reg_low = gen_lowpart (SImode, reg);
18613 emit_move_insn (reg_low, tmp);
18614 tmp = gen_rtx_AND (mode, reg, GEN_INT (mask2));
18615 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
18616 return;
18619 /* Two rld* insns: rotate, clear the hole in the middle (which now is
18620 at the top end), rotate back and clear the other hole. */
18621 int right = exact_log2 (bit3);
18622 int left = 64 - right;
18624 /* Rotate the mask too. */
18625 mask1 = (mask1 >> right) | ((bit2 - 1) << left);
18627 if (expand)
18629 rtx tmp1 = gen_reg_rtx (DImode);
18630 rtx tmp2 = gen_reg_rtx (DImode);
18631 rtx tmp3 = gen_reg_rtx (DImode);
18632 emit_insn (gen_rotldi3 (tmp1, operands[1], GEN_INT (left)));
18633 emit_insn (gen_anddi3 (tmp2, tmp1, GEN_INT (mask1)));
18634 emit_insn (gen_rotldi3 (tmp3, tmp2, GEN_INT (right)));
18635 emit_insn (gen_anddi3 (operands[0], tmp3, GEN_INT (mask2)));
18637 else
18639 rtx tmp = gen_rtx_ROTATE (mode, operands[1], GEN_INT (left));
18640 tmp = gen_rtx_AND (mode, tmp, GEN_INT (mask1));
18641 emit_move_insn (operands[0], tmp);
18642 tmp = gen_rtx_ROTATE (mode, operands[0], GEN_INT (right));
18643 tmp = gen_rtx_AND (mode, tmp, GEN_INT (mask2));
18644 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
18648 /* Return 1 if REGNO (reg1) == REGNO (reg2) - 1 making them candidates
18649 for lfq and stfq insns iff the registers are hard registers. */
18652 registers_ok_for_quad_peep (rtx reg1, rtx reg2)
18654 /* We might have been passed a SUBREG. */
18655 if (GET_CODE (reg1) != REG || GET_CODE (reg2) != REG)
18656 return 0;
18658 /* We might have been passed non floating point registers. */
18659 if (!FP_REGNO_P (REGNO (reg1))
18660 || !FP_REGNO_P (REGNO (reg2)))
18661 return 0;
18663 return (REGNO (reg1) == REGNO (reg2) - 1);
18666 /* Return 1 if addr1 and addr2 are suitable for lfq or stfq insn.
18667 addr1 and addr2 must be in consecutive memory locations
18668 (addr2 == addr1 + 8). */
18671 mems_ok_for_quad_peep (rtx mem1, rtx mem2)
18673 rtx addr1, addr2;
18674 unsigned int reg1, reg2;
18675 int offset1, offset2;
18677 /* The mems cannot be volatile. */
18678 if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
18679 return 0;
18681 addr1 = XEXP (mem1, 0);
18682 addr2 = XEXP (mem2, 0);
18684 /* Extract an offset (if used) from the first addr. */
18685 if (GET_CODE (addr1) == PLUS)
18687 /* If not a REG, return zero. */
18688 if (GET_CODE (XEXP (addr1, 0)) != REG)
18689 return 0;
18690 else
18692 reg1 = REGNO (XEXP (addr1, 0));
18693 /* The offset must be constant! */
18694 if (GET_CODE (XEXP (addr1, 1)) != CONST_INT)
18695 return 0;
18696 offset1 = INTVAL (XEXP (addr1, 1));
18699 else if (GET_CODE (addr1) != REG)
18700 return 0;
18701 else
18703 reg1 = REGNO (addr1);
18704 /* This was a simple (mem (reg)) expression. Offset is 0. */
18705 offset1 = 0;
18708 /* And now for the second addr. */
18709 if (GET_CODE (addr2) == PLUS)
18711 /* If not a REG, return zero. */
18712 if (GET_CODE (XEXP (addr2, 0)) != REG)
18713 return 0;
18714 else
18716 reg2 = REGNO (XEXP (addr2, 0));
18717 /* The offset must be constant. */
18718 if (GET_CODE (XEXP (addr2, 1)) != CONST_INT)
18719 return 0;
18720 offset2 = INTVAL (XEXP (addr2, 1));
18723 else if (GET_CODE (addr2) != REG)
18724 return 0;
18725 else
18727 reg2 = REGNO (addr2);
18728 /* This was a simple (mem (reg)) expression. Offset is 0. */
18729 offset2 = 0;
18732 /* Both of these must have the same base register. */
18733 if (reg1 != reg2)
18734 return 0;
18736 /* The offset for the second addr must be 8 more than the first addr. */
18737 if (offset2 != offset1 + 8)
18738 return 0;
18740 /* All the tests passed. addr1 and addr2 are valid for lfq or stfq
18741 instructions. */
18742 return 1;
18747 rs6000_secondary_memory_needed_rtx (machine_mode mode)
18749 static bool eliminated = false;
18750 rtx ret;
18752 if (mode != SDmode || TARGET_NO_SDMODE_STACK)
18753 ret = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
18754 else
18756 rtx mem = cfun->machine->sdmode_stack_slot;
18757 gcc_assert (mem != NULL_RTX);
18759 if (!eliminated)
18761 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
18762 cfun->machine->sdmode_stack_slot = mem;
18763 eliminated = true;
18765 ret = mem;
18768 if (TARGET_DEBUG_ADDR)
18770 fprintf (stderr, "\nrs6000_secondary_memory_needed_rtx, mode %s, rtx:\n",
18771 GET_MODE_NAME (mode));
18772 if (!ret)
18773 fprintf (stderr, "\tNULL_RTX\n");
18774 else
18775 debug_rtx (ret);
18778 return ret;
18781 /* Return the mode to be used for memory when a secondary memory
18782 location is needed. For SDmode values we need to use DDmode, in
18783 all other cases we can use the same mode. */
18784 machine_mode
18785 rs6000_secondary_memory_needed_mode (machine_mode mode)
18787 if (lra_in_progress && mode == SDmode)
18788 return DDmode;
18789 return mode;
18792 static tree
18793 rs6000_check_sdmode (tree *tp, int *walk_subtrees, void *data ATTRIBUTE_UNUSED)
18795 /* Don't walk into types. */
18796 if (*tp == NULL_TREE || *tp == error_mark_node || TYPE_P (*tp))
18798 *walk_subtrees = 0;
18799 return NULL_TREE;
18802 switch (TREE_CODE (*tp))
18804 case VAR_DECL:
18805 case PARM_DECL:
18806 case FIELD_DECL:
18807 case RESULT_DECL:
18808 case SSA_NAME:
18809 case REAL_CST:
18810 case MEM_REF:
18811 case VIEW_CONVERT_EXPR:
18812 if (TYPE_MODE (TREE_TYPE (*tp)) == SDmode)
18813 return *tp;
18814 break;
18815 default:
18816 break;
18819 return NULL_TREE;
18822 /* Classify a register type. Because the FMRGOW/FMRGEW instructions only work
18823 on traditional floating point registers, and the VMRGOW/VMRGEW instructions
18824 only work on the traditional altivec registers, note if an altivec register
18825 was chosen. */
18827 static enum rs6000_reg_type
18828 register_to_reg_type (rtx reg, bool *is_altivec)
18830 HOST_WIDE_INT regno;
18831 enum reg_class rclass;
18833 if (GET_CODE (reg) == SUBREG)
18834 reg = SUBREG_REG (reg);
18836 if (!REG_P (reg))
18837 return NO_REG_TYPE;
18839 regno = REGNO (reg);
18840 if (regno >= FIRST_PSEUDO_REGISTER)
18842 if (!lra_in_progress && !reload_in_progress && !reload_completed)
18843 return PSEUDO_REG_TYPE;
18845 regno = true_regnum (reg);
18846 if (regno < 0 || regno >= FIRST_PSEUDO_REGISTER)
18847 return PSEUDO_REG_TYPE;
18850 gcc_assert (regno >= 0);
18852 if (is_altivec && ALTIVEC_REGNO_P (regno))
18853 *is_altivec = true;
18855 rclass = rs6000_regno_regclass[regno];
18856 return reg_class_to_reg_type[(int)rclass];
18859 /* Helper function to return the cost of adding a TOC entry address. */
18861 static inline int
18862 rs6000_secondary_reload_toc_costs (addr_mask_type addr_mask)
18864 int ret;
18866 if (TARGET_CMODEL != CMODEL_SMALL)
18867 ret = ((addr_mask & RELOAD_REG_OFFSET) == 0) ? 1 : 2;
18869 else
18870 ret = (TARGET_MINIMAL_TOC) ? 6 : 3;
18872 return ret;
18875 /* Helper function for rs6000_secondary_reload to determine whether the memory
18876 address (ADDR) with a given register class (RCLASS) and machine mode (MODE)
18877 needs reloading. Return negative if the memory is not handled by the memory
18878 helper functions and to try a different reload method, 0 if no additional
18879 instructions are need, and positive to give the extra cost for the
18880 memory. */
18882 static int
18883 rs6000_secondary_reload_memory (rtx addr,
18884 enum reg_class rclass,
18885 machine_mode mode)
18887 int extra_cost = 0;
18888 rtx reg, and_arg, plus_arg0, plus_arg1;
18889 addr_mask_type addr_mask;
18890 const char *type = NULL;
18891 const char *fail_msg = NULL;
18893 if (GPR_REG_CLASS_P (rclass))
18894 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
18896 else if (rclass == FLOAT_REGS)
18897 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
18899 else if (rclass == ALTIVEC_REGS)
18900 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
18902 /* For the combined VSX_REGS, turn off Altivec AND -16. */
18903 else if (rclass == VSX_REGS)
18904 addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_VMX]
18905 & ~RELOAD_REG_AND_M16);
18907 /* If the register allocator hasn't made up its mind yet on the register
18908 class to use, settle on defaults to use. */
18909 else if (rclass == NO_REGS)
18911 addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_ANY]
18912 & ~RELOAD_REG_AND_M16);
18914 if ((addr_mask & RELOAD_REG_MULTIPLE) != 0)
18915 addr_mask &= ~(RELOAD_REG_INDEXED
18916 | RELOAD_REG_PRE_INCDEC
18917 | RELOAD_REG_PRE_MODIFY);
18920 else
18921 addr_mask = 0;
18923 /* If the register isn't valid in this register class, just return now. */
18924 if ((addr_mask & RELOAD_REG_VALID) == 0)
18926 if (TARGET_DEBUG_ADDR)
18928 fprintf (stderr,
18929 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
18930 "not valid in class\n",
18931 GET_MODE_NAME (mode), reg_class_names[rclass]);
18932 debug_rtx (addr);
18935 return -1;
18938 switch (GET_CODE (addr))
18940 /* Does the register class supports auto update forms for this mode? We
18941 don't need a scratch register, since the powerpc only supports
18942 PRE_INC, PRE_DEC, and PRE_MODIFY. */
18943 case PRE_INC:
18944 case PRE_DEC:
18945 reg = XEXP (addr, 0);
18946 if (!base_reg_operand (addr, GET_MODE (reg)))
18948 fail_msg = "no base register #1";
18949 extra_cost = -1;
18952 else if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
18954 extra_cost = 1;
18955 type = "update";
18957 break;
18959 case PRE_MODIFY:
18960 reg = XEXP (addr, 0);
18961 plus_arg1 = XEXP (addr, 1);
18962 if (!base_reg_operand (reg, GET_MODE (reg))
18963 || GET_CODE (plus_arg1) != PLUS
18964 || !rtx_equal_p (reg, XEXP (plus_arg1, 0)))
18966 fail_msg = "bad PRE_MODIFY";
18967 extra_cost = -1;
18970 else if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
18972 extra_cost = 1;
18973 type = "update";
18975 break;
18977 /* Do we need to simulate AND -16 to clear the bottom address bits used
18978 in VMX load/stores? Only allow the AND for vector sizes. */
18979 case AND:
18980 and_arg = XEXP (addr, 0);
18981 if (GET_MODE_SIZE (mode) != 16
18982 || GET_CODE (XEXP (addr, 1)) != CONST_INT
18983 || INTVAL (XEXP (addr, 1)) != -16)
18985 fail_msg = "bad Altivec AND #1";
18986 extra_cost = -1;
18989 if (rclass != ALTIVEC_REGS)
18991 if (legitimate_indirect_address_p (and_arg, false))
18992 extra_cost = 1;
18994 else if (legitimate_indexed_address_p (and_arg, false))
18995 extra_cost = 2;
18997 else
18999 fail_msg = "bad Altivec AND #2";
19000 extra_cost = -1;
19003 type = "and";
19005 break;
19007 /* If this is an indirect address, make sure it is a base register. */
19008 case REG:
19009 case SUBREG:
19010 if (!legitimate_indirect_address_p (addr, false))
19012 extra_cost = 1;
19013 type = "move";
19015 break;
19017 /* If this is an indexed address, make sure the register class can handle
19018 indexed addresses for this mode. */
19019 case PLUS:
19020 plus_arg0 = XEXP (addr, 0);
19021 plus_arg1 = XEXP (addr, 1);
19023 /* (plus (plus (reg) (constant)) (constant)) is generated during
19024 push_reload processing, so handle it now. */
19025 if (GET_CODE (plus_arg0) == PLUS && CONST_INT_P (plus_arg1))
19027 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
19029 extra_cost = 1;
19030 type = "offset";
19034 /* (plus (plus (reg) (constant)) (reg)) is also generated during
19035 push_reload processing, so handle it now. */
19036 else if (GET_CODE (plus_arg0) == PLUS && REG_P (plus_arg1))
19038 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
19040 extra_cost = 1;
19041 type = "indexed #2";
19045 else if (!base_reg_operand (plus_arg0, GET_MODE (plus_arg0)))
19047 fail_msg = "no base register #2";
19048 extra_cost = -1;
19051 else if (int_reg_operand (plus_arg1, GET_MODE (plus_arg1)))
19053 if ((addr_mask & RELOAD_REG_INDEXED) == 0
19054 || !legitimate_indexed_address_p (addr, false))
19056 extra_cost = 1;
19057 type = "indexed";
19061 else if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0
19062 && CONST_INT_P (plus_arg1))
19064 if (!quad_address_offset_p (INTVAL (plus_arg1)))
19066 extra_cost = 1;
19067 type = "vector d-form offset";
19071 /* Make sure the register class can handle offset addresses. */
19072 else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
19074 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
19076 extra_cost = 1;
19077 type = "offset #2";
19081 else
19083 fail_msg = "bad PLUS";
19084 extra_cost = -1;
19087 break;
19089 case LO_SUM:
19090 /* Quad offsets are restricted and can't handle normal addresses. */
19091 if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
19093 extra_cost = -1;
19094 type = "vector d-form lo_sum";
19097 else if (!legitimate_lo_sum_address_p (mode, addr, false))
19099 fail_msg = "bad LO_SUM";
19100 extra_cost = -1;
19103 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
19105 extra_cost = 1;
19106 type = "lo_sum";
19108 break;
19110 /* Static addresses need to create a TOC entry. */
19111 case CONST:
19112 case SYMBOL_REF:
19113 case LABEL_REF:
19114 if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
19116 extra_cost = -1;
19117 type = "vector d-form lo_sum #2";
19120 else
19122 type = "address";
19123 extra_cost = rs6000_secondary_reload_toc_costs (addr_mask);
19125 break;
19127 /* TOC references look like offsetable memory. */
19128 case UNSPEC:
19129 if (TARGET_CMODEL == CMODEL_SMALL || XINT (addr, 1) != UNSPEC_TOCREL)
19131 fail_msg = "bad UNSPEC";
19132 extra_cost = -1;
19135 else if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
19137 extra_cost = -1;
19138 type = "vector d-form lo_sum #3";
19141 else if ((addr_mask & RELOAD_REG_OFFSET) == 0)
19143 extra_cost = 1;
19144 type = "toc reference";
19146 break;
19148 default:
19150 fail_msg = "bad address";
19151 extra_cost = -1;
19155 if (TARGET_DEBUG_ADDR /* && extra_cost != 0 */)
19157 if (extra_cost < 0)
19158 fprintf (stderr,
19159 "rs6000_secondary_reload_memory error: mode = %s, "
19160 "class = %s, addr_mask = '%s', %s\n",
19161 GET_MODE_NAME (mode),
19162 reg_class_names[rclass],
19163 rs6000_debug_addr_mask (addr_mask, false),
19164 (fail_msg != NULL) ? fail_msg : "<bad address>");
19166 else
19167 fprintf (stderr,
19168 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
19169 "addr_mask = '%s', extra cost = %d, %s\n",
19170 GET_MODE_NAME (mode),
19171 reg_class_names[rclass],
19172 rs6000_debug_addr_mask (addr_mask, false),
19173 extra_cost,
19174 (type) ? type : "<none>");
19176 debug_rtx (addr);
19179 return extra_cost;
19182 /* Helper function for rs6000_secondary_reload to return true if a move to a
19183 different register classe is really a simple move. */
19185 static bool
19186 rs6000_secondary_reload_simple_move (enum rs6000_reg_type to_type,
19187 enum rs6000_reg_type from_type,
19188 machine_mode mode)
19190 int size;
19192 /* Add support for various direct moves available. In this function, we only
19193 look at cases where we don't need any extra registers, and one or more
19194 simple move insns are issued. At present, 32-bit integers are not allowed
19195 in FPR/VSX registers. Single precision binary floating is not a simple
19196 move because we need to convert to the single precision memory layout.
19197 The 4-byte SDmode can be moved. TDmode values are disallowed since they
19198 need special direct move handling, which we do not support yet. */
19199 size = GET_MODE_SIZE (mode);
19200 if (TARGET_DIRECT_MOVE
19201 && ((mode == SDmode) || (TARGET_POWERPC64 && size == 8))
19202 && ((to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
19203 || (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)))
19204 return true;
19206 else if (TARGET_DIRECT_MOVE_128 && size == 16 && mode != TDmode
19207 && ((to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
19208 || (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)))
19209 return true;
19211 else if (TARGET_MFPGPR && TARGET_POWERPC64 && size == 8
19212 && ((to_type == GPR_REG_TYPE && from_type == FPR_REG_TYPE)
19213 || (to_type == FPR_REG_TYPE && from_type == GPR_REG_TYPE)))
19214 return true;
19216 else if ((size == 4 || (TARGET_POWERPC64 && size == 8))
19217 && ((to_type == GPR_REG_TYPE && from_type == SPR_REG_TYPE)
19218 || (to_type == SPR_REG_TYPE && from_type == GPR_REG_TYPE)))
19219 return true;
19221 return false;
19224 /* Direct move helper function for rs6000_secondary_reload, handle all of the
19225 special direct moves that involve allocating an extra register, return the
19226 insn code of the helper function if there is such a function or
19227 CODE_FOR_nothing if not. */
19229 static bool
19230 rs6000_secondary_reload_direct_move (enum rs6000_reg_type to_type,
19231 enum rs6000_reg_type from_type,
19232 machine_mode mode,
19233 secondary_reload_info *sri,
19234 bool altivec_p)
19236 bool ret = false;
19237 enum insn_code icode = CODE_FOR_nothing;
19238 int cost = 0;
19239 int size = GET_MODE_SIZE (mode);
19241 if (TARGET_POWERPC64)
19243 if (size == 16)
19245 /* Handle moving 128-bit values from GPRs to VSX point registers on
19246 ISA 2.07 (power8, power9) when running in 64-bit mode using
19247 XXPERMDI to glue the two 64-bit values back together. */
19248 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
19250 cost = 3; /* 2 mtvsrd's, 1 xxpermdi. */
19251 icode = reg_addr[mode].reload_vsx_gpr;
19254 /* Handle moving 128-bit values from VSX point registers to GPRs on
19255 ISA 2.07 when running in 64-bit mode using XXPERMDI to get access to the
19256 bottom 64-bit value. */
19257 else if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
19259 cost = 3; /* 2 mfvsrd's, 1 xxpermdi. */
19260 icode = reg_addr[mode].reload_gpr_vsx;
19264 else if (mode == SFmode)
19266 if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
19268 cost = 3; /* xscvdpspn, mfvsrd, and. */
19269 icode = reg_addr[mode].reload_gpr_vsx;
19272 else if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
19274 cost = 2; /* mtvsrz, xscvspdpn. */
19275 icode = reg_addr[mode].reload_vsx_gpr;
19280 if (TARGET_POWERPC64 && size == 16)
19282 /* Handle moving 128-bit values from GPRs to VSX point registers on
19283 ISA 2.07 when running in 64-bit mode using XXPERMDI to glue the two
19284 64-bit values back together. */
19285 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
19287 cost = 3; /* 2 mtvsrd's, 1 xxpermdi. */
19288 icode = reg_addr[mode].reload_vsx_gpr;
19291 /* Handle moving 128-bit values from VSX point registers to GPRs on
19292 ISA 2.07 when running in 64-bit mode using XXPERMDI to get access to the
19293 bottom 64-bit value. */
19294 else if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
19296 cost = 3; /* 2 mfvsrd's, 1 xxpermdi. */
19297 icode = reg_addr[mode].reload_gpr_vsx;
19301 else if (!TARGET_POWERPC64 && size == 8)
19303 /* Handle moving 64-bit values from GPRs to floating point registers on
19304 ISA 2.07 when running in 32-bit mode using FMRGOW to glue the two
19305 32-bit values back together. Altivec register classes must be handled
19306 specially since a different instruction is used, and the secondary
19307 reload support requires a single instruction class in the scratch
19308 register constraint. However, right now TFmode is not allowed in
19309 Altivec registers, so the pattern will never match. */
19310 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE && !altivec_p)
19312 cost = 3; /* 2 mtvsrwz's, 1 fmrgow. */
19313 icode = reg_addr[mode].reload_fpr_gpr;
19317 if (icode != CODE_FOR_nothing)
19319 ret = true;
19320 if (sri)
19322 sri->icode = icode;
19323 sri->extra_cost = cost;
19327 return ret;
19330 /* Return whether a move between two register classes can be done either
19331 directly (simple move) or via a pattern that uses a single extra temporary
19332 (using ISA 2.07's direct move in this case. */
19334 static bool
19335 rs6000_secondary_reload_move (enum rs6000_reg_type to_type,
19336 enum rs6000_reg_type from_type,
19337 machine_mode mode,
19338 secondary_reload_info *sri,
19339 bool altivec_p)
19341 /* Fall back to load/store reloads if either type is not a register. */
19342 if (to_type == NO_REG_TYPE || from_type == NO_REG_TYPE)
19343 return false;
19345 /* If we haven't allocated registers yet, assume the move can be done for the
19346 standard register types. */
19347 if ((to_type == PSEUDO_REG_TYPE && from_type == PSEUDO_REG_TYPE)
19348 || (to_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (from_type))
19349 || (from_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (to_type)))
19350 return true;
19352 /* Moves to the same set of registers is a simple move for non-specialized
19353 registers. */
19354 if (to_type == from_type && IS_STD_REG_TYPE (to_type))
19355 return true;
19357 /* Check whether a simple move can be done directly. */
19358 if (rs6000_secondary_reload_simple_move (to_type, from_type, mode))
19360 if (sri)
19362 sri->icode = CODE_FOR_nothing;
19363 sri->extra_cost = 0;
19365 return true;
19368 /* Now check if we can do it in a few steps. */
19369 return rs6000_secondary_reload_direct_move (to_type, from_type, mode, sri,
19370 altivec_p);
19373 /* Inform reload about cases where moving X with a mode MODE to a register in
19374 RCLASS requires an extra scratch or immediate register. Return the class
19375 needed for the immediate register.
19377 For VSX and Altivec, we may need a register to convert sp+offset into
19378 reg+sp.
19380 For misaligned 64-bit gpr loads and stores we need a register to
19381 convert an offset address to indirect. */
19383 static reg_class_t
19384 rs6000_secondary_reload (bool in_p,
19385 rtx x,
19386 reg_class_t rclass_i,
19387 machine_mode mode,
19388 secondary_reload_info *sri)
19390 enum reg_class rclass = (enum reg_class) rclass_i;
19391 reg_class_t ret = ALL_REGS;
19392 enum insn_code icode;
19393 bool default_p = false;
19394 bool done_p = false;
19396 /* Allow subreg of memory before/during reload. */
19397 bool memory_p = (MEM_P (x)
19398 || (!reload_completed && GET_CODE (x) == SUBREG
19399 && MEM_P (SUBREG_REG (x))));
19401 sri->icode = CODE_FOR_nothing;
19402 sri->extra_cost = 0;
19403 icode = ((in_p)
19404 ? reg_addr[mode].reload_load
19405 : reg_addr[mode].reload_store);
19407 if (REG_P (x) || register_operand (x, mode))
19409 enum rs6000_reg_type to_type = reg_class_to_reg_type[(int)rclass];
19410 bool altivec_p = (rclass == ALTIVEC_REGS);
19411 enum rs6000_reg_type from_type = register_to_reg_type (x, &altivec_p);
19413 if (!in_p)
19415 enum rs6000_reg_type exchange = to_type;
19416 to_type = from_type;
19417 from_type = exchange;
19420 /* Can we do a direct move of some sort? */
19421 if (rs6000_secondary_reload_move (to_type, from_type, mode, sri,
19422 altivec_p))
19424 icode = (enum insn_code)sri->icode;
19425 default_p = false;
19426 done_p = true;
19427 ret = NO_REGS;
19431 /* Make sure 0.0 is not reloaded or forced into memory. */
19432 if (x == CONST0_RTX (mode) && VSX_REG_CLASS_P (rclass))
19434 ret = NO_REGS;
19435 default_p = false;
19436 done_p = true;
19439 /* If this is a scalar floating point value and we want to load it into the
19440 traditional Altivec registers, do it via a move via a traditional floating
19441 point register, unless we have D-form addressing. Also make sure that
19442 non-zero constants use a FPR. */
19443 if (!done_p && reg_addr[mode].scalar_in_vmx_p
19444 && !mode_supports_vmx_dform (mode)
19445 && (rclass == VSX_REGS || rclass == ALTIVEC_REGS)
19446 && (memory_p || (GET_CODE (x) == CONST_DOUBLE)))
19448 ret = FLOAT_REGS;
19449 default_p = false;
19450 done_p = true;
19453 /* Handle reload of load/stores if we have reload helper functions. */
19454 if (!done_p && icode != CODE_FOR_nothing && memory_p)
19456 int extra_cost = rs6000_secondary_reload_memory (XEXP (x, 0), rclass,
19457 mode);
19459 if (extra_cost >= 0)
19461 done_p = true;
19462 ret = NO_REGS;
19463 if (extra_cost > 0)
19465 sri->extra_cost = extra_cost;
19466 sri->icode = icode;
19471 /* Handle unaligned loads and stores of integer registers. */
19472 if (!done_p && TARGET_POWERPC64
19473 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
19474 && memory_p
19475 && GET_MODE_SIZE (GET_MODE (x)) >= UNITS_PER_WORD)
19477 rtx addr = XEXP (x, 0);
19478 rtx off = address_offset (addr);
19480 if (off != NULL_RTX)
19482 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
19483 unsigned HOST_WIDE_INT offset = INTVAL (off);
19485 /* We need a secondary reload when our legitimate_address_p
19486 says the address is good (as otherwise the entire address
19487 will be reloaded), and the offset is not a multiple of
19488 four or we have an address wrap. Address wrap will only
19489 occur for LO_SUMs since legitimate_offset_address_p
19490 rejects addresses for 16-byte mems that will wrap. */
19491 if (GET_CODE (addr) == LO_SUM
19492 ? (1 /* legitimate_address_p allows any offset for lo_sum */
19493 && ((offset & 3) != 0
19494 || ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra))
19495 : (offset + 0x8000 < 0x10000 - extra /* legitimate_address_p */
19496 && (offset & 3) != 0))
19498 /* -m32 -mpowerpc64 needs to use a 32-bit scratch register. */
19499 if (in_p)
19500 sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_load
19501 : CODE_FOR_reload_di_load);
19502 else
19503 sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_store
19504 : CODE_FOR_reload_di_store);
19505 sri->extra_cost = 2;
19506 ret = NO_REGS;
19507 done_p = true;
19509 else
19510 default_p = true;
19512 else
19513 default_p = true;
19516 if (!done_p && !TARGET_POWERPC64
19517 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
19518 && memory_p
19519 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
19521 rtx addr = XEXP (x, 0);
19522 rtx off = address_offset (addr);
19524 if (off != NULL_RTX)
19526 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
19527 unsigned HOST_WIDE_INT offset = INTVAL (off);
19529 /* We need a secondary reload when our legitimate_address_p
19530 says the address is good (as otherwise the entire address
19531 will be reloaded), and we have a wrap.
19533 legitimate_lo_sum_address_p allows LO_SUM addresses to
19534 have any offset so test for wrap in the low 16 bits.
19536 legitimate_offset_address_p checks for the range
19537 [-0x8000,0x7fff] for mode size of 8 and [-0x8000,0x7ff7]
19538 for mode size of 16. We wrap at [0x7ffc,0x7fff] and
19539 [0x7ff4,0x7fff] respectively, so test for the
19540 intersection of these ranges, [0x7ffc,0x7fff] and
19541 [0x7ff4,0x7ff7] respectively.
19543 Note that the address we see here may have been
19544 manipulated by legitimize_reload_address. */
19545 if (GET_CODE (addr) == LO_SUM
19546 ? ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra
19547 : offset - (0x8000 - extra) < UNITS_PER_WORD)
19549 if (in_p)
19550 sri->icode = CODE_FOR_reload_si_load;
19551 else
19552 sri->icode = CODE_FOR_reload_si_store;
19553 sri->extra_cost = 2;
19554 ret = NO_REGS;
19555 done_p = true;
19557 else
19558 default_p = true;
19560 else
19561 default_p = true;
19564 if (!done_p)
19565 default_p = true;
19567 if (default_p)
19568 ret = default_secondary_reload (in_p, x, rclass, mode, sri);
19570 gcc_assert (ret != ALL_REGS);
19572 if (TARGET_DEBUG_ADDR)
19574 fprintf (stderr,
19575 "\nrs6000_secondary_reload, return %s, in_p = %s, rclass = %s, "
19576 "mode = %s",
19577 reg_class_names[ret],
19578 in_p ? "true" : "false",
19579 reg_class_names[rclass],
19580 GET_MODE_NAME (mode));
19582 if (reload_completed)
19583 fputs (", after reload", stderr);
19585 if (!done_p)
19586 fputs (", done_p not set", stderr);
19588 if (default_p)
19589 fputs (", default secondary reload", stderr);
19591 if (sri->icode != CODE_FOR_nothing)
19592 fprintf (stderr, ", reload func = %s, extra cost = %d",
19593 insn_data[sri->icode].name, sri->extra_cost);
19595 else if (sri->extra_cost > 0)
19596 fprintf (stderr, ", extra cost = %d", sri->extra_cost);
19598 fputs ("\n", stderr);
19599 debug_rtx (x);
19602 return ret;
19605 /* Better tracing for rs6000_secondary_reload_inner. */
19607 static void
19608 rs6000_secondary_reload_trace (int line, rtx reg, rtx mem, rtx scratch,
19609 bool store_p)
19611 rtx set, clobber;
19613 gcc_assert (reg != NULL_RTX && mem != NULL_RTX && scratch != NULL_RTX);
19615 fprintf (stderr, "rs6000_secondary_reload_inner:%d, type = %s\n", line,
19616 store_p ? "store" : "load");
19618 if (store_p)
19619 set = gen_rtx_SET (mem, reg);
19620 else
19621 set = gen_rtx_SET (reg, mem);
19623 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
19624 debug_rtx (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
19627 static void rs6000_secondary_reload_fail (int, rtx, rtx, rtx, bool)
19628 ATTRIBUTE_NORETURN;
19630 static void
19631 rs6000_secondary_reload_fail (int line, rtx reg, rtx mem, rtx scratch,
19632 bool store_p)
19634 rs6000_secondary_reload_trace (line, reg, mem, scratch, store_p);
19635 gcc_unreachable ();
19638 /* Fixup reload addresses for values in GPR, FPR, and VMX registers that have
19639 reload helper functions. These were identified in
19640 rs6000_secondary_reload_memory, and if reload decided to use the secondary
19641 reload, it calls the insns:
19642 reload_<RELOAD:mode>_<P:mptrsize>_store
19643 reload_<RELOAD:mode>_<P:mptrsize>_load
19645 which in turn calls this function, to do whatever is necessary to create
19646 valid addresses. */
19648 void
19649 rs6000_secondary_reload_inner (rtx reg, rtx mem, rtx scratch, bool store_p)
19651 int regno = true_regnum (reg);
19652 machine_mode mode = GET_MODE (reg);
19653 addr_mask_type addr_mask;
19654 rtx addr;
19655 rtx new_addr;
19656 rtx op_reg, op0, op1;
19657 rtx and_op;
19658 rtx cc_clobber;
19659 rtvec rv;
19661 if (regno < 0 || regno >= FIRST_PSEUDO_REGISTER || !MEM_P (mem)
19662 || !base_reg_operand (scratch, GET_MODE (scratch)))
19663 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
19665 if (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO))
19666 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
19668 else if (IN_RANGE (regno, FIRST_FPR_REGNO, LAST_FPR_REGNO))
19669 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
19671 else if (IN_RANGE (regno, FIRST_ALTIVEC_REGNO, LAST_ALTIVEC_REGNO))
19672 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
19674 else
19675 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
19677 /* Make sure the mode is valid in this register class. */
19678 if ((addr_mask & RELOAD_REG_VALID) == 0)
19679 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
19681 if (TARGET_DEBUG_ADDR)
19682 rs6000_secondary_reload_trace (__LINE__, reg, mem, scratch, store_p);
19684 new_addr = addr = XEXP (mem, 0);
19685 switch (GET_CODE (addr))
19687 /* Does the register class support auto update forms for this mode? If
19688 not, do the update now. We don't need a scratch register, since the
19689 powerpc only supports PRE_INC, PRE_DEC, and PRE_MODIFY. */
19690 case PRE_INC:
19691 case PRE_DEC:
19692 op_reg = XEXP (addr, 0);
19693 if (!base_reg_operand (op_reg, Pmode))
19694 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
19696 if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
19698 emit_insn (gen_add2_insn (op_reg, GEN_INT (GET_MODE_SIZE (mode))));
19699 new_addr = op_reg;
19701 break;
19703 case PRE_MODIFY:
19704 op0 = XEXP (addr, 0);
19705 op1 = XEXP (addr, 1);
19706 if (!base_reg_operand (op0, Pmode)
19707 || GET_CODE (op1) != PLUS
19708 || !rtx_equal_p (op0, XEXP (op1, 0)))
19709 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
19711 if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
19713 emit_insn (gen_rtx_SET (op0, op1));
19714 new_addr = reg;
19716 break;
19718 /* Do we need to simulate AND -16 to clear the bottom address bits used
19719 in VMX load/stores? */
19720 case AND:
19721 op0 = XEXP (addr, 0);
19722 op1 = XEXP (addr, 1);
19723 if ((addr_mask & RELOAD_REG_AND_M16) == 0)
19725 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
19726 op_reg = op0;
19728 else if (GET_CODE (op1) == PLUS)
19730 emit_insn (gen_rtx_SET (scratch, op1));
19731 op_reg = scratch;
19734 else
19735 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
19737 and_op = gen_rtx_AND (GET_MODE (scratch), op_reg, op1);
19738 cc_clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (CCmode));
19739 rv = gen_rtvec (2, gen_rtx_SET (scratch, and_op), cc_clobber);
19740 emit_insn (gen_rtx_PARALLEL (VOIDmode, rv));
19741 new_addr = scratch;
19743 break;
19745 /* If this is an indirect address, make sure it is a base register. */
19746 case REG:
19747 case SUBREG:
19748 if (!base_reg_operand (addr, GET_MODE (addr)))
19750 emit_insn (gen_rtx_SET (scratch, addr));
19751 new_addr = scratch;
19753 break;
19755 /* If this is an indexed address, make sure the register class can handle
19756 indexed addresses for this mode. */
19757 case PLUS:
19758 op0 = XEXP (addr, 0);
19759 op1 = XEXP (addr, 1);
19760 if (!base_reg_operand (op0, Pmode))
19761 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
19763 else if (int_reg_operand (op1, Pmode))
19765 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
19767 emit_insn (gen_rtx_SET (scratch, addr));
19768 new_addr = scratch;
19772 else if (mode_supports_vsx_dform_quad (mode) && CONST_INT_P (op1))
19774 if (((addr_mask & RELOAD_REG_QUAD_OFFSET) == 0)
19775 || !quad_address_p (addr, mode, false))
19777 emit_insn (gen_rtx_SET (scratch, addr));
19778 new_addr = scratch;
19782 /* Make sure the register class can handle offset addresses. */
19783 else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
19785 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
19787 emit_insn (gen_rtx_SET (scratch, addr));
19788 new_addr = scratch;
19792 else
19793 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
19795 break;
19797 case LO_SUM:
19798 op0 = XEXP (addr, 0);
19799 op1 = XEXP (addr, 1);
19800 if (!base_reg_operand (op0, Pmode))
19801 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
19803 else if (int_reg_operand (op1, Pmode))
19805 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
19807 emit_insn (gen_rtx_SET (scratch, addr));
19808 new_addr = scratch;
19812 /* Quad offsets are restricted and can't handle normal addresses. */
19813 else if (mode_supports_vsx_dform_quad (mode))
19815 emit_insn (gen_rtx_SET (scratch, addr));
19816 new_addr = scratch;
19819 /* Make sure the register class can handle offset addresses. */
19820 else if (legitimate_lo_sum_address_p (mode, addr, false))
19822 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
19824 emit_insn (gen_rtx_SET (scratch, addr));
19825 new_addr = scratch;
19829 else
19830 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
19832 break;
19834 case SYMBOL_REF:
19835 case CONST:
19836 case LABEL_REF:
19837 rs6000_emit_move (scratch, addr, Pmode);
19838 new_addr = scratch;
19839 break;
19841 default:
19842 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
19845 /* Adjust the address if it changed. */
19846 if (addr != new_addr)
19848 mem = replace_equiv_address_nv (mem, new_addr);
19849 if (TARGET_DEBUG_ADDR)
19850 fprintf (stderr, "\nrs6000_secondary_reload_inner, mem adjusted.\n");
19853 /* Now create the move. */
19854 if (store_p)
19855 emit_insn (gen_rtx_SET (mem, reg));
19856 else
19857 emit_insn (gen_rtx_SET (reg, mem));
19859 return;
19862 /* Convert reloads involving 64-bit gprs and misaligned offset
19863 addressing, or multiple 32-bit gprs and offsets that are too large,
19864 to use indirect addressing. */
19866 void
19867 rs6000_secondary_reload_gpr (rtx reg, rtx mem, rtx scratch, bool store_p)
19869 int regno = true_regnum (reg);
19870 enum reg_class rclass;
19871 rtx addr;
19872 rtx scratch_or_premodify = scratch;
19874 if (TARGET_DEBUG_ADDR)
19876 fprintf (stderr, "\nrs6000_secondary_reload_gpr, type = %s\n",
19877 store_p ? "store" : "load");
19878 fprintf (stderr, "reg:\n");
19879 debug_rtx (reg);
19880 fprintf (stderr, "mem:\n");
19881 debug_rtx (mem);
19882 fprintf (stderr, "scratch:\n");
19883 debug_rtx (scratch);
19886 gcc_assert (regno >= 0 && regno < FIRST_PSEUDO_REGISTER);
19887 gcc_assert (GET_CODE (mem) == MEM);
19888 rclass = REGNO_REG_CLASS (regno);
19889 gcc_assert (rclass == GENERAL_REGS || rclass == BASE_REGS);
19890 addr = XEXP (mem, 0);
19892 if (GET_CODE (addr) == PRE_MODIFY)
19894 gcc_assert (REG_P (XEXP (addr, 0))
19895 && GET_CODE (XEXP (addr, 1)) == PLUS
19896 && XEXP (XEXP (addr, 1), 0) == XEXP (addr, 0));
19897 scratch_or_premodify = XEXP (addr, 0);
19898 if (!HARD_REGISTER_P (scratch_or_premodify))
19899 /* If we have a pseudo here then reload will have arranged
19900 to have it replaced, but only in the original insn.
19901 Use the replacement here too. */
19902 scratch_or_premodify = find_replacement (&XEXP (addr, 0));
19904 /* RTL emitted by rs6000_secondary_reload_gpr uses RTL
19905 expressions from the original insn, without unsharing them.
19906 Any RTL that points into the original insn will of course
19907 have register replacements applied. That is why we don't
19908 need to look for replacements under the PLUS. */
19909 addr = XEXP (addr, 1);
19911 gcc_assert (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM);
19913 rs6000_emit_move (scratch_or_premodify, addr, Pmode);
19915 mem = replace_equiv_address_nv (mem, scratch_or_premodify);
19917 /* Now create the move. */
19918 if (store_p)
19919 emit_insn (gen_rtx_SET (mem, reg));
19920 else
19921 emit_insn (gen_rtx_SET (reg, mem));
19923 return;
19926 /* Allocate a 64-bit stack slot to be used for copying SDmode values through if
19927 this function has any SDmode references. If we are on a power7 or later, we
19928 don't need the 64-bit stack slot since the LFIWZX and STIFWX instructions
19929 can load/store the value. */
19931 static void
19932 rs6000_alloc_sdmode_stack_slot (void)
19934 tree t;
19935 basic_block bb;
19936 gimple_stmt_iterator gsi;
19938 gcc_assert (cfun->machine->sdmode_stack_slot == NULL_RTX);
19939 /* We use a different approach for dealing with the secondary
19940 memory in LRA. */
19941 if (ira_use_lra_p)
19942 return;
19944 if (TARGET_NO_SDMODE_STACK)
19945 return;
19947 FOR_EACH_BB_FN (bb, cfun)
19948 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
19950 tree ret = walk_gimple_op (gsi_stmt (gsi), rs6000_check_sdmode, NULL);
19951 if (ret)
19953 rtx stack = assign_stack_local (DDmode, GET_MODE_SIZE (DDmode), 0);
19954 cfun->machine->sdmode_stack_slot = adjust_address_nv (stack,
19955 SDmode, 0);
19956 return;
19960 /* Check for any SDmode parameters of the function. */
19961 for (t = DECL_ARGUMENTS (cfun->decl); t; t = DECL_CHAIN (t))
19963 if (TREE_TYPE (t) == error_mark_node)
19964 continue;
19966 if (TYPE_MODE (TREE_TYPE (t)) == SDmode
19967 || TYPE_MODE (DECL_ARG_TYPE (t)) == SDmode)
19969 rtx stack = assign_stack_local (DDmode, GET_MODE_SIZE (DDmode), 0);
19970 cfun->machine->sdmode_stack_slot = adjust_address_nv (stack,
19971 SDmode, 0);
19972 return;
19977 static void
19978 rs6000_instantiate_decls (void)
19980 if (cfun->machine->sdmode_stack_slot != NULL_RTX)
19981 instantiate_decl_rtl (cfun->machine->sdmode_stack_slot);
19984 /* Given an rtx X being reloaded into a reg required to be
19985 in class CLASS, return the class of reg to actually use.
19986 In general this is just CLASS; but on some machines
19987 in some cases it is preferable to use a more restrictive class.
19989 On the RS/6000, we have to return NO_REGS when we want to reload a
19990 floating-point CONST_DOUBLE to force it to be copied to memory.
19992 We also don't want to reload integer values into floating-point
19993 registers if we can at all help it. In fact, this can
19994 cause reload to die, if it tries to generate a reload of CTR
19995 into a FP register and discovers it doesn't have the memory location
19996 required.
19998 ??? Would it be a good idea to have reload do the converse, that is
19999 try to reload floating modes into FP registers if possible?
20002 static enum reg_class
20003 rs6000_preferred_reload_class (rtx x, enum reg_class rclass)
20005 machine_mode mode = GET_MODE (x);
20006 bool is_constant = CONSTANT_P (x);
20008 /* If a mode can't go in FPR/ALTIVEC/VSX registers, don't return a preferred
20009 reload class for it. */
20010 if ((rclass == ALTIVEC_REGS || rclass == VSX_REGS)
20011 && (reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_VALID) == 0)
20012 return NO_REGS;
20014 if ((rclass == FLOAT_REGS || rclass == VSX_REGS)
20015 && (reg_addr[mode].addr_mask[RELOAD_REG_FPR] & RELOAD_REG_VALID) == 0)
20016 return NO_REGS;
20018 /* For VSX, see if we should prefer FLOAT_REGS or ALTIVEC_REGS. Do not allow
20019 the reloading of address expressions using PLUS into floating point
20020 registers. */
20021 if (TARGET_VSX && VSX_REG_CLASS_P (rclass) && GET_CODE (x) != PLUS)
20023 if (is_constant)
20025 /* Zero is always allowed in all VSX registers. */
20026 if (x == CONST0_RTX (mode))
20027 return rclass;
20029 /* If this is a vector constant that can be formed with a few Altivec
20030 instructions, we want altivec registers. */
20031 if (GET_CODE (x) == CONST_VECTOR && easy_vector_constant (x, mode))
20032 return ALTIVEC_REGS;
20034 /* Force constant to memory. */
20035 return NO_REGS;
20038 /* D-form addressing can easily reload the value. */
20039 if (mode_supports_vmx_dform (mode)
20040 || mode_supports_vsx_dform_quad (mode))
20041 return rclass;
20043 /* If this is a scalar floating point value and we don't have D-form
20044 addressing, prefer the traditional floating point registers so that we
20045 can use D-form (register+offset) addressing. */
20046 if (GET_MODE_SIZE (mode) < 16 && rclass == VSX_REGS)
20047 return FLOAT_REGS;
20049 /* Prefer the Altivec registers if Altivec is handling the vector
20050 operations (i.e. V16QI, V8HI, and V4SI), or if we prefer Altivec
20051 loads. */
20052 if (VECTOR_UNIT_ALTIVEC_P (mode) || VECTOR_MEM_ALTIVEC_P (mode)
20053 || mode == V1TImode)
20054 return ALTIVEC_REGS;
20056 return rclass;
20059 if (is_constant || GET_CODE (x) == PLUS)
20061 if (reg_class_subset_p (GENERAL_REGS, rclass))
20062 return GENERAL_REGS;
20063 if (reg_class_subset_p (BASE_REGS, rclass))
20064 return BASE_REGS;
20065 return NO_REGS;
20068 /* If we haven't picked a register class, and the type is a vector or
20069 floating point type, prefer to use the VSX, FPR, or Altivec register
20070 classes. */
20071 if (rclass == NO_REGS)
20073 if (TARGET_VSX && VECTOR_MEM_VSX_OR_P8_VECTOR_P (mode))
20074 return VSX_REGS;
20076 if (TARGET_ALTIVEC && VECTOR_MEM_ALTIVEC_P (mode))
20077 return ALTIVEC_REGS;
20079 if (DECIMAL_FLOAT_MODE_P (mode))
20080 return TARGET_DFP ? FLOAT_REGS : NO_REGS;
20082 if (TARGET_FPRS && TARGET_HARD_FLOAT && FLOAT_MODE_P (mode)
20083 && (reg_addr[mode].addr_mask[RELOAD_REG_FPR] & RELOAD_REG_VALID) == 0)
20084 return FLOAT_REGS;
20087 if (GET_MODE_CLASS (mode) == MODE_INT && rclass == NON_SPECIAL_REGS)
20088 return GENERAL_REGS;
20090 return rclass;
20093 /* Debug version of rs6000_preferred_reload_class. */
20094 static enum reg_class
20095 rs6000_debug_preferred_reload_class (rtx x, enum reg_class rclass)
20097 enum reg_class ret = rs6000_preferred_reload_class (x, rclass);
20099 fprintf (stderr,
20100 "\nrs6000_preferred_reload_class, return %s, rclass = %s, "
20101 "mode = %s, x:\n",
20102 reg_class_names[ret], reg_class_names[rclass],
20103 GET_MODE_NAME (GET_MODE (x)));
20104 debug_rtx (x);
20106 return ret;
20109 /* If we are copying between FP or AltiVec registers and anything else, we need
20110 a memory location. The exception is when we are targeting ppc64 and the
20111 move to/from fpr to gpr instructions are available. Also, under VSX, you
20112 can copy vector registers from the FP register set to the Altivec register
20113 set and vice versa. */
20115 static bool
20116 rs6000_secondary_memory_needed (enum reg_class from_class,
20117 enum reg_class to_class,
20118 machine_mode mode)
20120 enum rs6000_reg_type from_type, to_type;
20121 bool altivec_p = ((from_class == ALTIVEC_REGS)
20122 || (to_class == ALTIVEC_REGS));
20124 /* If a simple/direct move is available, we don't need secondary memory */
20125 from_type = reg_class_to_reg_type[(int)from_class];
20126 to_type = reg_class_to_reg_type[(int)to_class];
20128 if (rs6000_secondary_reload_move (to_type, from_type, mode,
20129 (secondary_reload_info *)0, altivec_p))
20130 return false;
20132 /* If we have a floating point or vector register class, we need to use
20133 memory to transfer the data. */
20134 if (IS_FP_VECT_REG_TYPE (from_type) || IS_FP_VECT_REG_TYPE (to_type))
20135 return true;
20137 return false;
20140 /* Debug version of rs6000_secondary_memory_needed. */
20141 static bool
20142 rs6000_debug_secondary_memory_needed (enum reg_class from_class,
20143 enum reg_class to_class,
20144 machine_mode mode)
20146 bool ret = rs6000_secondary_memory_needed (from_class, to_class, mode);
20148 fprintf (stderr,
20149 "rs6000_secondary_memory_needed, return: %s, from_class = %s, "
20150 "to_class = %s, mode = %s\n",
20151 ret ? "true" : "false",
20152 reg_class_names[from_class],
20153 reg_class_names[to_class],
20154 GET_MODE_NAME (mode));
20156 return ret;
20159 /* Return the register class of a scratch register needed to copy IN into
20160 or out of a register in RCLASS in MODE. If it can be done directly,
20161 NO_REGS is returned. */
20163 static enum reg_class
20164 rs6000_secondary_reload_class (enum reg_class rclass, machine_mode mode,
20165 rtx in)
20167 int regno;
20169 if (TARGET_ELF || (DEFAULT_ABI == ABI_DARWIN
20170 #if TARGET_MACHO
20171 && MACHOPIC_INDIRECT
20172 #endif
20175 /* We cannot copy a symbolic operand directly into anything
20176 other than BASE_REGS for TARGET_ELF. So indicate that a
20177 register from BASE_REGS is needed as an intermediate
20178 register.
20180 On Darwin, pic addresses require a load from memory, which
20181 needs a base register. */
20182 if (rclass != BASE_REGS
20183 && (GET_CODE (in) == SYMBOL_REF
20184 || GET_CODE (in) == HIGH
20185 || GET_CODE (in) == LABEL_REF
20186 || GET_CODE (in) == CONST))
20187 return BASE_REGS;
20190 if (GET_CODE (in) == REG)
20192 regno = REGNO (in);
20193 if (regno >= FIRST_PSEUDO_REGISTER)
20195 regno = true_regnum (in);
20196 if (regno >= FIRST_PSEUDO_REGISTER)
20197 regno = -1;
20200 else if (GET_CODE (in) == SUBREG)
20202 regno = true_regnum (in);
20203 if (regno >= FIRST_PSEUDO_REGISTER)
20204 regno = -1;
20206 else
20207 regno = -1;
20209 /* If we have VSX register moves, prefer moving scalar values between
20210 Altivec registers and GPR by going via an FPR (and then via memory)
20211 instead of reloading the secondary memory address for Altivec moves. */
20212 if (TARGET_VSX
20213 && GET_MODE_SIZE (mode) < 16
20214 && !mode_supports_vmx_dform (mode)
20215 && (((rclass == GENERAL_REGS || rclass == BASE_REGS)
20216 && (regno >= 0 && ALTIVEC_REGNO_P (regno)))
20217 || ((rclass == VSX_REGS || rclass == ALTIVEC_REGS)
20218 && (regno >= 0 && INT_REGNO_P (regno)))))
20219 return FLOAT_REGS;
20221 /* We can place anything into GENERAL_REGS and can put GENERAL_REGS
20222 into anything. */
20223 if (rclass == GENERAL_REGS || rclass == BASE_REGS
20224 || (regno >= 0 && INT_REGNO_P (regno)))
20225 return NO_REGS;
20227 /* Constants, memory, and VSX registers can go into VSX registers (both the
20228 traditional floating point and the altivec registers). */
20229 if (rclass == VSX_REGS
20230 && (regno == -1 || VSX_REGNO_P (regno)))
20231 return NO_REGS;
20233 /* Constants, memory, and FP registers can go into FP registers. */
20234 if ((regno == -1 || FP_REGNO_P (regno))
20235 && (rclass == FLOAT_REGS || rclass == NON_SPECIAL_REGS))
20236 return (mode != SDmode || lra_in_progress) ? NO_REGS : GENERAL_REGS;
20238 /* Memory, and AltiVec registers can go into AltiVec registers. */
20239 if ((regno == -1 || ALTIVEC_REGNO_P (regno))
20240 && rclass == ALTIVEC_REGS)
20241 return NO_REGS;
20243 /* We can copy among the CR registers. */
20244 if ((rclass == CR_REGS || rclass == CR0_REGS)
20245 && regno >= 0 && CR_REGNO_P (regno))
20246 return NO_REGS;
20248 /* Otherwise, we need GENERAL_REGS. */
20249 return GENERAL_REGS;
20252 /* Debug version of rs6000_secondary_reload_class. */
20253 static enum reg_class
20254 rs6000_debug_secondary_reload_class (enum reg_class rclass,
20255 machine_mode mode, rtx in)
20257 enum reg_class ret = rs6000_secondary_reload_class (rclass, mode, in);
20258 fprintf (stderr,
20259 "\nrs6000_secondary_reload_class, return %s, rclass = %s, "
20260 "mode = %s, input rtx:\n",
20261 reg_class_names[ret], reg_class_names[rclass],
20262 GET_MODE_NAME (mode));
20263 debug_rtx (in);
20265 return ret;
20268 /* Return nonzero if for CLASS a mode change from FROM to TO is invalid. */
20270 static bool
20271 rs6000_cannot_change_mode_class (machine_mode from,
20272 machine_mode to,
20273 enum reg_class rclass)
20275 unsigned from_size = GET_MODE_SIZE (from);
20276 unsigned to_size = GET_MODE_SIZE (to);
20278 if (from_size != to_size)
20280 enum reg_class xclass = (TARGET_VSX) ? VSX_REGS : FLOAT_REGS;
20282 if (reg_classes_intersect_p (xclass, rclass))
20284 unsigned to_nregs = hard_regno_nregs[FIRST_FPR_REGNO][to];
20285 unsigned from_nregs = hard_regno_nregs[FIRST_FPR_REGNO][from];
20286 bool to_float128_vector_p = FLOAT128_VECTOR_P (to);
20287 bool from_float128_vector_p = FLOAT128_VECTOR_P (from);
20289 /* Don't allow 64-bit types to overlap with 128-bit types that take a
20290 single register under VSX because the scalar part of the register
20291 is in the upper 64-bits, and not the lower 64-bits. Types like
20292 TFmode/TDmode that take 2 scalar register can overlap. 128-bit
20293 IEEE floating point can't overlap, and neither can small
20294 values. */
20296 if (to_float128_vector_p && from_float128_vector_p)
20297 return false;
20299 else if (to_float128_vector_p || from_float128_vector_p)
20300 return true;
20302 /* TDmode in floating-mode registers must always go into a register
20303 pair with the most significant word in the even-numbered register
20304 to match ISA requirements. In little-endian mode, this does not
20305 match subreg numbering, so we cannot allow subregs. */
20306 if (!BYTES_BIG_ENDIAN && (to == TDmode || from == TDmode))
20307 return true;
20309 if (from_size < 8 || to_size < 8)
20310 return true;
20312 if (from_size == 8 && (8 * to_nregs) != to_size)
20313 return true;
20315 if (to_size == 8 && (8 * from_nregs) != from_size)
20316 return true;
20318 return false;
20320 else
20321 return false;
20324 if (TARGET_E500_DOUBLE
20325 && ((((to) == DFmode) + ((from) == DFmode)) == 1
20326 || (((to) == TFmode) + ((from) == TFmode)) == 1
20327 || (((to) == IFmode) + ((from) == IFmode)) == 1
20328 || (((to) == KFmode) + ((from) == KFmode)) == 1
20329 || (((to) == DDmode) + ((from) == DDmode)) == 1
20330 || (((to) == TDmode) + ((from) == TDmode)) == 1
20331 || (((to) == DImode) + ((from) == DImode)) == 1))
20332 return true;
20334 /* Since the VSX register set includes traditional floating point registers
20335 and altivec registers, just check for the size being different instead of
20336 trying to check whether the modes are vector modes. Otherwise it won't
20337 allow say DF and DI to change classes. For types like TFmode and TDmode
20338 that take 2 64-bit registers, rather than a single 128-bit register, don't
20339 allow subregs of those types to other 128 bit types. */
20340 if (TARGET_VSX && VSX_REG_CLASS_P (rclass))
20342 unsigned num_regs = (from_size + 15) / 16;
20343 if (hard_regno_nregs[FIRST_FPR_REGNO][to] > num_regs
20344 || hard_regno_nregs[FIRST_FPR_REGNO][from] > num_regs)
20345 return true;
20347 return (from_size != 8 && from_size != 16);
20350 if (TARGET_ALTIVEC && rclass == ALTIVEC_REGS
20351 && (ALTIVEC_VECTOR_MODE (from) + ALTIVEC_VECTOR_MODE (to)) == 1)
20352 return true;
20354 if (TARGET_SPE && (SPE_VECTOR_MODE (from) + SPE_VECTOR_MODE (to)) == 1
20355 && reg_classes_intersect_p (GENERAL_REGS, rclass))
20356 return true;
20358 return false;
20361 /* Debug version of rs6000_cannot_change_mode_class. */
20362 static bool
20363 rs6000_debug_cannot_change_mode_class (machine_mode from,
20364 machine_mode to,
20365 enum reg_class rclass)
20367 bool ret = rs6000_cannot_change_mode_class (from, to, rclass);
20369 fprintf (stderr,
20370 "rs6000_cannot_change_mode_class, return %s, from = %s, "
20371 "to = %s, rclass = %s\n",
20372 ret ? "true" : "false",
20373 GET_MODE_NAME (from), GET_MODE_NAME (to),
20374 reg_class_names[rclass]);
20376 return ret;
20379 /* Return a string to do a move operation of 128 bits of data. */
20381 const char *
20382 rs6000_output_move_128bit (rtx operands[])
20384 rtx dest = operands[0];
20385 rtx src = operands[1];
20386 machine_mode mode = GET_MODE (dest);
20387 int dest_regno;
20388 int src_regno;
20389 bool dest_gpr_p, dest_fp_p, dest_vmx_p, dest_vsx_p;
20390 bool src_gpr_p, src_fp_p, src_vmx_p, src_vsx_p;
20392 if (REG_P (dest))
20394 dest_regno = REGNO (dest);
20395 dest_gpr_p = INT_REGNO_P (dest_regno);
20396 dest_fp_p = FP_REGNO_P (dest_regno);
20397 dest_vmx_p = ALTIVEC_REGNO_P (dest_regno);
20398 dest_vsx_p = dest_fp_p | dest_vmx_p;
20400 else
20402 dest_regno = -1;
20403 dest_gpr_p = dest_fp_p = dest_vmx_p = dest_vsx_p = false;
20406 if (REG_P (src))
20408 src_regno = REGNO (src);
20409 src_gpr_p = INT_REGNO_P (src_regno);
20410 src_fp_p = FP_REGNO_P (src_regno);
20411 src_vmx_p = ALTIVEC_REGNO_P (src_regno);
20412 src_vsx_p = src_fp_p | src_vmx_p;
20414 else
20416 src_regno = -1;
20417 src_gpr_p = src_fp_p = src_vmx_p = src_vsx_p = false;
20420 /* Register moves. */
20421 if (dest_regno >= 0 && src_regno >= 0)
20423 if (dest_gpr_p)
20425 if (src_gpr_p)
20426 return "#";
20428 if (TARGET_DIRECT_MOVE_128 && src_vsx_p)
20429 return (WORDS_BIG_ENDIAN
20430 ? "mfvsrd %0,%x1\n\tmfvsrld %L0,%x1"
20431 : "mfvsrd %L0,%x1\n\tmfvsrld %0,%x1");
20433 else if (TARGET_VSX && TARGET_DIRECT_MOVE && src_vsx_p)
20434 return "#";
20437 else if (TARGET_VSX && dest_vsx_p)
20439 if (src_vsx_p)
20440 return "xxlor %x0,%x1,%x1";
20442 else if (TARGET_DIRECT_MOVE_128 && src_gpr_p)
20443 return (WORDS_BIG_ENDIAN
20444 ? "mtvsrdd %x0,%1,%L1"
20445 : "mtvsrdd %x0,%L1,%1");
20447 else if (TARGET_DIRECT_MOVE && src_gpr_p)
20448 return "#";
20451 else if (TARGET_ALTIVEC && dest_vmx_p && src_vmx_p)
20452 return "vor %0,%1,%1";
20454 else if (dest_fp_p && src_fp_p)
20455 return "#";
20458 /* Loads. */
20459 else if (dest_regno >= 0 && MEM_P (src))
20461 if (dest_gpr_p)
20463 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
20464 return "lq %0,%1";
20465 else
20466 return "#";
20469 else if (TARGET_ALTIVEC && dest_vmx_p
20470 && altivec_indexed_or_indirect_operand (src, mode))
20471 return "lvx %0,%y1";
20473 else if (TARGET_VSX && dest_vsx_p)
20475 if (mode_supports_vsx_dform_quad (mode)
20476 && quad_address_p (XEXP (src, 0), mode, true))
20477 return "lxv %x0,%1";
20479 else if (TARGET_P9_VECTOR)
20480 return "lxvx %x0,%y1";
20482 else if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
20483 return "lxvw4x %x0,%y1";
20485 else
20486 return "lxvd2x %x0,%y1";
20489 else if (TARGET_ALTIVEC && dest_vmx_p)
20490 return "lvx %0,%y1";
20492 else if (dest_fp_p)
20493 return "#";
20496 /* Stores. */
20497 else if (src_regno >= 0 && MEM_P (dest))
20499 if (src_gpr_p)
20501 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
20502 return "stq %1,%0";
20503 else
20504 return "#";
20507 else if (TARGET_ALTIVEC && src_vmx_p
20508 && altivec_indexed_or_indirect_operand (src, mode))
20509 return "stvx %1,%y0";
20511 else if (TARGET_VSX && src_vsx_p)
20513 if (mode_supports_vsx_dform_quad (mode)
20514 && quad_address_p (XEXP (dest, 0), mode, true))
20515 return "stxv %x1,%0";
20517 else if (TARGET_P9_VECTOR)
20518 return "stxvx %x1,%y0";
20520 else if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
20521 return "stxvw4x %x1,%y0";
20523 else
20524 return "stxvd2x %x1,%y0";
20527 else if (TARGET_ALTIVEC && src_vmx_p)
20528 return "stvx %1,%y0";
20530 else if (src_fp_p)
20531 return "#";
20534 /* Constants. */
20535 else if (dest_regno >= 0
20536 && (GET_CODE (src) == CONST_INT
20537 || GET_CODE (src) == CONST_WIDE_INT
20538 || GET_CODE (src) == CONST_DOUBLE
20539 || GET_CODE (src) == CONST_VECTOR))
20541 if (dest_gpr_p)
20542 return "#";
20544 else if ((dest_vmx_p && TARGET_ALTIVEC)
20545 || (dest_vsx_p && TARGET_VSX))
20546 return output_vec_const_move (operands);
20549 fatal_insn ("Bad 128-bit move", gen_rtx_SET (dest, src));
20552 /* Validate a 128-bit move. */
20553 bool
20554 rs6000_move_128bit_ok_p (rtx operands[])
20556 machine_mode mode = GET_MODE (operands[0]);
20557 return (gpc_reg_operand (operands[0], mode)
20558 || gpc_reg_operand (operands[1], mode));
20561 /* Return true if a 128-bit move needs to be split. */
20562 bool
20563 rs6000_split_128bit_ok_p (rtx operands[])
20565 if (!reload_completed)
20566 return false;
20568 if (!gpr_or_gpr_p (operands[0], operands[1]))
20569 return false;
20571 if (quad_load_store_p (operands[0], operands[1]))
20572 return false;
20574 return true;
20578 /* Given a comparison operation, return the bit number in CCR to test. We
20579 know this is a valid comparison.
20581 SCC_P is 1 if this is for an scc. That means that %D will have been
20582 used instead of %C, so the bits will be in different places.
20584 Return -1 if OP isn't a valid comparison for some reason. */
20587 ccr_bit (rtx op, int scc_p)
20589 enum rtx_code code = GET_CODE (op);
20590 machine_mode cc_mode;
20591 int cc_regnum;
20592 int base_bit;
20593 rtx reg;
20595 if (!COMPARISON_P (op))
20596 return -1;
20598 reg = XEXP (op, 0);
20600 gcc_assert (GET_CODE (reg) == REG && CR_REGNO_P (REGNO (reg)));
20602 cc_mode = GET_MODE (reg);
20603 cc_regnum = REGNO (reg);
20604 base_bit = 4 * (cc_regnum - CR0_REGNO);
20606 validate_condition_mode (code, cc_mode);
20608 /* When generating a sCOND operation, only positive conditions are
20609 allowed. */
20610 gcc_assert (!scc_p
20611 || code == EQ || code == GT || code == LT || code == UNORDERED
20612 || code == GTU || code == LTU);
20614 switch (code)
20616 case NE:
20617 return scc_p ? base_bit + 3 : base_bit + 2;
20618 case EQ:
20619 return base_bit + 2;
20620 case GT: case GTU: case UNLE:
20621 return base_bit + 1;
20622 case LT: case LTU: case UNGE:
20623 return base_bit;
20624 case ORDERED: case UNORDERED:
20625 return base_bit + 3;
20627 case GE: case GEU:
20628 /* If scc, we will have done a cror to put the bit in the
20629 unordered position. So test that bit. For integer, this is ! LT
20630 unless this is an scc insn. */
20631 return scc_p ? base_bit + 3 : base_bit;
20633 case LE: case LEU:
20634 return scc_p ? base_bit + 3 : base_bit + 1;
20636 default:
20637 gcc_unreachable ();
20641 /* Return the GOT register. */
20644 rs6000_got_register (rtx value ATTRIBUTE_UNUSED)
20646 /* The second flow pass currently (June 1999) can't update
20647 regs_ever_live without disturbing other parts of the compiler, so
20648 update it here to make the prolog/epilogue code happy. */
20649 if (!can_create_pseudo_p ()
20650 && !df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM))
20651 df_set_regs_ever_live (RS6000_PIC_OFFSET_TABLE_REGNUM, true);
20653 crtl->uses_pic_offset_table = 1;
20655 return pic_offset_table_rtx;
20658 static rs6000_stack_t stack_info;
20660 /* Function to init struct machine_function.
20661 This will be called, via a pointer variable,
20662 from push_function_context. */
20664 static struct machine_function *
20665 rs6000_init_machine_status (void)
20667 stack_info.reload_completed = 0;
20668 return ggc_cleared_alloc<machine_function> ();
20671 #define INT_P(X) (GET_CODE (X) == CONST_INT && GET_MODE (X) == VOIDmode)
20673 /* Write out a function code label. */
20675 void
20676 rs6000_output_function_entry (FILE *file, const char *fname)
20678 if (fname[0] != '.')
20680 switch (DEFAULT_ABI)
20682 default:
20683 gcc_unreachable ();
20685 case ABI_AIX:
20686 if (DOT_SYMBOLS)
20687 putc ('.', file);
20688 else
20689 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "L.");
20690 break;
20692 case ABI_ELFv2:
20693 case ABI_V4:
20694 case ABI_DARWIN:
20695 break;
20699 RS6000_OUTPUT_BASENAME (file, fname);
20702 /* Print an operand. Recognize special options, documented below. */
20704 #if TARGET_ELF
20705 #define SMALL_DATA_RELOC ((rs6000_sdata == SDATA_EABI) ? "sda21" : "sdarel")
20706 #define SMALL_DATA_REG ((rs6000_sdata == SDATA_EABI) ? 0 : 13)
20707 #else
20708 #define SMALL_DATA_RELOC "sda21"
20709 #define SMALL_DATA_REG 0
20710 #endif
20712 void
20713 print_operand (FILE *file, rtx x, int code)
20715 int i;
20716 unsigned HOST_WIDE_INT uval;
20718 switch (code)
20720 /* %a is output_address. */
20722 /* %c is output_addr_const if a CONSTANT_ADDRESS_P, otherwise
20723 output_operand. */
20725 case 'D':
20726 /* Like 'J' but get to the GT bit only. */
20727 gcc_assert (REG_P (x));
20729 /* Bit 1 is GT bit. */
20730 i = 4 * (REGNO (x) - CR0_REGNO) + 1;
20732 /* Add one for shift count in rlinm for scc. */
20733 fprintf (file, "%d", i + 1);
20734 return;
20736 case 'e':
20737 /* If the low 16 bits are 0, but some other bit is set, write 's'. */
20738 if (! INT_P (x))
20740 output_operand_lossage ("invalid %%e value");
20741 return;
20744 uval = INTVAL (x);
20745 if ((uval & 0xffff) == 0 && uval != 0)
20746 putc ('s', file);
20747 return;
20749 case 'E':
20750 /* X is a CR register. Print the number of the EQ bit of the CR */
20751 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
20752 output_operand_lossage ("invalid %%E value");
20753 else
20754 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO) + 2);
20755 return;
20757 case 'f':
20758 /* X is a CR register. Print the shift count needed to move it
20759 to the high-order four bits. */
20760 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
20761 output_operand_lossage ("invalid %%f value");
20762 else
20763 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO));
20764 return;
20766 case 'F':
20767 /* Similar, but print the count for the rotate in the opposite
20768 direction. */
20769 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
20770 output_operand_lossage ("invalid %%F value");
20771 else
20772 fprintf (file, "%d", 32 - 4 * (REGNO (x) - CR0_REGNO));
20773 return;
20775 case 'G':
20776 /* X is a constant integer. If it is negative, print "m",
20777 otherwise print "z". This is to make an aze or ame insn. */
20778 if (GET_CODE (x) != CONST_INT)
20779 output_operand_lossage ("invalid %%G value");
20780 else if (INTVAL (x) >= 0)
20781 putc ('z', file);
20782 else
20783 putc ('m', file);
20784 return;
20786 case 'h':
20787 /* If constant, output low-order five bits. Otherwise, write
20788 normally. */
20789 if (INT_P (x))
20790 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 31);
20791 else
20792 print_operand (file, x, 0);
20793 return;
20795 case 'H':
20796 /* If constant, output low-order six bits. Otherwise, write
20797 normally. */
20798 if (INT_P (x))
20799 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 63);
20800 else
20801 print_operand (file, x, 0);
20802 return;
20804 case 'I':
20805 /* Print `i' if this is a constant, else nothing. */
20806 if (INT_P (x))
20807 putc ('i', file);
20808 return;
20810 case 'j':
20811 /* Write the bit number in CCR for jump. */
20812 i = ccr_bit (x, 0);
20813 if (i == -1)
20814 output_operand_lossage ("invalid %%j code");
20815 else
20816 fprintf (file, "%d", i);
20817 return;
20819 case 'J':
20820 /* Similar, but add one for shift count in rlinm for scc and pass
20821 scc flag to `ccr_bit'. */
20822 i = ccr_bit (x, 1);
20823 if (i == -1)
20824 output_operand_lossage ("invalid %%J code");
20825 else
20826 /* If we want bit 31, write a shift count of zero, not 32. */
20827 fprintf (file, "%d", i == 31 ? 0 : i + 1);
20828 return;
20830 case 'k':
20831 /* X must be a constant. Write the 1's complement of the
20832 constant. */
20833 if (! INT_P (x))
20834 output_operand_lossage ("invalid %%k value");
20835 else
20836 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~ INTVAL (x));
20837 return;
20839 case 'K':
20840 /* X must be a symbolic constant on ELF. Write an
20841 expression suitable for an 'addi' that adds in the low 16
20842 bits of the MEM. */
20843 if (GET_CODE (x) == CONST)
20845 if (GET_CODE (XEXP (x, 0)) != PLUS
20846 || (GET_CODE (XEXP (XEXP (x, 0), 0)) != SYMBOL_REF
20847 && GET_CODE (XEXP (XEXP (x, 0), 0)) != LABEL_REF)
20848 || GET_CODE (XEXP (XEXP (x, 0), 1)) != CONST_INT)
20849 output_operand_lossage ("invalid %%K value");
20851 print_operand_address (file, x);
20852 fputs ("@l", file);
20853 return;
20855 /* %l is output_asm_label. */
20857 case 'L':
20858 /* Write second word of DImode or DFmode reference. Works on register
20859 or non-indexed memory only. */
20860 if (REG_P (x))
20861 fputs (reg_names[REGNO (x) + 1], file);
20862 else if (MEM_P (x))
20864 machine_mode mode = GET_MODE (x);
20865 /* Handle possible auto-increment. Since it is pre-increment and
20866 we have already done it, we can just use an offset of word. */
20867 if (GET_CODE (XEXP (x, 0)) == PRE_INC
20868 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
20869 output_address (mode, plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
20870 UNITS_PER_WORD));
20871 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
20872 output_address (mode, plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
20873 UNITS_PER_WORD));
20874 else
20875 output_address (mode, XEXP (adjust_address_nv (x, SImode,
20876 UNITS_PER_WORD),
20877 0));
20879 if (small_data_operand (x, GET_MODE (x)))
20880 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
20881 reg_names[SMALL_DATA_REG]);
20883 return;
20885 case 'N':
20886 /* Write the number of elements in the vector times 4. */
20887 if (GET_CODE (x) != PARALLEL)
20888 output_operand_lossage ("invalid %%N value");
20889 else
20890 fprintf (file, "%d", XVECLEN (x, 0) * 4);
20891 return;
20893 case 'O':
20894 /* Similar, but subtract 1 first. */
20895 if (GET_CODE (x) != PARALLEL)
20896 output_operand_lossage ("invalid %%O value");
20897 else
20898 fprintf (file, "%d", (XVECLEN (x, 0) - 1) * 4);
20899 return;
20901 case 'p':
20902 /* X is a CONST_INT that is a power of two. Output the logarithm. */
20903 if (! INT_P (x)
20904 || INTVAL (x) < 0
20905 || (i = exact_log2 (INTVAL (x))) < 0)
20906 output_operand_lossage ("invalid %%p value");
20907 else
20908 fprintf (file, "%d", i);
20909 return;
20911 case 'P':
20912 /* The operand must be an indirect memory reference. The result
20913 is the register name. */
20914 if (GET_CODE (x) != MEM || GET_CODE (XEXP (x, 0)) != REG
20915 || REGNO (XEXP (x, 0)) >= 32)
20916 output_operand_lossage ("invalid %%P value");
20917 else
20918 fputs (reg_names[REGNO (XEXP (x, 0))], file);
20919 return;
20921 case 'q':
20922 /* This outputs the logical code corresponding to a boolean
20923 expression. The expression may have one or both operands
20924 negated (if one, only the first one). For condition register
20925 logical operations, it will also treat the negated
20926 CR codes as NOTs, but not handle NOTs of them. */
20928 const char *const *t = 0;
20929 const char *s;
20930 enum rtx_code code = GET_CODE (x);
20931 static const char * const tbl[3][3] = {
20932 { "and", "andc", "nor" },
20933 { "or", "orc", "nand" },
20934 { "xor", "eqv", "xor" } };
20936 if (code == AND)
20937 t = tbl[0];
20938 else if (code == IOR)
20939 t = tbl[1];
20940 else if (code == XOR)
20941 t = tbl[2];
20942 else
20943 output_operand_lossage ("invalid %%q value");
20945 if (GET_CODE (XEXP (x, 0)) != NOT)
20946 s = t[0];
20947 else
20949 if (GET_CODE (XEXP (x, 1)) == NOT)
20950 s = t[2];
20951 else
20952 s = t[1];
20955 fputs (s, file);
20957 return;
20959 case 'Q':
20960 if (! TARGET_MFCRF)
20961 return;
20962 fputc (',', file);
20963 /* FALLTHRU */
20965 case 'R':
20966 /* X is a CR register. Print the mask for `mtcrf'. */
20967 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
20968 output_operand_lossage ("invalid %%R value");
20969 else
20970 fprintf (file, "%d", 128 >> (REGNO (x) - CR0_REGNO));
20971 return;
20973 case 's':
20974 /* Low 5 bits of 32 - value */
20975 if (! INT_P (x))
20976 output_operand_lossage ("invalid %%s value");
20977 else
20978 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (32 - INTVAL (x)) & 31);
20979 return;
20981 case 't':
20982 /* Like 'J' but get to the OVERFLOW/UNORDERED bit. */
20983 gcc_assert (REG_P (x) && GET_MODE (x) == CCmode);
20985 /* Bit 3 is OV bit. */
20986 i = 4 * (REGNO (x) - CR0_REGNO) + 3;
20988 /* If we want bit 31, write a shift count of zero, not 32. */
20989 fprintf (file, "%d", i == 31 ? 0 : i + 1);
20990 return;
20992 case 'T':
20993 /* Print the symbolic name of a branch target register. */
20994 if (GET_CODE (x) != REG || (REGNO (x) != LR_REGNO
20995 && REGNO (x) != CTR_REGNO))
20996 output_operand_lossage ("invalid %%T value");
20997 else if (REGNO (x) == LR_REGNO)
20998 fputs ("lr", file);
20999 else
21000 fputs ("ctr", file);
21001 return;
21003 case 'u':
21004 /* High-order or low-order 16 bits of constant, whichever is non-zero,
21005 for use in unsigned operand. */
21006 if (! INT_P (x))
21008 output_operand_lossage ("invalid %%u value");
21009 return;
21012 uval = INTVAL (x);
21013 if ((uval & 0xffff) == 0)
21014 uval >>= 16;
21016 fprintf (file, HOST_WIDE_INT_PRINT_HEX, uval & 0xffff);
21017 return;
21019 case 'v':
21020 /* High-order 16 bits of constant for use in signed operand. */
21021 if (! INT_P (x))
21022 output_operand_lossage ("invalid %%v value");
21023 else
21024 fprintf (file, HOST_WIDE_INT_PRINT_HEX,
21025 (INTVAL (x) >> 16) & 0xffff);
21026 return;
21028 case 'U':
21029 /* Print `u' if this has an auto-increment or auto-decrement. */
21030 if (MEM_P (x)
21031 && (GET_CODE (XEXP (x, 0)) == PRE_INC
21032 || GET_CODE (XEXP (x, 0)) == PRE_DEC
21033 || GET_CODE (XEXP (x, 0)) == PRE_MODIFY))
21034 putc ('u', file);
21035 return;
21037 case 'V':
21038 /* Print the trap code for this operand. */
21039 switch (GET_CODE (x))
21041 case EQ:
21042 fputs ("eq", file); /* 4 */
21043 break;
21044 case NE:
21045 fputs ("ne", file); /* 24 */
21046 break;
21047 case LT:
21048 fputs ("lt", file); /* 16 */
21049 break;
21050 case LE:
21051 fputs ("le", file); /* 20 */
21052 break;
21053 case GT:
21054 fputs ("gt", file); /* 8 */
21055 break;
21056 case GE:
21057 fputs ("ge", file); /* 12 */
21058 break;
21059 case LTU:
21060 fputs ("llt", file); /* 2 */
21061 break;
21062 case LEU:
21063 fputs ("lle", file); /* 6 */
21064 break;
21065 case GTU:
21066 fputs ("lgt", file); /* 1 */
21067 break;
21068 case GEU:
21069 fputs ("lge", file); /* 5 */
21070 break;
21071 default:
21072 gcc_unreachable ();
21074 break;
21076 case 'w':
21077 /* If constant, low-order 16 bits of constant, signed. Otherwise, write
21078 normally. */
21079 if (INT_P (x))
21080 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
21081 ((INTVAL (x) & 0xffff) ^ 0x8000) - 0x8000);
21082 else
21083 print_operand (file, x, 0);
21084 return;
21086 case 'x':
21087 /* X is a FPR or Altivec register used in a VSX context. */
21088 if (GET_CODE (x) != REG || !VSX_REGNO_P (REGNO (x)))
21089 output_operand_lossage ("invalid %%x value");
21090 else
21092 int reg = REGNO (x);
21093 int vsx_reg = (FP_REGNO_P (reg)
21094 ? reg - 32
21095 : reg - FIRST_ALTIVEC_REGNO + 32);
21097 #ifdef TARGET_REGNAMES
21098 if (TARGET_REGNAMES)
21099 fprintf (file, "%%vs%d", vsx_reg);
21100 else
21101 #endif
21102 fprintf (file, "%d", vsx_reg);
21104 return;
21106 case 'X':
21107 if (MEM_P (x)
21108 && (legitimate_indexed_address_p (XEXP (x, 0), 0)
21109 || (GET_CODE (XEXP (x, 0)) == PRE_MODIFY
21110 && legitimate_indexed_address_p (XEXP (XEXP (x, 0), 1), 0))))
21111 putc ('x', file);
21112 return;
21114 case 'Y':
21115 /* Like 'L', for third word of TImode/PTImode */
21116 if (REG_P (x))
21117 fputs (reg_names[REGNO (x) + 2], file);
21118 else if (MEM_P (x))
21120 machine_mode mode = GET_MODE (x);
21121 if (GET_CODE (XEXP (x, 0)) == PRE_INC
21122 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
21123 output_address (mode, plus_constant (Pmode,
21124 XEXP (XEXP (x, 0), 0), 8));
21125 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
21126 output_address (mode, plus_constant (Pmode,
21127 XEXP (XEXP (x, 0), 0), 8));
21128 else
21129 output_address (mode, XEXP (adjust_address_nv (x, SImode, 8), 0));
21130 if (small_data_operand (x, GET_MODE (x)))
21131 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
21132 reg_names[SMALL_DATA_REG]);
21134 return;
21136 case 'z':
21137 /* X is a SYMBOL_REF. Write out the name preceded by a
21138 period and without any trailing data in brackets. Used for function
21139 names. If we are configured for System V (or the embedded ABI) on
21140 the PowerPC, do not emit the period, since those systems do not use
21141 TOCs and the like. */
21142 gcc_assert (GET_CODE (x) == SYMBOL_REF);
21144 /* For macho, check to see if we need a stub. */
21145 if (TARGET_MACHO)
21147 const char *name = XSTR (x, 0);
21148 #if TARGET_MACHO
21149 if (darwin_emit_branch_islands
21150 && MACHOPIC_INDIRECT
21151 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
21152 name = machopic_indirection_name (x, /*stub_p=*/true);
21153 #endif
21154 assemble_name (file, name);
21156 else if (!DOT_SYMBOLS)
21157 assemble_name (file, XSTR (x, 0));
21158 else
21159 rs6000_output_function_entry (file, XSTR (x, 0));
21160 return;
21162 case 'Z':
21163 /* Like 'L', for last word of TImode/PTImode. */
21164 if (REG_P (x))
21165 fputs (reg_names[REGNO (x) + 3], file);
21166 else if (MEM_P (x))
21168 machine_mode mode = GET_MODE (x);
21169 if (GET_CODE (XEXP (x, 0)) == PRE_INC
21170 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
21171 output_address (mode, plus_constant (Pmode,
21172 XEXP (XEXP (x, 0), 0), 12));
21173 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
21174 output_address (mode, plus_constant (Pmode,
21175 XEXP (XEXP (x, 0), 0), 12));
21176 else
21177 output_address (mode, XEXP (adjust_address_nv (x, SImode, 12), 0));
21178 if (small_data_operand (x, GET_MODE (x)))
21179 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
21180 reg_names[SMALL_DATA_REG]);
21182 return;
21184 /* Print AltiVec or SPE memory operand. */
21185 case 'y':
21187 rtx tmp;
21189 gcc_assert (MEM_P (x));
21191 tmp = XEXP (x, 0);
21193 /* Ugly hack because %y is overloaded. */
21194 if ((TARGET_SPE || TARGET_E500_DOUBLE)
21195 && (GET_MODE_SIZE (GET_MODE (x)) == 8
21196 || FLOAT128_2REG_P (GET_MODE (x))
21197 || GET_MODE (x) == TImode
21198 || GET_MODE (x) == PTImode))
21200 /* Handle [reg]. */
21201 if (REG_P (tmp))
21203 fprintf (file, "0(%s)", reg_names[REGNO (tmp)]);
21204 break;
21206 /* Handle [reg+UIMM]. */
21207 else if (GET_CODE (tmp) == PLUS &&
21208 GET_CODE (XEXP (tmp, 1)) == CONST_INT)
21210 int x;
21212 gcc_assert (REG_P (XEXP (tmp, 0)));
21214 x = INTVAL (XEXP (tmp, 1));
21215 fprintf (file, "%d(%s)", x, reg_names[REGNO (XEXP (tmp, 0))]);
21216 break;
21219 /* Fall through. Must be [reg+reg]. */
21221 if (VECTOR_MEM_ALTIVEC_P (GET_MODE (x))
21222 && GET_CODE (tmp) == AND
21223 && GET_CODE (XEXP (tmp, 1)) == CONST_INT
21224 && INTVAL (XEXP (tmp, 1)) == -16)
21225 tmp = XEXP (tmp, 0);
21226 else if (VECTOR_MEM_VSX_P (GET_MODE (x))
21227 && GET_CODE (tmp) == PRE_MODIFY)
21228 tmp = XEXP (tmp, 1);
21229 if (REG_P (tmp))
21230 fprintf (file, "0,%s", reg_names[REGNO (tmp)]);
21231 else
21233 if (GET_CODE (tmp) != PLUS
21234 || !REG_P (XEXP (tmp, 0))
21235 || !REG_P (XEXP (tmp, 1)))
21237 output_operand_lossage ("invalid %%y value, try using the 'Z' constraint");
21238 break;
21241 if (REGNO (XEXP (tmp, 0)) == 0)
21242 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 1)) ],
21243 reg_names[ REGNO (XEXP (tmp, 0)) ]);
21244 else
21245 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 0)) ],
21246 reg_names[ REGNO (XEXP (tmp, 1)) ]);
21248 break;
21251 case 0:
21252 if (REG_P (x))
21253 fprintf (file, "%s", reg_names[REGNO (x)]);
21254 else if (MEM_P (x))
21256 /* We need to handle PRE_INC and PRE_DEC here, since we need to
21257 know the width from the mode. */
21258 if (GET_CODE (XEXP (x, 0)) == PRE_INC)
21259 fprintf (file, "%d(%s)", GET_MODE_SIZE (GET_MODE (x)),
21260 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
21261 else if (GET_CODE (XEXP (x, 0)) == PRE_DEC)
21262 fprintf (file, "%d(%s)", - GET_MODE_SIZE (GET_MODE (x)),
21263 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
21264 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
21265 output_address (GET_MODE (x), XEXP (XEXP (x, 0), 1));
21266 else
21267 output_address (GET_MODE (x), XEXP (x, 0));
21269 else
21271 if (toc_relative_expr_p (x, false))
21272 /* This hack along with a corresponding hack in
21273 rs6000_output_addr_const_extra arranges to output addends
21274 where the assembler expects to find them. eg.
21275 (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 4)
21276 without this hack would be output as "x@toc+4". We
21277 want "x+4@toc". */
21278 output_addr_const (file, CONST_CAST_RTX (tocrel_base));
21279 else
21280 output_addr_const (file, x);
21282 return;
21284 case '&':
21285 if (const char *name = get_some_local_dynamic_name ())
21286 assemble_name (file, name);
21287 else
21288 output_operand_lossage ("'%%&' used without any "
21289 "local dynamic TLS references");
21290 return;
21292 default:
21293 output_operand_lossage ("invalid %%xn code");
21297 /* Print the address of an operand. */
21299 void
21300 print_operand_address (FILE *file, rtx x)
21302 if (REG_P (x))
21303 fprintf (file, "0(%s)", reg_names[ REGNO (x) ]);
21304 else if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == CONST
21305 || GET_CODE (x) == LABEL_REF)
21307 output_addr_const (file, x);
21308 if (small_data_operand (x, GET_MODE (x)))
21309 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
21310 reg_names[SMALL_DATA_REG]);
21311 else
21312 gcc_assert (!TARGET_TOC);
21314 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
21315 && REG_P (XEXP (x, 1)))
21317 if (REGNO (XEXP (x, 0)) == 0)
21318 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 1)) ],
21319 reg_names[ REGNO (XEXP (x, 0)) ]);
21320 else
21321 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 0)) ],
21322 reg_names[ REGNO (XEXP (x, 1)) ]);
21324 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
21325 && GET_CODE (XEXP (x, 1)) == CONST_INT)
21326 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
21327 INTVAL (XEXP (x, 1)), reg_names[ REGNO (XEXP (x, 0)) ]);
21328 #if TARGET_MACHO
21329 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
21330 && CONSTANT_P (XEXP (x, 1)))
21332 fprintf (file, "lo16(");
21333 output_addr_const (file, XEXP (x, 1));
21334 fprintf (file, ")(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
21336 #endif
21337 #if TARGET_ELF
21338 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
21339 && CONSTANT_P (XEXP (x, 1)))
21341 output_addr_const (file, XEXP (x, 1));
21342 fprintf (file, "@l(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
21344 #endif
21345 else if (toc_relative_expr_p (x, false))
21347 /* This hack along with a corresponding hack in
21348 rs6000_output_addr_const_extra arranges to output addends
21349 where the assembler expects to find them. eg.
21350 (lo_sum (reg 9)
21351 . (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 8))
21352 without this hack would be output as "x@toc+8@l(9)". We
21353 want "x+8@toc@l(9)". */
21354 output_addr_const (file, CONST_CAST_RTX (tocrel_base));
21355 if (GET_CODE (x) == LO_SUM)
21356 fprintf (file, "@l(%s)", reg_names[REGNO (XEXP (x, 0))]);
21357 else
21358 fprintf (file, "(%s)", reg_names[REGNO (XVECEXP (tocrel_base, 0, 1))]);
21360 else
21361 gcc_unreachable ();
21364 /* Implement TARGET_OUTPUT_ADDR_CONST_EXTRA. */
21366 static bool
21367 rs6000_output_addr_const_extra (FILE *file, rtx x)
21369 if (GET_CODE (x) == UNSPEC)
21370 switch (XINT (x, 1))
21372 case UNSPEC_TOCREL:
21373 gcc_checking_assert (GET_CODE (XVECEXP (x, 0, 0)) == SYMBOL_REF
21374 && REG_P (XVECEXP (x, 0, 1))
21375 && REGNO (XVECEXP (x, 0, 1)) == TOC_REGISTER);
21376 output_addr_const (file, XVECEXP (x, 0, 0));
21377 if (x == tocrel_base && tocrel_offset != const0_rtx)
21379 if (INTVAL (tocrel_offset) >= 0)
21380 fprintf (file, "+");
21381 output_addr_const (file, CONST_CAST_RTX (tocrel_offset));
21383 if (!TARGET_AIX || (TARGET_ELF && TARGET_MINIMAL_TOC))
21385 putc ('-', file);
21386 assemble_name (file, toc_label_name);
21387 need_toc_init = 1;
21389 else if (TARGET_ELF)
21390 fputs ("@toc", file);
21391 return true;
21393 #if TARGET_MACHO
21394 case UNSPEC_MACHOPIC_OFFSET:
21395 output_addr_const (file, XVECEXP (x, 0, 0));
21396 putc ('-', file);
21397 machopic_output_function_base_name (file);
21398 return true;
21399 #endif
21401 return false;
21404 /* Target hook for assembling integer objects. The PowerPC version has
21405 to handle fixup entries for relocatable code if RELOCATABLE_NEEDS_FIXUP
21406 is defined. It also needs to handle DI-mode objects on 64-bit
21407 targets. */
21409 static bool
21410 rs6000_assemble_integer (rtx x, unsigned int size, int aligned_p)
21412 #ifdef RELOCATABLE_NEEDS_FIXUP
21413 /* Special handling for SI values. */
21414 if (RELOCATABLE_NEEDS_FIXUP && size == 4 && aligned_p)
21416 static int recurse = 0;
21418 /* For -mrelocatable, we mark all addresses that need to be fixed up in
21419 the .fixup section. Since the TOC section is already relocated, we
21420 don't need to mark it here. We used to skip the text section, but it
21421 should never be valid for relocated addresses to be placed in the text
21422 section. */
21423 if (DEFAULT_ABI == ABI_V4
21424 && (TARGET_RELOCATABLE || flag_pic > 1)
21425 && in_section != toc_section
21426 && !recurse
21427 && !CONST_SCALAR_INT_P (x)
21428 && CONSTANT_P (x))
21430 char buf[256];
21432 recurse = 1;
21433 ASM_GENERATE_INTERNAL_LABEL (buf, "LCP", fixuplabelno);
21434 fixuplabelno++;
21435 ASM_OUTPUT_LABEL (asm_out_file, buf);
21436 fprintf (asm_out_file, "\t.long\t(");
21437 output_addr_const (asm_out_file, x);
21438 fprintf (asm_out_file, ")@fixup\n");
21439 fprintf (asm_out_file, "\t.section\t\".fixup\",\"aw\"\n");
21440 ASM_OUTPUT_ALIGN (asm_out_file, 2);
21441 fprintf (asm_out_file, "\t.long\t");
21442 assemble_name (asm_out_file, buf);
21443 fprintf (asm_out_file, "\n\t.previous\n");
21444 recurse = 0;
21445 return true;
21447 /* Remove initial .'s to turn a -mcall-aixdesc function
21448 address into the address of the descriptor, not the function
21449 itself. */
21450 else if (GET_CODE (x) == SYMBOL_REF
21451 && XSTR (x, 0)[0] == '.'
21452 && DEFAULT_ABI == ABI_AIX)
21454 const char *name = XSTR (x, 0);
21455 while (*name == '.')
21456 name++;
21458 fprintf (asm_out_file, "\t.long\t%s\n", name);
21459 return true;
21462 #endif /* RELOCATABLE_NEEDS_FIXUP */
21463 return default_assemble_integer (x, size, aligned_p);
21466 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
21467 /* Emit an assembler directive to set symbol visibility for DECL to
21468 VISIBILITY_TYPE. */
21470 static void
21471 rs6000_assemble_visibility (tree decl, int vis)
21473 if (TARGET_XCOFF)
21474 return;
21476 /* Functions need to have their entry point symbol visibility set as
21477 well as their descriptor symbol visibility. */
21478 if (DEFAULT_ABI == ABI_AIX
21479 && DOT_SYMBOLS
21480 && TREE_CODE (decl) == FUNCTION_DECL)
21482 static const char * const visibility_types[] = {
21483 NULL, "internal", "hidden", "protected"
21486 const char *name, *type;
21488 name = ((* targetm.strip_name_encoding)
21489 (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl))));
21490 type = visibility_types[vis];
21492 fprintf (asm_out_file, "\t.%s\t%s\n", type, name);
21493 fprintf (asm_out_file, "\t.%s\t.%s\n", type, name);
21495 else
21496 default_assemble_visibility (decl, vis);
21498 #endif
21500 enum rtx_code
21501 rs6000_reverse_condition (machine_mode mode, enum rtx_code code)
21503 /* Reversal of FP compares takes care -- an ordered compare
21504 becomes an unordered compare and vice versa. */
21505 if (mode == CCFPmode
21506 && (!flag_finite_math_only
21507 || code == UNLT || code == UNLE || code == UNGT || code == UNGE
21508 || code == UNEQ || code == LTGT))
21509 return reverse_condition_maybe_unordered (code);
21510 else
21511 return reverse_condition (code);
21514 /* Generate a compare for CODE. Return a brand-new rtx that
21515 represents the result of the compare. */
21517 static rtx
21518 rs6000_generate_compare (rtx cmp, machine_mode mode)
21520 machine_mode comp_mode;
21521 rtx compare_result;
21522 enum rtx_code code = GET_CODE (cmp);
21523 rtx op0 = XEXP (cmp, 0);
21524 rtx op1 = XEXP (cmp, 1);
21526 if (!TARGET_FLOAT128_HW && FLOAT128_VECTOR_P (mode))
21527 comp_mode = CCmode;
21528 else if (FLOAT_MODE_P (mode))
21529 comp_mode = CCFPmode;
21530 else if (code == GTU || code == LTU
21531 || code == GEU || code == LEU)
21532 comp_mode = CCUNSmode;
21533 else if ((code == EQ || code == NE)
21534 && unsigned_reg_p (op0)
21535 && (unsigned_reg_p (op1)
21536 || (CONST_INT_P (op1) && INTVAL (op1) != 0)))
21537 /* These are unsigned values, perhaps there will be a later
21538 ordering compare that can be shared with this one. */
21539 comp_mode = CCUNSmode;
21540 else
21541 comp_mode = CCmode;
21543 /* If we have an unsigned compare, make sure we don't have a signed value as
21544 an immediate. */
21545 if (comp_mode == CCUNSmode && GET_CODE (op1) == CONST_INT
21546 && INTVAL (op1) < 0)
21548 op0 = copy_rtx_if_shared (op0);
21549 op1 = force_reg (GET_MODE (op0), op1);
21550 cmp = gen_rtx_fmt_ee (code, GET_MODE (cmp), op0, op1);
21553 /* First, the compare. */
21554 compare_result = gen_reg_rtx (comp_mode);
21556 /* E500 FP compare instructions on the GPRs. Yuck! */
21557 if ((!TARGET_FPRS && TARGET_HARD_FLOAT)
21558 && FLOAT_MODE_P (mode))
21560 rtx cmp, or_result, compare_result2;
21561 machine_mode op_mode = GET_MODE (op0);
21562 bool reverse_p;
21564 if (op_mode == VOIDmode)
21565 op_mode = GET_MODE (op1);
21567 /* First reverse the condition codes that aren't directly supported. */
21568 switch (code)
21570 case NE:
21571 case UNLT:
21572 case UNLE:
21573 case UNGT:
21574 case UNGE:
21575 code = reverse_condition_maybe_unordered (code);
21576 reverse_p = true;
21577 break;
21579 case EQ:
21580 case LT:
21581 case LE:
21582 case GT:
21583 case GE:
21584 reverse_p = false;
21585 break;
21587 default:
21588 gcc_unreachable ();
21591 /* The E500 FP compare instructions toggle the GT bit (CR bit 1) only.
21592 This explains the following mess. */
21594 switch (code)
21596 case EQ:
21597 switch (op_mode)
21599 case SFmode:
21600 cmp = (flag_finite_math_only && !flag_trapping_math)
21601 ? gen_tstsfeq_gpr (compare_result, op0, op1)
21602 : gen_cmpsfeq_gpr (compare_result, op0, op1);
21603 break;
21605 case DFmode:
21606 cmp = (flag_finite_math_only && !flag_trapping_math)
21607 ? gen_tstdfeq_gpr (compare_result, op0, op1)
21608 : gen_cmpdfeq_gpr (compare_result, op0, op1);
21609 break;
21611 case TFmode:
21612 case IFmode:
21613 case KFmode:
21614 cmp = (flag_finite_math_only && !flag_trapping_math)
21615 ? gen_tsttfeq_gpr (compare_result, op0, op1)
21616 : gen_cmptfeq_gpr (compare_result, op0, op1);
21617 break;
21619 default:
21620 gcc_unreachable ();
21622 break;
21624 case GT:
21625 case GE:
21626 switch (op_mode)
21628 case SFmode:
21629 cmp = (flag_finite_math_only && !flag_trapping_math)
21630 ? gen_tstsfgt_gpr (compare_result, op0, op1)
21631 : gen_cmpsfgt_gpr (compare_result, op0, op1);
21632 break;
21634 case DFmode:
21635 cmp = (flag_finite_math_only && !flag_trapping_math)
21636 ? gen_tstdfgt_gpr (compare_result, op0, op1)
21637 : gen_cmpdfgt_gpr (compare_result, op0, op1);
21638 break;
21640 case TFmode:
21641 case IFmode:
21642 case KFmode:
21643 cmp = (flag_finite_math_only && !flag_trapping_math)
21644 ? gen_tsttfgt_gpr (compare_result, op0, op1)
21645 : gen_cmptfgt_gpr (compare_result, op0, op1);
21646 break;
21648 default:
21649 gcc_unreachable ();
21651 break;
21653 case LT:
21654 case LE:
21655 switch (op_mode)
21657 case SFmode:
21658 cmp = (flag_finite_math_only && !flag_trapping_math)
21659 ? gen_tstsflt_gpr (compare_result, op0, op1)
21660 : gen_cmpsflt_gpr (compare_result, op0, op1);
21661 break;
21663 case DFmode:
21664 cmp = (flag_finite_math_only && !flag_trapping_math)
21665 ? gen_tstdflt_gpr (compare_result, op0, op1)
21666 : gen_cmpdflt_gpr (compare_result, op0, op1);
21667 break;
21669 case TFmode:
21670 case IFmode:
21671 case KFmode:
21672 cmp = (flag_finite_math_only && !flag_trapping_math)
21673 ? gen_tsttflt_gpr (compare_result, op0, op1)
21674 : gen_cmptflt_gpr (compare_result, op0, op1);
21675 break;
21677 default:
21678 gcc_unreachable ();
21680 break;
21682 default:
21683 gcc_unreachable ();
21686 /* Synthesize LE and GE from LT/GT || EQ. */
21687 if (code == LE || code == GE)
21689 emit_insn (cmp);
21691 compare_result2 = gen_reg_rtx (CCFPmode);
21693 /* Do the EQ. */
21694 switch (op_mode)
21696 case SFmode:
21697 cmp = (flag_finite_math_only && !flag_trapping_math)
21698 ? gen_tstsfeq_gpr (compare_result2, op0, op1)
21699 : gen_cmpsfeq_gpr (compare_result2, op0, op1);
21700 break;
21702 case DFmode:
21703 cmp = (flag_finite_math_only && !flag_trapping_math)
21704 ? gen_tstdfeq_gpr (compare_result2, op0, op1)
21705 : gen_cmpdfeq_gpr (compare_result2, op0, op1);
21706 break;
21708 case TFmode:
21709 case IFmode:
21710 case KFmode:
21711 cmp = (flag_finite_math_only && !flag_trapping_math)
21712 ? gen_tsttfeq_gpr (compare_result2, op0, op1)
21713 : gen_cmptfeq_gpr (compare_result2, op0, op1);
21714 break;
21716 default:
21717 gcc_unreachable ();
21720 emit_insn (cmp);
21722 /* OR them together. */
21723 or_result = gen_reg_rtx (CCFPmode);
21724 cmp = gen_e500_cr_ior_compare (or_result, compare_result,
21725 compare_result2);
21726 compare_result = or_result;
21729 code = reverse_p ? NE : EQ;
21731 emit_insn (cmp);
21734 /* IEEE 128-bit support in VSX registers when we do not have hardware
21735 support. */
21736 else if (!TARGET_FLOAT128_HW && FLOAT128_VECTOR_P (mode))
21738 rtx libfunc = NULL_RTX;
21739 bool uneq_or_ltgt = false;
21740 rtx dest = gen_reg_rtx (SImode);
21742 switch (code)
21744 case EQ:
21745 case NE:
21746 libfunc = optab_libfunc (eq_optab, mode);
21747 break;
21749 case GT:
21750 case GE:
21751 libfunc = optab_libfunc (ge_optab, mode);
21752 break;
21754 case LT:
21755 case LE:
21756 libfunc = optab_libfunc (le_optab, mode);
21757 break;
21759 case UNORDERED:
21760 case ORDERED:
21761 libfunc = optab_libfunc (unord_optab, mode);
21762 code = (code == UNORDERED) ? NE : EQ;
21763 break;
21765 case UNGE:
21766 case UNGT:
21767 libfunc = optab_libfunc (le_optab, mode);
21768 code = (code == UNGE) ? GE : GT;
21769 break;
21771 case UNLE:
21772 case UNLT:
21773 libfunc = optab_libfunc (ge_optab, mode);
21774 code = (code == UNLE) ? LE : LT;
21775 break;
21777 case UNEQ:
21778 case LTGT:
21779 libfunc = optab_libfunc (le_optab, mode);
21780 uneq_or_ltgt = true;
21781 code = (code = UNEQ) ? NE : EQ;
21782 break;
21784 default:
21785 gcc_unreachable ();
21788 gcc_assert (libfunc);
21789 dest = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
21790 SImode, 2, op0, mode, op1, mode);
21792 /* If this is UNEQ or LTGT, we call __lekf2, which returns -1 for less
21793 than, 0 for equal, +1 for greater, and +2 for nan. We add 1, to give
21794 a value of 0..3, and then do and AND immediate of 1 to isolate whether
21795 it is 0/Nan (i.e. bottom bit is 0), or less than/greater than
21796 (i.e. bottom bit is 1). */
21797 if (uneq_or_ltgt)
21799 rtx add_result = gen_reg_rtx (SImode);
21800 rtx and_result = gen_reg_rtx (SImode);
21801 emit_insn (gen_addsi3 (add_result, dest, GEN_INT (1)));
21802 emit_insn (gen_andsi3 (and_result, add_result, GEN_INT (1)));
21803 dest = and_result;
21806 emit_insn (gen_rtx_SET (compare_result,
21807 gen_rtx_COMPARE (comp_mode, dest, const0_rtx)));
21810 else
21812 /* Generate XLC-compatible TFmode compare as PARALLEL with extra
21813 CLOBBERs to match cmptf_internal2 pattern. */
21814 if (comp_mode == CCFPmode && TARGET_XL_COMPAT
21815 && FLOAT128_IBM_P (GET_MODE (op0))
21816 && TARGET_HARD_FLOAT && TARGET_FPRS)
21817 emit_insn (gen_rtx_PARALLEL (VOIDmode,
21818 gen_rtvec (10,
21819 gen_rtx_SET (compare_result,
21820 gen_rtx_COMPARE (comp_mode, op0, op1)),
21821 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
21822 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
21823 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
21824 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
21825 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
21826 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
21827 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
21828 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
21829 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (Pmode)))));
21830 else if (GET_CODE (op1) == UNSPEC
21831 && XINT (op1, 1) == UNSPEC_SP_TEST)
21833 rtx op1b = XVECEXP (op1, 0, 0);
21834 comp_mode = CCEQmode;
21835 compare_result = gen_reg_rtx (CCEQmode);
21836 if (TARGET_64BIT)
21837 emit_insn (gen_stack_protect_testdi (compare_result, op0, op1b));
21838 else
21839 emit_insn (gen_stack_protect_testsi (compare_result, op0, op1b));
21841 else
21842 emit_insn (gen_rtx_SET (compare_result,
21843 gen_rtx_COMPARE (comp_mode, op0, op1)));
21846 /* Some kinds of FP comparisons need an OR operation;
21847 under flag_finite_math_only we don't bother. */
21848 if (FLOAT_MODE_P (mode)
21849 && (!FLOAT128_IEEE_P (mode) || TARGET_FLOAT128_HW)
21850 && !flag_finite_math_only
21851 && !(TARGET_HARD_FLOAT && !TARGET_FPRS)
21852 && (code == LE || code == GE
21853 || code == UNEQ || code == LTGT
21854 || code == UNGT || code == UNLT))
21856 enum rtx_code or1, or2;
21857 rtx or1_rtx, or2_rtx, compare2_rtx;
21858 rtx or_result = gen_reg_rtx (CCEQmode);
21860 switch (code)
21862 case LE: or1 = LT; or2 = EQ; break;
21863 case GE: or1 = GT; or2 = EQ; break;
21864 case UNEQ: or1 = UNORDERED; or2 = EQ; break;
21865 case LTGT: or1 = LT; or2 = GT; break;
21866 case UNGT: or1 = UNORDERED; or2 = GT; break;
21867 case UNLT: or1 = UNORDERED; or2 = LT; break;
21868 default: gcc_unreachable ();
21870 validate_condition_mode (or1, comp_mode);
21871 validate_condition_mode (or2, comp_mode);
21872 or1_rtx = gen_rtx_fmt_ee (or1, SImode, compare_result, const0_rtx);
21873 or2_rtx = gen_rtx_fmt_ee (or2, SImode, compare_result, const0_rtx);
21874 compare2_rtx = gen_rtx_COMPARE (CCEQmode,
21875 gen_rtx_IOR (SImode, or1_rtx, or2_rtx),
21876 const_true_rtx);
21877 emit_insn (gen_rtx_SET (or_result, compare2_rtx));
21879 compare_result = or_result;
21880 code = EQ;
21883 validate_condition_mode (code, GET_MODE (compare_result));
21885 return gen_rtx_fmt_ee (code, VOIDmode, compare_result, const0_rtx);
21889 /* Return the diagnostic message string if the binary operation OP is
21890 not permitted on TYPE1 and TYPE2, NULL otherwise. */
21892 static const char*
21893 rs6000_invalid_binary_op (int op ATTRIBUTE_UNUSED,
21894 const_tree type1,
21895 const_tree type2)
21897 enum machine_mode mode1 = TYPE_MODE (type1);
21898 enum machine_mode mode2 = TYPE_MODE (type2);
21900 /* For complex modes, use the inner type. */
21901 if (COMPLEX_MODE_P (mode1))
21902 mode1 = GET_MODE_INNER (mode1);
21904 if (COMPLEX_MODE_P (mode2))
21905 mode2 = GET_MODE_INNER (mode2);
21907 /* Don't allow IEEE 754R 128-bit binary floating point and IBM extended
21908 double to intermix unless -mfloat128-convert. */
21909 if (mode1 == mode2)
21910 return NULL;
21912 if (!TARGET_FLOAT128_CVT)
21914 if ((mode1 == KFmode && mode2 == IFmode)
21915 || (mode1 == IFmode && mode2 == KFmode))
21916 return N_("__float128 and __ibm128 cannot be used in the same "
21917 "expression");
21919 if (TARGET_IEEEQUAD
21920 && ((mode1 == IFmode && mode2 == TFmode)
21921 || (mode1 == TFmode && mode2 == IFmode)))
21922 return N_("__ibm128 and long double cannot be used in the same "
21923 "expression");
21925 if (!TARGET_IEEEQUAD
21926 && ((mode1 == KFmode && mode2 == TFmode)
21927 || (mode1 == TFmode && mode2 == KFmode)))
21928 return N_("__float128 and long double cannot be used in the same "
21929 "expression");
21932 return NULL;
21936 /* Expand floating point conversion to/from __float128 and __ibm128. */
21938 void
21939 rs6000_expand_float128_convert (rtx dest, rtx src, bool unsigned_p)
21941 machine_mode dest_mode = GET_MODE (dest);
21942 machine_mode src_mode = GET_MODE (src);
21943 convert_optab cvt = unknown_optab;
21944 bool do_move = false;
21945 rtx libfunc = NULL_RTX;
21946 rtx dest2;
21947 typedef rtx (*rtx_2func_t) (rtx, rtx);
21948 rtx_2func_t hw_convert = (rtx_2func_t)0;
21949 size_t kf_or_tf;
21951 struct hw_conv_t {
21952 rtx_2func_t from_df;
21953 rtx_2func_t from_sf;
21954 rtx_2func_t from_si_sign;
21955 rtx_2func_t from_si_uns;
21956 rtx_2func_t from_di_sign;
21957 rtx_2func_t from_di_uns;
21958 rtx_2func_t to_df;
21959 rtx_2func_t to_sf;
21960 rtx_2func_t to_si_sign;
21961 rtx_2func_t to_si_uns;
21962 rtx_2func_t to_di_sign;
21963 rtx_2func_t to_di_uns;
21964 } hw_conversions[2] = {
21965 /* convertions to/from KFmode */
21967 gen_extenddfkf2_hw, /* KFmode <- DFmode. */
21968 gen_extendsfkf2_hw, /* KFmode <- SFmode. */
21969 gen_float_kfsi2_hw, /* KFmode <- SImode (signed). */
21970 gen_floatuns_kfsi2_hw, /* KFmode <- SImode (unsigned). */
21971 gen_float_kfdi2_hw, /* KFmode <- DImode (signed). */
21972 gen_floatuns_kfdi2_hw, /* KFmode <- DImode (unsigned). */
21973 gen_trunckfdf2_hw, /* DFmode <- KFmode. */
21974 gen_trunckfsf2_hw, /* SFmode <- KFmode. */
21975 gen_fix_kfsi2_hw, /* SImode <- KFmode (signed). */
21976 gen_fixuns_kfsi2_hw, /* SImode <- KFmode (unsigned). */
21977 gen_fix_kfdi2_hw, /* DImode <- KFmode (signed). */
21978 gen_fixuns_kfdi2_hw, /* DImode <- KFmode (unsigned). */
21981 /* convertions to/from TFmode */
21983 gen_extenddftf2_hw, /* TFmode <- DFmode. */
21984 gen_extendsftf2_hw, /* TFmode <- SFmode. */
21985 gen_float_tfsi2_hw, /* TFmode <- SImode (signed). */
21986 gen_floatuns_tfsi2_hw, /* TFmode <- SImode (unsigned). */
21987 gen_float_tfdi2_hw, /* TFmode <- DImode (signed). */
21988 gen_floatuns_tfdi2_hw, /* TFmode <- DImode (unsigned). */
21989 gen_trunctfdf2_hw, /* DFmode <- TFmode. */
21990 gen_trunctfsf2_hw, /* SFmode <- TFmode. */
21991 gen_fix_tfsi2_hw, /* SImode <- TFmode (signed). */
21992 gen_fixuns_tfsi2_hw, /* SImode <- TFmode (unsigned). */
21993 gen_fix_tfdi2_hw, /* DImode <- TFmode (signed). */
21994 gen_fixuns_tfdi2_hw, /* DImode <- TFmode (unsigned). */
21998 if (dest_mode == src_mode)
21999 gcc_unreachable ();
22001 /* Eliminate memory operations. */
22002 if (MEM_P (src))
22003 src = force_reg (src_mode, src);
22005 if (MEM_P (dest))
22007 rtx tmp = gen_reg_rtx (dest_mode);
22008 rs6000_expand_float128_convert (tmp, src, unsigned_p);
22009 rs6000_emit_move (dest, tmp, dest_mode);
22010 return;
22013 /* Convert to IEEE 128-bit floating point. */
22014 if (FLOAT128_IEEE_P (dest_mode))
22016 if (dest_mode == KFmode)
22017 kf_or_tf = 0;
22018 else if (dest_mode == TFmode)
22019 kf_or_tf = 1;
22020 else
22021 gcc_unreachable ();
22023 switch (src_mode)
22025 case DFmode:
22026 cvt = sext_optab;
22027 hw_convert = hw_conversions[kf_or_tf].from_df;
22028 break;
22030 case SFmode:
22031 cvt = sext_optab;
22032 hw_convert = hw_conversions[kf_or_tf].from_sf;
22033 break;
22035 case KFmode:
22036 case IFmode:
22037 case TFmode:
22038 if (FLOAT128_IBM_P (src_mode))
22039 cvt = sext_optab;
22040 else
22041 do_move = true;
22042 break;
22044 case SImode:
22045 if (unsigned_p)
22047 cvt = ufloat_optab;
22048 hw_convert = hw_conversions[kf_or_tf].from_si_uns;
22050 else
22052 cvt = sfloat_optab;
22053 hw_convert = hw_conversions[kf_or_tf].from_si_sign;
22055 break;
22057 case DImode:
22058 if (unsigned_p)
22060 cvt = ufloat_optab;
22061 hw_convert = hw_conversions[kf_or_tf].from_di_uns;
22063 else
22065 cvt = sfloat_optab;
22066 hw_convert = hw_conversions[kf_or_tf].from_di_sign;
22068 break;
22070 default:
22071 gcc_unreachable ();
22075 /* Convert from IEEE 128-bit floating point. */
22076 else if (FLOAT128_IEEE_P (src_mode))
22078 if (src_mode == KFmode)
22079 kf_or_tf = 0;
22080 else if (src_mode == TFmode)
22081 kf_or_tf = 1;
22082 else
22083 gcc_unreachable ();
22085 switch (dest_mode)
22087 case DFmode:
22088 cvt = trunc_optab;
22089 hw_convert = hw_conversions[kf_or_tf].to_df;
22090 break;
22092 case SFmode:
22093 cvt = trunc_optab;
22094 hw_convert = hw_conversions[kf_or_tf].to_sf;
22095 break;
22097 case KFmode:
22098 case IFmode:
22099 case TFmode:
22100 if (FLOAT128_IBM_P (dest_mode))
22101 cvt = trunc_optab;
22102 else
22103 do_move = true;
22104 break;
22106 case SImode:
22107 if (unsigned_p)
22109 cvt = ufix_optab;
22110 hw_convert = hw_conversions[kf_or_tf].to_si_uns;
22112 else
22114 cvt = sfix_optab;
22115 hw_convert = hw_conversions[kf_or_tf].to_si_sign;
22117 break;
22119 case DImode:
22120 if (unsigned_p)
22122 cvt = ufix_optab;
22123 hw_convert = hw_conversions[kf_or_tf].to_di_uns;
22125 else
22127 cvt = sfix_optab;
22128 hw_convert = hw_conversions[kf_or_tf].to_di_sign;
22130 break;
22132 default:
22133 gcc_unreachable ();
22137 /* Both IBM format. */
22138 else if (FLOAT128_IBM_P (dest_mode) && FLOAT128_IBM_P (src_mode))
22139 do_move = true;
22141 else
22142 gcc_unreachable ();
22144 /* Handle conversion between TFmode/KFmode. */
22145 if (do_move)
22146 emit_move_insn (dest, gen_lowpart (dest_mode, src));
22148 /* Handle conversion if we have hardware support. */
22149 else if (TARGET_FLOAT128_HW && hw_convert)
22150 emit_insn ((hw_convert) (dest, src));
22152 /* Call an external function to do the conversion. */
22153 else if (cvt != unknown_optab)
22155 libfunc = convert_optab_libfunc (cvt, dest_mode, src_mode);
22156 gcc_assert (libfunc != NULL_RTX);
22158 dest2 = emit_library_call_value (libfunc, dest, LCT_CONST, dest_mode, 1, src,
22159 src_mode);
22161 gcc_assert (dest2 != NULL_RTX);
22162 if (!rtx_equal_p (dest, dest2))
22163 emit_move_insn (dest, dest2);
22166 else
22167 gcc_unreachable ();
22169 return;
22172 /* Split a conversion from __float128 to an integer type into separate insns.
22173 OPERANDS points to the destination, source, and V2DI temporary
22174 register. CODE is either FIX or UNSIGNED_FIX. */
22176 void
22177 convert_float128_to_int (rtx *operands, enum rtx_code code)
22179 rtx dest = operands[0];
22180 rtx src = operands[1];
22181 rtx tmp = operands[2];
22182 rtx cvt;
22183 rtvec cvt_vec;
22184 rtx cvt_unspec;
22185 rtvec move_vec;
22186 rtx move_unspec;
22188 if (GET_CODE (tmp) == SCRATCH)
22189 tmp = gen_reg_rtx (V2DImode);
22191 if (MEM_P (dest))
22192 dest = rs6000_address_for_fpconvert (dest);
22194 /* Generate the actual convert insn of the form:
22195 (set (tmp) (unspec:V2DI [(fix:SI (reg:KF))] UNSPEC_IEEE128_CONVERT)). */
22196 cvt = gen_rtx_fmt_e (code, GET_MODE (dest), src);
22197 cvt_vec = gen_rtvec (1, cvt);
22198 cvt_unspec = gen_rtx_UNSPEC (V2DImode, cvt_vec, UNSPEC_IEEE128_CONVERT);
22199 emit_insn (gen_rtx_SET (tmp, cvt_unspec));
22201 /* Generate the move insn of the form:
22202 (set (dest:SI) (unspec:SI [(tmp:V2DI))] UNSPEC_IEEE128_MOVE)). */
22203 move_vec = gen_rtvec (1, tmp);
22204 move_unspec = gen_rtx_UNSPEC (GET_MODE (dest), move_vec, UNSPEC_IEEE128_MOVE);
22205 emit_insn (gen_rtx_SET (dest, move_unspec));
22208 /* Split a conversion from an integer type to __float128 into separate insns.
22209 OPERANDS points to the destination, source, and V2DI temporary
22210 register. CODE is either FLOAT or UNSIGNED_FLOAT. */
22212 void
22213 convert_int_to_float128 (rtx *operands, enum rtx_code code)
22215 rtx dest = operands[0];
22216 rtx src = operands[1];
22217 rtx tmp = operands[2];
22218 rtx cvt;
22219 rtvec cvt_vec;
22220 rtx cvt_unspec;
22221 rtvec move_vec;
22222 rtx move_unspec;
22223 rtx unsigned_flag;
22225 if (GET_CODE (tmp) == SCRATCH)
22226 tmp = gen_reg_rtx (V2DImode);
22228 if (MEM_P (src))
22229 src = rs6000_address_for_fpconvert (src);
22231 /* Generate the move of the integer into the Altivec register of the form:
22232 (set (tmp:V2DI) (unspec:V2DI [(src:SI)
22233 (const_int 0)] UNSPEC_IEEE128_MOVE)).
22236 (set (tmp:V2DI) (unspec:V2DI [(src:DI)] UNSPEC_IEEE128_MOVE)). */
22238 if (GET_MODE (src) == SImode)
22240 unsigned_flag = (code == UNSIGNED_FLOAT) ? const1_rtx : const0_rtx;
22241 move_vec = gen_rtvec (2, src, unsigned_flag);
22243 else
22244 move_vec = gen_rtvec (1, src);
22246 move_unspec = gen_rtx_UNSPEC (V2DImode, move_vec, UNSPEC_IEEE128_MOVE);
22247 emit_insn (gen_rtx_SET (tmp, move_unspec));
22249 /* Generate the actual convert insn of the form:
22250 (set (dest:KF) (float:KF (unspec:DI [(tmp:V2DI)]
22251 UNSPEC_IEEE128_CONVERT))). */
22252 cvt_vec = gen_rtvec (1, tmp);
22253 cvt_unspec = gen_rtx_UNSPEC (DImode, cvt_vec, UNSPEC_IEEE128_CONVERT);
22254 cvt = gen_rtx_fmt_e (code, GET_MODE (dest), cvt_unspec);
22255 emit_insn (gen_rtx_SET (dest, cvt));
22259 /* Emit the RTL for an sISEL pattern. */
22261 void
22262 rs6000_emit_sISEL (machine_mode mode ATTRIBUTE_UNUSED, rtx operands[])
22264 rs6000_emit_int_cmove (operands[0], operands[1], const1_rtx, const0_rtx);
22267 /* Emit RTL that sets a register to zero if OP1 and OP2 are equal. SCRATCH
22268 can be used as that dest register. Return the dest register. */
22271 rs6000_emit_eqne (machine_mode mode, rtx op1, rtx op2, rtx scratch)
22273 if (op2 == const0_rtx)
22274 return op1;
22276 if (GET_CODE (scratch) == SCRATCH)
22277 scratch = gen_reg_rtx (mode);
22279 if (logical_operand (op2, mode))
22280 emit_insn (gen_rtx_SET (scratch, gen_rtx_XOR (mode, op1, op2)));
22281 else
22282 emit_insn (gen_rtx_SET (scratch,
22283 gen_rtx_PLUS (mode, op1, negate_rtx (mode, op2))));
22285 return scratch;
22288 void
22289 rs6000_emit_sCOND (machine_mode mode, rtx operands[])
22291 rtx condition_rtx;
22292 machine_mode op_mode;
22293 enum rtx_code cond_code;
22294 rtx result = operands[0];
22296 condition_rtx = rs6000_generate_compare (operands[1], mode);
22297 cond_code = GET_CODE (condition_rtx);
22299 if (FLOAT_MODE_P (mode)
22300 && !TARGET_FPRS && TARGET_HARD_FLOAT)
22302 rtx t;
22304 PUT_MODE (condition_rtx, SImode);
22305 t = XEXP (condition_rtx, 0);
22307 gcc_assert (cond_code == NE || cond_code == EQ);
22309 if (cond_code == NE)
22310 emit_insn (gen_e500_flip_gt_bit (t, t));
22312 emit_insn (gen_move_from_CR_gt_bit (result, t));
22313 return;
22316 if (cond_code == NE
22317 || cond_code == GE || cond_code == LE
22318 || cond_code == GEU || cond_code == LEU
22319 || cond_code == ORDERED || cond_code == UNGE || cond_code == UNLE)
22321 rtx not_result = gen_reg_rtx (CCEQmode);
22322 rtx not_op, rev_cond_rtx;
22323 machine_mode cc_mode;
22325 cc_mode = GET_MODE (XEXP (condition_rtx, 0));
22327 rev_cond_rtx = gen_rtx_fmt_ee (rs6000_reverse_condition (cc_mode, cond_code),
22328 SImode, XEXP (condition_rtx, 0), const0_rtx);
22329 not_op = gen_rtx_COMPARE (CCEQmode, rev_cond_rtx, const0_rtx);
22330 emit_insn (gen_rtx_SET (not_result, not_op));
22331 condition_rtx = gen_rtx_EQ (VOIDmode, not_result, const0_rtx);
22334 op_mode = GET_MODE (XEXP (operands[1], 0));
22335 if (op_mode == VOIDmode)
22336 op_mode = GET_MODE (XEXP (operands[1], 1));
22338 if (TARGET_POWERPC64 && (op_mode == DImode || FLOAT_MODE_P (mode)))
22340 PUT_MODE (condition_rtx, DImode);
22341 convert_move (result, condition_rtx, 0);
22343 else
22345 PUT_MODE (condition_rtx, SImode);
22346 emit_insn (gen_rtx_SET (result, condition_rtx));
22350 /* Emit a branch of kind CODE to location LOC. */
22352 void
22353 rs6000_emit_cbranch (machine_mode mode, rtx operands[])
22355 rtx condition_rtx, loc_ref;
22357 condition_rtx = rs6000_generate_compare (operands[0], mode);
22358 loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
22359 emit_jump_insn (gen_rtx_SET (pc_rtx,
22360 gen_rtx_IF_THEN_ELSE (VOIDmode, condition_rtx,
22361 loc_ref, pc_rtx)));
22364 /* Return the string to output a conditional branch to LABEL, which is
22365 the operand template of the label, or NULL if the branch is really a
22366 conditional return.
22368 OP is the conditional expression. XEXP (OP, 0) is assumed to be a
22369 condition code register and its mode specifies what kind of
22370 comparison we made.
22372 REVERSED is nonzero if we should reverse the sense of the comparison.
22374 INSN is the insn. */
22376 char *
22377 output_cbranch (rtx op, const char *label, int reversed, rtx_insn *insn)
22379 static char string[64];
22380 enum rtx_code code = GET_CODE (op);
22381 rtx cc_reg = XEXP (op, 0);
22382 machine_mode mode = GET_MODE (cc_reg);
22383 int cc_regno = REGNO (cc_reg) - CR0_REGNO;
22384 int need_longbranch = label != NULL && get_attr_length (insn) == 8;
22385 int really_reversed = reversed ^ need_longbranch;
22386 char *s = string;
22387 const char *ccode;
22388 const char *pred;
22389 rtx note;
22391 validate_condition_mode (code, mode);
22393 /* Work out which way this really branches. We could use
22394 reverse_condition_maybe_unordered here always but this
22395 makes the resulting assembler clearer. */
22396 if (really_reversed)
22398 /* Reversal of FP compares takes care -- an ordered compare
22399 becomes an unordered compare and vice versa. */
22400 if (mode == CCFPmode)
22401 code = reverse_condition_maybe_unordered (code);
22402 else
22403 code = reverse_condition (code);
22406 if ((!TARGET_FPRS && TARGET_HARD_FLOAT) && mode == CCFPmode)
22408 /* The efscmp/tst* instructions twiddle bit 2, which maps nicely
22409 to the GT bit. */
22410 switch (code)
22412 case EQ:
22413 /* Opposite of GT. */
22414 code = GT;
22415 break;
22417 case NE:
22418 code = UNLE;
22419 break;
22421 default:
22422 gcc_unreachable ();
22426 switch (code)
22428 /* Not all of these are actually distinct opcodes, but
22429 we distinguish them for clarity of the resulting assembler. */
22430 case NE: case LTGT:
22431 ccode = "ne"; break;
22432 case EQ: case UNEQ:
22433 ccode = "eq"; break;
22434 case GE: case GEU:
22435 ccode = "ge"; break;
22436 case GT: case GTU: case UNGT:
22437 ccode = "gt"; break;
22438 case LE: case LEU:
22439 ccode = "le"; break;
22440 case LT: case LTU: case UNLT:
22441 ccode = "lt"; break;
22442 case UNORDERED: ccode = "un"; break;
22443 case ORDERED: ccode = "nu"; break;
22444 case UNGE: ccode = "nl"; break;
22445 case UNLE: ccode = "ng"; break;
22446 default:
22447 gcc_unreachable ();
22450 /* Maybe we have a guess as to how likely the branch is. */
22451 pred = "";
22452 note = find_reg_note (insn, REG_BR_PROB, NULL_RTX);
22453 if (note != NULL_RTX)
22455 /* PROB is the difference from 50%. */
22456 int prob = XINT (note, 0) - REG_BR_PROB_BASE / 2;
22458 /* Only hint for highly probable/improbable branches on newer cpus when
22459 we have real profile data, as static prediction overrides processor
22460 dynamic prediction. For older cpus we may as well always hint, but
22461 assume not taken for branches that are very close to 50% as a
22462 mispredicted taken branch is more expensive than a
22463 mispredicted not-taken branch. */
22464 if (rs6000_always_hint
22465 || (abs (prob) > REG_BR_PROB_BASE / 100 * 48
22466 && (profile_status_for_fn (cfun) != PROFILE_GUESSED)
22467 && br_prob_note_reliable_p (note)))
22469 if (abs (prob) > REG_BR_PROB_BASE / 20
22470 && ((prob > 0) ^ need_longbranch))
22471 pred = "+";
22472 else
22473 pred = "-";
22477 if (label == NULL)
22478 s += sprintf (s, "b%slr%s ", ccode, pred);
22479 else
22480 s += sprintf (s, "b%s%s ", ccode, pred);
22482 /* We need to escape any '%' characters in the reg_names string.
22483 Assume they'd only be the first character.... */
22484 if (reg_names[cc_regno + CR0_REGNO][0] == '%')
22485 *s++ = '%';
22486 s += sprintf (s, "%s", reg_names[cc_regno + CR0_REGNO]);
22488 if (label != NULL)
22490 /* If the branch distance was too far, we may have to use an
22491 unconditional branch to go the distance. */
22492 if (need_longbranch)
22493 s += sprintf (s, ",$+8\n\tb %s", label);
22494 else
22495 s += sprintf (s, ",%s", label);
22498 return string;
22501 /* Return the string to flip the GT bit on a CR. */
22502 char *
22503 output_e500_flip_gt_bit (rtx dst, rtx src)
22505 static char string[64];
22506 int a, b;
22508 gcc_assert (GET_CODE (dst) == REG && CR_REGNO_P (REGNO (dst))
22509 && GET_CODE (src) == REG && CR_REGNO_P (REGNO (src)));
22511 /* GT bit. */
22512 a = 4 * (REGNO (dst) - CR0_REGNO) + 1;
22513 b = 4 * (REGNO (src) - CR0_REGNO) + 1;
22515 sprintf (string, "crnot %d,%d", a, b);
22516 return string;
22519 /* Return insn for VSX or Altivec comparisons. */
22521 static rtx
22522 rs6000_emit_vector_compare_inner (enum rtx_code code, rtx op0, rtx op1)
22524 rtx mask;
22525 machine_mode mode = GET_MODE (op0);
22527 switch (code)
22529 default:
22530 break;
22532 case GE:
22533 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
22534 return NULL_RTX;
22536 case EQ:
22537 case GT:
22538 case GTU:
22539 case ORDERED:
22540 case UNORDERED:
22541 case UNEQ:
22542 case LTGT:
22543 mask = gen_reg_rtx (mode);
22544 emit_insn (gen_rtx_SET (mask, gen_rtx_fmt_ee (code, mode, op0, op1)));
22545 return mask;
22548 return NULL_RTX;
22551 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
22552 DMODE is expected destination mode. This is a recursive function. */
22554 static rtx
22555 rs6000_emit_vector_compare (enum rtx_code rcode,
22556 rtx op0, rtx op1,
22557 machine_mode dmode)
22559 rtx mask;
22560 bool swap_operands = false;
22561 bool try_again = false;
22563 gcc_assert (VECTOR_UNIT_ALTIVEC_OR_VSX_P (dmode));
22564 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
22566 /* See if the comparison works as is. */
22567 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
22568 if (mask)
22569 return mask;
22571 switch (rcode)
22573 case LT:
22574 rcode = GT;
22575 swap_operands = true;
22576 try_again = true;
22577 break;
22578 case LTU:
22579 rcode = GTU;
22580 swap_operands = true;
22581 try_again = true;
22582 break;
22583 case NE:
22584 case UNLE:
22585 case UNLT:
22586 case UNGE:
22587 case UNGT:
22588 /* Invert condition and try again.
22589 e.g., A != B becomes ~(A==B). */
22591 enum rtx_code rev_code;
22592 enum insn_code nor_code;
22593 rtx mask2;
22595 rev_code = reverse_condition_maybe_unordered (rcode);
22596 if (rev_code == UNKNOWN)
22597 return NULL_RTX;
22599 nor_code = optab_handler (one_cmpl_optab, dmode);
22600 if (nor_code == CODE_FOR_nothing)
22601 return NULL_RTX;
22603 mask2 = rs6000_emit_vector_compare (rev_code, op0, op1, dmode);
22604 if (!mask2)
22605 return NULL_RTX;
22607 mask = gen_reg_rtx (dmode);
22608 emit_insn (GEN_FCN (nor_code) (mask, mask2));
22609 return mask;
22611 break;
22612 case GE:
22613 case GEU:
22614 case LE:
22615 case LEU:
22616 /* Try GT/GTU/LT/LTU OR EQ */
22618 rtx c_rtx, eq_rtx;
22619 enum insn_code ior_code;
22620 enum rtx_code new_code;
22622 switch (rcode)
22624 case GE:
22625 new_code = GT;
22626 break;
22628 case GEU:
22629 new_code = GTU;
22630 break;
22632 case LE:
22633 new_code = LT;
22634 break;
22636 case LEU:
22637 new_code = LTU;
22638 break;
22640 default:
22641 gcc_unreachable ();
22644 ior_code = optab_handler (ior_optab, dmode);
22645 if (ior_code == CODE_FOR_nothing)
22646 return NULL_RTX;
22648 c_rtx = rs6000_emit_vector_compare (new_code, op0, op1, dmode);
22649 if (!c_rtx)
22650 return NULL_RTX;
22652 eq_rtx = rs6000_emit_vector_compare (EQ, op0, op1, dmode);
22653 if (!eq_rtx)
22654 return NULL_RTX;
22656 mask = gen_reg_rtx (dmode);
22657 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
22658 return mask;
22660 break;
22661 default:
22662 return NULL_RTX;
22665 if (try_again)
22667 if (swap_operands)
22668 std::swap (op0, op1);
22670 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
22671 if (mask)
22672 return mask;
22675 /* You only get two chances. */
22676 return NULL_RTX;
22679 /* Emit vector conditional expression. DEST is destination. OP_TRUE and
22680 OP_FALSE are two VEC_COND_EXPR operands. CC_OP0 and CC_OP1 are the two
22681 operands for the relation operation COND. */
22684 rs6000_emit_vector_cond_expr (rtx dest, rtx op_true, rtx op_false,
22685 rtx cond, rtx cc_op0, rtx cc_op1)
22687 machine_mode dest_mode = GET_MODE (dest);
22688 machine_mode mask_mode = GET_MODE (cc_op0);
22689 enum rtx_code rcode = GET_CODE (cond);
22690 machine_mode cc_mode = CCmode;
22691 rtx mask;
22692 rtx cond2;
22693 rtx tmp;
22694 bool invert_move = false;
22696 if (VECTOR_UNIT_NONE_P (dest_mode))
22697 return 0;
22699 gcc_assert (GET_MODE_SIZE (dest_mode) == GET_MODE_SIZE (mask_mode)
22700 && GET_MODE_NUNITS (dest_mode) == GET_MODE_NUNITS (mask_mode));
22702 switch (rcode)
22704 /* Swap operands if we can, and fall back to doing the operation as
22705 specified, and doing a NOR to invert the test. */
22706 case NE:
22707 case UNLE:
22708 case UNLT:
22709 case UNGE:
22710 case UNGT:
22711 /* Invert condition and try again.
22712 e.g., A = (B != C) ? D : E becomes A = (B == C) ? E : D. */
22713 invert_move = true;
22714 rcode = reverse_condition_maybe_unordered (rcode);
22715 if (rcode == UNKNOWN)
22716 return 0;
22717 break;
22719 /* Mark unsigned tests with CCUNSmode. */
22720 case GTU:
22721 case GEU:
22722 case LTU:
22723 case LEU:
22724 cc_mode = CCUNSmode;
22725 break;
22727 default:
22728 break;
22731 /* Get the vector mask for the given relational operations. */
22732 mask = rs6000_emit_vector_compare (rcode, cc_op0, cc_op1, mask_mode);
22734 if (!mask)
22735 return 0;
22737 if (invert_move)
22739 tmp = op_true;
22740 op_true = op_false;
22741 op_false = tmp;
22744 cond2 = gen_rtx_fmt_ee (NE, cc_mode, gen_lowpart (dest_mode, mask),
22745 CONST0_RTX (dest_mode));
22746 emit_insn (gen_rtx_SET (dest,
22747 gen_rtx_IF_THEN_ELSE (dest_mode,
22748 cond2,
22749 op_true,
22750 op_false)));
22751 return 1;
22754 /* ISA 3.0 (power9) minmax subcase to emit a XSMAXCDP or XSMINCDP instruction
22755 for SF/DF scalars. Move TRUE_COND to DEST if OP of the operands of the last
22756 comparison is nonzero/true, FALSE_COND if it is zero/false. Return 0 if the
22757 hardware has no such operation. */
22759 static int
22760 rs6000_emit_p9_fp_minmax (rtx dest, rtx op, rtx true_cond, rtx false_cond)
22762 enum rtx_code code = GET_CODE (op);
22763 rtx op0 = XEXP (op, 0);
22764 rtx op1 = XEXP (op, 1);
22765 machine_mode compare_mode = GET_MODE (op0);
22766 machine_mode result_mode = GET_MODE (dest);
22767 bool max_p = false;
22769 if (result_mode != compare_mode)
22770 return 0;
22772 if (code == GE || code == GT)
22773 max_p = true;
22774 else if (code == LE || code == LT)
22775 max_p = false;
22776 else
22777 return 0;
22779 if (rtx_equal_p (op0, true_cond) && rtx_equal_p (op1, false_cond))
22782 else if (rtx_equal_p (op1, true_cond) && rtx_equal_p (op0, false_cond))
22783 max_p = !max_p;
22785 else
22786 return 0;
22788 rs6000_emit_minmax (dest, max_p ? SMAX : SMIN, op0, op1);
22789 return 1;
22792 /* ISA 3.0 (power9) conditional move subcase to emit XSCMP{EQ,GE,GT,NE}DP and
22793 XXSEL instructions for SF/DF scalars. Move TRUE_COND to DEST if OP of the
22794 operands of the last comparison is nonzero/true, FALSE_COND if it is
22795 zero/false. Return 0 if the hardware has no such operation. */
22797 static int
22798 rs6000_emit_p9_fp_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
22800 enum rtx_code code = GET_CODE (op);
22801 rtx op0 = XEXP (op, 0);
22802 rtx op1 = XEXP (op, 1);
22803 machine_mode result_mode = GET_MODE (dest);
22804 rtx compare_rtx;
22805 rtx cmove_rtx;
22806 rtx clobber_rtx;
22808 if (!can_create_pseudo_p ())
22809 return 0;
22811 switch (code)
22813 case EQ:
22814 case GE:
22815 case GT:
22816 break;
22818 case NE:
22819 case LT:
22820 case LE:
22821 code = swap_condition (code);
22822 std::swap (op0, op1);
22823 break;
22825 default:
22826 return 0;
22829 /* Generate: [(parallel [(set (dest)
22830 (if_then_else (op (cmp1) (cmp2))
22831 (true)
22832 (false)))
22833 (clobber (scratch))])]. */
22835 compare_rtx = gen_rtx_fmt_ee (code, CCFPmode, op0, op1);
22836 cmove_rtx = gen_rtx_SET (dest,
22837 gen_rtx_IF_THEN_ELSE (result_mode,
22838 compare_rtx,
22839 true_cond,
22840 false_cond));
22842 clobber_rtx = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (V2DImode));
22843 emit_insn (gen_rtx_PARALLEL (VOIDmode,
22844 gen_rtvec (2, cmove_rtx, clobber_rtx)));
22846 return 1;
22849 /* Emit a conditional move: move TRUE_COND to DEST if OP of the
22850 operands of the last comparison is nonzero/true, FALSE_COND if it
22851 is zero/false. Return 0 if the hardware has no such operation. */
22854 rs6000_emit_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
22856 enum rtx_code code = GET_CODE (op);
22857 rtx op0 = XEXP (op, 0);
22858 rtx op1 = XEXP (op, 1);
22859 machine_mode compare_mode = GET_MODE (op0);
22860 machine_mode result_mode = GET_MODE (dest);
22861 rtx temp;
22862 bool is_against_zero;
22864 /* These modes should always match. */
22865 if (GET_MODE (op1) != compare_mode
22866 /* In the isel case however, we can use a compare immediate, so
22867 op1 may be a small constant. */
22868 && (!TARGET_ISEL || !short_cint_operand (op1, VOIDmode)))
22869 return 0;
22870 if (GET_MODE (true_cond) != result_mode)
22871 return 0;
22872 if (GET_MODE (false_cond) != result_mode)
22873 return 0;
22875 /* See if we can use the ISA 3.0 (power9) min/max/compare functions. */
22876 if (TARGET_P9_MINMAX
22877 && (compare_mode == SFmode || compare_mode == DFmode)
22878 && (result_mode == SFmode || result_mode == DFmode))
22880 if (rs6000_emit_p9_fp_minmax (dest, op, true_cond, false_cond))
22881 return 1;
22883 if (rs6000_emit_p9_fp_cmove (dest, op, true_cond, false_cond))
22884 return 1;
22887 /* Don't allow using floating point comparisons for integer results for
22888 now. */
22889 if (FLOAT_MODE_P (compare_mode) && !FLOAT_MODE_P (result_mode))
22890 return 0;
22892 /* First, work out if the hardware can do this at all, or
22893 if it's too slow.... */
22894 if (!FLOAT_MODE_P (compare_mode))
22896 if (TARGET_ISEL)
22897 return rs6000_emit_int_cmove (dest, op, true_cond, false_cond);
22898 return 0;
22900 else if (TARGET_HARD_FLOAT && !TARGET_FPRS
22901 && SCALAR_FLOAT_MODE_P (compare_mode))
22902 return 0;
22904 is_against_zero = op1 == CONST0_RTX (compare_mode);
22906 /* A floating-point subtract might overflow, underflow, or produce
22907 an inexact result, thus changing the floating-point flags, so it
22908 can't be generated if we care about that. It's safe if one side
22909 of the construct is zero, since then no subtract will be
22910 generated. */
22911 if (SCALAR_FLOAT_MODE_P (compare_mode)
22912 && flag_trapping_math && ! is_against_zero)
22913 return 0;
22915 /* Eliminate half of the comparisons by switching operands, this
22916 makes the remaining code simpler. */
22917 if (code == UNLT || code == UNGT || code == UNORDERED || code == NE
22918 || code == LTGT || code == LT || code == UNLE)
22920 code = reverse_condition_maybe_unordered (code);
22921 temp = true_cond;
22922 true_cond = false_cond;
22923 false_cond = temp;
22926 /* UNEQ and LTGT take four instructions for a comparison with zero,
22927 it'll probably be faster to use a branch here too. */
22928 if (code == UNEQ && HONOR_NANS (compare_mode))
22929 return 0;
22931 /* We're going to try to implement comparisons by performing
22932 a subtract, then comparing against zero. Unfortunately,
22933 Inf - Inf is NaN which is not zero, and so if we don't
22934 know that the operand is finite and the comparison
22935 would treat EQ different to UNORDERED, we can't do it. */
22936 if (HONOR_INFINITIES (compare_mode)
22937 && code != GT && code != UNGE
22938 && (GET_CODE (op1) != CONST_DOUBLE
22939 || real_isinf (CONST_DOUBLE_REAL_VALUE (op1)))
22940 /* Constructs of the form (a OP b ? a : b) are safe. */
22941 && ((! rtx_equal_p (op0, false_cond) && ! rtx_equal_p (op1, false_cond))
22942 || (! rtx_equal_p (op0, true_cond)
22943 && ! rtx_equal_p (op1, true_cond))))
22944 return 0;
22946 /* At this point we know we can use fsel. */
22948 /* Reduce the comparison to a comparison against zero. */
22949 if (! is_against_zero)
22951 temp = gen_reg_rtx (compare_mode);
22952 emit_insn (gen_rtx_SET (temp, gen_rtx_MINUS (compare_mode, op0, op1)));
22953 op0 = temp;
22954 op1 = CONST0_RTX (compare_mode);
22957 /* If we don't care about NaNs we can reduce some of the comparisons
22958 down to faster ones. */
22959 if (! HONOR_NANS (compare_mode))
22960 switch (code)
22962 case GT:
22963 code = LE;
22964 temp = true_cond;
22965 true_cond = false_cond;
22966 false_cond = temp;
22967 break;
22968 case UNGE:
22969 code = GE;
22970 break;
22971 case UNEQ:
22972 code = EQ;
22973 break;
22974 default:
22975 break;
22978 /* Now, reduce everything down to a GE. */
22979 switch (code)
22981 case GE:
22982 break;
22984 case LE:
22985 temp = gen_reg_rtx (compare_mode);
22986 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
22987 op0 = temp;
22988 break;
22990 case ORDERED:
22991 temp = gen_reg_rtx (compare_mode);
22992 emit_insn (gen_rtx_SET (temp, gen_rtx_ABS (compare_mode, op0)));
22993 op0 = temp;
22994 break;
22996 case EQ:
22997 temp = gen_reg_rtx (compare_mode);
22998 emit_insn (gen_rtx_SET (temp,
22999 gen_rtx_NEG (compare_mode,
23000 gen_rtx_ABS (compare_mode, op0))));
23001 op0 = temp;
23002 break;
23004 case UNGE:
23005 /* a UNGE 0 <-> (a GE 0 || -a UNLT 0) */
23006 temp = gen_reg_rtx (result_mode);
23007 emit_insn (gen_rtx_SET (temp,
23008 gen_rtx_IF_THEN_ELSE (result_mode,
23009 gen_rtx_GE (VOIDmode,
23010 op0, op1),
23011 true_cond, false_cond)));
23012 false_cond = true_cond;
23013 true_cond = temp;
23015 temp = gen_reg_rtx (compare_mode);
23016 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
23017 op0 = temp;
23018 break;
23020 case GT:
23021 /* a GT 0 <-> (a GE 0 && -a UNLT 0) */
23022 temp = gen_reg_rtx (result_mode);
23023 emit_insn (gen_rtx_SET (temp,
23024 gen_rtx_IF_THEN_ELSE (result_mode,
23025 gen_rtx_GE (VOIDmode,
23026 op0, op1),
23027 true_cond, false_cond)));
23028 true_cond = false_cond;
23029 false_cond = temp;
23031 temp = gen_reg_rtx (compare_mode);
23032 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
23033 op0 = temp;
23034 break;
23036 default:
23037 gcc_unreachable ();
23040 emit_insn (gen_rtx_SET (dest,
23041 gen_rtx_IF_THEN_ELSE (result_mode,
23042 gen_rtx_GE (VOIDmode,
23043 op0, op1),
23044 true_cond, false_cond)));
23045 return 1;
23048 /* Same as above, but for ints (isel). */
23050 static int
23051 rs6000_emit_int_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
23053 rtx condition_rtx, cr;
23054 machine_mode mode = GET_MODE (dest);
23055 enum rtx_code cond_code;
23056 rtx (*isel_func) (rtx, rtx, rtx, rtx, rtx);
23057 bool signedp;
23059 if (mode != SImode && (!TARGET_POWERPC64 || mode != DImode))
23060 return 0;
23062 /* We still have to do the compare, because isel doesn't do a
23063 compare, it just looks at the CRx bits set by a previous compare
23064 instruction. */
23065 condition_rtx = rs6000_generate_compare (op, mode);
23066 cond_code = GET_CODE (condition_rtx);
23067 cr = XEXP (condition_rtx, 0);
23068 signedp = GET_MODE (cr) == CCmode;
23070 isel_func = (mode == SImode
23071 ? (signedp ? gen_isel_signed_si : gen_isel_unsigned_si)
23072 : (signedp ? gen_isel_signed_di : gen_isel_unsigned_di));
23074 switch (cond_code)
23076 case LT: case GT: case LTU: case GTU: case EQ:
23077 /* isel handles these directly. */
23078 break;
23080 default:
23081 /* We need to swap the sense of the comparison. */
23083 std::swap (false_cond, true_cond);
23084 PUT_CODE (condition_rtx, reverse_condition (cond_code));
23086 break;
23089 false_cond = force_reg (mode, false_cond);
23090 if (true_cond != const0_rtx)
23091 true_cond = force_reg (mode, true_cond);
23093 emit_insn (isel_func (dest, condition_rtx, true_cond, false_cond, cr));
23095 return 1;
23098 const char *
23099 output_isel (rtx *operands)
23101 enum rtx_code code;
23103 code = GET_CODE (operands[1]);
23105 if (code == GE || code == GEU || code == LE || code == LEU || code == NE)
23107 gcc_assert (GET_CODE (operands[2]) == REG
23108 && GET_CODE (operands[3]) == REG);
23109 PUT_CODE (operands[1], reverse_condition (code));
23110 return "isel %0,%3,%2,%j1";
23113 return "isel %0,%2,%3,%j1";
23116 void
23117 rs6000_emit_minmax (rtx dest, enum rtx_code code, rtx op0, rtx op1)
23119 machine_mode mode = GET_MODE (op0);
23120 enum rtx_code c;
23121 rtx target;
23123 /* VSX/altivec have direct min/max insns. */
23124 if ((code == SMAX || code == SMIN)
23125 && (VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode)
23126 || (mode == SFmode && VECTOR_UNIT_VSX_P (DFmode))))
23128 emit_insn (gen_rtx_SET (dest, gen_rtx_fmt_ee (code, mode, op0, op1)));
23129 return;
23132 if (code == SMAX || code == SMIN)
23133 c = GE;
23134 else
23135 c = GEU;
23137 if (code == SMAX || code == UMAX)
23138 target = emit_conditional_move (dest, c, op0, op1, mode,
23139 op0, op1, mode, 0);
23140 else
23141 target = emit_conditional_move (dest, c, op0, op1, mode,
23142 op1, op0, mode, 0);
23143 gcc_assert (target);
23144 if (target != dest)
23145 emit_move_insn (dest, target);
23148 /* Split a signbit operation on 64-bit machines with direct move. Also allow
23149 for the value to come from memory or if it is already loaded into a GPR. */
23151 void
23152 rs6000_split_signbit (rtx dest, rtx src)
23154 machine_mode d_mode = GET_MODE (dest);
23155 machine_mode s_mode = GET_MODE (src);
23156 rtx dest_di = (d_mode == DImode) ? dest : gen_lowpart (DImode, dest);
23157 rtx shift_reg = dest_di;
23159 gcc_assert (REG_P (dest));
23160 gcc_assert (REG_P (src) || MEM_P (src));
23161 gcc_assert (s_mode == KFmode || s_mode == TFmode);
23163 if (MEM_P (src))
23165 rtx mem = (WORDS_BIG_ENDIAN
23166 ? adjust_address (src, DImode, 0)
23167 : adjust_address (src, DImode, 8));
23168 emit_insn (gen_rtx_SET (dest_di, mem));
23171 else
23173 unsigned int r = REGNO (src);
23175 /* If this is a VSX register, generate the special mfvsrd instruction
23176 to get it in a GPR. Until we support SF and DF modes, that will
23177 always be true. */
23178 gcc_assert (VSX_REGNO_P (r));
23180 if (s_mode == KFmode)
23181 emit_insn (gen_signbitkf2_dm2 (dest_di, src));
23182 else
23183 emit_insn (gen_signbittf2_dm2 (dest_di, src));
23186 emit_insn (gen_lshrdi3 (dest_di, shift_reg, GEN_INT (63)));
23187 return;
23190 /* A subroutine of the atomic operation splitters. Jump to LABEL if
23191 COND is true. Mark the jump as unlikely to be taken. */
23193 static void
23194 emit_unlikely_jump (rtx cond, rtx label)
23196 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
23197 rtx x;
23199 x = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, label, pc_rtx);
23200 x = emit_jump_insn (gen_rtx_SET (pc_rtx, x));
23201 add_int_reg_note (x, REG_BR_PROB, very_unlikely);
23204 /* A subroutine of the atomic operation splitters. Emit a load-locked
23205 instruction in MODE. For QI/HImode, possibly use a pattern than includes
23206 the zero_extend operation. */
23208 static void
23209 emit_load_locked (machine_mode mode, rtx reg, rtx mem)
23211 rtx (*fn) (rtx, rtx) = NULL;
23213 switch (mode)
23215 case QImode:
23216 fn = gen_load_lockedqi;
23217 break;
23218 case HImode:
23219 fn = gen_load_lockedhi;
23220 break;
23221 case SImode:
23222 if (GET_MODE (mem) == QImode)
23223 fn = gen_load_lockedqi_si;
23224 else if (GET_MODE (mem) == HImode)
23225 fn = gen_load_lockedhi_si;
23226 else
23227 fn = gen_load_lockedsi;
23228 break;
23229 case DImode:
23230 fn = gen_load_lockeddi;
23231 break;
23232 case TImode:
23233 fn = gen_load_lockedti;
23234 break;
23235 default:
23236 gcc_unreachable ();
23238 emit_insn (fn (reg, mem));
23241 /* A subroutine of the atomic operation splitters. Emit a store-conditional
23242 instruction in MODE. */
23244 static void
23245 emit_store_conditional (machine_mode mode, rtx res, rtx mem, rtx val)
23247 rtx (*fn) (rtx, rtx, rtx) = NULL;
23249 switch (mode)
23251 case QImode:
23252 fn = gen_store_conditionalqi;
23253 break;
23254 case HImode:
23255 fn = gen_store_conditionalhi;
23256 break;
23257 case SImode:
23258 fn = gen_store_conditionalsi;
23259 break;
23260 case DImode:
23261 fn = gen_store_conditionaldi;
23262 break;
23263 case TImode:
23264 fn = gen_store_conditionalti;
23265 break;
23266 default:
23267 gcc_unreachable ();
23270 /* Emit sync before stwcx. to address PPC405 Erratum. */
23271 if (PPC405_ERRATUM77)
23272 emit_insn (gen_hwsync ());
23274 emit_insn (fn (res, mem, val));
23277 /* Expand barriers before and after a load_locked/store_cond sequence. */
23279 static rtx
23280 rs6000_pre_atomic_barrier (rtx mem, enum memmodel model)
23282 rtx addr = XEXP (mem, 0);
23283 int strict_p = (reload_in_progress || reload_completed);
23285 if (!legitimate_indirect_address_p (addr, strict_p)
23286 && !legitimate_indexed_address_p (addr, strict_p))
23288 addr = force_reg (Pmode, addr);
23289 mem = replace_equiv_address_nv (mem, addr);
23292 switch (model)
23294 case MEMMODEL_RELAXED:
23295 case MEMMODEL_CONSUME:
23296 case MEMMODEL_ACQUIRE:
23297 break;
23298 case MEMMODEL_RELEASE:
23299 case MEMMODEL_ACQ_REL:
23300 emit_insn (gen_lwsync ());
23301 break;
23302 case MEMMODEL_SEQ_CST:
23303 emit_insn (gen_hwsync ());
23304 break;
23305 default:
23306 gcc_unreachable ();
23308 return mem;
23311 static void
23312 rs6000_post_atomic_barrier (enum memmodel model)
23314 switch (model)
23316 case MEMMODEL_RELAXED:
23317 case MEMMODEL_CONSUME:
23318 case MEMMODEL_RELEASE:
23319 break;
23320 case MEMMODEL_ACQUIRE:
23321 case MEMMODEL_ACQ_REL:
23322 case MEMMODEL_SEQ_CST:
23323 emit_insn (gen_isync ());
23324 break;
23325 default:
23326 gcc_unreachable ();
23330 /* A subroutine of the various atomic expanders. For sub-word operations,
23331 we must adjust things to operate on SImode. Given the original MEM,
23332 return a new aligned memory. Also build and return the quantities by
23333 which to shift and mask. */
23335 static rtx
23336 rs6000_adjust_atomic_subword (rtx orig_mem, rtx *pshift, rtx *pmask)
23338 rtx addr, align, shift, mask, mem;
23339 HOST_WIDE_INT shift_mask;
23340 machine_mode mode = GET_MODE (orig_mem);
23342 /* For smaller modes, we have to implement this via SImode. */
23343 shift_mask = (mode == QImode ? 0x18 : 0x10);
23345 addr = XEXP (orig_mem, 0);
23346 addr = force_reg (GET_MODE (addr), addr);
23348 /* Aligned memory containing subword. Generate a new memory. We
23349 do not want any of the existing MEM_ATTR data, as we're now
23350 accessing memory outside the original object. */
23351 align = expand_simple_binop (Pmode, AND, addr, GEN_INT (-4),
23352 NULL_RTX, 1, OPTAB_LIB_WIDEN);
23353 mem = gen_rtx_MEM (SImode, align);
23354 MEM_VOLATILE_P (mem) = MEM_VOLATILE_P (orig_mem);
23355 if (MEM_ALIAS_SET (orig_mem) == ALIAS_SET_MEMORY_BARRIER)
23356 set_mem_alias_set (mem, ALIAS_SET_MEMORY_BARRIER);
23358 /* Shift amount for subword relative to aligned word. */
23359 shift = gen_reg_rtx (SImode);
23360 addr = gen_lowpart (SImode, addr);
23361 rtx tmp = gen_reg_rtx (SImode);
23362 emit_insn (gen_ashlsi3 (tmp, addr, GEN_INT (3)));
23363 emit_insn (gen_andsi3 (shift, tmp, GEN_INT (shift_mask)));
23364 if (BYTES_BIG_ENDIAN)
23365 shift = expand_simple_binop (SImode, XOR, shift, GEN_INT (shift_mask),
23366 shift, 1, OPTAB_LIB_WIDEN);
23367 *pshift = shift;
23369 /* Mask for insertion. */
23370 mask = expand_simple_binop (SImode, ASHIFT, GEN_INT (GET_MODE_MASK (mode)),
23371 shift, NULL_RTX, 1, OPTAB_LIB_WIDEN);
23372 *pmask = mask;
23374 return mem;
23377 /* A subroutine of the various atomic expanders. For sub-word operands,
23378 combine OLDVAL and NEWVAL via MASK. Returns a new pseduo. */
23380 static rtx
23381 rs6000_mask_atomic_subword (rtx oldval, rtx newval, rtx mask)
23383 rtx x;
23385 x = gen_reg_rtx (SImode);
23386 emit_insn (gen_rtx_SET (x, gen_rtx_AND (SImode,
23387 gen_rtx_NOT (SImode, mask),
23388 oldval)));
23390 x = expand_simple_binop (SImode, IOR, newval, x, x, 1, OPTAB_LIB_WIDEN);
23392 return x;
23395 /* A subroutine of the various atomic expanders. For sub-word operands,
23396 extract WIDE to NARROW via SHIFT. */
23398 static void
23399 rs6000_finish_atomic_subword (rtx narrow, rtx wide, rtx shift)
23401 wide = expand_simple_binop (SImode, LSHIFTRT, wide, shift,
23402 wide, 1, OPTAB_LIB_WIDEN);
23403 emit_move_insn (narrow, gen_lowpart (GET_MODE (narrow), wide));
23406 /* Expand an atomic compare and swap operation. */
23408 void
23409 rs6000_expand_atomic_compare_and_swap (rtx operands[])
23411 rtx boolval, retval, mem, oldval, newval, cond;
23412 rtx label1, label2, x, mask, shift;
23413 machine_mode mode, orig_mode;
23414 enum memmodel mod_s, mod_f;
23415 bool is_weak;
23417 boolval = operands[0];
23418 retval = operands[1];
23419 mem = operands[2];
23420 oldval = operands[3];
23421 newval = operands[4];
23422 is_weak = (INTVAL (operands[5]) != 0);
23423 mod_s = memmodel_base (INTVAL (operands[6]));
23424 mod_f = memmodel_base (INTVAL (operands[7]));
23425 orig_mode = mode = GET_MODE (mem);
23427 mask = shift = NULL_RTX;
23428 if (mode == QImode || mode == HImode)
23430 /* Before power8, we didn't have access to lbarx/lharx, so generate a
23431 lwarx and shift/mask operations. With power8, we need to do the
23432 comparison in SImode, but the store is still done in QI/HImode. */
23433 oldval = convert_modes (SImode, mode, oldval, 1);
23435 if (!TARGET_SYNC_HI_QI)
23437 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
23439 /* Shift and mask OLDVAL into position with the word. */
23440 oldval = expand_simple_binop (SImode, ASHIFT, oldval, shift,
23441 NULL_RTX, 1, OPTAB_LIB_WIDEN);
23443 /* Shift and mask NEWVAL into position within the word. */
23444 newval = convert_modes (SImode, mode, newval, 1);
23445 newval = expand_simple_binop (SImode, ASHIFT, newval, shift,
23446 NULL_RTX, 1, OPTAB_LIB_WIDEN);
23449 /* Prepare to adjust the return value. */
23450 retval = gen_reg_rtx (SImode);
23451 mode = SImode;
23453 else if (reg_overlap_mentioned_p (retval, oldval))
23454 oldval = copy_to_reg (oldval);
23456 if (mode != TImode && !reg_or_short_operand (oldval, mode))
23457 oldval = copy_to_mode_reg (mode, oldval);
23459 if (reg_overlap_mentioned_p (retval, newval))
23460 newval = copy_to_reg (newval);
23462 mem = rs6000_pre_atomic_barrier (mem, mod_s);
23464 label1 = NULL_RTX;
23465 if (!is_weak)
23467 label1 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
23468 emit_label (XEXP (label1, 0));
23470 label2 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
23472 emit_load_locked (mode, retval, mem);
23474 x = retval;
23475 if (mask)
23476 x = expand_simple_binop (SImode, AND, retval, mask,
23477 NULL_RTX, 1, OPTAB_LIB_WIDEN);
23479 cond = gen_reg_rtx (CCmode);
23480 /* If we have TImode, synthesize a comparison. */
23481 if (mode != TImode)
23482 x = gen_rtx_COMPARE (CCmode, x, oldval);
23483 else
23485 rtx xor1_result = gen_reg_rtx (DImode);
23486 rtx xor2_result = gen_reg_rtx (DImode);
23487 rtx or_result = gen_reg_rtx (DImode);
23488 rtx new_word0 = simplify_gen_subreg (DImode, x, TImode, 0);
23489 rtx new_word1 = simplify_gen_subreg (DImode, x, TImode, 8);
23490 rtx old_word0 = simplify_gen_subreg (DImode, oldval, TImode, 0);
23491 rtx old_word1 = simplify_gen_subreg (DImode, oldval, TImode, 8);
23493 emit_insn (gen_xordi3 (xor1_result, new_word0, old_word0));
23494 emit_insn (gen_xordi3 (xor2_result, new_word1, old_word1));
23495 emit_insn (gen_iordi3 (or_result, xor1_result, xor2_result));
23496 x = gen_rtx_COMPARE (CCmode, or_result, const0_rtx);
23499 emit_insn (gen_rtx_SET (cond, x));
23501 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
23502 emit_unlikely_jump (x, label2);
23504 x = newval;
23505 if (mask)
23506 x = rs6000_mask_atomic_subword (retval, newval, mask);
23508 emit_store_conditional (orig_mode, cond, mem, x);
23510 if (!is_weak)
23512 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
23513 emit_unlikely_jump (x, label1);
23516 if (!is_mm_relaxed (mod_f))
23517 emit_label (XEXP (label2, 0));
23519 rs6000_post_atomic_barrier (mod_s);
23521 if (is_mm_relaxed (mod_f))
23522 emit_label (XEXP (label2, 0));
23524 if (shift)
23525 rs6000_finish_atomic_subword (operands[1], retval, shift);
23526 else if (mode != GET_MODE (operands[1]))
23527 convert_move (operands[1], retval, 1);
23529 /* In all cases, CR0 contains EQ on success, and NE on failure. */
23530 x = gen_rtx_EQ (SImode, cond, const0_rtx);
23531 emit_insn (gen_rtx_SET (boolval, x));
23534 /* Expand an atomic exchange operation. */
23536 void
23537 rs6000_expand_atomic_exchange (rtx operands[])
23539 rtx retval, mem, val, cond;
23540 machine_mode mode;
23541 enum memmodel model;
23542 rtx label, x, mask, shift;
23544 retval = operands[0];
23545 mem = operands[1];
23546 val = operands[2];
23547 model = memmodel_base (INTVAL (operands[3]));
23548 mode = GET_MODE (mem);
23550 mask = shift = NULL_RTX;
23551 if (!TARGET_SYNC_HI_QI && (mode == QImode || mode == HImode))
23553 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
23555 /* Shift and mask VAL into position with the word. */
23556 val = convert_modes (SImode, mode, val, 1);
23557 val = expand_simple_binop (SImode, ASHIFT, val, shift,
23558 NULL_RTX, 1, OPTAB_LIB_WIDEN);
23560 /* Prepare to adjust the return value. */
23561 retval = gen_reg_rtx (SImode);
23562 mode = SImode;
23565 mem = rs6000_pre_atomic_barrier (mem, model);
23567 label = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
23568 emit_label (XEXP (label, 0));
23570 emit_load_locked (mode, retval, mem);
23572 x = val;
23573 if (mask)
23574 x = rs6000_mask_atomic_subword (retval, val, mask);
23576 cond = gen_reg_rtx (CCmode);
23577 emit_store_conditional (mode, cond, mem, x);
23579 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
23580 emit_unlikely_jump (x, label);
23582 rs6000_post_atomic_barrier (model);
23584 if (shift)
23585 rs6000_finish_atomic_subword (operands[0], retval, shift);
23588 /* Expand an atomic fetch-and-operate pattern. CODE is the binary operation
23589 to perform. MEM is the memory on which to operate. VAL is the second
23590 operand of the binary operator. BEFORE and AFTER are optional locations to
23591 return the value of MEM either before of after the operation. MODEL_RTX
23592 is a CONST_INT containing the memory model to use. */
23594 void
23595 rs6000_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
23596 rtx orig_before, rtx orig_after, rtx model_rtx)
23598 enum memmodel model = memmodel_base (INTVAL (model_rtx));
23599 machine_mode mode = GET_MODE (mem);
23600 machine_mode store_mode = mode;
23601 rtx label, x, cond, mask, shift;
23602 rtx before = orig_before, after = orig_after;
23604 mask = shift = NULL_RTX;
23605 /* On power8, we want to use SImode for the operation. On previous systems,
23606 use the operation in a subword and shift/mask to get the proper byte or
23607 halfword. */
23608 if (mode == QImode || mode == HImode)
23610 if (TARGET_SYNC_HI_QI)
23612 val = convert_modes (SImode, mode, val, 1);
23614 /* Prepare to adjust the return value. */
23615 before = gen_reg_rtx (SImode);
23616 if (after)
23617 after = gen_reg_rtx (SImode);
23618 mode = SImode;
23620 else
23622 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
23624 /* Shift and mask VAL into position with the word. */
23625 val = convert_modes (SImode, mode, val, 1);
23626 val = expand_simple_binop (SImode, ASHIFT, val, shift,
23627 NULL_RTX, 1, OPTAB_LIB_WIDEN);
23629 switch (code)
23631 case IOR:
23632 case XOR:
23633 /* We've already zero-extended VAL. That is sufficient to
23634 make certain that it does not affect other bits. */
23635 mask = NULL;
23636 break;
23638 case AND:
23639 /* If we make certain that all of the other bits in VAL are
23640 set, that will be sufficient to not affect other bits. */
23641 x = gen_rtx_NOT (SImode, mask);
23642 x = gen_rtx_IOR (SImode, x, val);
23643 emit_insn (gen_rtx_SET (val, x));
23644 mask = NULL;
23645 break;
23647 case NOT:
23648 case PLUS:
23649 case MINUS:
23650 /* These will all affect bits outside the field and need
23651 adjustment via MASK within the loop. */
23652 break;
23654 default:
23655 gcc_unreachable ();
23658 /* Prepare to adjust the return value. */
23659 before = gen_reg_rtx (SImode);
23660 if (after)
23661 after = gen_reg_rtx (SImode);
23662 store_mode = mode = SImode;
23666 mem = rs6000_pre_atomic_barrier (mem, model);
23668 label = gen_label_rtx ();
23669 emit_label (label);
23670 label = gen_rtx_LABEL_REF (VOIDmode, label);
23672 if (before == NULL_RTX)
23673 before = gen_reg_rtx (mode);
23675 emit_load_locked (mode, before, mem);
23677 if (code == NOT)
23679 x = expand_simple_binop (mode, AND, before, val,
23680 NULL_RTX, 1, OPTAB_LIB_WIDEN);
23681 after = expand_simple_unop (mode, NOT, x, after, 1);
23683 else
23685 after = expand_simple_binop (mode, code, before, val,
23686 after, 1, OPTAB_LIB_WIDEN);
23689 x = after;
23690 if (mask)
23692 x = expand_simple_binop (SImode, AND, after, mask,
23693 NULL_RTX, 1, OPTAB_LIB_WIDEN);
23694 x = rs6000_mask_atomic_subword (before, x, mask);
23696 else if (store_mode != mode)
23697 x = convert_modes (store_mode, mode, x, 1);
23699 cond = gen_reg_rtx (CCmode);
23700 emit_store_conditional (store_mode, cond, mem, x);
23702 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
23703 emit_unlikely_jump (x, label);
23705 rs6000_post_atomic_barrier (model);
23707 if (shift)
23709 /* QImode/HImode on machines without lbarx/lharx where we do a lwarx and
23710 then do the calcuations in a SImode register. */
23711 if (orig_before)
23712 rs6000_finish_atomic_subword (orig_before, before, shift);
23713 if (orig_after)
23714 rs6000_finish_atomic_subword (orig_after, after, shift);
23716 else if (store_mode != mode)
23718 /* QImode/HImode on machines with lbarx/lharx where we do the native
23719 operation and then do the calcuations in a SImode register. */
23720 if (orig_before)
23721 convert_move (orig_before, before, 1);
23722 if (orig_after)
23723 convert_move (orig_after, after, 1);
23725 else if (orig_after && after != orig_after)
23726 emit_move_insn (orig_after, after);
23729 /* Emit instructions to move SRC to DST. Called by splitters for
23730 multi-register moves. It will emit at most one instruction for
23731 each register that is accessed; that is, it won't emit li/lis pairs
23732 (or equivalent for 64-bit code). One of SRC or DST must be a hard
23733 register. */
23735 void
23736 rs6000_split_multireg_move (rtx dst, rtx src)
23738 /* The register number of the first register being moved. */
23739 int reg;
23740 /* The mode that is to be moved. */
23741 machine_mode mode;
23742 /* The mode that the move is being done in, and its size. */
23743 machine_mode reg_mode;
23744 int reg_mode_size;
23745 /* The number of registers that will be moved. */
23746 int nregs;
23748 reg = REG_P (dst) ? REGNO (dst) : REGNO (src);
23749 mode = GET_MODE (dst);
23750 nregs = hard_regno_nregs[reg][mode];
23751 if (FP_REGNO_P (reg))
23752 reg_mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode :
23753 ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT) ? DFmode : SFmode);
23754 else if (ALTIVEC_REGNO_P (reg))
23755 reg_mode = V16QImode;
23756 else if (TARGET_E500_DOUBLE && FLOAT128_2REG_P (mode))
23757 reg_mode = DFmode;
23758 else
23759 reg_mode = word_mode;
23760 reg_mode_size = GET_MODE_SIZE (reg_mode);
23762 gcc_assert (reg_mode_size * nregs == GET_MODE_SIZE (mode));
23764 /* TDmode residing in FP registers is special, since the ISA requires that
23765 the lower-numbered word of a register pair is always the most significant
23766 word, even in little-endian mode. This does not match the usual subreg
23767 semantics, so we cannnot use simplify_gen_subreg in those cases. Access
23768 the appropriate constituent registers "by hand" in little-endian mode.
23770 Note we do not need to check for destructive overlap here since TDmode
23771 can only reside in even/odd register pairs. */
23772 if (FP_REGNO_P (reg) && DECIMAL_FLOAT_MODE_P (mode) && !BYTES_BIG_ENDIAN)
23774 rtx p_src, p_dst;
23775 int i;
23777 for (i = 0; i < nregs; i++)
23779 if (REG_P (src) && FP_REGNO_P (REGNO (src)))
23780 p_src = gen_rtx_REG (reg_mode, REGNO (src) + nregs - 1 - i);
23781 else
23782 p_src = simplify_gen_subreg (reg_mode, src, mode,
23783 i * reg_mode_size);
23785 if (REG_P (dst) && FP_REGNO_P (REGNO (dst)))
23786 p_dst = gen_rtx_REG (reg_mode, REGNO (dst) + nregs - 1 - i);
23787 else
23788 p_dst = simplify_gen_subreg (reg_mode, dst, mode,
23789 i * reg_mode_size);
23791 emit_insn (gen_rtx_SET (p_dst, p_src));
23794 return;
23797 if (REG_P (src) && REG_P (dst) && (REGNO (src) < REGNO (dst)))
23799 /* Move register range backwards, if we might have destructive
23800 overlap. */
23801 int i;
23802 for (i = nregs - 1; i >= 0; i--)
23803 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
23804 i * reg_mode_size),
23805 simplify_gen_subreg (reg_mode, src, mode,
23806 i * reg_mode_size)));
23808 else
23810 int i;
23811 int j = -1;
23812 bool used_update = false;
23813 rtx restore_basereg = NULL_RTX;
23815 if (MEM_P (src) && INT_REGNO_P (reg))
23817 rtx breg;
23819 if (GET_CODE (XEXP (src, 0)) == PRE_INC
23820 || GET_CODE (XEXP (src, 0)) == PRE_DEC)
23822 rtx delta_rtx;
23823 breg = XEXP (XEXP (src, 0), 0);
23824 delta_rtx = (GET_CODE (XEXP (src, 0)) == PRE_INC
23825 ? GEN_INT (GET_MODE_SIZE (GET_MODE (src)))
23826 : GEN_INT (-GET_MODE_SIZE (GET_MODE (src))));
23827 emit_insn (gen_add3_insn (breg, breg, delta_rtx));
23828 src = replace_equiv_address (src, breg);
23830 else if (! rs6000_offsettable_memref_p (src, reg_mode))
23832 if (GET_CODE (XEXP (src, 0)) == PRE_MODIFY)
23834 rtx basereg = XEXP (XEXP (src, 0), 0);
23835 if (TARGET_UPDATE)
23837 rtx ndst = simplify_gen_subreg (reg_mode, dst, mode, 0);
23838 emit_insn (gen_rtx_SET (ndst,
23839 gen_rtx_MEM (reg_mode,
23840 XEXP (src, 0))));
23841 used_update = true;
23843 else
23844 emit_insn (gen_rtx_SET (basereg,
23845 XEXP (XEXP (src, 0), 1)));
23846 src = replace_equiv_address (src, basereg);
23848 else
23850 rtx basereg = gen_rtx_REG (Pmode, reg);
23851 emit_insn (gen_rtx_SET (basereg, XEXP (src, 0)));
23852 src = replace_equiv_address (src, basereg);
23856 breg = XEXP (src, 0);
23857 if (GET_CODE (breg) == PLUS || GET_CODE (breg) == LO_SUM)
23858 breg = XEXP (breg, 0);
23860 /* If the base register we are using to address memory is
23861 also a destination reg, then change that register last. */
23862 if (REG_P (breg)
23863 && REGNO (breg) >= REGNO (dst)
23864 && REGNO (breg) < REGNO (dst) + nregs)
23865 j = REGNO (breg) - REGNO (dst);
23867 else if (MEM_P (dst) && INT_REGNO_P (reg))
23869 rtx breg;
23871 if (GET_CODE (XEXP (dst, 0)) == PRE_INC
23872 || GET_CODE (XEXP (dst, 0)) == PRE_DEC)
23874 rtx delta_rtx;
23875 breg = XEXP (XEXP (dst, 0), 0);
23876 delta_rtx = (GET_CODE (XEXP (dst, 0)) == PRE_INC
23877 ? GEN_INT (GET_MODE_SIZE (GET_MODE (dst)))
23878 : GEN_INT (-GET_MODE_SIZE (GET_MODE (dst))));
23880 /* We have to update the breg before doing the store.
23881 Use store with update, if available. */
23883 if (TARGET_UPDATE)
23885 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
23886 emit_insn (TARGET_32BIT
23887 ? (TARGET_POWERPC64
23888 ? gen_movdi_si_update (breg, breg, delta_rtx, nsrc)
23889 : gen_movsi_update (breg, breg, delta_rtx, nsrc))
23890 : gen_movdi_di_update (breg, breg, delta_rtx, nsrc));
23891 used_update = true;
23893 else
23894 emit_insn (gen_add3_insn (breg, breg, delta_rtx));
23895 dst = replace_equiv_address (dst, breg);
23897 else if (!rs6000_offsettable_memref_p (dst, reg_mode)
23898 && GET_CODE (XEXP (dst, 0)) != LO_SUM)
23900 if (GET_CODE (XEXP (dst, 0)) == PRE_MODIFY)
23902 rtx basereg = XEXP (XEXP (dst, 0), 0);
23903 if (TARGET_UPDATE)
23905 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
23906 emit_insn (gen_rtx_SET (gen_rtx_MEM (reg_mode,
23907 XEXP (dst, 0)),
23908 nsrc));
23909 used_update = true;
23911 else
23912 emit_insn (gen_rtx_SET (basereg,
23913 XEXP (XEXP (dst, 0), 1)));
23914 dst = replace_equiv_address (dst, basereg);
23916 else
23918 rtx basereg = XEXP (XEXP (dst, 0), 0);
23919 rtx offsetreg = XEXP (XEXP (dst, 0), 1);
23920 gcc_assert (GET_CODE (XEXP (dst, 0)) == PLUS
23921 && REG_P (basereg)
23922 && REG_P (offsetreg)
23923 && REGNO (basereg) != REGNO (offsetreg));
23924 if (REGNO (basereg) == 0)
23926 rtx tmp = offsetreg;
23927 offsetreg = basereg;
23928 basereg = tmp;
23930 emit_insn (gen_add3_insn (basereg, basereg, offsetreg));
23931 restore_basereg = gen_sub3_insn (basereg, basereg, offsetreg);
23932 dst = replace_equiv_address (dst, basereg);
23935 else if (GET_CODE (XEXP (dst, 0)) != LO_SUM)
23936 gcc_assert (rs6000_offsettable_memref_p (dst, reg_mode));
23939 for (i = 0; i < nregs; i++)
23941 /* Calculate index to next subword. */
23942 ++j;
23943 if (j == nregs)
23944 j = 0;
23946 /* If compiler already emitted move of first word by
23947 store with update, no need to do anything. */
23948 if (j == 0 && used_update)
23949 continue;
23951 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
23952 j * reg_mode_size),
23953 simplify_gen_subreg (reg_mode, src, mode,
23954 j * reg_mode_size)));
23956 if (restore_basereg != NULL_RTX)
23957 emit_insn (restore_basereg);
23962 /* This page contains routines that are used to determine what the
23963 function prologue and epilogue code will do and write them out. */
23965 static inline bool
23966 save_reg_p (int r)
23968 return !call_used_regs[r] && df_regs_ever_live_p (r);
23971 /* Determine whether the gp REG is really used. */
23973 static bool
23974 rs6000_reg_live_or_pic_offset_p (int reg)
23976 /* We need to mark the PIC offset register live for the same conditions
23977 as it is set up, or otherwise it won't be saved before we clobber it. */
23979 if (reg == RS6000_PIC_OFFSET_TABLE_REGNUM && !TARGET_SINGLE_PIC_BASE)
23981 if (TARGET_TOC && TARGET_MINIMAL_TOC
23982 && (crtl->calls_eh_return
23983 || df_regs_ever_live_p (reg)
23984 || get_pool_size ()))
23985 return true;
23987 if ((DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN)
23988 && flag_pic)
23989 return true;
23992 /* If the function calls eh_return, claim used all the registers that would
23993 be checked for liveness otherwise. */
23995 return ((crtl->calls_eh_return || df_regs_ever_live_p (reg))
23996 && !call_used_regs[reg]);
23999 /* Return the first fixed-point register that is required to be
24000 saved. 32 if none. */
24003 first_reg_to_save (void)
24005 int first_reg;
24007 /* Find lowest numbered live register. */
24008 for (first_reg = 13; first_reg <= 31; first_reg++)
24009 if (save_reg_p (first_reg))
24010 break;
24012 if (first_reg > RS6000_PIC_OFFSET_TABLE_REGNUM
24013 && ((DEFAULT_ABI == ABI_V4 && flag_pic != 0)
24014 || (DEFAULT_ABI == ABI_DARWIN && flag_pic)
24015 || (TARGET_TOC && TARGET_MINIMAL_TOC))
24016 && rs6000_reg_live_or_pic_offset_p (RS6000_PIC_OFFSET_TABLE_REGNUM))
24017 first_reg = RS6000_PIC_OFFSET_TABLE_REGNUM;
24019 #if TARGET_MACHO
24020 if (flag_pic
24021 && crtl->uses_pic_offset_table
24022 && first_reg > RS6000_PIC_OFFSET_TABLE_REGNUM)
24023 return RS6000_PIC_OFFSET_TABLE_REGNUM;
24024 #endif
24026 return first_reg;
24029 /* Similar, for FP regs. */
24032 first_fp_reg_to_save (void)
24034 int first_reg;
24036 /* Find lowest numbered live register. */
24037 for (first_reg = 14 + 32; first_reg <= 63; first_reg++)
24038 if (save_reg_p (first_reg))
24039 break;
24041 return first_reg;
24044 /* Similar, for AltiVec regs. */
24046 static int
24047 first_altivec_reg_to_save (void)
24049 int i;
24051 /* Stack frame remains as is unless we are in AltiVec ABI. */
24052 if (! TARGET_ALTIVEC_ABI)
24053 return LAST_ALTIVEC_REGNO + 1;
24055 /* On Darwin, the unwind routines are compiled without
24056 TARGET_ALTIVEC, and use save_world to save/restore the
24057 altivec registers when necessary. */
24058 if (DEFAULT_ABI == ABI_DARWIN && crtl->calls_eh_return
24059 && ! TARGET_ALTIVEC)
24060 return FIRST_ALTIVEC_REGNO + 20;
24062 /* Find lowest numbered live register. */
24063 for (i = FIRST_ALTIVEC_REGNO + 20; i <= LAST_ALTIVEC_REGNO; ++i)
24064 if (save_reg_p (i))
24065 break;
24067 return i;
24070 /* Return a 32-bit mask of the AltiVec registers we need to set in
24071 VRSAVE. Bit n of the return value is 1 if Vn is live. The MSB in
24072 the 32-bit word is 0. */
24074 static unsigned int
24075 compute_vrsave_mask (void)
24077 unsigned int i, mask = 0;
24079 /* On Darwin, the unwind routines are compiled without
24080 TARGET_ALTIVEC, and use save_world to save/restore the
24081 call-saved altivec registers when necessary. */
24082 if (DEFAULT_ABI == ABI_DARWIN && crtl->calls_eh_return
24083 && ! TARGET_ALTIVEC)
24084 mask |= 0xFFF;
24086 /* First, find out if we use _any_ altivec registers. */
24087 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
24088 if (df_regs_ever_live_p (i))
24089 mask |= ALTIVEC_REG_BIT (i);
24091 if (mask == 0)
24092 return mask;
24094 /* Next, remove the argument registers from the set. These must
24095 be in the VRSAVE mask set by the caller, so we don't need to add
24096 them in again. More importantly, the mask we compute here is
24097 used to generate CLOBBERs in the set_vrsave insn, and we do not
24098 wish the argument registers to die. */
24099 for (i = ALTIVEC_ARG_MIN_REG; i < (unsigned) crtl->args.info.vregno; i++)
24100 mask &= ~ALTIVEC_REG_BIT (i);
24102 /* Similarly, remove the return value from the set. */
24104 bool yes = false;
24105 diddle_return_value (is_altivec_return_reg, &yes);
24106 if (yes)
24107 mask &= ~ALTIVEC_REG_BIT (ALTIVEC_ARG_RETURN);
24110 return mask;
24113 /* For a very restricted set of circumstances, we can cut down the
24114 size of prologues/epilogues by calling our own save/restore-the-world
24115 routines. */
24117 static void
24118 compute_save_world_info (rs6000_stack_t *info)
24120 info->world_save_p = 1;
24121 info->world_save_p
24122 = (WORLD_SAVE_P (info)
24123 && DEFAULT_ABI == ABI_DARWIN
24124 && !cfun->has_nonlocal_label
24125 && info->first_fp_reg_save == FIRST_SAVED_FP_REGNO
24126 && info->first_gp_reg_save == FIRST_SAVED_GP_REGNO
24127 && info->first_altivec_reg_save == FIRST_SAVED_ALTIVEC_REGNO
24128 && info->cr_save_p);
24130 /* This will not work in conjunction with sibcalls. Make sure there
24131 are none. (This check is expensive, but seldom executed.) */
24132 if (WORLD_SAVE_P (info))
24134 rtx_insn *insn;
24135 for (insn = get_last_insn_anywhere (); insn; insn = PREV_INSN (insn))
24136 if (CALL_P (insn) && SIBLING_CALL_P (insn))
24138 info->world_save_p = 0;
24139 break;
24143 if (WORLD_SAVE_P (info))
24145 /* Even if we're not touching VRsave, make sure there's room on the
24146 stack for it, if it looks like we're calling SAVE_WORLD, which
24147 will attempt to save it. */
24148 info->vrsave_size = 4;
24150 /* If we are going to save the world, we need to save the link register too. */
24151 info->lr_save_p = 1;
24153 /* "Save" the VRsave register too if we're saving the world. */
24154 if (info->vrsave_mask == 0)
24155 info->vrsave_mask = compute_vrsave_mask ();
24157 /* Because the Darwin register save/restore routines only handle
24158 F14 .. F31 and V20 .. V31 as per the ABI, perform a consistency
24159 check. */
24160 gcc_assert (info->first_fp_reg_save >= FIRST_SAVED_FP_REGNO
24161 && (info->first_altivec_reg_save
24162 >= FIRST_SAVED_ALTIVEC_REGNO));
24165 return;
24169 static void
24170 is_altivec_return_reg (rtx reg, void *xyes)
24172 bool *yes = (bool *) xyes;
24173 if (REGNO (reg) == ALTIVEC_ARG_RETURN)
24174 *yes = true;
24178 /* Return whether REG is a global user reg or has been specifed by
24179 -ffixed-REG. We should not restore these, and so cannot use
24180 lmw or out-of-line restore functions if there are any. We also
24181 can't save them (well, emit frame notes for them), because frame
24182 unwinding during exception handling will restore saved registers. */
24184 static bool
24185 fixed_reg_p (int reg)
24187 /* Ignore fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] when the
24188 backend sets it, overriding anything the user might have given. */
24189 if (reg == RS6000_PIC_OFFSET_TABLE_REGNUM
24190 && ((DEFAULT_ABI == ABI_V4 && flag_pic)
24191 || (DEFAULT_ABI == ABI_DARWIN && flag_pic)
24192 || (TARGET_TOC && TARGET_MINIMAL_TOC)))
24193 return false;
24195 return fixed_regs[reg];
24198 /* Determine the strategy for savings/restoring registers. */
24200 enum {
24201 SAVE_MULTIPLE = 0x1,
24202 SAVE_INLINE_GPRS = 0x2,
24203 SAVE_INLINE_FPRS = 0x4,
24204 SAVE_NOINLINE_GPRS_SAVES_LR = 0x8,
24205 SAVE_NOINLINE_FPRS_SAVES_LR = 0x10,
24206 SAVE_INLINE_VRS = 0x20,
24207 REST_MULTIPLE = 0x100,
24208 REST_INLINE_GPRS = 0x200,
24209 REST_INLINE_FPRS = 0x400,
24210 REST_NOINLINE_FPRS_DOESNT_RESTORE_LR = 0x800,
24211 REST_INLINE_VRS = 0x1000
24214 static int
24215 rs6000_savres_strategy (rs6000_stack_t *info,
24216 bool using_static_chain_p)
24218 int strategy = 0;
24220 /* Select between in-line and out-of-line save and restore of regs.
24221 First, all the obvious cases where we don't use out-of-line. */
24222 if (crtl->calls_eh_return
24223 || cfun->machine->ra_need_lr)
24224 strategy |= (SAVE_INLINE_FPRS | REST_INLINE_FPRS
24225 | SAVE_INLINE_GPRS | REST_INLINE_GPRS
24226 | SAVE_INLINE_VRS | REST_INLINE_VRS);
24228 if (info->first_gp_reg_save == 32)
24229 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
24231 if (info->first_fp_reg_save == 64
24232 /* The out-of-line FP routines use double-precision stores;
24233 we can't use those routines if we don't have such stores. */
24234 || (TARGET_HARD_FLOAT && !TARGET_DOUBLE_FLOAT))
24235 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
24237 if (info->first_altivec_reg_save == LAST_ALTIVEC_REGNO + 1)
24238 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
24240 /* Define cutoff for using out-of-line functions to save registers. */
24241 if (DEFAULT_ABI == ABI_V4 || TARGET_ELF)
24243 if (!optimize_size)
24245 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
24246 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
24247 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
24249 else
24251 /* Prefer out-of-line restore if it will exit. */
24252 if (info->first_fp_reg_save > 61)
24253 strategy |= SAVE_INLINE_FPRS;
24254 if (info->first_gp_reg_save > 29)
24256 if (info->first_fp_reg_save == 64)
24257 strategy |= SAVE_INLINE_GPRS;
24258 else
24259 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
24261 if (info->first_altivec_reg_save == LAST_ALTIVEC_REGNO)
24262 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
24265 else if (DEFAULT_ABI == ABI_DARWIN)
24267 if (info->first_fp_reg_save > 60)
24268 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
24269 if (info->first_gp_reg_save > 29)
24270 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
24271 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
24273 else
24275 gcc_checking_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2);
24276 if (info->first_fp_reg_save > 61)
24277 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
24278 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
24279 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
24282 /* Don't bother to try to save things out-of-line if r11 is occupied
24283 by the static chain. It would require too much fiddling and the
24284 static chain is rarely used anyway. FPRs are saved w.r.t the stack
24285 pointer on Darwin, and AIX uses r1 or r12. */
24286 if (using_static_chain_p
24287 && (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN))
24288 strategy |= ((DEFAULT_ABI == ABI_DARWIN ? 0 : SAVE_INLINE_FPRS)
24289 | SAVE_INLINE_GPRS
24290 | SAVE_INLINE_VRS);
24292 /* Saving CR interferes with the exit routines used on the SPE, so
24293 just punt here. */
24294 if (TARGET_SPE_ABI
24295 && info->spe_64bit_regs_used
24296 && info->cr_save_p)
24297 strategy |= REST_INLINE_GPRS;
24299 /* We can only use the out-of-line routines to restore fprs if we've
24300 saved all the registers from first_fp_reg_save in the prologue.
24301 Otherwise, we risk loading garbage. Of course, if we have saved
24302 out-of-line then we know we haven't skipped any fprs. */
24303 if ((strategy & SAVE_INLINE_FPRS)
24304 && !(strategy & REST_INLINE_FPRS))
24306 int i;
24308 for (i = info->first_fp_reg_save; i < 64; i++)
24309 if (fixed_regs[i] || !save_reg_p (i))
24311 strategy |= REST_INLINE_FPRS;
24312 break;
24316 /* Similarly, for altivec regs. */
24317 if ((strategy & SAVE_INLINE_VRS)
24318 && !(strategy & REST_INLINE_VRS))
24320 int i;
24322 for (i = info->first_altivec_reg_save; i < LAST_ALTIVEC_REGNO + 1; i++)
24323 if (fixed_regs[i] || !save_reg_p (i))
24325 strategy |= REST_INLINE_VRS;
24326 break;
24330 /* info->lr_save_p isn't yet set if the only reason lr needs to be
24331 saved is an out-of-line save or restore. Set up the value for
24332 the next test (excluding out-of-line gprs). */
24333 bool lr_save_p = (info->lr_save_p
24334 || !(strategy & SAVE_INLINE_FPRS)
24335 || !(strategy & SAVE_INLINE_VRS)
24336 || !(strategy & REST_INLINE_FPRS)
24337 || !(strategy & REST_INLINE_VRS));
24339 if (TARGET_MULTIPLE
24340 && !TARGET_POWERPC64
24341 && !(TARGET_SPE_ABI && info->spe_64bit_regs_used)
24342 && info->first_gp_reg_save < 31)
24344 /* Prefer store multiple for saves over out-of-line routines,
24345 since the store-multiple instruction will always be smaller. */
24346 strategy |= SAVE_INLINE_GPRS | SAVE_MULTIPLE;
24348 /* The situation is more complicated with load multiple. We'd
24349 prefer to use the out-of-line routines for restores, since the
24350 "exit" out-of-line routines can handle the restore of LR and the
24351 frame teardown. However if doesn't make sense to use the
24352 out-of-line routine if that is the only reason we'd need to save
24353 LR, and we can't use the "exit" out-of-line gpr restore if we
24354 have saved some fprs; In those cases it is advantageous to use
24355 load multiple when available. */
24356 if (info->first_fp_reg_save != 64 || !lr_save_p)
24357 strategy |= REST_INLINE_GPRS | REST_MULTIPLE;
24360 /* Using the "exit" out-of-line routine does not improve code size
24361 if using it would require lr to be saved and if only saving one
24362 or two gprs. */
24363 else if (!lr_save_p && info->first_gp_reg_save > 29)
24364 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
24366 /* We can only use load multiple or the out-of-line routines to
24367 restore gprs if we've saved all the registers from
24368 first_gp_reg_save. Otherwise, we risk loading garbage.
24369 Of course, if we have saved out-of-line or used stmw then we know
24370 we haven't skipped any gprs. */
24371 if ((strategy & (SAVE_INLINE_GPRS | SAVE_MULTIPLE)) == SAVE_INLINE_GPRS
24372 && (strategy & (REST_INLINE_GPRS | REST_MULTIPLE)) != REST_INLINE_GPRS)
24374 int i;
24376 for (i = info->first_gp_reg_save; i < 32; i++)
24377 if (fixed_reg_p (i) || !save_reg_p (i))
24379 strategy |= REST_INLINE_GPRS;
24380 strategy &= ~REST_MULTIPLE;
24381 break;
24385 if (TARGET_ELF && TARGET_64BIT)
24387 if (!(strategy & SAVE_INLINE_FPRS))
24388 strategy |= SAVE_NOINLINE_FPRS_SAVES_LR;
24389 else if (!(strategy & SAVE_INLINE_GPRS)
24390 && info->first_fp_reg_save == 64)
24391 strategy |= SAVE_NOINLINE_GPRS_SAVES_LR;
24393 else if (TARGET_AIX && !(strategy & REST_INLINE_FPRS))
24394 strategy |= REST_NOINLINE_FPRS_DOESNT_RESTORE_LR;
24396 if (TARGET_MACHO && !(strategy & SAVE_INLINE_FPRS))
24397 strategy |= SAVE_NOINLINE_FPRS_SAVES_LR;
24399 return strategy;
24402 /* Calculate the stack information for the current function. This is
24403 complicated by having two separate calling sequences, the AIX calling
24404 sequence and the V.4 calling sequence.
24406 AIX (and Darwin/Mac OS X) stack frames look like:
24407 32-bit 64-bit
24408 SP----> +---------------------------------------+
24409 | back chain to caller | 0 0
24410 +---------------------------------------+
24411 | saved CR | 4 8 (8-11)
24412 +---------------------------------------+
24413 | saved LR | 8 16
24414 +---------------------------------------+
24415 | reserved for compilers | 12 24
24416 +---------------------------------------+
24417 | reserved for binders | 16 32
24418 +---------------------------------------+
24419 | saved TOC pointer | 20 40
24420 +---------------------------------------+
24421 | Parameter save area (P) | 24 48
24422 +---------------------------------------+
24423 | Alloca space (A) | 24+P etc.
24424 +---------------------------------------+
24425 | Local variable space (L) | 24+P+A
24426 +---------------------------------------+
24427 | Float/int conversion temporary (X) | 24+P+A+L
24428 +---------------------------------------+
24429 | Save area for AltiVec registers (W) | 24+P+A+L+X
24430 +---------------------------------------+
24431 | AltiVec alignment padding (Y) | 24+P+A+L+X+W
24432 +---------------------------------------+
24433 | Save area for VRSAVE register (Z) | 24+P+A+L+X+W+Y
24434 +---------------------------------------+
24435 | Save area for GP registers (G) | 24+P+A+X+L+X+W+Y+Z
24436 +---------------------------------------+
24437 | Save area for FP registers (F) | 24+P+A+X+L+X+W+Y+Z+G
24438 +---------------------------------------+
24439 old SP->| back chain to caller's caller |
24440 +---------------------------------------+
24442 The required alignment for AIX configurations is two words (i.e., 8
24443 or 16 bytes).
24445 The ELFv2 ABI is a variant of the AIX ABI. Stack frames look like:
24447 SP----> +---------------------------------------+
24448 | Back chain to caller | 0
24449 +---------------------------------------+
24450 | Save area for CR | 8
24451 +---------------------------------------+
24452 | Saved LR | 16
24453 +---------------------------------------+
24454 | Saved TOC pointer | 24
24455 +---------------------------------------+
24456 | Parameter save area (P) | 32
24457 +---------------------------------------+
24458 | Alloca space (A) | 32+P
24459 +---------------------------------------+
24460 | Local variable space (L) | 32+P+A
24461 +---------------------------------------+
24462 | Save area for AltiVec registers (W) | 32+P+A+L
24463 +---------------------------------------+
24464 | AltiVec alignment padding (Y) | 32+P+A+L+W
24465 +---------------------------------------+
24466 | Save area for GP registers (G) | 32+P+A+L+W+Y
24467 +---------------------------------------+
24468 | Save area for FP registers (F) | 32+P+A+L+W+Y+G
24469 +---------------------------------------+
24470 old SP->| back chain to caller's caller | 32+P+A+L+W+Y+G+F
24471 +---------------------------------------+
24474 V.4 stack frames look like:
24476 SP----> +---------------------------------------+
24477 | back chain to caller | 0
24478 +---------------------------------------+
24479 | caller's saved LR | 4
24480 +---------------------------------------+
24481 | Parameter save area (P) | 8
24482 +---------------------------------------+
24483 | Alloca space (A) | 8+P
24484 +---------------------------------------+
24485 | Varargs save area (V) | 8+P+A
24486 +---------------------------------------+
24487 | Local variable space (L) | 8+P+A+V
24488 +---------------------------------------+
24489 | Float/int conversion temporary (X) | 8+P+A+V+L
24490 +---------------------------------------+
24491 | Save area for AltiVec registers (W) | 8+P+A+V+L+X
24492 +---------------------------------------+
24493 | AltiVec alignment padding (Y) | 8+P+A+V+L+X+W
24494 +---------------------------------------+
24495 | Save area for VRSAVE register (Z) | 8+P+A+V+L+X+W+Y
24496 +---------------------------------------+
24497 | SPE: area for 64-bit GP registers |
24498 +---------------------------------------+
24499 | SPE alignment padding |
24500 +---------------------------------------+
24501 | saved CR (C) | 8+P+A+V+L+X+W+Y+Z
24502 +---------------------------------------+
24503 | Save area for GP registers (G) | 8+P+A+V+L+X+W+Y+Z+C
24504 +---------------------------------------+
24505 | Save area for FP registers (F) | 8+P+A+V+L+X+W+Y+Z+C+G
24506 +---------------------------------------+
24507 old SP->| back chain to caller's caller |
24508 +---------------------------------------+
24510 The required alignment for V.4 is 16 bytes, or 8 bytes if -meabi is
24511 given. (But note below and in sysv4.h that we require only 8 and
24512 may round up the size of our stack frame anyways. The historical
24513 reason is early versions of powerpc-linux which didn't properly
24514 align the stack at program startup. A happy side-effect is that
24515 -mno-eabi libraries can be used with -meabi programs.)
24517 The EABI configuration defaults to the V.4 layout. However,
24518 the stack alignment requirements may differ. If -mno-eabi is not
24519 given, the required stack alignment is 8 bytes; if -mno-eabi is
24520 given, the required alignment is 16 bytes. (But see V.4 comment
24521 above.) */
24523 #ifndef ABI_STACK_BOUNDARY
24524 #define ABI_STACK_BOUNDARY STACK_BOUNDARY
24525 #endif
24527 static rs6000_stack_t *
24528 rs6000_stack_info (void)
24530 /* We should never be called for thunks, we are not set up for that. */
24531 gcc_assert (!cfun->is_thunk);
24533 rs6000_stack_t *info = &stack_info;
24534 int reg_size = TARGET_32BIT ? 4 : 8;
24535 int ehrd_size;
24536 int ehcr_size;
24537 int save_align;
24538 int first_gp;
24539 HOST_WIDE_INT non_fixed_size;
24540 bool using_static_chain_p;
24542 if (reload_completed && info->reload_completed)
24543 return info;
24545 memset (info, 0, sizeof (*info));
24546 info->reload_completed = reload_completed;
24548 if (TARGET_SPE)
24550 /* Cache value so we don't rescan instruction chain over and over. */
24551 if (cfun->machine->spe_insn_chain_scanned_p == 0)
24552 cfun->machine->spe_insn_chain_scanned_p
24553 = spe_func_has_64bit_regs_p () + 1;
24554 info->spe_64bit_regs_used = cfun->machine->spe_insn_chain_scanned_p - 1;
24557 /* Select which calling sequence. */
24558 info->abi = DEFAULT_ABI;
24560 /* Calculate which registers need to be saved & save area size. */
24561 info->first_gp_reg_save = first_reg_to_save ();
24562 /* Assume that we will have to save RS6000_PIC_OFFSET_TABLE_REGNUM,
24563 even if it currently looks like we won't. Reload may need it to
24564 get at a constant; if so, it will have already created a constant
24565 pool entry for it. */
24566 if (((TARGET_TOC && TARGET_MINIMAL_TOC)
24567 || (flag_pic == 1 && DEFAULT_ABI == ABI_V4)
24568 || (flag_pic && DEFAULT_ABI == ABI_DARWIN))
24569 && crtl->uses_const_pool
24570 && info->first_gp_reg_save > RS6000_PIC_OFFSET_TABLE_REGNUM)
24571 first_gp = RS6000_PIC_OFFSET_TABLE_REGNUM;
24572 else
24573 first_gp = info->first_gp_reg_save;
24575 info->gp_size = reg_size * (32 - first_gp);
24577 /* For the SPE, we have an additional upper 32-bits on each GPR.
24578 Ideally we should save the entire 64-bits only when the upper
24579 half is used in SIMD instructions. Since we only record
24580 registers live (not the size they are used in), this proves
24581 difficult because we'd have to traverse the instruction chain at
24582 the right time, taking reload into account. This is a real pain,
24583 so we opt to save the GPRs in 64-bits always if but one register
24584 gets used in 64-bits. Otherwise, all the registers in the frame
24585 get saved in 32-bits.
24587 So... since when we save all GPRs (except the SP) in 64-bits, the
24588 traditional GP save area will be empty. */
24589 if (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0)
24590 info->gp_size = 0;
24592 info->first_fp_reg_save = first_fp_reg_to_save ();
24593 info->fp_size = 8 * (64 - info->first_fp_reg_save);
24595 info->first_altivec_reg_save = first_altivec_reg_to_save ();
24596 info->altivec_size = 16 * (LAST_ALTIVEC_REGNO + 1
24597 - info->first_altivec_reg_save);
24599 /* Does this function call anything? */
24600 info->calls_p = (!crtl->is_leaf || cfun->machine->ra_needs_full_frame);
24602 /* Determine if we need to save the condition code registers. */
24603 if (save_reg_p (CR2_REGNO)
24604 || save_reg_p (CR3_REGNO)
24605 || save_reg_p (CR4_REGNO))
24607 info->cr_save_p = 1;
24608 if (DEFAULT_ABI == ABI_V4)
24609 info->cr_size = reg_size;
24612 /* If the current function calls __builtin_eh_return, then we need
24613 to allocate stack space for registers that will hold data for
24614 the exception handler. */
24615 if (crtl->calls_eh_return)
24617 unsigned int i;
24618 for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM; ++i)
24619 continue;
24621 /* SPE saves EH registers in 64-bits. */
24622 ehrd_size = i * (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0
24623 ? UNITS_PER_SPE_WORD : UNITS_PER_WORD);
24625 else
24626 ehrd_size = 0;
24628 /* In the ELFv2 ABI, we also need to allocate space for separate
24629 CR field save areas if the function calls __builtin_eh_return. */
24630 if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return)
24632 /* This hard-codes that we have three call-saved CR fields. */
24633 ehcr_size = 3 * reg_size;
24634 /* We do *not* use the regular CR save mechanism. */
24635 info->cr_save_p = 0;
24637 else
24638 ehcr_size = 0;
24640 /* Determine various sizes. */
24641 info->reg_size = reg_size;
24642 info->fixed_size = RS6000_SAVE_AREA;
24643 info->vars_size = RS6000_ALIGN (get_frame_size (), 8);
24644 info->parm_size = RS6000_ALIGN (crtl->outgoing_args_size,
24645 TARGET_ALTIVEC ? 16 : 8);
24646 if (FRAME_GROWS_DOWNWARD)
24647 info->vars_size
24648 += RS6000_ALIGN (info->fixed_size + info->vars_size + info->parm_size,
24649 ABI_STACK_BOUNDARY / BITS_PER_UNIT)
24650 - (info->fixed_size + info->vars_size + info->parm_size);
24652 if (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0)
24653 info->spe_gp_size = 8 * (32 - first_gp);
24655 if (TARGET_ALTIVEC_ABI)
24656 info->vrsave_mask = compute_vrsave_mask ();
24658 if (TARGET_ALTIVEC_VRSAVE && info->vrsave_mask)
24659 info->vrsave_size = 4;
24661 compute_save_world_info (info);
24663 /* Calculate the offsets. */
24664 switch (DEFAULT_ABI)
24666 case ABI_NONE:
24667 default:
24668 gcc_unreachable ();
24670 case ABI_AIX:
24671 case ABI_ELFv2:
24672 case ABI_DARWIN:
24673 info->fp_save_offset = -info->fp_size;
24674 info->gp_save_offset = info->fp_save_offset - info->gp_size;
24676 if (TARGET_ALTIVEC_ABI)
24678 info->vrsave_save_offset = info->gp_save_offset - info->vrsave_size;
24680 /* Align stack so vector save area is on a quadword boundary.
24681 The padding goes above the vectors. */
24682 if (info->altivec_size != 0)
24683 info->altivec_padding_size = info->vrsave_save_offset & 0xF;
24685 info->altivec_save_offset = info->vrsave_save_offset
24686 - info->altivec_padding_size
24687 - info->altivec_size;
24688 gcc_assert (info->altivec_size == 0
24689 || info->altivec_save_offset % 16 == 0);
24691 /* Adjust for AltiVec case. */
24692 info->ehrd_offset = info->altivec_save_offset - ehrd_size;
24694 else
24695 info->ehrd_offset = info->gp_save_offset - ehrd_size;
24697 info->ehcr_offset = info->ehrd_offset - ehcr_size;
24698 info->cr_save_offset = reg_size; /* first word when 64-bit. */
24699 info->lr_save_offset = 2*reg_size;
24700 break;
24702 case ABI_V4:
24703 info->fp_save_offset = -info->fp_size;
24704 info->gp_save_offset = info->fp_save_offset - info->gp_size;
24705 info->cr_save_offset = info->gp_save_offset - info->cr_size;
24707 if (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0)
24709 /* Align stack so SPE GPR save area is aligned on a
24710 double-word boundary. */
24711 if (info->spe_gp_size != 0 && info->cr_save_offset != 0)
24712 info->spe_padding_size = 8 - (-info->cr_save_offset % 8);
24713 else
24714 info->spe_padding_size = 0;
24716 info->spe_gp_save_offset = info->cr_save_offset
24717 - info->spe_padding_size
24718 - info->spe_gp_size;
24720 /* Adjust for SPE case. */
24721 info->ehrd_offset = info->spe_gp_save_offset;
24723 else if (TARGET_ALTIVEC_ABI)
24725 info->vrsave_save_offset = info->cr_save_offset - info->vrsave_size;
24727 /* Align stack so vector save area is on a quadword boundary. */
24728 if (info->altivec_size != 0)
24729 info->altivec_padding_size = 16 - (-info->vrsave_save_offset % 16);
24731 info->altivec_save_offset = info->vrsave_save_offset
24732 - info->altivec_padding_size
24733 - info->altivec_size;
24735 /* Adjust for AltiVec case. */
24736 info->ehrd_offset = info->altivec_save_offset;
24738 else
24739 info->ehrd_offset = info->cr_save_offset;
24741 info->ehrd_offset -= ehrd_size;
24742 info->lr_save_offset = reg_size;
24745 save_align = (TARGET_ALTIVEC_ABI || DEFAULT_ABI == ABI_DARWIN) ? 16 : 8;
24746 info->save_size = RS6000_ALIGN (info->fp_size
24747 + info->gp_size
24748 + info->altivec_size
24749 + info->altivec_padding_size
24750 + info->spe_gp_size
24751 + info->spe_padding_size
24752 + ehrd_size
24753 + ehcr_size
24754 + info->cr_size
24755 + info->vrsave_size,
24756 save_align);
24758 non_fixed_size = info->vars_size + info->parm_size + info->save_size;
24760 info->total_size = RS6000_ALIGN (non_fixed_size + info->fixed_size,
24761 ABI_STACK_BOUNDARY / BITS_PER_UNIT);
24763 /* Determine if we need to save the link register. */
24764 if (info->calls_p
24765 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
24766 && crtl->profile
24767 && !TARGET_PROFILE_KERNEL)
24768 || (DEFAULT_ABI == ABI_V4 && cfun->calls_alloca)
24769 #ifdef TARGET_RELOCATABLE
24770 || (DEFAULT_ABI == ABI_V4
24771 && (TARGET_RELOCATABLE || flag_pic > 1)
24772 && get_pool_size () != 0)
24773 #endif
24774 || rs6000_ra_ever_killed ())
24775 info->lr_save_p = 1;
24777 using_static_chain_p = (cfun->static_chain_decl != NULL_TREE
24778 && df_regs_ever_live_p (STATIC_CHAIN_REGNUM)
24779 && call_used_regs[STATIC_CHAIN_REGNUM]);
24780 info->savres_strategy = rs6000_savres_strategy (info, using_static_chain_p);
24782 if (!(info->savres_strategy & SAVE_INLINE_GPRS)
24783 || !(info->savres_strategy & SAVE_INLINE_FPRS)
24784 || !(info->savres_strategy & SAVE_INLINE_VRS)
24785 || !(info->savres_strategy & REST_INLINE_GPRS)
24786 || !(info->savres_strategy & REST_INLINE_FPRS)
24787 || !(info->savres_strategy & REST_INLINE_VRS))
24788 info->lr_save_p = 1;
24790 if (info->lr_save_p)
24791 df_set_regs_ever_live (LR_REGNO, true);
24793 /* Determine if we need to allocate any stack frame:
24795 For AIX we need to push the stack if a frame pointer is needed
24796 (because the stack might be dynamically adjusted), if we are
24797 debugging, if we make calls, or if the sum of fp_save, gp_save,
24798 and local variables are more than the space needed to save all
24799 non-volatile registers: 32-bit: 18*8 + 19*4 = 220 or 64-bit: 18*8
24800 + 18*8 = 288 (GPR13 reserved).
24802 For V.4 we don't have the stack cushion that AIX uses, but assume
24803 that the debugger can handle stackless frames. */
24805 if (info->calls_p)
24806 info->push_p = 1;
24808 else if (DEFAULT_ABI == ABI_V4)
24809 info->push_p = non_fixed_size != 0;
24811 else if (frame_pointer_needed)
24812 info->push_p = 1;
24814 else if (TARGET_XCOFF && write_symbols != NO_DEBUG)
24815 info->push_p = 1;
24817 else
24818 info->push_p = non_fixed_size > (TARGET_32BIT ? 220 : 288);
24820 return info;
24823 /* Return true if the current function uses any GPRs in 64-bit SIMD
24824 mode. */
24826 static bool
24827 spe_func_has_64bit_regs_p (void)
24829 rtx_insn *insns, *insn;
24831 /* Functions that save and restore all the call-saved registers will
24832 need to save/restore the registers in 64-bits. */
24833 if (crtl->calls_eh_return
24834 || cfun->calls_setjmp
24835 || crtl->has_nonlocal_goto)
24836 return true;
24838 insns = get_insns ();
24840 for (insn = NEXT_INSN (insns); insn != NULL_RTX; insn = NEXT_INSN (insn))
24842 if (INSN_P (insn))
24844 rtx i;
24846 /* FIXME: This should be implemented with attributes...
24848 (set_attr "spe64" "true")....then,
24849 if (get_spe64(insn)) return true;
24851 It's the only reliable way to do the stuff below. */
24853 i = PATTERN (insn);
24854 if (GET_CODE (i) == SET)
24856 machine_mode mode = GET_MODE (SET_SRC (i));
24858 if (SPE_VECTOR_MODE (mode))
24859 return true;
24860 if (TARGET_E500_DOUBLE
24861 && (mode == DFmode || FLOAT128_2REG_P (mode)))
24862 return true;
24867 return false;
24870 static void
24871 debug_stack_info (rs6000_stack_t *info)
24873 const char *abi_string;
24875 if (! info)
24876 info = rs6000_stack_info ();
24878 fprintf (stderr, "\nStack information for function %s:\n",
24879 ((current_function_decl && DECL_NAME (current_function_decl))
24880 ? IDENTIFIER_POINTER (DECL_NAME (current_function_decl))
24881 : "<unknown>"));
24883 switch (info->abi)
24885 default: abi_string = "Unknown"; break;
24886 case ABI_NONE: abi_string = "NONE"; break;
24887 case ABI_AIX: abi_string = "AIX"; break;
24888 case ABI_ELFv2: abi_string = "ELFv2"; break;
24889 case ABI_DARWIN: abi_string = "Darwin"; break;
24890 case ABI_V4: abi_string = "V.4"; break;
24893 fprintf (stderr, "\tABI = %5s\n", abi_string);
24895 if (TARGET_ALTIVEC_ABI)
24896 fprintf (stderr, "\tALTIVEC ABI extensions enabled.\n");
24898 if (TARGET_SPE_ABI)
24899 fprintf (stderr, "\tSPE ABI extensions enabled.\n");
24901 if (info->first_gp_reg_save != 32)
24902 fprintf (stderr, "\tfirst_gp_reg_save = %5d\n", info->first_gp_reg_save);
24904 if (info->first_fp_reg_save != 64)
24905 fprintf (stderr, "\tfirst_fp_reg_save = %5d\n", info->first_fp_reg_save);
24907 if (info->first_altivec_reg_save <= LAST_ALTIVEC_REGNO)
24908 fprintf (stderr, "\tfirst_altivec_reg_save = %5d\n",
24909 info->first_altivec_reg_save);
24911 if (info->lr_save_p)
24912 fprintf (stderr, "\tlr_save_p = %5d\n", info->lr_save_p);
24914 if (info->cr_save_p)
24915 fprintf (stderr, "\tcr_save_p = %5d\n", info->cr_save_p);
24917 if (info->vrsave_mask)
24918 fprintf (stderr, "\tvrsave_mask = 0x%x\n", info->vrsave_mask);
24920 if (info->push_p)
24921 fprintf (stderr, "\tpush_p = %5d\n", info->push_p);
24923 if (info->calls_p)
24924 fprintf (stderr, "\tcalls_p = %5d\n", info->calls_p);
24926 if (info->gp_size)
24927 fprintf (stderr, "\tgp_save_offset = %5d\n", info->gp_save_offset);
24929 if (info->fp_size)
24930 fprintf (stderr, "\tfp_save_offset = %5d\n", info->fp_save_offset);
24932 if (info->altivec_size)
24933 fprintf (stderr, "\taltivec_save_offset = %5d\n",
24934 info->altivec_save_offset);
24936 if (info->spe_gp_size)
24937 fprintf (stderr, "\tspe_gp_save_offset = %5d\n",
24938 info->spe_gp_save_offset);
24940 if (info->vrsave_size)
24941 fprintf (stderr, "\tvrsave_save_offset = %5d\n",
24942 info->vrsave_save_offset);
24944 if (info->lr_save_p)
24945 fprintf (stderr, "\tlr_save_offset = %5d\n", info->lr_save_offset);
24947 if (info->cr_save_p)
24948 fprintf (stderr, "\tcr_save_offset = %5d\n", info->cr_save_offset);
24950 if (info->varargs_save_offset)
24951 fprintf (stderr, "\tvarargs_save_offset = %5d\n", info->varargs_save_offset);
24953 if (info->total_size)
24954 fprintf (stderr, "\ttotal_size = " HOST_WIDE_INT_PRINT_DEC"\n",
24955 info->total_size);
24957 if (info->vars_size)
24958 fprintf (stderr, "\tvars_size = " HOST_WIDE_INT_PRINT_DEC"\n",
24959 info->vars_size);
24961 if (info->parm_size)
24962 fprintf (stderr, "\tparm_size = %5d\n", info->parm_size);
24964 if (info->fixed_size)
24965 fprintf (stderr, "\tfixed_size = %5d\n", info->fixed_size);
24967 if (info->gp_size)
24968 fprintf (stderr, "\tgp_size = %5d\n", info->gp_size);
24970 if (info->spe_gp_size)
24971 fprintf (stderr, "\tspe_gp_size = %5d\n", info->spe_gp_size);
24973 if (info->fp_size)
24974 fprintf (stderr, "\tfp_size = %5d\n", info->fp_size);
24976 if (info->altivec_size)
24977 fprintf (stderr, "\taltivec_size = %5d\n", info->altivec_size);
24979 if (info->vrsave_size)
24980 fprintf (stderr, "\tvrsave_size = %5d\n", info->vrsave_size);
24982 if (info->altivec_padding_size)
24983 fprintf (stderr, "\taltivec_padding_size= %5d\n",
24984 info->altivec_padding_size);
24986 if (info->spe_padding_size)
24987 fprintf (stderr, "\tspe_padding_size = %5d\n",
24988 info->spe_padding_size);
24990 if (info->cr_size)
24991 fprintf (stderr, "\tcr_size = %5d\n", info->cr_size);
24993 if (info->save_size)
24994 fprintf (stderr, "\tsave_size = %5d\n", info->save_size);
24996 if (info->reg_size != 4)
24997 fprintf (stderr, "\treg_size = %5d\n", info->reg_size);
24999 fprintf (stderr, "\tsave-strategy = %04x\n", info->savres_strategy);
25001 fprintf (stderr, "\n");
25005 rs6000_return_addr (int count, rtx frame)
25007 /* Currently we don't optimize very well between prolog and body
25008 code and for PIC code the code can be actually quite bad, so
25009 don't try to be too clever here. */
25010 if (count != 0
25011 || ((DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN) && flag_pic))
25013 cfun->machine->ra_needs_full_frame = 1;
25015 return
25016 gen_rtx_MEM
25017 (Pmode,
25018 memory_address
25019 (Pmode,
25020 plus_constant (Pmode,
25021 copy_to_reg
25022 (gen_rtx_MEM (Pmode,
25023 memory_address (Pmode, frame))),
25024 RETURN_ADDRESS_OFFSET)));
25027 cfun->machine->ra_need_lr = 1;
25028 return get_hard_reg_initial_val (Pmode, LR_REGNO);
25031 /* Say whether a function is a candidate for sibcall handling or not. */
25033 static bool
25034 rs6000_function_ok_for_sibcall (tree decl, tree exp)
25036 tree fntype;
25038 if (decl)
25039 fntype = TREE_TYPE (decl);
25040 else
25041 fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
25043 /* We can't do it if the called function has more vector parameters
25044 than the current function; there's nowhere to put the VRsave code. */
25045 if (TARGET_ALTIVEC_ABI
25046 && TARGET_ALTIVEC_VRSAVE
25047 && !(decl && decl == current_function_decl))
25049 function_args_iterator args_iter;
25050 tree type;
25051 int nvreg = 0;
25053 /* Functions with vector parameters are required to have a
25054 prototype, so the argument type info must be available
25055 here. */
25056 FOREACH_FUNCTION_ARGS(fntype, type, args_iter)
25057 if (TREE_CODE (type) == VECTOR_TYPE
25058 && ALTIVEC_OR_VSX_VECTOR_MODE (TYPE_MODE (type)))
25059 nvreg++;
25061 FOREACH_FUNCTION_ARGS(TREE_TYPE (current_function_decl), type, args_iter)
25062 if (TREE_CODE (type) == VECTOR_TYPE
25063 && ALTIVEC_OR_VSX_VECTOR_MODE (TYPE_MODE (type)))
25064 nvreg--;
25066 if (nvreg > 0)
25067 return false;
25070 /* Under the AIX or ELFv2 ABIs we can't allow calls to non-local
25071 functions, because the callee may have a different TOC pointer to
25072 the caller and there's no way to ensure we restore the TOC when
25073 we return. With the secure-plt SYSV ABI we can't make non-local
25074 calls when -fpic/PIC because the plt call stubs use r30. */
25075 if (DEFAULT_ABI == ABI_DARWIN
25076 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
25077 && decl
25078 && !DECL_EXTERNAL (decl)
25079 && !DECL_WEAK (decl)
25080 && (*targetm.binds_local_p) (decl))
25081 || (DEFAULT_ABI == ABI_V4
25082 && (!TARGET_SECURE_PLT
25083 || !flag_pic
25084 || (decl
25085 && (*targetm.binds_local_p) (decl)))))
25087 tree attr_list = TYPE_ATTRIBUTES (fntype);
25089 if (!lookup_attribute ("longcall", attr_list)
25090 || lookup_attribute ("shortcall", attr_list))
25091 return true;
25094 return false;
25097 static int
25098 rs6000_ra_ever_killed (void)
25100 rtx_insn *top;
25101 rtx reg;
25102 rtx_insn *insn;
25104 if (cfun->is_thunk)
25105 return 0;
25107 if (cfun->machine->lr_save_state)
25108 return cfun->machine->lr_save_state - 1;
25110 /* regs_ever_live has LR marked as used if any sibcalls are present,
25111 but this should not force saving and restoring in the
25112 pro/epilogue. Likewise, reg_set_between_p thinks a sibcall
25113 clobbers LR, so that is inappropriate. */
25115 /* Also, the prologue can generate a store into LR that
25116 doesn't really count, like this:
25118 move LR->R0
25119 bcl to set PIC register
25120 move LR->R31
25121 move R0->LR
25123 When we're called from the epilogue, we need to avoid counting
25124 this as a store. */
25126 push_topmost_sequence ();
25127 top = get_insns ();
25128 pop_topmost_sequence ();
25129 reg = gen_rtx_REG (Pmode, LR_REGNO);
25131 for (insn = NEXT_INSN (top); insn != NULL_RTX; insn = NEXT_INSN (insn))
25133 if (INSN_P (insn))
25135 if (CALL_P (insn))
25137 if (!SIBLING_CALL_P (insn))
25138 return 1;
25140 else if (find_regno_note (insn, REG_INC, LR_REGNO))
25141 return 1;
25142 else if (set_of (reg, insn) != NULL_RTX
25143 && !prologue_epilogue_contains (insn))
25144 return 1;
25147 return 0;
25150 /* Emit instructions needed to load the TOC register.
25151 This is only needed when TARGET_TOC, TARGET_MINIMAL_TOC, and there is
25152 a constant pool; or for SVR4 -fpic. */
25154 void
25155 rs6000_emit_load_toc_table (int fromprolog)
25157 rtx dest;
25158 dest = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
25160 if (TARGET_ELF && TARGET_SECURE_PLT && DEFAULT_ABI == ABI_V4 && flag_pic)
25162 char buf[30];
25163 rtx lab, tmp1, tmp2, got;
25165 lab = gen_label_rtx ();
25166 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (lab));
25167 lab = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
25168 if (flag_pic == 2)
25170 got = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (toc_label_name));
25171 need_toc_init = 1;
25173 else
25174 got = rs6000_got_sym ();
25175 tmp1 = tmp2 = dest;
25176 if (!fromprolog)
25178 tmp1 = gen_reg_rtx (Pmode);
25179 tmp2 = gen_reg_rtx (Pmode);
25181 emit_insn (gen_load_toc_v4_PIC_1 (lab));
25182 emit_move_insn (tmp1, gen_rtx_REG (Pmode, LR_REGNO));
25183 emit_insn (gen_load_toc_v4_PIC_3b (tmp2, tmp1, got, lab));
25184 emit_insn (gen_load_toc_v4_PIC_3c (dest, tmp2, got, lab));
25186 else if (TARGET_ELF && DEFAULT_ABI == ABI_V4 && flag_pic == 1)
25188 emit_insn (gen_load_toc_v4_pic_si ());
25189 emit_move_insn (dest, gen_rtx_REG (Pmode, LR_REGNO));
25191 else if (TARGET_ELF && DEFAULT_ABI == ABI_V4 && flag_pic == 2)
25193 char buf[30];
25194 rtx temp0 = (fromprolog
25195 ? gen_rtx_REG (Pmode, 0)
25196 : gen_reg_rtx (Pmode));
25198 if (fromprolog)
25200 rtx symF, symL;
25202 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
25203 symF = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
25205 ASM_GENERATE_INTERNAL_LABEL (buf, "LCL", rs6000_pic_labelno);
25206 symL = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
25208 emit_insn (gen_load_toc_v4_PIC_1 (symF));
25209 emit_move_insn (dest, gen_rtx_REG (Pmode, LR_REGNO));
25210 emit_insn (gen_load_toc_v4_PIC_2 (temp0, dest, symL, symF));
25212 else
25214 rtx tocsym, lab;
25216 tocsym = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (toc_label_name));
25217 need_toc_init = 1;
25218 lab = gen_label_rtx ();
25219 emit_insn (gen_load_toc_v4_PIC_1b (tocsym, lab));
25220 emit_move_insn (dest, gen_rtx_REG (Pmode, LR_REGNO));
25221 if (TARGET_LINK_STACK)
25222 emit_insn (gen_addsi3 (dest, dest, GEN_INT (4)));
25223 emit_move_insn (temp0, gen_rtx_MEM (Pmode, dest));
25225 emit_insn (gen_addsi3 (dest, temp0, dest));
25227 else if (TARGET_ELF && !TARGET_AIX && flag_pic == 0 && TARGET_MINIMAL_TOC)
25229 /* This is for AIX code running in non-PIC ELF32. */
25230 rtx realsym = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (toc_label_name));
25232 need_toc_init = 1;
25233 emit_insn (gen_elf_high (dest, realsym));
25234 emit_insn (gen_elf_low (dest, dest, realsym));
25236 else
25238 gcc_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2);
25240 if (TARGET_32BIT)
25241 emit_insn (gen_load_toc_aix_si (dest));
25242 else
25243 emit_insn (gen_load_toc_aix_di (dest));
25247 /* Emit instructions to restore the link register after determining where
25248 its value has been stored. */
25250 void
25251 rs6000_emit_eh_reg_restore (rtx source, rtx scratch)
25253 rs6000_stack_t *info = rs6000_stack_info ();
25254 rtx operands[2];
25256 operands[0] = source;
25257 operands[1] = scratch;
25259 if (info->lr_save_p)
25261 rtx frame_rtx = stack_pointer_rtx;
25262 HOST_WIDE_INT sp_offset = 0;
25263 rtx tmp;
25265 if (frame_pointer_needed
25266 || cfun->calls_alloca
25267 || info->total_size > 32767)
25269 tmp = gen_frame_mem (Pmode, frame_rtx);
25270 emit_move_insn (operands[1], tmp);
25271 frame_rtx = operands[1];
25273 else if (info->push_p)
25274 sp_offset = info->total_size;
25276 tmp = plus_constant (Pmode, frame_rtx,
25277 info->lr_save_offset + sp_offset);
25278 tmp = gen_frame_mem (Pmode, tmp);
25279 emit_move_insn (tmp, operands[0]);
25281 else
25282 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNO), operands[0]);
25284 /* Freeze lr_save_p. We've just emitted rtl that depends on the
25285 state of lr_save_p so any change from here on would be a bug. In
25286 particular, stop rs6000_ra_ever_killed from considering the SET
25287 of lr we may have added just above. */
25288 cfun->machine->lr_save_state = info->lr_save_p + 1;
25291 static GTY(()) alias_set_type set = -1;
25293 alias_set_type
25294 get_TOC_alias_set (void)
25296 if (set == -1)
25297 set = new_alias_set ();
25298 return set;
25301 /* This returns nonzero if the current function uses the TOC. This is
25302 determined by the presence of (use (unspec ... UNSPEC_TOC)), which
25303 is generated by the ABI_V4 load_toc_* patterns. */
25304 #if TARGET_ELF
25305 static int
25306 uses_TOC (void)
25308 rtx_insn *insn;
25310 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
25311 if (INSN_P (insn))
25313 rtx pat = PATTERN (insn);
25314 int i;
25316 if (GET_CODE (pat) == PARALLEL)
25317 for (i = 0; i < XVECLEN (pat, 0); i++)
25319 rtx sub = XVECEXP (pat, 0, i);
25320 if (GET_CODE (sub) == USE)
25322 sub = XEXP (sub, 0);
25323 if (GET_CODE (sub) == UNSPEC
25324 && XINT (sub, 1) == UNSPEC_TOC)
25325 return 1;
25329 return 0;
25331 #endif
25334 create_TOC_reference (rtx symbol, rtx largetoc_reg)
25336 rtx tocrel, tocreg, hi;
25338 if (TARGET_DEBUG_ADDR)
25340 if (GET_CODE (symbol) == SYMBOL_REF)
25341 fprintf (stderr, "\ncreate_TOC_reference, (symbol_ref %s)\n",
25342 XSTR (symbol, 0));
25343 else
25345 fprintf (stderr, "\ncreate_TOC_reference, code %s:\n",
25346 GET_RTX_NAME (GET_CODE (symbol)));
25347 debug_rtx (symbol);
25351 if (!can_create_pseudo_p ())
25352 df_set_regs_ever_live (TOC_REGISTER, true);
25354 tocreg = gen_rtx_REG (Pmode, TOC_REGISTER);
25355 tocrel = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, symbol, tocreg), UNSPEC_TOCREL);
25356 if (TARGET_CMODEL == CMODEL_SMALL || can_create_pseudo_p ())
25357 return tocrel;
25359 hi = gen_rtx_HIGH (Pmode, copy_rtx (tocrel));
25360 if (largetoc_reg != NULL)
25362 emit_move_insn (largetoc_reg, hi);
25363 hi = largetoc_reg;
25365 return gen_rtx_LO_SUM (Pmode, hi, tocrel);
25368 /* Issue assembly directives that create a reference to the given DWARF
25369 FRAME_TABLE_LABEL from the current function section. */
25370 void
25371 rs6000_aix_asm_output_dwarf_table_ref (char * frame_table_label)
25373 fprintf (asm_out_file, "\t.ref %s\n",
25374 (* targetm.strip_name_encoding) (frame_table_label));
25377 /* This ties together stack memory (MEM with an alias set of frame_alias_set)
25378 and the change to the stack pointer. */
25380 static void
25381 rs6000_emit_stack_tie (rtx fp, bool hard_frame_needed)
25383 rtvec p;
25384 int i;
25385 rtx regs[3];
25387 i = 0;
25388 regs[i++] = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
25389 if (hard_frame_needed)
25390 regs[i++] = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
25391 if (!(REGNO (fp) == STACK_POINTER_REGNUM
25392 || (hard_frame_needed
25393 && REGNO (fp) == HARD_FRAME_POINTER_REGNUM)))
25394 regs[i++] = fp;
25396 p = rtvec_alloc (i);
25397 while (--i >= 0)
25399 rtx mem = gen_frame_mem (BLKmode, regs[i]);
25400 RTVEC_ELT (p, i) = gen_rtx_SET (mem, const0_rtx);
25403 emit_insn (gen_stack_tie (gen_rtx_PARALLEL (VOIDmode, p)));
25406 /* Emit the correct code for allocating stack space, as insns.
25407 If COPY_REG, make sure a copy of the old frame is left there.
25408 The generated code may use hard register 0 as a temporary. */
25410 static rtx_insn *
25411 rs6000_emit_allocate_stack (HOST_WIDE_INT size, rtx copy_reg, int copy_off)
25413 rtx_insn *insn;
25414 rtx stack_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
25415 rtx tmp_reg = gen_rtx_REG (Pmode, 0);
25416 rtx todec = gen_int_mode (-size, Pmode);
25417 rtx par, set, mem;
25419 if (INTVAL (todec) != -size)
25421 warning (0, "stack frame too large");
25422 emit_insn (gen_trap ());
25423 return 0;
25426 if (crtl->limit_stack)
25428 if (REG_P (stack_limit_rtx)
25429 && REGNO (stack_limit_rtx) > 1
25430 && REGNO (stack_limit_rtx) <= 31)
25432 emit_insn (gen_add3_insn (tmp_reg, stack_limit_rtx, GEN_INT (size)));
25433 emit_insn (gen_cond_trap (LTU, stack_reg, tmp_reg,
25434 const0_rtx));
25436 else if (GET_CODE (stack_limit_rtx) == SYMBOL_REF
25437 && TARGET_32BIT
25438 && DEFAULT_ABI == ABI_V4)
25440 rtx toload = gen_rtx_CONST (VOIDmode,
25441 gen_rtx_PLUS (Pmode,
25442 stack_limit_rtx,
25443 GEN_INT (size)));
25445 emit_insn (gen_elf_high (tmp_reg, toload));
25446 emit_insn (gen_elf_low (tmp_reg, tmp_reg, toload));
25447 emit_insn (gen_cond_trap (LTU, stack_reg, tmp_reg,
25448 const0_rtx));
25450 else
25451 warning (0, "stack limit expression is not supported");
25454 if (copy_reg)
25456 if (copy_off != 0)
25457 emit_insn (gen_add3_insn (copy_reg, stack_reg, GEN_INT (copy_off)));
25458 else
25459 emit_move_insn (copy_reg, stack_reg);
25462 if (size > 32767)
25464 /* Need a note here so that try_split doesn't get confused. */
25465 if (get_last_insn () == NULL_RTX)
25466 emit_note (NOTE_INSN_DELETED);
25467 insn = emit_move_insn (tmp_reg, todec);
25468 try_split (PATTERN (insn), insn, 0);
25469 todec = tmp_reg;
25472 insn = emit_insn (TARGET_32BIT
25473 ? gen_movsi_update_stack (stack_reg, stack_reg,
25474 todec, stack_reg)
25475 : gen_movdi_di_update_stack (stack_reg, stack_reg,
25476 todec, stack_reg));
25477 /* Since we didn't use gen_frame_mem to generate the MEM, grab
25478 it now and set the alias set/attributes. The above gen_*_update
25479 calls will generate a PARALLEL with the MEM set being the first
25480 operation. */
25481 par = PATTERN (insn);
25482 gcc_assert (GET_CODE (par) == PARALLEL);
25483 set = XVECEXP (par, 0, 0);
25484 gcc_assert (GET_CODE (set) == SET);
25485 mem = SET_DEST (set);
25486 gcc_assert (MEM_P (mem));
25487 MEM_NOTRAP_P (mem) = 1;
25488 set_mem_alias_set (mem, get_frame_alias_set ());
25490 RTX_FRAME_RELATED_P (insn) = 1;
25491 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
25492 gen_rtx_SET (stack_reg, gen_rtx_PLUS (Pmode, stack_reg,
25493 GEN_INT (-size))));
25494 return insn;
25497 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
25499 #if PROBE_INTERVAL > 32768
25500 #error Cannot use indexed addressing mode for stack probing
25501 #endif
25503 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
25504 inclusive. These are offsets from the current stack pointer. */
25506 static void
25507 rs6000_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
25509 /* See if we have a constant small number of probes to generate. If so,
25510 that's the easy case. */
25511 if (first + size <= 32768)
25513 HOST_WIDE_INT i;
25515 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
25516 it exceeds SIZE. If only one probe is needed, this will not
25517 generate any code. Then probe at FIRST + SIZE. */
25518 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
25519 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
25520 -(first + i)));
25522 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
25523 -(first + size)));
25526 /* Otherwise, do the same as above, but in a loop. Note that we must be
25527 extra careful with variables wrapping around because we might be at
25528 the very top (or the very bottom) of the address space and we have
25529 to be able to handle this case properly; in particular, we use an
25530 equality test for the loop condition. */
25531 else
25533 HOST_WIDE_INT rounded_size;
25534 rtx r12 = gen_rtx_REG (Pmode, 12);
25535 rtx r0 = gen_rtx_REG (Pmode, 0);
25537 /* Sanity check for the addressing mode we're going to use. */
25538 gcc_assert (first <= 32768);
25540 /* Step 1: round SIZE to the previous multiple of the interval. */
25542 rounded_size = ROUND_DOWN (size, PROBE_INTERVAL);
25545 /* Step 2: compute initial and final value of the loop counter. */
25547 /* TEST_ADDR = SP + FIRST. */
25548 emit_insn (gen_rtx_SET (r12, plus_constant (Pmode, stack_pointer_rtx,
25549 -first)));
25551 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
25552 if (rounded_size > 32768)
25554 emit_move_insn (r0, GEN_INT (-rounded_size));
25555 emit_insn (gen_rtx_SET (r0, gen_rtx_PLUS (Pmode, r12, r0)));
25557 else
25558 emit_insn (gen_rtx_SET (r0, plus_constant (Pmode, r12,
25559 -rounded_size)));
25562 /* Step 3: the loop
25566 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
25567 probe at TEST_ADDR
25569 while (TEST_ADDR != LAST_ADDR)
25571 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
25572 until it is equal to ROUNDED_SIZE. */
25574 if (TARGET_64BIT)
25575 emit_insn (gen_probe_stack_rangedi (r12, r12, r0));
25576 else
25577 emit_insn (gen_probe_stack_rangesi (r12, r12, r0));
25580 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
25581 that SIZE is equal to ROUNDED_SIZE. */
25583 if (size != rounded_size)
25584 emit_stack_probe (plus_constant (Pmode, r12, rounded_size - size));
25588 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
25589 absolute addresses. */
25591 const char *
25592 output_probe_stack_range (rtx reg1, rtx reg2)
25594 static int labelno = 0;
25595 char loop_lab[32];
25596 rtx xops[2];
25598 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
25600 /* Loop. */
25601 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
25603 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
25604 xops[0] = reg1;
25605 xops[1] = GEN_INT (-PROBE_INTERVAL);
25606 output_asm_insn ("addi %0,%0,%1", xops);
25608 /* Probe at TEST_ADDR. */
25609 xops[1] = gen_rtx_REG (Pmode, 0);
25610 output_asm_insn ("stw %1,0(%0)", xops);
25612 /* Test if TEST_ADDR == LAST_ADDR. */
25613 xops[1] = reg2;
25614 if (TARGET_64BIT)
25615 output_asm_insn ("cmpd 0,%0,%1", xops);
25616 else
25617 output_asm_insn ("cmpw 0,%0,%1", xops);
25619 /* Branch. */
25620 fputs ("\tbne 0,", asm_out_file);
25621 assemble_name_raw (asm_out_file, loop_lab);
25622 fputc ('\n', asm_out_file);
25624 return "";
25627 /* Add to 'insn' a note which is PATTERN (INSN) but with REG replaced
25628 with (plus:P (reg 1) VAL), and with REG2 replaced with REPL2 if REG2
25629 is not NULL. It would be nice if dwarf2out_frame_debug_expr could
25630 deduce these equivalences by itself so it wasn't necessary to hold
25631 its hand so much. Don't be tempted to always supply d2_f_d_e with
25632 the actual cfa register, ie. r31 when we are using a hard frame
25633 pointer. That fails when saving regs off r1, and sched moves the
25634 r31 setup past the reg saves. */
25636 static rtx
25637 rs6000_frame_related (rtx insn, rtx reg, HOST_WIDE_INT val,
25638 rtx reg2, rtx repl2)
25640 rtx repl;
25642 if (REGNO (reg) == STACK_POINTER_REGNUM)
25644 gcc_checking_assert (val == 0);
25645 repl = NULL_RTX;
25647 else
25648 repl = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, STACK_POINTER_REGNUM),
25649 GEN_INT (val));
25651 rtx pat = PATTERN (insn);
25652 if (!repl && !reg2)
25654 /* No need for any replacement. Just set RTX_FRAME_RELATED_P. */
25655 if (GET_CODE (pat) == PARALLEL)
25656 for (int i = 0; i < XVECLEN (pat, 0); i++)
25657 if (GET_CODE (XVECEXP (pat, 0, i)) == SET)
25659 rtx set = XVECEXP (pat, 0, i);
25661 /* If this PARALLEL has been emitted for out-of-line
25662 register save functions, or store multiple, then omit
25663 eh_frame info for any user-defined global regs. If
25664 eh_frame info is supplied, frame unwinding will
25665 restore a user reg. */
25666 if (!REG_P (SET_SRC (set))
25667 || !fixed_reg_p (REGNO (SET_SRC (set))))
25668 RTX_FRAME_RELATED_P (set) = 1;
25670 RTX_FRAME_RELATED_P (insn) = 1;
25671 return insn;
25674 /* We expect that 'pat' is either a SET or a PARALLEL containing
25675 SETs (and possibly other stuff). In a PARALLEL, all the SETs
25676 are important so they all have to be marked RTX_FRAME_RELATED_P.
25677 Call simplify_replace_rtx on the SETs rather than the whole insn
25678 so as to leave the other stuff alone (for example USE of r12). */
25680 if (GET_CODE (pat) == SET)
25682 if (repl)
25683 pat = simplify_replace_rtx (pat, reg, repl);
25684 if (reg2)
25685 pat = simplify_replace_rtx (pat, reg2, repl2);
25687 else if (GET_CODE (pat) == PARALLEL)
25689 pat = shallow_copy_rtx (pat);
25690 XVEC (pat, 0) = shallow_copy_rtvec (XVEC (pat, 0));
25692 for (int i = 0; i < XVECLEN (pat, 0); i++)
25693 if (GET_CODE (XVECEXP (pat, 0, i)) == SET)
25695 rtx set = XVECEXP (pat, 0, i);
25697 if (repl)
25698 set = simplify_replace_rtx (set, reg, repl);
25699 if (reg2)
25700 set = simplify_replace_rtx (set, reg2, repl2);
25701 XVECEXP (pat, 0, i) = set;
25703 /* Omit eh_frame info for any user-defined global regs. */
25704 if (!REG_P (SET_SRC (set))
25705 || !fixed_reg_p (REGNO (SET_SRC (set))))
25706 RTX_FRAME_RELATED_P (set) = 1;
25709 else
25710 gcc_unreachable ();
25712 RTX_FRAME_RELATED_P (insn) = 1;
25713 if (repl || reg2)
25714 add_reg_note (insn, REG_FRAME_RELATED_EXPR, pat);
25716 return insn;
25719 /* Returns an insn that has a vrsave set operation with the
25720 appropriate CLOBBERs. */
25722 static rtx
25723 generate_set_vrsave (rtx reg, rs6000_stack_t *info, int epiloguep)
25725 int nclobs, i;
25726 rtx insn, clobs[TOTAL_ALTIVEC_REGS + 1];
25727 rtx vrsave = gen_rtx_REG (SImode, VRSAVE_REGNO);
25729 clobs[0]
25730 = gen_rtx_SET (vrsave,
25731 gen_rtx_UNSPEC_VOLATILE (SImode,
25732 gen_rtvec (2, reg, vrsave),
25733 UNSPECV_SET_VRSAVE));
25735 nclobs = 1;
25737 /* We need to clobber the registers in the mask so the scheduler
25738 does not move sets to VRSAVE before sets of AltiVec registers.
25740 However, if the function receives nonlocal gotos, reload will set
25741 all call saved registers live. We will end up with:
25743 (set (reg 999) (mem))
25744 (parallel [ (set (reg vrsave) (unspec blah))
25745 (clobber (reg 999))])
25747 The clobber will cause the store into reg 999 to be dead, and
25748 flow will attempt to delete an epilogue insn. In this case, we
25749 need an unspec use/set of the register. */
25751 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
25752 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
25754 if (!epiloguep || call_used_regs [i])
25755 clobs[nclobs++] = gen_rtx_CLOBBER (VOIDmode,
25756 gen_rtx_REG (V4SImode, i));
25757 else
25759 rtx reg = gen_rtx_REG (V4SImode, i);
25761 clobs[nclobs++]
25762 = gen_rtx_SET (reg,
25763 gen_rtx_UNSPEC (V4SImode,
25764 gen_rtvec (1, reg), 27));
25768 insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nclobs));
25770 for (i = 0; i < nclobs; ++i)
25771 XVECEXP (insn, 0, i) = clobs[i];
25773 return insn;
25776 static rtx
25777 gen_frame_set (rtx reg, rtx frame_reg, int offset, bool store)
25779 rtx addr, mem;
25781 addr = gen_rtx_PLUS (Pmode, frame_reg, GEN_INT (offset));
25782 mem = gen_frame_mem (GET_MODE (reg), addr);
25783 return gen_rtx_SET (store ? mem : reg, store ? reg : mem);
25786 static rtx
25787 gen_frame_load (rtx reg, rtx frame_reg, int offset)
25789 return gen_frame_set (reg, frame_reg, offset, false);
25792 static rtx
25793 gen_frame_store (rtx reg, rtx frame_reg, int offset)
25795 return gen_frame_set (reg, frame_reg, offset, true);
25798 /* Save a register into the frame, and emit RTX_FRAME_RELATED_P notes.
25799 Save REGNO into [FRAME_REG + OFFSET] in mode MODE. */
25801 static rtx
25802 emit_frame_save (rtx frame_reg, machine_mode mode,
25803 unsigned int regno, int offset, HOST_WIDE_INT frame_reg_to_sp)
25805 rtx reg, insn;
25807 /* Some cases that need register indexed addressing. */
25808 gcc_checking_assert (!((TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (mode))
25809 || (TARGET_VSX && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
25810 || (TARGET_E500_DOUBLE && mode == DFmode)
25811 || (TARGET_SPE_ABI
25812 && SPE_VECTOR_MODE (mode)
25813 && !SPE_CONST_OFFSET_OK (offset))));
25815 reg = gen_rtx_REG (mode, regno);
25816 insn = emit_insn (gen_frame_store (reg, frame_reg, offset));
25817 return rs6000_frame_related (insn, frame_reg, frame_reg_to_sp,
25818 NULL_RTX, NULL_RTX);
25821 /* Emit an offset memory reference suitable for a frame store, while
25822 converting to a valid addressing mode. */
25824 static rtx
25825 gen_frame_mem_offset (machine_mode mode, rtx reg, int offset)
25827 rtx int_rtx, offset_rtx;
25829 int_rtx = GEN_INT (offset);
25831 if ((TARGET_SPE_ABI && SPE_VECTOR_MODE (mode) && !SPE_CONST_OFFSET_OK (offset))
25832 || (TARGET_E500_DOUBLE && mode == DFmode))
25834 offset_rtx = gen_rtx_REG (Pmode, FIXED_SCRATCH);
25835 emit_move_insn (offset_rtx, int_rtx);
25837 else
25838 offset_rtx = int_rtx;
25840 return gen_frame_mem (mode, gen_rtx_PLUS (Pmode, reg, offset_rtx));
25843 #ifndef TARGET_FIX_AND_CONTINUE
25844 #define TARGET_FIX_AND_CONTINUE 0
25845 #endif
25847 /* It's really GPR 13 or 14, FPR 14 and VR 20. We need the smallest. */
25848 #define FIRST_SAVRES_REGISTER FIRST_SAVED_GP_REGNO
25849 #define LAST_SAVRES_REGISTER 31
25850 #define N_SAVRES_REGISTERS (LAST_SAVRES_REGISTER - FIRST_SAVRES_REGISTER + 1)
25852 enum {
25853 SAVRES_LR = 0x1,
25854 SAVRES_SAVE = 0x2,
25855 SAVRES_REG = 0x0c,
25856 SAVRES_GPR = 0,
25857 SAVRES_FPR = 4,
25858 SAVRES_VR = 8
25861 static GTY(()) rtx savres_routine_syms[N_SAVRES_REGISTERS][12];
25863 /* Temporary holding space for an out-of-line register save/restore
25864 routine name. */
25865 static char savres_routine_name[30];
25867 /* Return the name for an out-of-line register save/restore routine.
25868 We are saving/restoring GPRs if GPR is true. */
25870 static char *
25871 rs6000_savres_routine_name (rs6000_stack_t *info, int regno, int sel)
25873 const char *prefix = "";
25874 const char *suffix = "";
25876 /* Different targets are supposed to define
25877 {SAVE,RESTORE}_FP_{PREFIX,SUFFIX} with the idea that the needed
25878 routine name could be defined with:
25880 sprintf (name, "%s%d%s", SAVE_FP_PREFIX, regno, SAVE_FP_SUFFIX)
25882 This is a nice idea in practice, but in reality, things are
25883 complicated in several ways:
25885 - ELF targets have save/restore routines for GPRs.
25887 - SPE targets use different prefixes for 32/64-bit registers, and
25888 neither of them fit neatly in the FOO_{PREFIX,SUFFIX} regimen.
25890 - PPC64 ELF targets have routines for save/restore of GPRs that
25891 differ in what they do with the link register, so having a set
25892 prefix doesn't work. (We only use one of the save routines at
25893 the moment, though.)
25895 - PPC32 elf targets have "exit" versions of the restore routines
25896 that restore the link register and can save some extra space.
25897 These require an extra suffix. (There are also "tail" versions
25898 of the restore routines and "GOT" versions of the save routines,
25899 but we don't generate those at present. Same problems apply,
25900 though.)
25902 We deal with all this by synthesizing our own prefix/suffix and
25903 using that for the simple sprintf call shown above. */
25904 if (TARGET_SPE)
25906 /* No floating point saves on the SPE. */
25907 gcc_assert ((sel & SAVRES_REG) == SAVRES_GPR);
25909 if ((sel & SAVRES_SAVE))
25910 prefix = info->spe_64bit_regs_used ? "_save64gpr_" : "_save32gpr_";
25911 else
25912 prefix = info->spe_64bit_regs_used ? "_rest64gpr_" : "_rest32gpr_";
25914 if ((sel & SAVRES_LR))
25915 suffix = "_x";
25917 else if (DEFAULT_ABI == ABI_V4)
25919 if (TARGET_64BIT)
25920 goto aix_names;
25922 if ((sel & SAVRES_REG) == SAVRES_GPR)
25923 prefix = (sel & SAVRES_SAVE) ? "_savegpr_" : "_restgpr_";
25924 else if ((sel & SAVRES_REG) == SAVRES_FPR)
25925 prefix = (sel & SAVRES_SAVE) ? "_savefpr_" : "_restfpr_";
25926 else if ((sel & SAVRES_REG) == SAVRES_VR)
25927 prefix = (sel & SAVRES_SAVE) ? "_savevr_" : "_restvr_";
25928 else
25929 abort ();
25931 if ((sel & SAVRES_LR))
25932 suffix = "_x";
25934 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
25936 #if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD)
25937 /* No out-of-line save/restore routines for GPRs on AIX. */
25938 gcc_assert (!TARGET_AIX || (sel & SAVRES_REG) != SAVRES_GPR);
25939 #endif
25941 aix_names:
25942 if ((sel & SAVRES_REG) == SAVRES_GPR)
25943 prefix = ((sel & SAVRES_SAVE)
25944 ? ((sel & SAVRES_LR) ? "_savegpr0_" : "_savegpr1_")
25945 : ((sel & SAVRES_LR) ? "_restgpr0_" : "_restgpr1_"));
25946 else if ((sel & SAVRES_REG) == SAVRES_FPR)
25948 #if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD)
25949 if ((sel & SAVRES_LR))
25950 prefix = ((sel & SAVRES_SAVE) ? "_savefpr_" : "_restfpr_");
25951 else
25952 #endif
25954 prefix = (sel & SAVRES_SAVE) ? SAVE_FP_PREFIX : RESTORE_FP_PREFIX;
25955 suffix = (sel & SAVRES_SAVE) ? SAVE_FP_SUFFIX : RESTORE_FP_SUFFIX;
25958 else if ((sel & SAVRES_REG) == SAVRES_VR)
25959 prefix = (sel & SAVRES_SAVE) ? "_savevr_" : "_restvr_";
25960 else
25961 abort ();
25964 if (DEFAULT_ABI == ABI_DARWIN)
25966 /* The Darwin approach is (slightly) different, in order to be
25967 compatible with code generated by the system toolchain. There is a
25968 single symbol for the start of save sequence, and the code here
25969 embeds an offset into that code on the basis of the first register
25970 to be saved. */
25971 prefix = (sel & SAVRES_SAVE) ? "save" : "rest" ;
25972 if ((sel & SAVRES_REG) == SAVRES_GPR)
25973 sprintf (savres_routine_name, "*%sGPR%s%s%.0d ; %s r%d-r31", prefix,
25974 ((sel & SAVRES_LR) ? "x" : ""), (regno == 13 ? "" : "+"),
25975 (regno - 13) * 4, prefix, regno);
25976 else if ((sel & SAVRES_REG) == SAVRES_FPR)
25977 sprintf (savres_routine_name, "*%sFP%s%.0d ; %s f%d-f31", prefix,
25978 (regno == 14 ? "" : "+"), (regno - 14) * 4, prefix, regno);
25979 else if ((sel & SAVRES_REG) == SAVRES_VR)
25980 sprintf (savres_routine_name, "*%sVEC%s%.0d ; %s v%d-v31", prefix,
25981 (regno == 20 ? "" : "+"), (regno - 20) * 8, prefix, regno);
25982 else
25983 abort ();
25985 else
25986 sprintf (savres_routine_name, "%s%d%s", prefix, regno, suffix);
25988 return savres_routine_name;
25991 /* Return an RTL SYMBOL_REF for an out-of-line register save/restore routine.
25992 We are saving/restoring GPRs if GPR is true. */
25994 static rtx
25995 rs6000_savres_routine_sym (rs6000_stack_t *info, int sel)
25997 int regno = ((sel & SAVRES_REG) == SAVRES_GPR
25998 ? info->first_gp_reg_save
25999 : (sel & SAVRES_REG) == SAVRES_FPR
26000 ? info->first_fp_reg_save - 32
26001 : (sel & SAVRES_REG) == SAVRES_VR
26002 ? info->first_altivec_reg_save - FIRST_ALTIVEC_REGNO
26003 : -1);
26004 rtx sym;
26005 int select = sel;
26007 /* On the SPE, we never have any FPRs, but we do have 32/64-bit
26008 versions of the gpr routines. */
26009 if (TARGET_SPE_ABI && (sel & SAVRES_REG) == SAVRES_GPR
26010 && info->spe_64bit_regs_used)
26011 select ^= SAVRES_FPR ^ SAVRES_GPR;
26013 /* Don't generate bogus routine names. */
26014 gcc_assert (FIRST_SAVRES_REGISTER <= regno
26015 && regno <= LAST_SAVRES_REGISTER
26016 && select >= 0 && select <= 12);
26018 sym = savres_routine_syms[regno-FIRST_SAVRES_REGISTER][select];
26020 if (sym == NULL)
26022 char *name;
26024 name = rs6000_savres_routine_name (info, regno, sel);
26026 sym = savres_routine_syms[regno-FIRST_SAVRES_REGISTER][select]
26027 = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
26028 SYMBOL_REF_FLAGS (sym) |= SYMBOL_FLAG_FUNCTION;
26031 return sym;
26034 /* Emit a sequence of insns, including a stack tie if needed, for
26035 resetting the stack pointer. If UPDT_REGNO is not 1, then don't
26036 reset the stack pointer, but move the base of the frame into
26037 reg UPDT_REGNO for use by out-of-line register restore routines. */
26039 static rtx
26040 rs6000_emit_stack_reset (rs6000_stack_t *info,
26041 rtx frame_reg_rtx, HOST_WIDE_INT frame_off,
26042 unsigned updt_regno)
26044 rtx updt_reg_rtx;
26046 /* This blockage is needed so that sched doesn't decide to move
26047 the sp change before the register restores. */
26048 if (DEFAULT_ABI == ABI_V4
26049 || (TARGET_SPE_ABI
26050 && info->spe_64bit_regs_used != 0
26051 && info->first_gp_reg_save != 32))
26052 rs6000_emit_stack_tie (frame_reg_rtx, frame_pointer_needed);
26054 /* If we are restoring registers out-of-line, we will be using the
26055 "exit" variants of the restore routines, which will reset the
26056 stack for us. But we do need to point updt_reg into the
26057 right place for those routines. */
26058 updt_reg_rtx = gen_rtx_REG (Pmode, updt_regno);
26060 if (frame_off != 0)
26061 return emit_insn (gen_add3_insn (updt_reg_rtx,
26062 frame_reg_rtx, GEN_INT (frame_off)));
26063 else if (REGNO (frame_reg_rtx) != updt_regno)
26064 return emit_move_insn (updt_reg_rtx, frame_reg_rtx);
26066 return NULL_RTX;
26069 /* Return the register number used as a pointer by out-of-line
26070 save/restore functions. */
26072 static inline unsigned
26073 ptr_regno_for_savres (int sel)
26075 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
26076 return (sel & SAVRES_REG) == SAVRES_FPR || (sel & SAVRES_LR) ? 1 : 12;
26077 return DEFAULT_ABI == ABI_DARWIN && (sel & SAVRES_REG) == SAVRES_FPR ? 1 : 11;
26080 /* Construct a parallel rtx describing the effect of a call to an
26081 out-of-line register save/restore routine, and emit the insn
26082 or jump_insn as appropriate. */
26084 static rtx
26085 rs6000_emit_savres_rtx (rs6000_stack_t *info,
26086 rtx frame_reg_rtx, int save_area_offset, int lr_offset,
26087 machine_mode reg_mode, int sel)
26089 int i;
26090 int offset, start_reg, end_reg, n_regs, use_reg;
26091 int reg_size = GET_MODE_SIZE (reg_mode);
26092 rtx sym;
26093 rtvec p;
26094 rtx par, insn;
26096 offset = 0;
26097 start_reg = ((sel & SAVRES_REG) == SAVRES_GPR
26098 ? info->first_gp_reg_save
26099 : (sel & SAVRES_REG) == SAVRES_FPR
26100 ? info->first_fp_reg_save
26101 : (sel & SAVRES_REG) == SAVRES_VR
26102 ? info->first_altivec_reg_save
26103 : -1);
26104 end_reg = ((sel & SAVRES_REG) == SAVRES_GPR
26105 ? 32
26106 : (sel & SAVRES_REG) == SAVRES_FPR
26107 ? 64
26108 : (sel & SAVRES_REG) == SAVRES_VR
26109 ? LAST_ALTIVEC_REGNO + 1
26110 : -1);
26111 n_regs = end_reg - start_reg;
26112 p = rtvec_alloc (3 + ((sel & SAVRES_LR) ? 1 : 0)
26113 + ((sel & SAVRES_REG) == SAVRES_VR ? 1 : 0)
26114 + n_regs);
26116 if (!(sel & SAVRES_SAVE) && (sel & SAVRES_LR))
26117 RTVEC_ELT (p, offset++) = ret_rtx;
26119 RTVEC_ELT (p, offset++)
26120 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, LR_REGNO));
26122 sym = rs6000_savres_routine_sym (info, sel);
26123 RTVEC_ELT (p, offset++) = gen_rtx_USE (VOIDmode, sym);
26125 use_reg = ptr_regno_for_savres (sel);
26126 if ((sel & SAVRES_REG) == SAVRES_VR)
26128 /* Vector regs are saved/restored using [reg+reg] addressing. */
26129 RTVEC_ELT (p, offset++)
26130 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, use_reg));
26131 RTVEC_ELT (p, offset++)
26132 = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, 0));
26134 else
26135 RTVEC_ELT (p, offset++)
26136 = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, use_reg));
26138 for (i = 0; i < end_reg - start_reg; i++)
26139 RTVEC_ELT (p, i + offset)
26140 = gen_frame_set (gen_rtx_REG (reg_mode, start_reg + i),
26141 frame_reg_rtx, save_area_offset + reg_size * i,
26142 (sel & SAVRES_SAVE) != 0);
26144 if ((sel & SAVRES_SAVE) && (sel & SAVRES_LR))
26145 RTVEC_ELT (p, i + offset)
26146 = gen_frame_store (gen_rtx_REG (Pmode, 0), frame_reg_rtx, lr_offset);
26148 par = gen_rtx_PARALLEL (VOIDmode, p);
26150 if (!(sel & SAVRES_SAVE) && (sel & SAVRES_LR))
26152 insn = emit_jump_insn (par);
26153 JUMP_LABEL (insn) = ret_rtx;
26155 else
26156 insn = emit_insn (par);
26157 return insn;
26160 /* Emit code to store CR fields that need to be saved into REG. */
26162 static void
26163 rs6000_emit_move_from_cr (rtx reg)
26165 /* Only the ELFv2 ABI allows storing only selected fields. */
26166 if (DEFAULT_ABI == ABI_ELFv2 && TARGET_MFCRF)
26168 int i, cr_reg[8], count = 0;
26170 /* Collect CR fields that must be saved. */
26171 for (i = 0; i < 8; i++)
26172 if (save_reg_p (CR0_REGNO + i))
26173 cr_reg[count++] = i;
26175 /* If it's just a single one, use mfcrf. */
26176 if (count == 1)
26178 rtvec p = rtvec_alloc (1);
26179 rtvec r = rtvec_alloc (2);
26180 RTVEC_ELT (r, 0) = gen_rtx_REG (CCmode, CR0_REGNO + cr_reg[0]);
26181 RTVEC_ELT (r, 1) = GEN_INT (1 << (7 - cr_reg[0]));
26182 RTVEC_ELT (p, 0)
26183 = gen_rtx_SET (reg,
26184 gen_rtx_UNSPEC (SImode, r, UNSPEC_MOVESI_FROM_CR));
26186 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
26187 return;
26190 /* ??? It might be better to handle count == 2 / 3 cases here
26191 as well, using logical operations to combine the values. */
26194 emit_insn (gen_movesi_from_cr (reg));
26197 /* Return whether the split-stack arg pointer (r12) is used. */
26199 static bool
26200 split_stack_arg_pointer_used_p (void)
26202 /* If the pseudo holding the arg pointer is no longer a pseudo,
26203 then the arg pointer is used. */
26204 if (cfun->machine->split_stack_arg_pointer != NULL_RTX
26205 && (!REG_P (cfun->machine->split_stack_arg_pointer)
26206 || (REGNO (cfun->machine->split_stack_arg_pointer)
26207 < FIRST_PSEUDO_REGISTER)))
26208 return true;
26210 /* Unfortunately we also need to do some code scanning, since
26211 r12 may have been substituted for the pseudo. */
26212 rtx_insn *insn;
26213 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
26214 FOR_BB_INSNS (bb, insn)
26215 if (NONDEBUG_INSN_P (insn))
26217 /* A call destroys r12. */
26218 if (CALL_P (insn))
26219 return false;
26221 df_ref use;
26222 FOR_EACH_INSN_USE (use, insn)
26224 rtx x = DF_REF_REG (use);
26225 if (REG_P (x) && REGNO (x) == 12)
26226 return true;
26228 df_ref def;
26229 FOR_EACH_INSN_DEF (def, insn)
26231 rtx x = DF_REF_REG (def);
26232 if (REG_P (x) && REGNO (x) == 12)
26233 return false;
26236 return bitmap_bit_p (DF_LR_OUT (bb), 12);
26239 /* Return whether we need to emit an ELFv2 global entry point prologue. */
26241 static bool
26242 rs6000_global_entry_point_needed_p (void)
26244 /* Only needed for the ELFv2 ABI. */
26245 if (DEFAULT_ABI != ABI_ELFv2)
26246 return false;
26248 /* With -msingle-pic-base, we assume the whole program shares the same
26249 TOC, so no global entry point prologues are needed anywhere. */
26250 if (TARGET_SINGLE_PIC_BASE)
26251 return false;
26253 /* Ensure we have a global entry point for thunks. ??? We could
26254 avoid that if the target routine doesn't need a global entry point,
26255 but we do not know whether this is the case at this point. */
26256 if (cfun->is_thunk)
26257 return true;
26259 /* For regular functions, rs6000_emit_prologue sets this flag if the
26260 routine ever uses the TOC pointer. */
26261 return cfun->machine->r2_setup_needed;
26264 /* Emit function prologue as insns. */
26266 void
26267 rs6000_emit_prologue (void)
26269 rs6000_stack_t *info = rs6000_stack_info ();
26270 machine_mode reg_mode = Pmode;
26271 int reg_size = TARGET_32BIT ? 4 : 8;
26272 rtx sp_reg_rtx = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
26273 rtx frame_reg_rtx = sp_reg_rtx;
26274 unsigned int cr_save_regno;
26275 rtx cr_save_rtx = NULL_RTX;
26276 rtx insn;
26277 int strategy;
26278 int using_static_chain_p = (cfun->static_chain_decl != NULL_TREE
26279 && df_regs_ever_live_p (STATIC_CHAIN_REGNUM)
26280 && call_used_regs[STATIC_CHAIN_REGNUM]);
26281 int using_split_stack = (flag_split_stack
26282 && (lookup_attribute ("no_split_stack",
26283 DECL_ATTRIBUTES (cfun->decl))
26284 == NULL));
26286 /* Offset to top of frame for frame_reg and sp respectively. */
26287 HOST_WIDE_INT frame_off = 0;
26288 HOST_WIDE_INT sp_off = 0;
26289 /* sp_adjust is the stack adjusting instruction, tracked so that the
26290 insn setting up the split-stack arg pointer can be emitted just
26291 prior to it, when r12 is not used here for other purposes. */
26292 rtx_insn *sp_adjust = 0;
26294 #if CHECKING_P
26295 /* Track and check usage of r0, r11, r12. */
26296 int reg_inuse = using_static_chain_p ? 1 << 11 : 0;
26297 #define START_USE(R) do \
26299 gcc_assert ((reg_inuse & (1 << (R))) == 0); \
26300 reg_inuse |= 1 << (R); \
26301 } while (0)
26302 #define END_USE(R) do \
26304 gcc_assert ((reg_inuse & (1 << (R))) != 0); \
26305 reg_inuse &= ~(1 << (R)); \
26306 } while (0)
26307 #define NOT_INUSE(R) do \
26309 gcc_assert ((reg_inuse & (1 << (R))) == 0); \
26310 } while (0)
26311 #else
26312 #define START_USE(R) do {} while (0)
26313 #define END_USE(R) do {} while (0)
26314 #define NOT_INUSE(R) do {} while (0)
26315 #endif
26317 if (DEFAULT_ABI == ABI_ELFv2
26318 && !TARGET_SINGLE_PIC_BASE)
26320 cfun->machine->r2_setup_needed = df_regs_ever_live_p (TOC_REGNUM);
26322 /* With -mminimal-toc we may generate an extra use of r2 below. */
26323 if (TARGET_TOC && TARGET_MINIMAL_TOC && get_pool_size () != 0)
26324 cfun->machine->r2_setup_needed = true;
26328 if (flag_stack_usage_info)
26329 current_function_static_stack_size = info->total_size;
26331 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
26333 HOST_WIDE_INT size = info->total_size;
26335 if (crtl->is_leaf && !cfun->calls_alloca)
26337 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
26338 rs6000_emit_probe_stack_range (STACK_CHECK_PROTECT,
26339 size - STACK_CHECK_PROTECT);
26341 else if (size > 0)
26342 rs6000_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
26345 if (TARGET_FIX_AND_CONTINUE)
26347 /* gdb on darwin arranges to forward a function from the old
26348 address by modifying the first 5 instructions of the function
26349 to branch to the overriding function. This is necessary to
26350 permit function pointers that point to the old function to
26351 actually forward to the new function. */
26352 emit_insn (gen_nop ());
26353 emit_insn (gen_nop ());
26354 emit_insn (gen_nop ());
26355 emit_insn (gen_nop ());
26356 emit_insn (gen_nop ());
26359 if (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0)
26361 reg_mode = V2SImode;
26362 reg_size = 8;
26365 /* Handle world saves specially here. */
26366 if (WORLD_SAVE_P (info))
26368 int i, j, sz;
26369 rtx treg;
26370 rtvec p;
26371 rtx reg0;
26373 /* save_world expects lr in r0. */
26374 reg0 = gen_rtx_REG (Pmode, 0);
26375 if (info->lr_save_p)
26377 insn = emit_move_insn (reg0,
26378 gen_rtx_REG (Pmode, LR_REGNO));
26379 RTX_FRAME_RELATED_P (insn) = 1;
26382 /* The SAVE_WORLD and RESTORE_WORLD routines make a number of
26383 assumptions about the offsets of various bits of the stack
26384 frame. */
26385 gcc_assert (info->gp_save_offset == -220
26386 && info->fp_save_offset == -144
26387 && info->lr_save_offset == 8
26388 && info->cr_save_offset == 4
26389 && info->push_p
26390 && info->lr_save_p
26391 && (!crtl->calls_eh_return
26392 || info->ehrd_offset == -432)
26393 && info->vrsave_save_offset == -224
26394 && info->altivec_save_offset == -416);
26396 treg = gen_rtx_REG (SImode, 11);
26397 emit_move_insn (treg, GEN_INT (-info->total_size));
26399 /* SAVE_WORLD takes the caller's LR in R0 and the frame size
26400 in R11. It also clobbers R12, so beware! */
26402 /* Preserve CR2 for save_world prologues */
26403 sz = 5;
26404 sz += 32 - info->first_gp_reg_save;
26405 sz += 64 - info->first_fp_reg_save;
26406 sz += LAST_ALTIVEC_REGNO - info->first_altivec_reg_save + 1;
26407 p = rtvec_alloc (sz);
26408 j = 0;
26409 RTVEC_ELT (p, j++) = gen_rtx_CLOBBER (VOIDmode,
26410 gen_rtx_REG (SImode,
26411 LR_REGNO));
26412 RTVEC_ELT (p, j++) = gen_rtx_USE (VOIDmode,
26413 gen_rtx_SYMBOL_REF (Pmode,
26414 "*save_world"));
26415 /* We do floats first so that the instruction pattern matches
26416 properly. */
26417 for (i = 0; i < 64 - info->first_fp_reg_save; i++)
26418 RTVEC_ELT (p, j++)
26419 = gen_frame_store (gen_rtx_REG (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT
26420 ? DFmode : SFmode,
26421 info->first_fp_reg_save + i),
26422 frame_reg_rtx,
26423 info->fp_save_offset + frame_off + 8 * i);
26424 for (i = 0; info->first_altivec_reg_save + i <= LAST_ALTIVEC_REGNO; i++)
26425 RTVEC_ELT (p, j++)
26426 = gen_frame_store (gen_rtx_REG (V4SImode,
26427 info->first_altivec_reg_save + i),
26428 frame_reg_rtx,
26429 info->altivec_save_offset + frame_off + 16 * i);
26430 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
26431 RTVEC_ELT (p, j++)
26432 = gen_frame_store (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
26433 frame_reg_rtx,
26434 info->gp_save_offset + frame_off + reg_size * i);
26436 /* CR register traditionally saved as CR2. */
26437 RTVEC_ELT (p, j++)
26438 = gen_frame_store (gen_rtx_REG (SImode, CR2_REGNO),
26439 frame_reg_rtx, info->cr_save_offset + frame_off);
26440 /* Explain about use of R0. */
26441 if (info->lr_save_p)
26442 RTVEC_ELT (p, j++)
26443 = gen_frame_store (reg0,
26444 frame_reg_rtx, info->lr_save_offset + frame_off);
26445 /* Explain what happens to the stack pointer. */
26447 rtx newval = gen_rtx_PLUS (Pmode, sp_reg_rtx, treg);
26448 RTVEC_ELT (p, j++) = gen_rtx_SET (sp_reg_rtx, newval);
26451 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
26452 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
26453 treg, GEN_INT (-info->total_size));
26454 sp_off = frame_off = info->total_size;
26457 strategy = info->savres_strategy;
26459 /* For V.4, update stack before we do any saving and set back pointer. */
26460 if (! WORLD_SAVE_P (info)
26461 && info->push_p
26462 && (DEFAULT_ABI == ABI_V4
26463 || crtl->calls_eh_return))
26465 bool need_r11 = (TARGET_SPE
26466 ? (!(strategy & SAVE_INLINE_GPRS)
26467 && info->spe_64bit_regs_used == 0)
26468 : (!(strategy & SAVE_INLINE_FPRS)
26469 || !(strategy & SAVE_INLINE_GPRS)
26470 || !(strategy & SAVE_INLINE_VRS)));
26471 int ptr_regno = -1;
26472 rtx ptr_reg = NULL_RTX;
26473 int ptr_off = 0;
26475 if (info->total_size < 32767)
26476 frame_off = info->total_size;
26477 else if (need_r11)
26478 ptr_regno = 11;
26479 else if (info->cr_save_p
26480 || info->lr_save_p
26481 || info->first_fp_reg_save < 64
26482 || info->first_gp_reg_save < 32
26483 || info->altivec_size != 0
26484 || info->vrsave_size != 0
26485 || crtl->calls_eh_return)
26486 ptr_regno = 12;
26487 else
26489 /* The prologue won't be saving any regs so there is no need
26490 to set up a frame register to access any frame save area.
26491 We also won't be using frame_off anywhere below, but set
26492 the correct value anyway to protect against future
26493 changes to this function. */
26494 frame_off = info->total_size;
26496 if (ptr_regno != -1)
26498 /* Set up the frame offset to that needed by the first
26499 out-of-line save function. */
26500 START_USE (ptr_regno);
26501 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
26502 frame_reg_rtx = ptr_reg;
26503 if (!(strategy & SAVE_INLINE_FPRS) && info->fp_size != 0)
26504 gcc_checking_assert (info->fp_save_offset + info->fp_size == 0);
26505 else if (!(strategy & SAVE_INLINE_GPRS) && info->first_gp_reg_save < 32)
26506 ptr_off = info->gp_save_offset + info->gp_size;
26507 else if (!(strategy & SAVE_INLINE_VRS) && info->altivec_size != 0)
26508 ptr_off = info->altivec_save_offset + info->altivec_size;
26509 frame_off = -ptr_off;
26511 sp_adjust = rs6000_emit_allocate_stack (info->total_size,
26512 ptr_reg, ptr_off);
26513 if (REGNO (frame_reg_rtx) == 12)
26514 sp_adjust = 0;
26515 sp_off = info->total_size;
26516 if (frame_reg_rtx != sp_reg_rtx)
26517 rs6000_emit_stack_tie (frame_reg_rtx, false);
26520 /* If we use the link register, get it into r0. */
26521 if (!WORLD_SAVE_P (info) && info->lr_save_p)
26523 rtx addr, reg, mem;
26525 reg = gen_rtx_REG (Pmode, 0);
26526 START_USE (0);
26527 insn = emit_move_insn (reg, gen_rtx_REG (Pmode, LR_REGNO));
26528 RTX_FRAME_RELATED_P (insn) = 1;
26530 if (!(strategy & (SAVE_NOINLINE_GPRS_SAVES_LR
26531 | SAVE_NOINLINE_FPRS_SAVES_LR)))
26533 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx,
26534 GEN_INT (info->lr_save_offset + frame_off));
26535 mem = gen_rtx_MEM (Pmode, addr);
26536 /* This should not be of rs6000_sr_alias_set, because of
26537 __builtin_return_address. */
26539 insn = emit_move_insn (mem, reg);
26540 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
26541 NULL_RTX, NULL_RTX);
26542 END_USE (0);
26546 /* If we need to save CR, put it into r12 or r11. Choose r12 except when
26547 r12 will be needed by out-of-line gpr restore. */
26548 cr_save_regno = ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
26549 && !(strategy & (SAVE_INLINE_GPRS
26550 | SAVE_NOINLINE_GPRS_SAVES_LR))
26551 ? 11 : 12);
26552 if (!WORLD_SAVE_P (info)
26553 && info->cr_save_p
26554 && REGNO (frame_reg_rtx) != cr_save_regno
26555 && !(using_static_chain_p && cr_save_regno == 11)
26556 && !(using_split_stack && cr_save_regno == 12 && sp_adjust))
26558 cr_save_rtx = gen_rtx_REG (SImode, cr_save_regno);
26559 START_USE (cr_save_regno);
26560 rs6000_emit_move_from_cr (cr_save_rtx);
26563 /* Do any required saving of fpr's. If only one or two to save, do
26564 it ourselves. Otherwise, call function. */
26565 if (!WORLD_SAVE_P (info) && (strategy & SAVE_INLINE_FPRS))
26567 int i;
26568 for (i = 0; i < 64 - info->first_fp_reg_save; i++)
26569 if (save_reg_p (info->first_fp_reg_save + i))
26570 emit_frame_save (frame_reg_rtx,
26571 (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT
26572 ? DFmode : SFmode),
26573 info->first_fp_reg_save + i,
26574 info->fp_save_offset + frame_off + 8 * i,
26575 sp_off - frame_off);
26577 else if (!WORLD_SAVE_P (info) && info->first_fp_reg_save != 64)
26579 bool lr = (strategy & SAVE_NOINLINE_FPRS_SAVES_LR) != 0;
26580 int sel = SAVRES_SAVE | SAVRES_FPR | (lr ? SAVRES_LR : 0);
26581 unsigned ptr_regno = ptr_regno_for_savres (sel);
26582 rtx ptr_reg = frame_reg_rtx;
26584 if (REGNO (frame_reg_rtx) == ptr_regno)
26585 gcc_checking_assert (frame_off == 0);
26586 else
26588 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
26589 NOT_INUSE (ptr_regno);
26590 emit_insn (gen_add3_insn (ptr_reg,
26591 frame_reg_rtx, GEN_INT (frame_off)));
26593 insn = rs6000_emit_savres_rtx (info, ptr_reg,
26594 info->fp_save_offset,
26595 info->lr_save_offset,
26596 DFmode, sel);
26597 rs6000_frame_related (insn, ptr_reg, sp_off,
26598 NULL_RTX, NULL_RTX);
26599 if (lr)
26600 END_USE (0);
26603 /* Save GPRs. This is done as a PARALLEL if we are using
26604 the store-multiple instructions. */
26605 if (!WORLD_SAVE_P (info)
26606 && TARGET_SPE_ABI
26607 && info->spe_64bit_regs_used != 0
26608 && info->first_gp_reg_save != 32)
26610 int i;
26611 rtx spe_save_area_ptr;
26612 HOST_WIDE_INT save_off;
26613 int ool_adjust = 0;
26615 /* Determine whether we can address all of the registers that need
26616 to be saved with an offset from frame_reg_rtx that fits in
26617 the small const field for SPE memory instructions. */
26618 int spe_regs_addressable
26619 = (SPE_CONST_OFFSET_OK (info->spe_gp_save_offset + frame_off
26620 + reg_size * (32 - info->first_gp_reg_save - 1))
26621 && (strategy & SAVE_INLINE_GPRS));
26623 if (spe_regs_addressable)
26625 spe_save_area_ptr = frame_reg_rtx;
26626 save_off = frame_off;
26628 else
26630 /* Make r11 point to the start of the SPE save area. We need
26631 to be careful here if r11 is holding the static chain. If
26632 it is, then temporarily save it in r0. */
26633 HOST_WIDE_INT offset;
26635 if (!(strategy & SAVE_INLINE_GPRS))
26636 ool_adjust = 8 * (info->first_gp_reg_save - FIRST_SAVED_GP_REGNO);
26637 offset = info->spe_gp_save_offset + frame_off - ool_adjust;
26638 spe_save_area_ptr = gen_rtx_REG (Pmode, 11);
26639 save_off = frame_off - offset;
26641 if (using_static_chain_p)
26643 rtx r0 = gen_rtx_REG (Pmode, 0);
26645 START_USE (0);
26646 gcc_assert (info->first_gp_reg_save > 11);
26648 emit_move_insn (r0, spe_save_area_ptr);
26650 else if (REGNO (frame_reg_rtx) != 11)
26651 START_USE (11);
26653 emit_insn (gen_addsi3 (spe_save_area_ptr,
26654 frame_reg_rtx, GEN_INT (offset)));
26655 if (!using_static_chain_p && REGNO (frame_reg_rtx) == 11)
26656 frame_off = -info->spe_gp_save_offset + ool_adjust;
26659 if ((strategy & SAVE_INLINE_GPRS))
26661 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
26662 if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i))
26663 emit_frame_save (spe_save_area_ptr, reg_mode,
26664 info->first_gp_reg_save + i,
26665 (info->spe_gp_save_offset + save_off
26666 + reg_size * i),
26667 sp_off - save_off);
26669 else
26671 insn = rs6000_emit_savres_rtx (info, spe_save_area_ptr,
26672 info->spe_gp_save_offset + save_off,
26673 0, reg_mode,
26674 SAVRES_SAVE | SAVRES_GPR);
26676 rs6000_frame_related (insn, spe_save_area_ptr, sp_off - save_off,
26677 NULL_RTX, NULL_RTX);
26680 /* Move the static chain pointer back. */
26681 if (!spe_regs_addressable)
26683 if (using_static_chain_p)
26685 emit_move_insn (spe_save_area_ptr, gen_rtx_REG (Pmode, 0));
26686 END_USE (0);
26688 else if (REGNO (frame_reg_rtx) != 11)
26689 END_USE (11);
26692 else if (!WORLD_SAVE_P (info) && !(strategy & SAVE_INLINE_GPRS))
26694 bool lr = (strategy & SAVE_NOINLINE_GPRS_SAVES_LR) != 0;
26695 int sel = SAVRES_SAVE | SAVRES_GPR | (lr ? SAVRES_LR : 0);
26696 unsigned ptr_regno = ptr_regno_for_savres (sel);
26697 rtx ptr_reg = frame_reg_rtx;
26698 bool ptr_set_up = REGNO (ptr_reg) == ptr_regno;
26699 int end_save = info->gp_save_offset + info->gp_size;
26700 int ptr_off;
26702 if (ptr_regno == 12)
26703 sp_adjust = 0;
26704 if (!ptr_set_up)
26705 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
26707 /* Need to adjust r11 (r12) if we saved any FPRs. */
26708 if (end_save + frame_off != 0)
26710 rtx offset = GEN_INT (end_save + frame_off);
26712 if (ptr_set_up)
26713 frame_off = -end_save;
26714 else
26715 NOT_INUSE (ptr_regno);
26716 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
26718 else if (!ptr_set_up)
26720 NOT_INUSE (ptr_regno);
26721 emit_move_insn (ptr_reg, frame_reg_rtx);
26723 ptr_off = -end_save;
26724 insn = rs6000_emit_savres_rtx (info, ptr_reg,
26725 info->gp_save_offset + ptr_off,
26726 info->lr_save_offset + ptr_off,
26727 reg_mode, sel);
26728 rs6000_frame_related (insn, ptr_reg, sp_off - ptr_off,
26729 NULL_RTX, NULL_RTX);
26730 if (lr)
26731 END_USE (0);
26733 else if (!WORLD_SAVE_P (info) && (strategy & SAVE_MULTIPLE))
26735 rtvec p;
26736 int i;
26737 p = rtvec_alloc (32 - info->first_gp_reg_save);
26738 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
26739 RTVEC_ELT (p, i)
26740 = gen_frame_store (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
26741 frame_reg_rtx,
26742 info->gp_save_offset + frame_off + reg_size * i);
26743 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
26744 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
26745 NULL_RTX, NULL_RTX);
26747 else if (!WORLD_SAVE_P (info))
26749 int i;
26750 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
26751 if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i))
26752 emit_frame_save (frame_reg_rtx, reg_mode,
26753 info->first_gp_reg_save + i,
26754 info->gp_save_offset + frame_off + reg_size * i,
26755 sp_off - frame_off);
26758 if (crtl->calls_eh_return)
26760 unsigned int i;
26761 rtvec p;
26763 for (i = 0; ; ++i)
26765 unsigned int regno = EH_RETURN_DATA_REGNO (i);
26766 if (regno == INVALID_REGNUM)
26767 break;
26770 p = rtvec_alloc (i);
26772 for (i = 0; ; ++i)
26774 unsigned int regno = EH_RETURN_DATA_REGNO (i);
26775 if (regno == INVALID_REGNUM)
26776 break;
26778 insn
26779 = gen_frame_store (gen_rtx_REG (reg_mode, regno),
26780 sp_reg_rtx,
26781 info->ehrd_offset + sp_off + reg_size * (int) i);
26782 RTVEC_ELT (p, i) = insn;
26783 RTX_FRAME_RELATED_P (insn) = 1;
26786 insn = emit_insn (gen_blockage ());
26787 RTX_FRAME_RELATED_P (insn) = 1;
26788 add_reg_note (insn, REG_FRAME_RELATED_EXPR, gen_rtx_PARALLEL (VOIDmode, p));
26791 /* In AIX ABI we need to make sure r2 is really saved. */
26792 if (TARGET_AIX && crtl->calls_eh_return)
26794 rtx tmp_reg, tmp_reg_si, hi, lo, compare_result, toc_save_done, jump;
26795 rtx save_insn, join_insn, note;
26796 long toc_restore_insn;
26798 tmp_reg = gen_rtx_REG (Pmode, 11);
26799 tmp_reg_si = gen_rtx_REG (SImode, 11);
26800 if (using_static_chain_p)
26802 START_USE (0);
26803 emit_move_insn (gen_rtx_REG (Pmode, 0), tmp_reg);
26805 else
26806 START_USE (11);
26807 emit_move_insn (tmp_reg, gen_rtx_REG (Pmode, LR_REGNO));
26808 /* Peek at instruction to which this function returns. If it's
26809 restoring r2, then we know we've already saved r2. We can't
26810 unconditionally save r2 because the value we have will already
26811 be updated if we arrived at this function via a plt call or
26812 toc adjusting stub. */
26813 emit_move_insn (tmp_reg_si, gen_rtx_MEM (SImode, tmp_reg));
26814 toc_restore_insn = ((TARGET_32BIT ? 0x80410000 : 0xE8410000)
26815 + RS6000_TOC_SAVE_SLOT);
26816 hi = gen_int_mode (toc_restore_insn & ~0xffff, SImode);
26817 emit_insn (gen_xorsi3 (tmp_reg_si, tmp_reg_si, hi));
26818 compare_result = gen_rtx_REG (CCUNSmode, CR0_REGNO);
26819 validate_condition_mode (EQ, CCUNSmode);
26820 lo = gen_int_mode (toc_restore_insn & 0xffff, SImode);
26821 emit_insn (gen_rtx_SET (compare_result,
26822 gen_rtx_COMPARE (CCUNSmode, tmp_reg_si, lo)));
26823 toc_save_done = gen_label_rtx ();
26824 jump = gen_rtx_IF_THEN_ELSE (VOIDmode,
26825 gen_rtx_EQ (VOIDmode, compare_result,
26826 const0_rtx),
26827 gen_rtx_LABEL_REF (VOIDmode, toc_save_done),
26828 pc_rtx);
26829 jump = emit_jump_insn (gen_rtx_SET (pc_rtx, jump));
26830 JUMP_LABEL (jump) = toc_save_done;
26831 LABEL_NUSES (toc_save_done) += 1;
26833 save_insn = emit_frame_save (frame_reg_rtx, reg_mode,
26834 TOC_REGNUM, frame_off + RS6000_TOC_SAVE_SLOT,
26835 sp_off - frame_off);
26837 emit_label (toc_save_done);
26839 /* ??? If we leave SAVE_INSN as marked as saving R2, then we'll
26840 have a CFG that has different saves along different paths.
26841 Move the note to a dummy blockage insn, which describes that
26842 R2 is unconditionally saved after the label. */
26843 /* ??? An alternate representation might be a special insn pattern
26844 containing both the branch and the store. That might let the
26845 code that minimizes the number of DW_CFA_advance opcodes better
26846 freedom in placing the annotations. */
26847 note = find_reg_note (save_insn, REG_FRAME_RELATED_EXPR, NULL);
26848 if (note)
26849 remove_note (save_insn, note);
26850 else
26851 note = alloc_reg_note (REG_FRAME_RELATED_EXPR,
26852 copy_rtx (PATTERN (save_insn)), NULL_RTX);
26853 RTX_FRAME_RELATED_P (save_insn) = 0;
26855 join_insn = emit_insn (gen_blockage ());
26856 REG_NOTES (join_insn) = note;
26857 RTX_FRAME_RELATED_P (join_insn) = 1;
26859 if (using_static_chain_p)
26861 emit_move_insn (tmp_reg, gen_rtx_REG (Pmode, 0));
26862 END_USE (0);
26864 else
26865 END_USE (11);
26868 /* Save CR if we use any that must be preserved. */
26869 if (!WORLD_SAVE_P (info) && info->cr_save_p)
26871 rtx addr = gen_rtx_PLUS (Pmode, frame_reg_rtx,
26872 GEN_INT (info->cr_save_offset + frame_off));
26873 rtx mem = gen_frame_mem (SImode, addr);
26875 /* If we didn't copy cr before, do so now using r0. */
26876 if (cr_save_rtx == NULL_RTX)
26878 START_USE (0);
26879 cr_save_rtx = gen_rtx_REG (SImode, 0);
26880 rs6000_emit_move_from_cr (cr_save_rtx);
26883 /* Saving CR requires a two-instruction sequence: one instruction
26884 to move the CR to a general-purpose register, and a second
26885 instruction that stores the GPR to memory.
26887 We do not emit any DWARF CFI records for the first of these,
26888 because we cannot properly represent the fact that CR is saved in
26889 a register. One reason is that we cannot express that multiple
26890 CR fields are saved; another reason is that on 64-bit, the size
26891 of the CR register in DWARF (4 bytes) differs from the size of
26892 a general-purpose register.
26894 This means if any intervening instruction were to clobber one of
26895 the call-saved CR fields, we'd have incorrect CFI. To prevent
26896 this from happening, we mark the store to memory as a use of
26897 those CR fields, which prevents any such instruction from being
26898 scheduled in between the two instructions. */
26899 rtx crsave_v[9];
26900 int n_crsave = 0;
26901 int i;
26903 crsave_v[n_crsave++] = gen_rtx_SET (mem, cr_save_rtx);
26904 for (i = 0; i < 8; i++)
26905 if (save_reg_p (CR0_REGNO + i))
26906 crsave_v[n_crsave++]
26907 = gen_rtx_USE (VOIDmode, gen_rtx_REG (CCmode, CR0_REGNO + i));
26909 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode,
26910 gen_rtvec_v (n_crsave, crsave_v)));
26911 END_USE (REGNO (cr_save_rtx));
26913 /* Now, there's no way that dwarf2out_frame_debug_expr is going to
26914 understand '(unspec:SI [(reg:CC 68) ...] UNSPEC_MOVESI_FROM_CR)',
26915 so we need to construct a frame expression manually. */
26916 RTX_FRAME_RELATED_P (insn) = 1;
26918 /* Update address to be stack-pointer relative, like
26919 rs6000_frame_related would do. */
26920 addr = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, STACK_POINTER_REGNUM),
26921 GEN_INT (info->cr_save_offset + sp_off));
26922 mem = gen_frame_mem (SImode, addr);
26924 if (DEFAULT_ABI == ABI_ELFv2)
26926 /* In the ELFv2 ABI we generate separate CFI records for each
26927 CR field that was actually saved. They all point to the
26928 same 32-bit stack slot. */
26929 rtx crframe[8];
26930 int n_crframe = 0;
26932 for (i = 0; i < 8; i++)
26933 if (save_reg_p (CR0_REGNO + i))
26935 crframe[n_crframe]
26936 = gen_rtx_SET (mem, gen_rtx_REG (SImode, CR0_REGNO + i));
26938 RTX_FRAME_RELATED_P (crframe[n_crframe]) = 1;
26939 n_crframe++;
26942 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
26943 gen_rtx_PARALLEL (VOIDmode,
26944 gen_rtvec_v (n_crframe, crframe)));
26946 else
26948 /* In other ABIs, by convention, we use a single CR regnum to
26949 represent the fact that all call-saved CR fields are saved.
26950 We use CR2_REGNO to be compatible with gcc-2.95 on Linux. */
26951 rtx set = gen_rtx_SET (mem, gen_rtx_REG (SImode, CR2_REGNO));
26952 add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
26956 /* In the ELFv2 ABI we need to save all call-saved CR fields into
26957 *separate* slots if the routine calls __builtin_eh_return, so
26958 that they can be independently restored by the unwinder. */
26959 if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return)
26961 int i, cr_off = info->ehcr_offset;
26962 rtx crsave;
26964 /* ??? We might get better performance by using multiple mfocrf
26965 instructions. */
26966 crsave = gen_rtx_REG (SImode, 0);
26967 emit_insn (gen_movesi_from_cr (crsave));
26969 for (i = 0; i < 8; i++)
26970 if (!call_used_regs[CR0_REGNO + i])
26972 rtvec p = rtvec_alloc (2);
26973 RTVEC_ELT (p, 0)
26974 = gen_frame_store (crsave, frame_reg_rtx, cr_off + frame_off);
26975 RTVEC_ELT (p, 1)
26976 = gen_rtx_USE (VOIDmode, gen_rtx_REG (CCmode, CR0_REGNO + i));
26978 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
26980 RTX_FRAME_RELATED_P (insn) = 1;
26981 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
26982 gen_frame_store (gen_rtx_REG (SImode, CR0_REGNO + i),
26983 sp_reg_rtx, cr_off + sp_off));
26985 cr_off += reg_size;
26989 /* Update stack and set back pointer unless this is V.4,
26990 for which it was done previously. */
26991 if (!WORLD_SAVE_P (info) && info->push_p
26992 && !(DEFAULT_ABI == ABI_V4 || crtl->calls_eh_return))
26994 rtx ptr_reg = NULL;
26995 int ptr_off = 0;
26997 /* If saving altivec regs we need to be able to address all save
26998 locations using a 16-bit offset. */
26999 if ((strategy & SAVE_INLINE_VRS) == 0
27000 || (info->altivec_size != 0
27001 && (info->altivec_save_offset + info->altivec_size - 16
27002 + info->total_size - frame_off) > 32767)
27003 || (info->vrsave_size != 0
27004 && (info->vrsave_save_offset
27005 + info->total_size - frame_off) > 32767))
27007 int sel = SAVRES_SAVE | SAVRES_VR;
27008 unsigned ptr_regno = ptr_regno_for_savres (sel);
27010 if (using_static_chain_p
27011 && ptr_regno == STATIC_CHAIN_REGNUM)
27012 ptr_regno = 12;
27013 if (REGNO (frame_reg_rtx) != ptr_regno)
27014 START_USE (ptr_regno);
27015 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
27016 frame_reg_rtx = ptr_reg;
27017 ptr_off = info->altivec_save_offset + info->altivec_size;
27018 frame_off = -ptr_off;
27020 else if (REGNO (frame_reg_rtx) == 1)
27021 frame_off = info->total_size;
27022 sp_adjust = rs6000_emit_allocate_stack (info->total_size,
27023 ptr_reg, ptr_off);
27024 if (REGNO (frame_reg_rtx) == 12)
27025 sp_adjust = 0;
27026 sp_off = info->total_size;
27027 if (frame_reg_rtx != sp_reg_rtx)
27028 rs6000_emit_stack_tie (frame_reg_rtx, false);
27031 /* Set frame pointer, if needed. */
27032 if (frame_pointer_needed)
27034 insn = emit_move_insn (gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM),
27035 sp_reg_rtx);
27036 RTX_FRAME_RELATED_P (insn) = 1;
27039 /* Save AltiVec registers if needed. Save here because the red zone does
27040 not always include AltiVec registers. */
27041 if (!WORLD_SAVE_P (info)
27042 && info->altivec_size != 0 && (strategy & SAVE_INLINE_VRS) == 0)
27044 int end_save = info->altivec_save_offset + info->altivec_size;
27045 int ptr_off;
27046 /* Oddly, the vector save/restore functions point r0 at the end
27047 of the save area, then use r11 or r12 to load offsets for
27048 [reg+reg] addressing. */
27049 rtx ptr_reg = gen_rtx_REG (Pmode, 0);
27050 int scratch_regno = ptr_regno_for_savres (SAVRES_SAVE | SAVRES_VR);
27051 rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
27053 gcc_checking_assert (scratch_regno == 11 || scratch_regno == 12);
27054 NOT_INUSE (0);
27055 if (scratch_regno == 12)
27056 sp_adjust = 0;
27057 if (end_save + frame_off != 0)
27059 rtx offset = GEN_INT (end_save + frame_off);
27061 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
27063 else
27064 emit_move_insn (ptr_reg, frame_reg_rtx);
27066 ptr_off = -end_save;
27067 insn = rs6000_emit_savres_rtx (info, scratch_reg,
27068 info->altivec_save_offset + ptr_off,
27069 0, V4SImode, SAVRES_SAVE | SAVRES_VR);
27070 rs6000_frame_related (insn, scratch_reg, sp_off - ptr_off,
27071 NULL_RTX, NULL_RTX);
27072 if (REGNO (frame_reg_rtx) == REGNO (scratch_reg))
27074 /* The oddity mentioned above clobbered our frame reg. */
27075 emit_move_insn (frame_reg_rtx, ptr_reg);
27076 frame_off = ptr_off;
27079 else if (!WORLD_SAVE_P (info)
27080 && info->altivec_size != 0)
27082 int i;
27084 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
27085 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
27087 rtx areg, savereg, mem;
27088 HOST_WIDE_INT offset;
27090 offset = (info->altivec_save_offset + frame_off
27091 + 16 * (i - info->first_altivec_reg_save));
27093 savereg = gen_rtx_REG (V4SImode, i);
27095 if (TARGET_P9_DFORM_VECTOR && quad_address_offset_p (offset))
27097 mem = gen_frame_mem (V4SImode,
27098 gen_rtx_PLUS (Pmode, frame_reg_rtx,
27099 GEN_INT (offset)));
27100 insn = emit_insn (gen_rtx_SET (mem, savereg));
27101 areg = NULL_RTX;
27103 else
27105 NOT_INUSE (0);
27106 areg = gen_rtx_REG (Pmode, 0);
27107 emit_move_insn (areg, GEN_INT (offset));
27109 /* AltiVec addressing mode is [reg+reg]. */
27110 mem = gen_frame_mem (V4SImode,
27111 gen_rtx_PLUS (Pmode, frame_reg_rtx, areg));
27113 /* Rather than emitting a generic move, force use of the stvx
27114 instruction, which we always want on ISA 2.07 (power8) systems.
27115 In particular we don't want xxpermdi/stxvd2x for little
27116 endian. */
27117 insn = emit_insn (gen_altivec_stvx_v4si_internal (mem, savereg));
27120 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
27121 areg, GEN_INT (offset));
27125 /* VRSAVE is a bit vector representing which AltiVec registers
27126 are used. The OS uses this to determine which vector
27127 registers to save on a context switch. We need to save
27128 VRSAVE on the stack frame, add whatever AltiVec registers we
27129 used in this function, and do the corresponding magic in the
27130 epilogue. */
27132 if (!WORLD_SAVE_P (info)
27133 && info->vrsave_size != 0)
27135 rtx reg, vrsave;
27136 int offset;
27137 int save_regno;
27139 /* Get VRSAVE onto a GPR. Note that ABI_V4 and ABI_DARWIN might
27140 be using r12 as frame_reg_rtx and r11 as the static chain
27141 pointer for nested functions. */
27142 save_regno = 12;
27143 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
27144 && !using_static_chain_p)
27145 save_regno = 11;
27146 else if (using_split_stack || REGNO (frame_reg_rtx) == 12)
27148 save_regno = 11;
27149 if (using_static_chain_p)
27150 save_regno = 0;
27153 NOT_INUSE (save_regno);
27154 reg = gen_rtx_REG (SImode, save_regno);
27155 vrsave = gen_rtx_REG (SImode, VRSAVE_REGNO);
27156 if (TARGET_MACHO)
27157 emit_insn (gen_get_vrsave_internal (reg));
27158 else
27159 emit_insn (gen_rtx_SET (reg, vrsave));
27161 /* Save VRSAVE. */
27162 offset = info->vrsave_save_offset + frame_off;
27163 insn = emit_insn (gen_frame_store (reg, frame_reg_rtx, offset));
27165 /* Include the registers in the mask. */
27166 emit_insn (gen_iorsi3 (reg, reg, GEN_INT ((int) info->vrsave_mask)));
27168 insn = emit_insn (generate_set_vrsave (reg, info, 0));
27171 /* If we are using RS6000_PIC_OFFSET_TABLE_REGNUM, we need to set it up. */
27172 if (!TARGET_SINGLE_PIC_BASE
27173 && ((TARGET_TOC && TARGET_MINIMAL_TOC && get_pool_size () != 0)
27174 || (DEFAULT_ABI == ABI_V4
27175 && (flag_pic == 1 || (flag_pic && TARGET_SECURE_PLT))
27176 && df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM))))
27178 /* If emit_load_toc_table will use the link register, we need to save
27179 it. We use R12 for this purpose because emit_load_toc_table
27180 can use register 0. This allows us to use a plain 'blr' to return
27181 from the procedure more often. */
27182 int save_LR_around_toc_setup = (TARGET_ELF
27183 && DEFAULT_ABI == ABI_V4
27184 && flag_pic
27185 && ! info->lr_save_p
27186 && EDGE_COUNT (EXIT_BLOCK_PTR_FOR_FN (cfun)->preds) > 0);
27187 if (save_LR_around_toc_setup)
27189 rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
27190 rtx tmp = gen_rtx_REG (Pmode, 12);
27192 sp_adjust = 0;
27193 insn = emit_move_insn (tmp, lr);
27194 RTX_FRAME_RELATED_P (insn) = 1;
27196 rs6000_emit_load_toc_table (TRUE);
27198 insn = emit_move_insn (lr, tmp);
27199 add_reg_note (insn, REG_CFA_RESTORE, lr);
27200 RTX_FRAME_RELATED_P (insn) = 1;
27202 else
27203 rs6000_emit_load_toc_table (TRUE);
27206 #if TARGET_MACHO
27207 if (!TARGET_SINGLE_PIC_BASE
27208 && DEFAULT_ABI == ABI_DARWIN
27209 && flag_pic && crtl->uses_pic_offset_table)
27211 rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
27212 rtx src = gen_rtx_SYMBOL_REF (Pmode, MACHOPIC_FUNCTION_BASE_NAME);
27214 /* Save and restore LR locally around this call (in R0). */
27215 if (!info->lr_save_p)
27216 emit_move_insn (gen_rtx_REG (Pmode, 0), lr);
27218 emit_insn (gen_load_macho_picbase (src));
27220 emit_move_insn (gen_rtx_REG (Pmode,
27221 RS6000_PIC_OFFSET_TABLE_REGNUM),
27222 lr);
27224 if (!info->lr_save_p)
27225 emit_move_insn (lr, gen_rtx_REG (Pmode, 0));
27227 #endif
27229 /* If we need to, save the TOC register after doing the stack setup.
27230 Do not emit eh frame info for this save. The unwinder wants info,
27231 conceptually attached to instructions in this function, about
27232 register values in the caller of this function. This R2 may have
27233 already been changed from the value in the caller.
27234 We don't attempt to write accurate DWARF EH frame info for R2
27235 because code emitted by gcc for a (non-pointer) function call
27236 doesn't save and restore R2. Instead, R2 is managed out-of-line
27237 by a linker generated plt call stub when the function resides in
27238 a shared library. This behavior is costly to describe in DWARF,
27239 both in terms of the size of DWARF info and the time taken in the
27240 unwinder to interpret it. R2 changes, apart from the
27241 calls_eh_return case earlier in this function, are handled by
27242 linux-unwind.h frob_update_context. */
27243 if (rs6000_save_toc_in_prologue_p ())
27245 rtx reg = gen_rtx_REG (reg_mode, TOC_REGNUM);
27246 emit_insn (gen_frame_store (reg, sp_reg_rtx, RS6000_TOC_SAVE_SLOT));
27249 if (using_split_stack && split_stack_arg_pointer_used_p ())
27251 /* Set up the arg pointer (r12) for -fsplit-stack code. If
27252 __morestack was called, it left the arg pointer to the old
27253 stack in r29. Otherwise, the arg pointer is the top of the
27254 current frame. */
27255 cfun->machine->split_stack_argp_used = true;
27256 if (sp_adjust)
27258 rtx r12 = gen_rtx_REG (Pmode, 12);
27259 rtx set_r12 = gen_rtx_SET (r12, sp_reg_rtx);
27260 emit_insn_before (set_r12, sp_adjust);
27262 else if (frame_off != 0 || REGNO (frame_reg_rtx) != 12)
27264 rtx r12 = gen_rtx_REG (Pmode, 12);
27265 if (frame_off == 0)
27266 emit_move_insn (r12, frame_reg_rtx);
27267 else
27268 emit_insn (gen_add3_insn (r12, frame_reg_rtx, GEN_INT (frame_off)));
27270 if (info->push_p)
27272 rtx r12 = gen_rtx_REG (Pmode, 12);
27273 rtx r29 = gen_rtx_REG (Pmode, 29);
27274 rtx cr7 = gen_rtx_REG (CCUNSmode, CR7_REGNO);
27275 rtx not_more = gen_label_rtx ();
27276 rtx jump;
27278 jump = gen_rtx_IF_THEN_ELSE (VOIDmode,
27279 gen_rtx_GEU (VOIDmode, cr7, const0_rtx),
27280 gen_rtx_LABEL_REF (VOIDmode, not_more),
27281 pc_rtx);
27282 jump = emit_jump_insn (gen_rtx_SET (pc_rtx, jump));
27283 JUMP_LABEL (jump) = not_more;
27284 LABEL_NUSES (not_more) += 1;
27285 emit_move_insn (r12, r29);
27286 emit_label (not_more);
27291 /* Output .extern statements for the save/restore routines we use. */
27293 static void
27294 rs6000_output_savres_externs (FILE *file)
27296 rs6000_stack_t *info = rs6000_stack_info ();
27298 if (TARGET_DEBUG_STACK)
27299 debug_stack_info (info);
27301 /* Write .extern for any function we will call to save and restore
27302 fp values. */
27303 if (info->first_fp_reg_save < 64
27304 && !TARGET_MACHO
27305 && !TARGET_ELF)
27307 char *name;
27308 int regno = info->first_fp_reg_save - 32;
27310 if ((info->savres_strategy & SAVE_INLINE_FPRS) == 0)
27312 bool lr = (info->savres_strategy & SAVE_NOINLINE_FPRS_SAVES_LR) != 0;
27313 int sel = SAVRES_SAVE | SAVRES_FPR | (lr ? SAVRES_LR : 0);
27314 name = rs6000_savres_routine_name (info, regno, sel);
27315 fprintf (file, "\t.extern %s\n", name);
27317 if ((info->savres_strategy & REST_INLINE_FPRS) == 0)
27319 bool lr = (info->savres_strategy
27320 & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR) == 0;
27321 int sel = SAVRES_FPR | (lr ? SAVRES_LR : 0);
27322 name = rs6000_savres_routine_name (info, regno, sel);
27323 fprintf (file, "\t.extern %s\n", name);
27328 /* Write function prologue. */
27330 static void
27331 rs6000_output_function_prologue (FILE *file,
27332 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
27334 if (!cfun->is_thunk)
27335 rs6000_output_savres_externs (file);
27337 /* ELFv2 ABI r2 setup code and local entry point. This must follow
27338 immediately after the global entry point label. */
27339 if (rs6000_global_entry_point_needed_p ())
27341 const char *name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
27343 (*targetm.asm_out.internal_label) (file, "LCF", rs6000_pic_labelno);
27345 if (TARGET_CMODEL != CMODEL_LARGE)
27347 /* In the small and medium code models, we assume the TOC is less
27348 2 GB away from the text section, so it can be computed via the
27349 following two-instruction sequence. */
27350 char buf[256];
27352 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
27353 fprintf (file, "0:\taddis 2,12,.TOC.-");
27354 assemble_name (file, buf);
27355 fprintf (file, "@ha\n");
27356 fprintf (file, "\taddi 2,2,.TOC.-");
27357 assemble_name (file, buf);
27358 fprintf (file, "@l\n");
27360 else
27362 /* In the large code model, we allow arbitrary offsets between the
27363 TOC and the text section, so we have to load the offset from
27364 memory. The data field is emitted directly before the global
27365 entry point in rs6000_elf_declare_function_name. */
27366 char buf[256];
27368 #ifdef HAVE_AS_ENTRY_MARKERS
27369 /* If supported by the linker, emit a marker relocation. If the
27370 total code size of the final executable or shared library
27371 happens to fit into 2 GB after all, the linker will replace
27372 this code sequence with the sequence for the small or medium
27373 code model. */
27374 fprintf (file, "\t.reloc .,R_PPC64_ENTRY\n");
27375 #endif
27376 fprintf (file, "\tld 2,");
27377 ASM_GENERATE_INTERNAL_LABEL (buf, "LCL", rs6000_pic_labelno);
27378 assemble_name (file, buf);
27379 fprintf (file, "-");
27380 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
27381 assemble_name (file, buf);
27382 fprintf (file, "(12)\n");
27383 fprintf (file, "\tadd 2,2,12\n");
27386 fputs ("\t.localentry\t", file);
27387 assemble_name (file, name);
27388 fputs (",.-", file);
27389 assemble_name (file, name);
27390 fputs ("\n", file);
27393 /* Output -mprofile-kernel code. This needs to be done here instead of
27394 in output_function_profile since it must go after the ELFv2 ABI
27395 local entry point. */
27396 if (TARGET_PROFILE_KERNEL && crtl->profile)
27398 gcc_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2);
27399 gcc_assert (!TARGET_32BIT);
27401 asm_fprintf (file, "\tmflr %s\n", reg_names[0]);
27403 /* In the ELFv2 ABI we have no compiler stack word. It must be
27404 the resposibility of _mcount to preserve the static chain
27405 register if required. */
27406 if (DEFAULT_ABI != ABI_ELFv2
27407 && cfun->static_chain_decl != NULL)
27409 asm_fprintf (file, "\tstd %s,24(%s)\n",
27410 reg_names[STATIC_CHAIN_REGNUM], reg_names[1]);
27411 fprintf (file, "\tbl %s\n", RS6000_MCOUNT);
27412 asm_fprintf (file, "\tld %s,24(%s)\n",
27413 reg_names[STATIC_CHAIN_REGNUM], reg_names[1]);
27415 else
27416 fprintf (file, "\tbl %s\n", RS6000_MCOUNT);
27419 rs6000_pic_labelno++;
27422 /* -mprofile-kernel code calls mcount before the function prolog,
27423 so a profiled leaf function should stay a leaf function. */
27424 static bool
27425 rs6000_keep_leaf_when_profiled ()
27427 return TARGET_PROFILE_KERNEL;
27430 /* Non-zero if vmx regs are restored before the frame pop, zero if
27431 we restore after the pop when possible. */
27432 #define ALWAYS_RESTORE_ALTIVEC_BEFORE_POP 0
27434 /* Restoring cr is a two step process: loading a reg from the frame
27435 save, then moving the reg to cr. For ABI_V4 we must let the
27436 unwinder know that the stack location is no longer valid at or
27437 before the stack deallocation, but we can't emit a cfa_restore for
27438 cr at the stack deallocation like we do for other registers.
27439 The trouble is that it is possible for the move to cr to be
27440 scheduled after the stack deallocation. So say exactly where cr
27441 is located on each of the two insns. */
27443 static rtx
27444 load_cr_save (int regno, rtx frame_reg_rtx, int offset, bool exit_func)
27446 rtx mem = gen_frame_mem_offset (SImode, frame_reg_rtx, offset);
27447 rtx reg = gen_rtx_REG (SImode, regno);
27448 rtx_insn *insn = emit_move_insn (reg, mem);
27450 if (!exit_func && DEFAULT_ABI == ABI_V4)
27452 rtx cr = gen_rtx_REG (SImode, CR2_REGNO);
27453 rtx set = gen_rtx_SET (reg, cr);
27455 add_reg_note (insn, REG_CFA_REGISTER, set);
27456 RTX_FRAME_RELATED_P (insn) = 1;
27458 return reg;
27461 /* Reload CR from REG. */
27463 static void
27464 restore_saved_cr (rtx reg, int using_mfcr_multiple, bool exit_func)
27466 int count = 0;
27467 int i;
27469 if (using_mfcr_multiple)
27471 for (i = 0; i < 8; i++)
27472 if (save_reg_p (CR0_REGNO + i))
27473 count++;
27474 gcc_assert (count);
27477 if (using_mfcr_multiple && count > 1)
27479 rtx_insn *insn;
27480 rtvec p;
27481 int ndx;
27483 p = rtvec_alloc (count);
27485 ndx = 0;
27486 for (i = 0; i < 8; i++)
27487 if (save_reg_p (CR0_REGNO + i))
27489 rtvec r = rtvec_alloc (2);
27490 RTVEC_ELT (r, 0) = reg;
27491 RTVEC_ELT (r, 1) = GEN_INT (1 << (7-i));
27492 RTVEC_ELT (p, ndx) =
27493 gen_rtx_SET (gen_rtx_REG (CCmode, CR0_REGNO + i),
27494 gen_rtx_UNSPEC (CCmode, r, UNSPEC_MOVESI_TO_CR));
27495 ndx++;
27497 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
27498 gcc_assert (ndx == count);
27500 /* For the ELFv2 ABI we generate a CFA_RESTORE for each
27501 CR field separately. */
27502 if (!exit_func && DEFAULT_ABI == ABI_ELFv2 && flag_shrink_wrap)
27504 for (i = 0; i < 8; i++)
27505 if (save_reg_p (CR0_REGNO + i))
27506 add_reg_note (insn, REG_CFA_RESTORE,
27507 gen_rtx_REG (SImode, CR0_REGNO + i));
27509 RTX_FRAME_RELATED_P (insn) = 1;
27512 else
27513 for (i = 0; i < 8; i++)
27514 if (save_reg_p (CR0_REGNO + i))
27516 rtx insn = emit_insn (gen_movsi_to_cr_one
27517 (gen_rtx_REG (CCmode, CR0_REGNO + i), reg));
27519 /* For the ELFv2 ABI we generate a CFA_RESTORE for each
27520 CR field separately, attached to the insn that in fact
27521 restores this particular CR field. */
27522 if (!exit_func && DEFAULT_ABI == ABI_ELFv2 && flag_shrink_wrap)
27524 add_reg_note (insn, REG_CFA_RESTORE,
27525 gen_rtx_REG (SImode, CR0_REGNO + i));
27527 RTX_FRAME_RELATED_P (insn) = 1;
27531 /* For other ABIs, we just generate a single CFA_RESTORE for CR2. */
27532 if (!exit_func && DEFAULT_ABI != ABI_ELFv2
27533 && (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap))
27535 rtx_insn *insn = get_last_insn ();
27536 rtx cr = gen_rtx_REG (SImode, CR2_REGNO);
27538 add_reg_note (insn, REG_CFA_RESTORE, cr);
27539 RTX_FRAME_RELATED_P (insn) = 1;
27543 /* Like cr, the move to lr instruction can be scheduled after the
27544 stack deallocation, but unlike cr, its stack frame save is still
27545 valid. So we only need to emit the cfa_restore on the correct
27546 instruction. */
27548 static void
27549 load_lr_save (int regno, rtx frame_reg_rtx, int offset)
27551 rtx mem = gen_frame_mem_offset (Pmode, frame_reg_rtx, offset);
27552 rtx reg = gen_rtx_REG (Pmode, regno);
27554 emit_move_insn (reg, mem);
27557 static void
27558 restore_saved_lr (int regno, bool exit_func)
27560 rtx reg = gen_rtx_REG (Pmode, regno);
27561 rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
27562 rtx_insn *insn = emit_move_insn (lr, reg);
27564 if (!exit_func && flag_shrink_wrap)
27566 add_reg_note (insn, REG_CFA_RESTORE, lr);
27567 RTX_FRAME_RELATED_P (insn) = 1;
27571 static rtx
27572 add_crlr_cfa_restore (const rs6000_stack_t *info, rtx cfa_restores)
27574 if (DEFAULT_ABI == ABI_ELFv2)
27576 int i;
27577 for (i = 0; i < 8; i++)
27578 if (save_reg_p (CR0_REGNO + i))
27580 rtx cr = gen_rtx_REG (SImode, CR0_REGNO + i);
27581 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, cr,
27582 cfa_restores);
27585 else if (info->cr_save_p)
27586 cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
27587 gen_rtx_REG (SImode, CR2_REGNO),
27588 cfa_restores);
27590 if (info->lr_save_p)
27591 cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
27592 gen_rtx_REG (Pmode, LR_REGNO),
27593 cfa_restores);
27594 return cfa_restores;
27597 /* Return true if OFFSET from stack pointer can be clobbered by signals.
27598 V.4 doesn't have any stack cushion, AIX ABIs have 220 or 288 bytes
27599 below stack pointer not cloberred by signals. */
27601 static inline bool
27602 offset_below_red_zone_p (HOST_WIDE_INT offset)
27604 return offset < (DEFAULT_ABI == ABI_V4
27606 : TARGET_32BIT ? -220 : -288);
27609 /* Append CFA_RESTORES to any existing REG_NOTES on the last insn. */
27611 static void
27612 emit_cfa_restores (rtx cfa_restores)
27614 rtx_insn *insn = get_last_insn ();
27615 rtx *loc = &REG_NOTES (insn);
27617 while (*loc)
27618 loc = &XEXP (*loc, 1);
27619 *loc = cfa_restores;
27620 RTX_FRAME_RELATED_P (insn) = 1;
27623 /* Emit function epilogue as insns. */
27625 void
27626 rs6000_emit_epilogue (int sibcall)
27628 rs6000_stack_t *info;
27629 int restoring_GPRs_inline;
27630 int restoring_FPRs_inline;
27631 int using_load_multiple;
27632 int using_mtcr_multiple;
27633 int use_backchain_to_restore_sp;
27634 int restore_lr;
27635 int strategy;
27636 HOST_WIDE_INT frame_off = 0;
27637 rtx sp_reg_rtx = gen_rtx_REG (Pmode, 1);
27638 rtx frame_reg_rtx = sp_reg_rtx;
27639 rtx cfa_restores = NULL_RTX;
27640 rtx insn;
27641 rtx cr_save_reg = NULL_RTX;
27642 machine_mode reg_mode = Pmode;
27643 int reg_size = TARGET_32BIT ? 4 : 8;
27644 int i;
27645 bool exit_func;
27646 unsigned ptr_regno;
27648 info = rs6000_stack_info ();
27650 if (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0)
27652 reg_mode = V2SImode;
27653 reg_size = 8;
27656 strategy = info->savres_strategy;
27657 using_load_multiple = strategy & REST_MULTIPLE;
27658 restoring_FPRs_inline = sibcall || (strategy & REST_INLINE_FPRS);
27659 restoring_GPRs_inline = sibcall || (strategy & REST_INLINE_GPRS);
27660 using_mtcr_multiple = (rs6000_cpu == PROCESSOR_PPC601
27661 || rs6000_cpu == PROCESSOR_PPC603
27662 || rs6000_cpu == PROCESSOR_PPC750
27663 || optimize_size);
27664 /* Restore via the backchain when we have a large frame, since this
27665 is more efficient than an addis, addi pair. The second condition
27666 here will not trigger at the moment; We don't actually need a
27667 frame pointer for alloca, but the generic parts of the compiler
27668 give us one anyway. */
27669 use_backchain_to_restore_sp = (info->total_size + (info->lr_save_p
27670 ? info->lr_save_offset
27671 : 0) > 32767
27672 || (cfun->calls_alloca
27673 && !frame_pointer_needed));
27674 restore_lr = (info->lr_save_p
27675 && (restoring_FPRs_inline
27676 || (strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR))
27677 && (restoring_GPRs_inline
27678 || info->first_fp_reg_save < 64));
27680 if (WORLD_SAVE_P (info))
27682 int i, j;
27683 char rname[30];
27684 const char *alloc_rname;
27685 rtvec p;
27687 /* eh_rest_world_r10 will return to the location saved in the LR
27688 stack slot (which is not likely to be our caller.)
27689 Input: R10 -- stack adjustment. Clobbers R0, R11, R12, R7, R8.
27690 rest_world is similar, except any R10 parameter is ignored.
27691 The exception-handling stuff that was here in 2.95 is no
27692 longer necessary. */
27694 p = rtvec_alloc (9
27696 + 32 - info->first_gp_reg_save
27697 + LAST_ALTIVEC_REGNO + 1 - info->first_altivec_reg_save
27698 + 63 + 1 - info->first_fp_reg_save);
27700 strcpy (rname, ((crtl->calls_eh_return) ?
27701 "*eh_rest_world_r10" : "*rest_world"));
27702 alloc_rname = ggc_strdup (rname);
27704 j = 0;
27705 RTVEC_ELT (p, j++) = ret_rtx;
27706 RTVEC_ELT (p, j++) = gen_rtx_USE (VOIDmode,
27707 gen_rtx_REG (Pmode,
27708 LR_REGNO));
27709 RTVEC_ELT (p, j++)
27710 = gen_rtx_USE (VOIDmode, gen_rtx_SYMBOL_REF (Pmode, alloc_rname));
27711 /* The instruction pattern requires a clobber here;
27712 it is shared with the restVEC helper. */
27713 RTVEC_ELT (p, j++)
27714 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, 11));
27717 /* CR register traditionally saved as CR2. */
27718 rtx reg = gen_rtx_REG (SImode, CR2_REGNO);
27719 RTVEC_ELT (p, j++)
27720 = gen_frame_load (reg, frame_reg_rtx, info->cr_save_offset);
27721 if (flag_shrink_wrap)
27723 cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
27724 gen_rtx_REG (Pmode, LR_REGNO),
27725 cfa_restores);
27726 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
27730 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
27732 rtx reg = gen_rtx_REG (reg_mode, info->first_gp_reg_save + i);
27733 RTVEC_ELT (p, j++)
27734 = gen_frame_load (reg,
27735 frame_reg_rtx, info->gp_save_offset + reg_size * i);
27736 if (flag_shrink_wrap)
27737 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
27739 for (i = 0; info->first_altivec_reg_save + i <= LAST_ALTIVEC_REGNO; i++)
27741 rtx reg = gen_rtx_REG (V4SImode, info->first_altivec_reg_save + i);
27742 RTVEC_ELT (p, j++)
27743 = gen_frame_load (reg,
27744 frame_reg_rtx, info->altivec_save_offset + 16 * i);
27745 if (flag_shrink_wrap)
27746 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
27748 for (i = 0; info->first_fp_reg_save + i <= 63; i++)
27750 rtx reg = gen_rtx_REG ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT
27751 ? DFmode : SFmode),
27752 info->first_fp_reg_save + i);
27753 RTVEC_ELT (p, j++)
27754 = gen_frame_load (reg, frame_reg_rtx, info->fp_save_offset + 8 * i);
27755 if (flag_shrink_wrap)
27756 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
27758 RTVEC_ELT (p, j++)
27759 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, 0));
27760 RTVEC_ELT (p, j++)
27761 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 12));
27762 RTVEC_ELT (p, j++)
27763 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 7));
27764 RTVEC_ELT (p, j++)
27765 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 8));
27766 RTVEC_ELT (p, j++)
27767 = gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, 10));
27768 insn = emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, p));
27770 if (flag_shrink_wrap)
27772 REG_NOTES (insn) = cfa_restores;
27773 add_reg_note (insn, REG_CFA_DEF_CFA, sp_reg_rtx);
27774 RTX_FRAME_RELATED_P (insn) = 1;
27776 return;
27779 /* frame_reg_rtx + frame_off points to the top of this stack frame. */
27780 if (info->push_p)
27781 frame_off = info->total_size;
27783 /* Restore AltiVec registers if we must do so before adjusting the
27784 stack. */
27785 if (info->altivec_size != 0
27786 && (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
27787 || (DEFAULT_ABI != ABI_V4
27788 && offset_below_red_zone_p (info->altivec_save_offset))))
27790 int i;
27791 int scratch_regno = ptr_regno_for_savres (SAVRES_VR);
27793 gcc_checking_assert (scratch_regno == 11 || scratch_regno == 12);
27794 if (use_backchain_to_restore_sp)
27796 int frame_regno = 11;
27798 if ((strategy & REST_INLINE_VRS) == 0)
27800 /* Of r11 and r12, select the one not clobbered by an
27801 out-of-line restore function for the frame register. */
27802 frame_regno = 11 + 12 - scratch_regno;
27804 frame_reg_rtx = gen_rtx_REG (Pmode, frame_regno);
27805 emit_move_insn (frame_reg_rtx,
27806 gen_rtx_MEM (Pmode, sp_reg_rtx));
27807 frame_off = 0;
27809 else if (frame_pointer_needed)
27810 frame_reg_rtx = hard_frame_pointer_rtx;
27812 if ((strategy & REST_INLINE_VRS) == 0)
27814 int end_save = info->altivec_save_offset + info->altivec_size;
27815 int ptr_off;
27816 rtx ptr_reg = gen_rtx_REG (Pmode, 0);
27817 rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
27819 if (end_save + frame_off != 0)
27821 rtx offset = GEN_INT (end_save + frame_off);
27823 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
27825 else
27826 emit_move_insn (ptr_reg, frame_reg_rtx);
27828 ptr_off = -end_save;
27829 insn = rs6000_emit_savres_rtx (info, scratch_reg,
27830 info->altivec_save_offset + ptr_off,
27831 0, V4SImode, SAVRES_VR);
27833 else
27835 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
27836 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
27838 rtx addr, areg, mem, insn;
27839 rtx reg = gen_rtx_REG (V4SImode, i);
27840 HOST_WIDE_INT offset
27841 = (info->altivec_save_offset + frame_off
27842 + 16 * (i - info->first_altivec_reg_save));
27844 if (TARGET_P9_DFORM_VECTOR && quad_address_offset_p (offset))
27846 mem = gen_frame_mem (V4SImode,
27847 gen_rtx_PLUS (Pmode, frame_reg_rtx,
27848 GEN_INT (offset)));
27849 insn = gen_rtx_SET (reg, mem);
27851 else
27853 areg = gen_rtx_REG (Pmode, 0);
27854 emit_move_insn (areg, GEN_INT (offset));
27856 /* AltiVec addressing mode is [reg+reg]. */
27857 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx, areg);
27858 mem = gen_frame_mem (V4SImode, addr);
27860 /* Rather than emitting a generic move, force use of the
27861 lvx instruction, which we always want. In particular we
27862 don't want lxvd2x/xxpermdi for little endian. */
27863 insn = gen_altivec_lvx_v4si_internal (reg, mem);
27866 (void) emit_insn (insn);
27870 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
27871 if (((strategy & REST_INLINE_VRS) == 0
27872 || (info->vrsave_mask & ALTIVEC_REG_BIT (i)) != 0)
27873 && (flag_shrink_wrap
27874 || (offset_below_red_zone_p
27875 (info->altivec_save_offset
27876 + 16 * (i - info->first_altivec_reg_save)))))
27878 rtx reg = gen_rtx_REG (V4SImode, i);
27879 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
27883 /* Restore VRSAVE if we must do so before adjusting the stack. */
27884 if (info->vrsave_size != 0
27885 && (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
27886 || (DEFAULT_ABI != ABI_V4
27887 && offset_below_red_zone_p (info->vrsave_save_offset))))
27889 rtx reg;
27891 if (frame_reg_rtx == sp_reg_rtx)
27893 if (use_backchain_to_restore_sp)
27895 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
27896 emit_move_insn (frame_reg_rtx,
27897 gen_rtx_MEM (Pmode, sp_reg_rtx));
27898 frame_off = 0;
27900 else if (frame_pointer_needed)
27901 frame_reg_rtx = hard_frame_pointer_rtx;
27904 reg = gen_rtx_REG (SImode, 12);
27905 emit_insn (gen_frame_load (reg, frame_reg_rtx,
27906 info->vrsave_save_offset + frame_off));
27908 emit_insn (generate_set_vrsave (reg, info, 1));
27911 insn = NULL_RTX;
27912 /* If we have a large stack frame, restore the old stack pointer
27913 using the backchain. */
27914 if (use_backchain_to_restore_sp)
27916 if (frame_reg_rtx == sp_reg_rtx)
27918 /* Under V.4, don't reset the stack pointer until after we're done
27919 loading the saved registers. */
27920 if (DEFAULT_ABI == ABI_V4)
27921 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
27923 insn = emit_move_insn (frame_reg_rtx,
27924 gen_rtx_MEM (Pmode, sp_reg_rtx));
27925 frame_off = 0;
27927 else if (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
27928 && DEFAULT_ABI == ABI_V4)
27929 /* frame_reg_rtx has been set up by the altivec restore. */
27931 else
27933 insn = emit_move_insn (sp_reg_rtx, frame_reg_rtx);
27934 frame_reg_rtx = sp_reg_rtx;
27937 /* If we have a frame pointer, we can restore the old stack pointer
27938 from it. */
27939 else if (frame_pointer_needed)
27941 frame_reg_rtx = sp_reg_rtx;
27942 if (DEFAULT_ABI == ABI_V4)
27943 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
27944 /* Prevent reordering memory accesses against stack pointer restore. */
27945 else if (cfun->calls_alloca
27946 || offset_below_red_zone_p (-info->total_size))
27947 rs6000_emit_stack_tie (frame_reg_rtx, true);
27949 insn = emit_insn (gen_add3_insn (frame_reg_rtx, hard_frame_pointer_rtx,
27950 GEN_INT (info->total_size)));
27951 frame_off = 0;
27953 else if (info->push_p
27954 && DEFAULT_ABI != ABI_V4
27955 && !crtl->calls_eh_return)
27957 /* Prevent reordering memory accesses against stack pointer restore. */
27958 if (cfun->calls_alloca
27959 || offset_below_red_zone_p (-info->total_size))
27960 rs6000_emit_stack_tie (frame_reg_rtx, false);
27961 insn = emit_insn (gen_add3_insn (sp_reg_rtx, sp_reg_rtx,
27962 GEN_INT (info->total_size)));
27963 frame_off = 0;
27965 if (insn && frame_reg_rtx == sp_reg_rtx)
27967 if (cfa_restores)
27969 REG_NOTES (insn) = cfa_restores;
27970 cfa_restores = NULL_RTX;
27972 add_reg_note (insn, REG_CFA_DEF_CFA, sp_reg_rtx);
27973 RTX_FRAME_RELATED_P (insn) = 1;
27976 /* Restore AltiVec registers if we have not done so already. */
27977 if (!ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
27978 && info->altivec_size != 0
27979 && (DEFAULT_ABI == ABI_V4
27980 || !offset_below_red_zone_p (info->altivec_save_offset)))
27982 int i;
27984 if ((strategy & REST_INLINE_VRS) == 0)
27986 int end_save = info->altivec_save_offset + info->altivec_size;
27987 int ptr_off;
27988 rtx ptr_reg = gen_rtx_REG (Pmode, 0);
27989 int scratch_regno = ptr_regno_for_savres (SAVRES_VR);
27990 rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
27992 if (end_save + frame_off != 0)
27994 rtx offset = GEN_INT (end_save + frame_off);
27996 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
27998 else
27999 emit_move_insn (ptr_reg, frame_reg_rtx);
28001 ptr_off = -end_save;
28002 insn = rs6000_emit_savres_rtx (info, scratch_reg,
28003 info->altivec_save_offset + ptr_off,
28004 0, V4SImode, SAVRES_VR);
28005 if (REGNO (frame_reg_rtx) == REGNO (scratch_reg))
28007 /* Frame reg was clobbered by out-of-line save. Restore it
28008 from ptr_reg, and if we are calling out-of-line gpr or
28009 fpr restore set up the correct pointer and offset. */
28010 unsigned newptr_regno = 1;
28011 if (!restoring_GPRs_inline)
28013 bool lr = info->gp_save_offset + info->gp_size == 0;
28014 int sel = SAVRES_GPR | (lr ? SAVRES_LR : 0);
28015 newptr_regno = ptr_regno_for_savres (sel);
28016 end_save = info->gp_save_offset + info->gp_size;
28018 else if (!restoring_FPRs_inline)
28020 bool lr = !(strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR);
28021 int sel = SAVRES_FPR | (lr ? SAVRES_LR : 0);
28022 newptr_regno = ptr_regno_for_savres (sel);
28023 end_save = info->fp_save_offset + info->fp_size;
28026 if (newptr_regno != 1 && REGNO (frame_reg_rtx) != newptr_regno)
28027 frame_reg_rtx = gen_rtx_REG (Pmode, newptr_regno);
28029 if (end_save + ptr_off != 0)
28031 rtx offset = GEN_INT (end_save + ptr_off);
28033 frame_off = -end_save;
28034 if (TARGET_32BIT)
28035 emit_insn (gen_addsi3_carry (frame_reg_rtx,
28036 ptr_reg, offset));
28037 else
28038 emit_insn (gen_adddi3_carry (frame_reg_rtx,
28039 ptr_reg, offset));
28041 else
28043 frame_off = ptr_off;
28044 emit_move_insn (frame_reg_rtx, ptr_reg);
28048 else
28050 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
28051 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
28053 rtx addr, areg, mem, insn;
28054 rtx reg = gen_rtx_REG (V4SImode, i);
28055 HOST_WIDE_INT offset
28056 = (info->altivec_save_offset + frame_off
28057 + 16 * (i - info->first_altivec_reg_save));
28059 if (TARGET_P9_DFORM_VECTOR && quad_address_offset_p (offset))
28061 mem = gen_frame_mem (V4SImode,
28062 gen_rtx_PLUS (Pmode, frame_reg_rtx,
28063 GEN_INT (offset)));
28064 insn = gen_rtx_SET (reg, mem);
28066 else
28068 areg = gen_rtx_REG (Pmode, 0);
28069 emit_move_insn (areg, GEN_INT (offset));
28071 /* AltiVec addressing mode is [reg+reg]. */
28072 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx, areg);
28073 mem = gen_frame_mem (V4SImode, addr);
28075 /* Rather than emitting a generic move, force use of the
28076 lvx instruction, which we always want. In particular we
28077 don't want lxvd2x/xxpermdi for little endian. */
28078 insn = gen_altivec_lvx_v4si_internal (reg, mem);
28081 (void) emit_insn (insn);
28085 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
28086 if (((strategy & REST_INLINE_VRS) == 0
28087 || (info->vrsave_mask & ALTIVEC_REG_BIT (i)) != 0)
28088 && (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap))
28090 rtx reg = gen_rtx_REG (V4SImode, i);
28091 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
28095 /* Restore VRSAVE if we have not done so already. */
28096 if (!ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
28097 && info->vrsave_size != 0
28098 && (DEFAULT_ABI == ABI_V4
28099 || !offset_below_red_zone_p (info->vrsave_save_offset)))
28101 rtx reg;
28103 reg = gen_rtx_REG (SImode, 12);
28104 emit_insn (gen_frame_load (reg, frame_reg_rtx,
28105 info->vrsave_save_offset + frame_off));
28107 emit_insn (generate_set_vrsave (reg, info, 1));
28110 /* If we exit by an out-of-line restore function on ABI_V4 then that
28111 function will deallocate the stack, so we don't need to worry
28112 about the unwinder restoring cr from an invalid stack frame
28113 location. */
28114 exit_func = (!restoring_FPRs_inline
28115 || (!restoring_GPRs_inline
28116 && info->first_fp_reg_save == 64));
28118 /* In the ELFv2 ABI we need to restore all call-saved CR fields from
28119 *separate* slots if the routine calls __builtin_eh_return, so
28120 that they can be independently restored by the unwinder. */
28121 if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return)
28123 int i, cr_off = info->ehcr_offset;
28125 for (i = 0; i < 8; i++)
28126 if (!call_used_regs[CR0_REGNO + i])
28128 rtx reg = gen_rtx_REG (SImode, 0);
28129 emit_insn (gen_frame_load (reg, frame_reg_rtx,
28130 cr_off + frame_off));
28132 insn = emit_insn (gen_movsi_to_cr_one
28133 (gen_rtx_REG (CCmode, CR0_REGNO + i), reg));
28135 if (!exit_func && flag_shrink_wrap)
28137 add_reg_note (insn, REG_CFA_RESTORE,
28138 gen_rtx_REG (SImode, CR0_REGNO + i));
28140 RTX_FRAME_RELATED_P (insn) = 1;
28143 cr_off += reg_size;
28147 /* Get the old lr if we saved it. If we are restoring registers
28148 out-of-line, then the out-of-line routines can do this for us. */
28149 if (restore_lr && restoring_GPRs_inline)
28150 load_lr_save (0, frame_reg_rtx, info->lr_save_offset + frame_off);
28152 /* Get the old cr if we saved it. */
28153 if (info->cr_save_p)
28155 unsigned cr_save_regno = 12;
28157 if (!restoring_GPRs_inline)
28159 /* Ensure we don't use the register used by the out-of-line
28160 gpr register restore below. */
28161 bool lr = info->gp_save_offset + info->gp_size == 0;
28162 int sel = SAVRES_GPR | (lr ? SAVRES_LR : 0);
28163 int gpr_ptr_regno = ptr_regno_for_savres (sel);
28165 if (gpr_ptr_regno == 12)
28166 cr_save_regno = 11;
28167 gcc_checking_assert (REGNO (frame_reg_rtx) != cr_save_regno);
28169 else if (REGNO (frame_reg_rtx) == 12)
28170 cr_save_regno = 11;
28172 cr_save_reg = load_cr_save (cr_save_regno, frame_reg_rtx,
28173 info->cr_save_offset + frame_off,
28174 exit_func);
28177 /* Set LR here to try to overlap restores below. */
28178 if (restore_lr && restoring_GPRs_inline)
28179 restore_saved_lr (0, exit_func);
28181 /* Load exception handler data registers, if needed. */
28182 if (crtl->calls_eh_return)
28184 unsigned int i, regno;
28186 if (TARGET_AIX)
28188 rtx reg = gen_rtx_REG (reg_mode, 2);
28189 emit_insn (gen_frame_load (reg, frame_reg_rtx,
28190 frame_off + RS6000_TOC_SAVE_SLOT));
28193 for (i = 0; ; ++i)
28195 rtx mem;
28197 regno = EH_RETURN_DATA_REGNO (i);
28198 if (regno == INVALID_REGNUM)
28199 break;
28201 /* Note: possible use of r0 here to address SPE regs. */
28202 mem = gen_frame_mem_offset (reg_mode, frame_reg_rtx,
28203 info->ehrd_offset + frame_off
28204 + reg_size * (int) i);
28206 emit_move_insn (gen_rtx_REG (reg_mode, regno), mem);
28210 /* Restore GPRs. This is done as a PARALLEL if we are using
28211 the load-multiple instructions. */
28212 if (TARGET_SPE_ABI
28213 && info->spe_64bit_regs_used
28214 && info->first_gp_reg_save != 32)
28216 /* Determine whether we can address all of the registers that need
28217 to be saved with an offset from frame_reg_rtx that fits in
28218 the small const field for SPE memory instructions. */
28219 int spe_regs_addressable
28220 = (SPE_CONST_OFFSET_OK (info->spe_gp_save_offset + frame_off
28221 + reg_size * (32 - info->first_gp_reg_save - 1))
28222 && restoring_GPRs_inline);
28224 if (!spe_regs_addressable)
28226 int ool_adjust = 0;
28227 rtx old_frame_reg_rtx = frame_reg_rtx;
28228 /* Make r11 point to the start of the SPE save area. We worried about
28229 not clobbering it when we were saving registers in the prologue.
28230 There's no need to worry here because the static chain is passed
28231 anew to every function. */
28233 if (!restoring_GPRs_inline)
28234 ool_adjust = 8 * (info->first_gp_reg_save - FIRST_SAVED_GP_REGNO);
28235 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
28236 emit_insn (gen_addsi3 (frame_reg_rtx, old_frame_reg_rtx,
28237 GEN_INT (info->spe_gp_save_offset
28238 + frame_off
28239 - ool_adjust)));
28240 /* Keep the invariant that frame_reg_rtx + frame_off points
28241 at the top of the stack frame. */
28242 frame_off = -info->spe_gp_save_offset + ool_adjust;
28245 if (restoring_GPRs_inline)
28247 HOST_WIDE_INT spe_offset = info->spe_gp_save_offset + frame_off;
28249 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
28250 if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i))
28252 rtx offset, addr, mem, reg;
28254 /* We're doing all this to ensure that the immediate offset
28255 fits into the immediate field of 'evldd'. */
28256 gcc_assert (SPE_CONST_OFFSET_OK (spe_offset + reg_size * i));
28258 offset = GEN_INT (spe_offset + reg_size * i);
28259 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx, offset);
28260 mem = gen_rtx_MEM (V2SImode, addr);
28261 reg = gen_rtx_REG (reg_mode, info->first_gp_reg_save + i);
28263 emit_move_insn (reg, mem);
28266 else
28267 rs6000_emit_savres_rtx (info, frame_reg_rtx,
28268 info->spe_gp_save_offset + frame_off,
28269 info->lr_save_offset + frame_off,
28270 reg_mode,
28271 SAVRES_GPR | SAVRES_LR);
28273 else if (!restoring_GPRs_inline)
28275 /* We are jumping to an out-of-line function. */
28276 rtx ptr_reg;
28277 int end_save = info->gp_save_offset + info->gp_size;
28278 bool can_use_exit = end_save == 0;
28279 int sel = SAVRES_GPR | (can_use_exit ? SAVRES_LR : 0);
28280 int ptr_off;
28282 /* Emit stack reset code if we need it. */
28283 ptr_regno = ptr_regno_for_savres (sel);
28284 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
28285 if (can_use_exit)
28286 rs6000_emit_stack_reset (info, frame_reg_rtx, frame_off, ptr_regno);
28287 else if (end_save + frame_off != 0)
28288 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx,
28289 GEN_INT (end_save + frame_off)));
28290 else if (REGNO (frame_reg_rtx) != ptr_regno)
28291 emit_move_insn (ptr_reg, frame_reg_rtx);
28292 if (REGNO (frame_reg_rtx) == ptr_regno)
28293 frame_off = -end_save;
28295 if (can_use_exit && info->cr_save_p)
28296 restore_saved_cr (cr_save_reg, using_mtcr_multiple, true);
28298 ptr_off = -end_save;
28299 rs6000_emit_savres_rtx (info, ptr_reg,
28300 info->gp_save_offset + ptr_off,
28301 info->lr_save_offset + ptr_off,
28302 reg_mode, sel);
28304 else if (using_load_multiple)
28306 rtvec p;
28307 p = rtvec_alloc (32 - info->first_gp_reg_save);
28308 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
28309 RTVEC_ELT (p, i)
28310 = gen_frame_load (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
28311 frame_reg_rtx,
28312 info->gp_save_offset + frame_off + reg_size * i);
28313 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
28315 else
28317 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
28318 if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i))
28319 emit_insn (gen_frame_load
28320 (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
28321 frame_reg_rtx,
28322 info->gp_save_offset + frame_off + reg_size * i));
28325 if (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap)
28327 /* If the frame pointer was used then we can't delay emitting
28328 a REG_CFA_DEF_CFA note. This must happen on the insn that
28329 restores the frame pointer, r31. We may have already emitted
28330 a REG_CFA_DEF_CFA note, but that's OK; A duplicate is
28331 discarded by dwarf2cfi.c/dwarf2out.c, and in any case would
28332 be harmless if emitted. */
28333 if (frame_pointer_needed)
28335 insn = get_last_insn ();
28336 add_reg_note (insn, REG_CFA_DEF_CFA,
28337 plus_constant (Pmode, frame_reg_rtx, frame_off));
28338 RTX_FRAME_RELATED_P (insn) = 1;
28341 /* Set up cfa_restores. We always need these when
28342 shrink-wrapping. If not shrink-wrapping then we only need
28343 the cfa_restore when the stack location is no longer valid.
28344 The cfa_restores must be emitted on or before the insn that
28345 invalidates the stack, and of course must not be emitted
28346 before the insn that actually does the restore. The latter
28347 is why it is a bad idea to emit the cfa_restores as a group
28348 on the last instruction here that actually does a restore:
28349 That insn may be reordered with respect to others doing
28350 restores. */
28351 if (flag_shrink_wrap
28352 && !restoring_GPRs_inline
28353 && info->first_fp_reg_save == 64)
28354 cfa_restores = add_crlr_cfa_restore (info, cfa_restores);
28356 for (i = info->first_gp_reg_save; i < 32; i++)
28357 if (!restoring_GPRs_inline
28358 || using_load_multiple
28359 || rs6000_reg_live_or_pic_offset_p (i))
28361 rtx reg = gen_rtx_REG (reg_mode, i);
28363 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
28367 if (!restoring_GPRs_inline
28368 && info->first_fp_reg_save == 64)
28370 /* We are jumping to an out-of-line function. */
28371 if (cfa_restores)
28372 emit_cfa_restores (cfa_restores);
28373 return;
28376 if (restore_lr && !restoring_GPRs_inline)
28378 load_lr_save (0, frame_reg_rtx, info->lr_save_offset + frame_off);
28379 restore_saved_lr (0, exit_func);
28382 /* Restore fpr's if we need to do it without calling a function. */
28383 if (restoring_FPRs_inline)
28384 for (i = 0; i < 64 - info->first_fp_reg_save; i++)
28385 if (save_reg_p (info->first_fp_reg_save + i))
28387 rtx reg = gen_rtx_REG ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT
28388 ? DFmode : SFmode),
28389 info->first_fp_reg_save + i);
28390 emit_insn (gen_frame_load (reg, frame_reg_rtx,
28391 info->fp_save_offset + frame_off + 8 * i));
28392 if (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap)
28393 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
28396 /* If we saved cr, restore it here. Just those that were used. */
28397 if (info->cr_save_p)
28398 restore_saved_cr (cr_save_reg, using_mtcr_multiple, exit_func);
28400 /* If this is V.4, unwind the stack pointer after all of the loads
28401 have been done, or set up r11 if we are restoring fp out of line. */
28402 ptr_regno = 1;
28403 if (!restoring_FPRs_inline)
28405 bool lr = (strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR) == 0;
28406 int sel = SAVRES_FPR | (lr ? SAVRES_LR : 0);
28407 ptr_regno = ptr_regno_for_savres (sel);
28410 insn = rs6000_emit_stack_reset (info, frame_reg_rtx, frame_off, ptr_regno);
28411 if (REGNO (frame_reg_rtx) == ptr_regno)
28412 frame_off = 0;
28414 if (insn && restoring_FPRs_inline)
28416 if (cfa_restores)
28418 REG_NOTES (insn) = cfa_restores;
28419 cfa_restores = NULL_RTX;
28421 add_reg_note (insn, REG_CFA_DEF_CFA, sp_reg_rtx);
28422 RTX_FRAME_RELATED_P (insn) = 1;
28425 if (crtl->calls_eh_return)
28427 rtx sa = EH_RETURN_STACKADJ_RTX;
28428 emit_insn (gen_add3_insn (sp_reg_rtx, sp_reg_rtx, sa));
28431 if (!sibcall)
28433 rtvec p;
28434 bool lr = (strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR) == 0;
28435 if (! restoring_FPRs_inline)
28437 p = rtvec_alloc (4 + 64 - info->first_fp_reg_save);
28438 RTVEC_ELT (p, 0) = ret_rtx;
28440 else
28442 if (cfa_restores)
28444 /* We can't hang the cfa_restores off a simple return,
28445 since the shrink-wrap code sometimes uses an existing
28446 return. This means there might be a path from
28447 pre-prologue code to this return, and dwarf2cfi code
28448 wants the eh_frame unwinder state to be the same on
28449 all paths to any point. So we need to emit the
28450 cfa_restores before the return. For -m64 we really
28451 don't need epilogue cfa_restores at all, except for
28452 this irritating dwarf2cfi with shrink-wrap
28453 requirement; The stack red-zone means eh_frame info
28454 from the prologue telling the unwinder to restore
28455 from the stack is perfectly good right to the end of
28456 the function. */
28457 emit_insn (gen_blockage ());
28458 emit_cfa_restores (cfa_restores);
28459 cfa_restores = NULL_RTX;
28461 p = rtvec_alloc (2);
28462 RTVEC_ELT (p, 0) = simple_return_rtx;
28465 RTVEC_ELT (p, 1) = ((restoring_FPRs_inline || !lr)
28466 ? gen_rtx_USE (VOIDmode,
28467 gen_rtx_REG (Pmode, LR_REGNO))
28468 : gen_rtx_CLOBBER (VOIDmode,
28469 gen_rtx_REG (Pmode, LR_REGNO)));
28471 /* If we have to restore more than two FP registers, branch to the
28472 restore function. It will return to our caller. */
28473 if (! restoring_FPRs_inline)
28475 int i;
28476 int reg;
28477 rtx sym;
28479 if (flag_shrink_wrap)
28480 cfa_restores = add_crlr_cfa_restore (info, cfa_restores);
28482 sym = rs6000_savres_routine_sym (info,
28483 SAVRES_FPR | (lr ? SAVRES_LR : 0));
28484 RTVEC_ELT (p, 2) = gen_rtx_USE (VOIDmode, sym);
28485 reg = (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)? 1 : 11;
28486 RTVEC_ELT (p, 3) = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, reg));
28488 for (i = 0; i < 64 - info->first_fp_reg_save; i++)
28490 rtx reg = gen_rtx_REG (DFmode, info->first_fp_reg_save + i);
28492 RTVEC_ELT (p, i + 4)
28493 = gen_frame_load (reg, sp_reg_rtx, info->fp_save_offset + 8 * i);
28494 if (flag_shrink_wrap)
28495 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg,
28496 cfa_restores);
28500 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, p));
28503 if (cfa_restores)
28505 if (sibcall)
28506 /* Ensure the cfa_restores are hung off an insn that won't
28507 be reordered above other restores. */
28508 emit_insn (gen_blockage ());
28510 emit_cfa_restores (cfa_restores);
28514 /* Write function epilogue. */
28516 static void
28517 rs6000_output_function_epilogue (FILE *file,
28518 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
28520 #if TARGET_MACHO
28521 macho_branch_islands ();
28522 /* Mach-O doesn't support labels at the end of objects, so if
28523 it looks like we might want one, insert a NOP. */
28525 rtx_insn *insn = get_last_insn ();
28526 rtx_insn *deleted_debug_label = NULL;
28527 while (insn
28528 && NOTE_P (insn)
28529 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
28531 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
28532 notes only, instead set their CODE_LABEL_NUMBER to -1,
28533 otherwise there would be code generation differences
28534 in between -g and -g0. */
28535 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
28536 deleted_debug_label = insn;
28537 insn = PREV_INSN (insn);
28539 if (insn
28540 && (LABEL_P (insn)
28541 || (NOTE_P (insn)
28542 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
28543 fputs ("\tnop\n", file);
28544 else if (deleted_debug_label)
28545 for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
28546 if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
28547 CODE_LABEL_NUMBER (insn) = -1;
28549 #endif
28551 /* Output a traceback table here. See /usr/include/sys/debug.h for info
28552 on its format.
28554 We don't output a traceback table if -finhibit-size-directive was
28555 used. The documentation for -finhibit-size-directive reads
28556 ``don't output a @code{.size} assembler directive, or anything
28557 else that would cause trouble if the function is split in the
28558 middle, and the two halves are placed at locations far apart in
28559 memory.'' The traceback table has this property, since it
28560 includes the offset from the start of the function to the
28561 traceback table itself.
28563 System V.4 Powerpc's (and the embedded ABI derived from it) use a
28564 different traceback table. */
28565 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
28566 && ! flag_inhibit_size_directive
28567 && rs6000_traceback != traceback_none && !cfun->is_thunk)
28569 const char *fname = NULL;
28570 const char *language_string = lang_hooks.name;
28571 int fixed_parms = 0, float_parms = 0, parm_info = 0;
28572 int i;
28573 int optional_tbtab;
28574 rs6000_stack_t *info = rs6000_stack_info ();
28576 if (rs6000_traceback == traceback_full)
28577 optional_tbtab = 1;
28578 else if (rs6000_traceback == traceback_part)
28579 optional_tbtab = 0;
28580 else
28581 optional_tbtab = !optimize_size && !TARGET_ELF;
28583 if (optional_tbtab)
28585 fname = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
28586 while (*fname == '.') /* V.4 encodes . in the name */
28587 fname++;
28589 /* Need label immediately before tbtab, so we can compute
28590 its offset from the function start. */
28591 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LT");
28592 ASM_OUTPUT_LABEL (file, fname);
28595 /* The .tbtab pseudo-op can only be used for the first eight
28596 expressions, since it can't handle the possibly variable
28597 length fields that follow. However, if you omit the optional
28598 fields, the assembler outputs zeros for all optional fields
28599 anyways, giving each variable length field is minimum length
28600 (as defined in sys/debug.h). Thus we can not use the .tbtab
28601 pseudo-op at all. */
28603 /* An all-zero word flags the start of the tbtab, for debuggers
28604 that have to find it by searching forward from the entry
28605 point or from the current pc. */
28606 fputs ("\t.long 0\n", file);
28608 /* Tbtab format type. Use format type 0. */
28609 fputs ("\t.byte 0,", file);
28611 /* Language type. Unfortunately, there does not seem to be any
28612 official way to discover the language being compiled, so we
28613 use language_string.
28614 C is 0. Fortran is 1. Pascal is 2. Ada is 3. C++ is 9.
28615 Java is 13. Objective-C is 14. Objective-C++ isn't assigned
28616 a number, so for now use 9. LTO, Go and JIT aren't assigned numbers
28617 either, so for now use 0. */
28618 if (lang_GNU_C ()
28619 || ! strcmp (language_string, "GNU GIMPLE")
28620 || ! strcmp (language_string, "GNU Go")
28621 || ! strcmp (language_string, "libgccjit"))
28622 i = 0;
28623 else if (! strcmp (language_string, "GNU F77")
28624 || lang_GNU_Fortran ())
28625 i = 1;
28626 else if (! strcmp (language_string, "GNU Pascal"))
28627 i = 2;
28628 else if (! strcmp (language_string, "GNU Ada"))
28629 i = 3;
28630 else if (lang_GNU_CXX ()
28631 || ! strcmp (language_string, "GNU Objective-C++"))
28632 i = 9;
28633 else if (! strcmp (language_string, "GNU Java"))
28634 i = 13;
28635 else if (! strcmp (language_string, "GNU Objective-C"))
28636 i = 14;
28637 else
28638 gcc_unreachable ();
28639 fprintf (file, "%d,", i);
28641 /* 8 single bit fields: global linkage (not set for C extern linkage,
28642 apparently a PL/I convention?), out-of-line epilogue/prologue, offset
28643 from start of procedure stored in tbtab, internal function, function
28644 has controlled storage, function has no toc, function uses fp,
28645 function logs/aborts fp operations. */
28646 /* Assume that fp operations are used if any fp reg must be saved. */
28647 fprintf (file, "%d,",
28648 (optional_tbtab << 5) | ((info->first_fp_reg_save != 64) << 1));
28650 /* 6 bitfields: function is interrupt handler, name present in
28651 proc table, function calls alloca, on condition directives
28652 (controls stack walks, 3 bits), saves condition reg, saves
28653 link reg. */
28654 /* The `function calls alloca' bit seems to be set whenever reg 31 is
28655 set up as a frame pointer, even when there is no alloca call. */
28656 fprintf (file, "%d,",
28657 ((optional_tbtab << 6)
28658 | ((optional_tbtab & frame_pointer_needed) << 5)
28659 | (info->cr_save_p << 1)
28660 | (info->lr_save_p)));
28662 /* 3 bitfields: saves backchain, fixup code, number of fpr saved
28663 (6 bits). */
28664 fprintf (file, "%d,",
28665 (info->push_p << 7) | (64 - info->first_fp_reg_save));
28667 /* 2 bitfields: spare bits (2 bits), number of gpr saved (6 bits). */
28668 fprintf (file, "%d,", (32 - first_reg_to_save ()));
28670 if (optional_tbtab)
28672 /* Compute the parameter info from the function decl argument
28673 list. */
28674 tree decl;
28675 int next_parm_info_bit = 31;
28677 for (decl = DECL_ARGUMENTS (current_function_decl);
28678 decl; decl = DECL_CHAIN (decl))
28680 rtx parameter = DECL_INCOMING_RTL (decl);
28681 machine_mode mode = GET_MODE (parameter);
28683 if (GET_CODE (parameter) == REG)
28685 if (SCALAR_FLOAT_MODE_P (mode))
28687 int bits;
28689 float_parms++;
28691 switch (mode)
28693 case SFmode:
28694 case SDmode:
28695 bits = 0x2;
28696 break;
28698 case DFmode:
28699 case DDmode:
28700 case TFmode:
28701 case TDmode:
28702 case IFmode:
28703 case KFmode:
28704 bits = 0x3;
28705 break;
28707 default:
28708 gcc_unreachable ();
28711 /* If only one bit will fit, don't or in this entry. */
28712 if (next_parm_info_bit > 0)
28713 parm_info |= (bits << (next_parm_info_bit - 1));
28714 next_parm_info_bit -= 2;
28716 else
28718 fixed_parms += ((GET_MODE_SIZE (mode)
28719 + (UNITS_PER_WORD - 1))
28720 / UNITS_PER_WORD);
28721 next_parm_info_bit -= 1;
28727 /* Number of fixed point parameters. */
28728 /* This is actually the number of words of fixed point parameters; thus
28729 an 8 byte struct counts as 2; and thus the maximum value is 8. */
28730 fprintf (file, "%d,", fixed_parms);
28732 /* 2 bitfields: number of floating point parameters (7 bits), parameters
28733 all on stack. */
28734 /* This is actually the number of fp registers that hold parameters;
28735 and thus the maximum value is 13. */
28736 /* Set parameters on stack bit if parameters are not in their original
28737 registers, regardless of whether they are on the stack? Xlc
28738 seems to set the bit when not optimizing. */
28739 fprintf (file, "%d\n", ((float_parms << 1) | (! optimize)));
28741 if (! optional_tbtab)
28742 return;
28744 /* Optional fields follow. Some are variable length. */
28746 /* Parameter types, left adjusted bit fields: 0 fixed, 10 single float,
28747 11 double float. */
28748 /* There is an entry for each parameter in a register, in the order that
28749 they occur in the parameter list. Any intervening arguments on the
28750 stack are ignored. If the list overflows a long (max possible length
28751 34 bits) then completely leave off all elements that don't fit. */
28752 /* Only emit this long if there was at least one parameter. */
28753 if (fixed_parms || float_parms)
28754 fprintf (file, "\t.long %d\n", parm_info);
28756 /* Offset from start of code to tb table. */
28757 fputs ("\t.long ", file);
28758 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LT");
28759 RS6000_OUTPUT_BASENAME (file, fname);
28760 putc ('-', file);
28761 rs6000_output_function_entry (file, fname);
28762 putc ('\n', file);
28764 /* Interrupt handler mask. */
28765 /* Omit this long, since we never set the interrupt handler bit
28766 above. */
28768 /* Number of CTL (controlled storage) anchors. */
28769 /* Omit this long, since the has_ctl bit is never set above. */
28771 /* Displacement into stack of each CTL anchor. */
28772 /* Omit this list of longs, because there are no CTL anchors. */
28774 /* Length of function name. */
28775 if (*fname == '*')
28776 ++fname;
28777 fprintf (file, "\t.short %d\n", (int) strlen (fname));
28779 /* Function name. */
28780 assemble_string (fname, strlen (fname));
28782 /* Register for alloca automatic storage; this is always reg 31.
28783 Only emit this if the alloca bit was set above. */
28784 if (frame_pointer_needed)
28785 fputs ("\t.byte 31\n", file);
28787 fputs ("\t.align 2\n", file);
28790 /* Arrange to define .LCTOC1 label, if not already done. */
28791 if (need_toc_init)
28793 need_toc_init = 0;
28794 if (!toc_initialized)
28796 switch_to_section (toc_section);
28797 switch_to_section (current_function_section ());
28802 /* -fsplit-stack support. */
28804 /* A SYMBOL_REF for __morestack. */
28805 static GTY(()) rtx morestack_ref;
28807 static rtx
28808 gen_add3_const (rtx rt, rtx ra, long c)
28810 if (TARGET_64BIT)
28811 return gen_adddi3 (rt, ra, GEN_INT (c));
28812 else
28813 return gen_addsi3 (rt, ra, GEN_INT (c));
28816 /* Emit -fsplit-stack prologue, which goes before the regular function
28817 prologue (at local entry point in the case of ELFv2). */
28819 void
28820 rs6000_expand_split_stack_prologue (void)
28822 rs6000_stack_t *info = rs6000_stack_info ();
28823 unsigned HOST_WIDE_INT allocate;
28824 long alloc_hi, alloc_lo;
28825 rtx r0, r1, r12, lr, ok_label, compare, jump, call_fusage;
28826 rtx_insn *insn;
28828 gcc_assert (flag_split_stack && reload_completed);
28830 if (!info->push_p)
28831 return;
28833 if (global_regs[29])
28835 error ("-fsplit-stack uses register r29");
28836 inform (DECL_SOURCE_LOCATION (global_regs_decl[29]),
28837 "conflicts with %qD", global_regs_decl[29]);
28840 allocate = info->total_size;
28841 if (allocate > (unsigned HOST_WIDE_INT) 1 << 31)
28843 sorry ("Stack frame larger than 2G is not supported for -fsplit-stack");
28844 return;
28846 if (morestack_ref == NULL_RTX)
28848 morestack_ref = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
28849 SYMBOL_REF_FLAGS (morestack_ref) |= (SYMBOL_FLAG_LOCAL
28850 | SYMBOL_FLAG_FUNCTION);
28853 r0 = gen_rtx_REG (Pmode, 0);
28854 r1 = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
28855 r12 = gen_rtx_REG (Pmode, 12);
28856 emit_insn (gen_load_split_stack_limit (r0));
28857 /* Always emit two insns here to calculate the requested stack,
28858 so that the linker can edit them when adjusting size for calling
28859 non-split-stack code. */
28860 alloc_hi = (-allocate + 0x8000) & ~0xffffL;
28861 alloc_lo = -allocate - alloc_hi;
28862 if (alloc_hi != 0)
28864 emit_insn (gen_add3_const (r12, r1, alloc_hi));
28865 if (alloc_lo != 0)
28866 emit_insn (gen_add3_const (r12, r12, alloc_lo));
28867 else
28868 emit_insn (gen_nop ());
28870 else
28872 emit_insn (gen_add3_const (r12, r1, alloc_lo));
28873 emit_insn (gen_nop ());
28876 compare = gen_rtx_REG (CCUNSmode, CR7_REGNO);
28877 emit_insn (gen_rtx_SET (compare, gen_rtx_COMPARE (CCUNSmode, r12, r0)));
28878 ok_label = gen_label_rtx ();
28879 jump = gen_rtx_IF_THEN_ELSE (VOIDmode,
28880 gen_rtx_GEU (VOIDmode, compare, const0_rtx),
28881 gen_rtx_LABEL_REF (VOIDmode, ok_label),
28882 pc_rtx);
28883 jump = emit_jump_insn (gen_rtx_SET (pc_rtx, jump));
28884 JUMP_LABEL (jump) = ok_label;
28885 /* Mark the jump as very likely to be taken. */
28886 add_int_reg_note (jump, REG_BR_PROB,
28887 REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100);
28889 lr = gen_rtx_REG (Pmode, LR_REGNO);
28890 insn = emit_move_insn (r0, lr);
28891 RTX_FRAME_RELATED_P (insn) = 1;
28892 insn = emit_insn (gen_frame_store (r0, r1, info->lr_save_offset));
28893 RTX_FRAME_RELATED_P (insn) = 1;
28895 insn = emit_call_insn (gen_call (gen_rtx_MEM (SImode, morestack_ref),
28896 const0_rtx, const0_rtx));
28897 call_fusage = NULL_RTX;
28898 use_reg (&call_fusage, r12);
28899 /* Say the call uses r0, even though it doesn't, to stop regrename
28900 from twiddling with the insns saving lr, trashing args for cfun.
28901 The insns restoring lr are similarly protected by making
28902 split_stack_return use r0. */
28903 use_reg (&call_fusage, r0);
28904 add_function_usage_to (insn, call_fusage);
28905 emit_insn (gen_frame_load (r0, r1, info->lr_save_offset));
28906 insn = emit_move_insn (lr, r0);
28907 add_reg_note (insn, REG_CFA_RESTORE, lr);
28908 RTX_FRAME_RELATED_P (insn) = 1;
28909 emit_insn (gen_split_stack_return ());
28911 emit_label (ok_label);
28912 LABEL_NUSES (ok_label) = 1;
28915 /* Return the internal arg pointer used for function incoming
28916 arguments. When -fsplit-stack, the arg pointer is r12 so we need
28917 to copy it to a pseudo in order for it to be preserved over calls
28918 and suchlike. We'd really like to use a pseudo here for the
28919 internal arg pointer but data-flow analysis is not prepared to
28920 accept pseudos as live at the beginning of a function. */
28922 static rtx
28923 rs6000_internal_arg_pointer (void)
28925 if (flag_split_stack
28926 && (lookup_attribute ("no_split_stack", DECL_ATTRIBUTES (cfun->decl))
28927 == NULL))
28930 if (cfun->machine->split_stack_arg_pointer == NULL_RTX)
28932 rtx pat;
28934 cfun->machine->split_stack_arg_pointer = gen_reg_rtx (Pmode);
28935 REG_POINTER (cfun->machine->split_stack_arg_pointer) = 1;
28937 /* Put the pseudo initialization right after the note at the
28938 beginning of the function. */
28939 pat = gen_rtx_SET (cfun->machine->split_stack_arg_pointer,
28940 gen_rtx_REG (Pmode, 12));
28941 push_topmost_sequence ();
28942 emit_insn_after (pat, get_insns ());
28943 pop_topmost_sequence ();
28945 return plus_constant (Pmode, cfun->machine->split_stack_arg_pointer,
28946 FIRST_PARM_OFFSET (current_function_decl));
28948 return virtual_incoming_args_rtx;
28951 /* We may have to tell the dataflow pass that the split stack prologue
28952 is initializing a register. */
28954 static void
28955 rs6000_live_on_entry (bitmap regs)
28957 if (flag_split_stack)
28958 bitmap_set_bit (regs, 12);
28961 /* Emit -fsplit-stack dynamic stack allocation space check. */
28963 void
28964 rs6000_split_stack_space_check (rtx size, rtx label)
28966 rtx sp = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
28967 rtx limit = gen_reg_rtx (Pmode);
28968 rtx requested = gen_reg_rtx (Pmode);
28969 rtx cmp = gen_reg_rtx (CCUNSmode);
28970 rtx jump;
28972 emit_insn (gen_load_split_stack_limit (limit));
28973 if (CONST_INT_P (size))
28974 emit_insn (gen_add3_insn (requested, sp, GEN_INT (-INTVAL (size))));
28975 else
28977 size = force_reg (Pmode, size);
28978 emit_move_insn (requested, gen_rtx_MINUS (Pmode, sp, size));
28980 emit_insn (gen_rtx_SET (cmp, gen_rtx_COMPARE (CCUNSmode, requested, limit)));
28981 jump = gen_rtx_IF_THEN_ELSE (VOIDmode,
28982 gen_rtx_GEU (VOIDmode, cmp, const0_rtx),
28983 gen_rtx_LABEL_REF (VOIDmode, label),
28984 pc_rtx);
28985 jump = emit_jump_insn (gen_rtx_SET (pc_rtx, jump));
28986 JUMP_LABEL (jump) = label;
28989 /* A C compound statement that outputs the assembler code for a thunk
28990 function, used to implement C++ virtual function calls with
28991 multiple inheritance. The thunk acts as a wrapper around a virtual
28992 function, adjusting the implicit object parameter before handing
28993 control off to the real function.
28995 First, emit code to add the integer DELTA to the location that
28996 contains the incoming first argument. Assume that this argument
28997 contains a pointer, and is the one used to pass the `this' pointer
28998 in C++. This is the incoming argument *before* the function
28999 prologue, e.g. `%o0' on a sparc. The addition must preserve the
29000 values of all other incoming arguments.
29002 After the addition, emit code to jump to FUNCTION, which is a
29003 `FUNCTION_DECL'. This is a direct pure jump, not a call, and does
29004 not touch the return address. Hence returning from FUNCTION will
29005 return to whoever called the current `thunk'.
29007 The effect must be as if FUNCTION had been called directly with the
29008 adjusted first argument. This macro is responsible for emitting
29009 all of the code for a thunk function; output_function_prologue()
29010 and output_function_epilogue() are not invoked.
29012 The THUNK_FNDECL is redundant. (DELTA and FUNCTION have already
29013 been extracted from it.) It might possibly be useful on some
29014 targets, but probably not.
29016 If you do not define this macro, the target-independent code in the
29017 C++ frontend will generate a less efficient heavyweight thunk that
29018 calls FUNCTION instead of jumping to it. The generic approach does
29019 not support varargs. */
29021 static void
29022 rs6000_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
29023 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
29024 tree function)
29026 rtx this_rtx, funexp;
29027 rtx_insn *insn;
29029 reload_completed = 1;
29030 epilogue_completed = 1;
29032 /* Mark the end of the (empty) prologue. */
29033 emit_note (NOTE_INSN_PROLOGUE_END);
29035 /* Find the "this" pointer. If the function returns a structure,
29036 the structure return pointer is in r3. */
29037 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
29038 this_rtx = gen_rtx_REG (Pmode, 4);
29039 else
29040 this_rtx = gen_rtx_REG (Pmode, 3);
29042 /* Apply the constant offset, if required. */
29043 if (delta)
29044 emit_insn (gen_add3_insn (this_rtx, this_rtx, GEN_INT (delta)));
29046 /* Apply the offset from the vtable, if required. */
29047 if (vcall_offset)
29049 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
29050 rtx tmp = gen_rtx_REG (Pmode, 12);
29052 emit_move_insn (tmp, gen_rtx_MEM (Pmode, this_rtx));
29053 if (((unsigned HOST_WIDE_INT) vcall_offset) + 0x8000 >= 0x10000)
29055 emit_insn (gen_add3_insn (tmp, tmp, vcall_offset_rtx));
29056 emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp));
29058 else
29060 rtx loc = gen_rtx_PLUS (Pmode, tmp, vcall_offset_rtx);
29062 emit_move_insn (tmp, gen_rtx_MEM (Pmode, loc));
29064 emit_insn (gen_add3_insn (this_rtx, this_rtx, tmp));
29067 /* Generate a tail call to the target function. */
29068 if (!TREE_USED (function))
29070 assemble_external (function);
29071 TREE_USED (function) = 1;
29073 funexp = XEXP (DECL_RTL (function), 0);
29074 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
29076 #if TARGET_MACHO
29077 if (MACHOPIC_INDIRECT)
29078 funexp = machopic_indirect_call_target (funexp);
29079 #endif
29081 /* gen_sibcall expects reload to convert scratch pseudo to LR so we must
29082 generate sibcall RTL explicitly. */
29083 insn = emit_call_insn (
29084 gen_rtx_PARALLEL (VOIDmode,
29085 gen_rtvec (4,
29086 gen_rtx_CALL (VOIDmode,
29087 funexp, const0_rtx),
29088 gen_rtx_USE (VOIDmode, const0_rtx),
29089 gen_rtx_USE (VOIDmode,
29090 gen_rtx_REG (SImode,
29091 LR_REGNO)),
29092 simple_return_rtx)));
29093 SIBLING_CALL_P (insn) = 1;
29094 emit_barrier ();
29096 /* Run just enough of rest_of_compilation to get the insns emitted.
29097 There's not really enough bulk here to make other passes such as
29098 instruction scheduling worth while. Note that use_thunk calls
29099 assemble_start_function and assemble_end_function. */
29100 insn = get_insns ();
29101 shorten_branches (insn);
29102 final_start_function (insn, file, 1);
29103 final (insn, file, 1);
29104 final_end_function ();
29106 reload_completed = 0;
29107 epilogue_completed = 0;
29110 /* A quick summary of the various types of 'constant-pool tables'
29111 under PowerPC:
29113 Target Flags Name One table per
29114 AIX (none) AIX TOC object file
29115 AIX -mfull-toc AIX TOC object file
29116 AIX -mminimal-toc AIX minimal TOC translation unit
29117 SVR4/EABI (none) SVR4 SDATA object file
29118 SVR4/EABI -fpic SVR4 pic object file
29119 SVR4/EABI -fPIC SVR4 PIC translation unit
29120 SVR4/EABI -mrelocatable EABI TOC function
29121 SVR4/EABI -maix AIX TOC object file
29122 SVR4/EABI -maix -mminimal-toc
29123 AIX minimal TOC translation unit
29125 Name Reg. Set by entries contains:
29126 made by addrs? fp? sum?
29128 AIX TOC 2 crt0 as Y option option
29129 AIX minimal TOC 30 prolog gcc Y Y option
29130 SVR4 SDATA 13 crt0 gcc N Y N
29131 SVR4 pic 30 prolog ld Y not yet N
29132 SVR4 PIC 30 prolog gcc Y option option
29133 EABI TOC 30 prolog gcc Y option option
29137 /* Hash functions for the hash table. */
29139 static unsigned
29140 rs6000_hash_constant (rtx k)
29142 enum rtx_code code = GET_CODE (k);
29143 machine_mode mode = GET_MODE (k);
29144 unsigned result = (code << 3) ^ mode;
29145 const char *format;
29146 int flen, fidx;
29148 format = GET_RTX_FORMAT (code);
29149 flen = strlen (format);
29150 fidx = 0;
29152 switch (code)
29154 case LABEL_REF:
29155 return result * 1231 + (unsigned) INSN_UID (XEXP (k, 0));
29157 case CONST_WIDE_INT:
29159 int i;
29160 flen = CONST_WIDE_INT_NUNITS (k);
29161 for (i = 0; i < flen; i++)
29162 result = result * 613 + CONST_WIDE_INT_ELT (k, i);
29163 return result;
29166 case CONST_DOUBLE:
29167 if (mode != VOIDmode)
29168 return real_hash (CONST_DOUBLE_REAL_VALUE (k)) * result;
29169 flen = 2;
29170 break;
29172 case CODE_LABEL:
29173 fidx = 3;
29174 break;
29176 default:
29177 break;
29180 for (; fidx < flen; fidx++)
29181 switch (format[fidx])
29183 case 's':
29185 unsigned i, len;
29186 const char *str = XSTR (k, fidx);
29187 len = strlen (str);
29188 result = result * 613 + len;
29189 for (i = 0; i < len; i++)
29190 result = result * 613 + (unsigned) str[i];
29191 break;
29193 case 'u':
29194 case 'e':
29195 result = result * 1231 + rs6000_hash_constant (XEXP (k, fidx));
29196 break;
29197 case 'i':
29198 case 'n':
29199 result = result * 613 + (unsigned) XINT (k, fidx);
29200 break;
29201 case 'w':
29202 if (sizeof (unsigned) >= sizeof (HOST_WIDE_INT))
29203 result = result * 613 + (unsigned) XWINT (k, fidx);
29204 else
29206 size_t i;
29207 for (i = 0; i < sizeof (HOST_WIDE_INT) / sizeof (unsigned); i++)
29208 result = result * 613 + (unsigned) (XWINT (k, fidx)
29209 >> CHAR_BIT * i);
29211 break;
29212 case '0':
29213 break;
29214 default:
29215 gcc_unreachable ();
29218 return result;
29221 hashval_t
29222 toc_hasher::hash (toc_hash_struct *thc)
29224 return rs6000_hash_constant (thc->key) ^ thc->key_mode;
29227 /* Compare H1 and H2 for equivalence. */
29229 bool
29230 toc_hasher::equal (toc_hash_struct *h1, toc_hash_struct *h2)
29232 rtx r1 = h1->key;
29233 rtx r2 = h2->key;
29235 if (h1->key_mode != h2->key_mode)
29236 return 0;
29238 return rtx_equal_p (r1, r2);
29241 /* These are the names given by the C++ front-end to vtables, and
29242 vtable-like objects. Ideally, this logic should not be here;
29243 instead, there should be some programmatic way of inquiring as
29244 to whether or not an object is a vtable. */
29246 #define VTABLE_NAME_P(NAME) \
29247 (strncmp ("_vt.", name, strlen ("_vt.")) == 0 \
29248 || strncmp ("_ZTV", name, strlen ("_ZTV")) == 0 \
29249 || strncmp ("_ZTT", name, strlen ("_ZTT")) == 0 \
29250 || strncmp ("_ZTI", name, strlen ("_ZTI")) == 0 \
29251 || strncmp ("_ZTC", name, strlen ("_ZTC")) == 0)
29253 #ifdef NO_DOLLAR_IN_LABEL
29254 /* Return a GGC-allocated character string translating dollar signs in
29255 input NAME to underscores. Used by XCOFF ASM_OUTPUT_LABELREF. */
29257 const char *
29258 rs6000_xcoff_strip_dollar (const char *name)
29260 char *strip, *p;
29261 const char *q;
29262 size_t len;
29264 q = (const char *) strchr (name, '$');
29266 if (q == 0 || q == name)
29267 return name;
29269 len = strlen (name);
29270 strip = XALLOCAVEC (char, len + 1);
29271 strcpy (strip, name);
29272 p = strip + (q - name);
29273 while (p)
29275 *p = '_';
29276 p = strchr (p + 1, '$');
29279 return ggc_alloc_string (strip, len);
29281 #endif
29283 void
29284 rs6000_output_symbol_ref (FILE *file, rtx x)
29286 /* Currently C++ toc references to vtables can be emitted before it
29287 is decided whether the vtable is public or private. If this is
29288 the case, then the linker will eventually complain that there is
29289 a reference to an unknown section. Thus, for vtables only,
29290 we emit the TOC reference to reference the symbol and not the
29291 section. */
29292 const char *name = XSTR (x, 0);
29294 tree decl = SYMBOL_REF_DECL (x);
29295 if (decl /* sync condition with assemble_external () */
29296 && DECL_P (decl) && DECL_EXTERNAL (decl) && TREE_PUBLIC (decl)
29297 && (TREE_CODE (decl) == VAR_DECL
29298 || TREE_CODE (decl) == FUNCTION_DECL)
29299 && name[strlen (name) - 1] != ']')
29301 name = concat (name,
29302 (TREE_CODE (decl) == FUNCTION_DECL
29303 ? "[DS]" : "[UA]"),
29304 NULL);
29305 XSTR (x, 0) = name;
29308 if (VTABLE_NAME_P (name))
29310 RS6000_OUTPUT_BASENAME (file, name);
29312 else
29313 assemble_name (file, name);
29316 /* Output a TOC entry. We derive the entry name from what is being
29317 written. */
29319 void
29320 output_toc (FILE *file, rtx x, int labelno, machine_mode mode)
29322 char buf[256];
29323 const char *name = buf;
29324 rtx base = x;
29325 HOST_WIDE_INT offset = 0;
29327 gcc_assert (!TARGET_NO_TOC);
29329 /* When the linker won't eliminate them, don't output duplicate
29330 TOC entries (this happens on AIX if there is any kind of TOC,
29331 and on SVR4 under -fPIC or -mrelocatable). Don't do this for
29332 CODE_LABELs. */
29333 if (TARGET_TOC && GET_CODE (x) != LABEL_REF)
29335 struct toc_hash_struct *h;
29337 /* Create toc_hash_table. This can't be done at TARGET_OPTION_OVERRIDE
29338 time because GGC is not initialized at that point. */
29339 if (toc_hash_table == NULL)
29340 toc_hash_table = hash_table<toc_hasher>::create_ggc (1021);
29342 h = ggc_alloc<toc_hash_struct> ();
29343 h->key = x;
29344 h->key_mode = mode;
29345 h->labelno = labelno;
29347 toc_hash_struct **found = toc_hash_table->find_slot (h, INSERT);
29348 if (*found == NULL)
29349 *found = h;
29350 else /* This is indeed a duplicate.
29351 Set this label equal to that label. */
29353 fputs ("\t.set ", file);
29354 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
29355 fprintf (file, "%d,", labelno);
29356 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
29357 fprintf (file, "%d\n", ((*found)->labelno));
29359 #ifdef HAVE_AS_TLS
29360 if (TARGET_XCOFF && GET_CODE (x) == SYMBOL_REF
29361 && (SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_GLOBAL_DYNAMIC
29362 || SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC))
29364 fputs ("\t.set ", file);
29365 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
29366 fprintf (file, "%d,", labelno);
29367 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
29368 fprintf (file, "%d\n", ((*found)->labelno));
29370 #endif
29371 return;
29375 /* If we're going to put a double constant in the TOC, make sure it's
29376 aligned properly when strict alignment is on. */
29377 if ((CONST_DOUBLE_P (x) || CONST_WIDE_INT_P (x))
29378 && STRICT_ALIGNMENT
29379 && GET_MODE_BITSIZE (mode) >= 64
29380 && ! (TARGET_NO_FP_IN_TOC && ! TARGET_MINIMAL_TOC)) {
29381 ASM_OUTPUT_ALIGN (file, 3);
29384 (*targetm.asm_out.internal_label) (file, "LC", labelno);
29386 /* Handle FP constants specially. Note that if we have a minimal
29387 TOC, things we put here aren't actually in the TOC, so we can allow
29388 FP constants. */
29389 if (GET_CODE (x) == CONST_DOUBLE &&
29390 (GET_MODE (x) == TFmode || GET_MODE (x) == TDmode
29391 || GET_MODE (x) == IFmode || GET_MODE (x) == KFmode))
29393 long k[4];
29395 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
29396 REAL_VALUE_TO_TARGET_DECIMAL128 (*CONST_DOUBLE_REAL_VALUE (x), k);
29397 else
29398 REAL_VALUE_TO_TARGET_LONG_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), k);
29400 if (TARGET_64BIT)
29402 if (TARGET_ELF || TARGET_MINIMAL_TOC)
29403 fputs (DOUBLE_INT_ASM_OP, file);
29404 else
29405 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
29406 k[0] & 0xffffffff, k[1] & 0xffffffff,
29407 k[2] & 0xffffffff, k[3] & 0xffffffff);
29408 fprintf (file, "0x%lx%08lx,0x%lx%08lx\n",
29409 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
29410 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff,
29411 k[WORDS_BIG_ENDIAN ? 2 : 3] & 0xffffffff,
29412 k[WORDS_BIG_ENDIAN ? 3 : 2] & 0xffffffff);
29413 return;
29415 else
29417 if (TARGET_ELF || TARGET_MINIMAL_TOC)
29418 fputs ("\t.long ", file);
29419 else
29420 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
29421 k[0] & 0xffffffff, k[1] & 0xffffffff,
29422 k[2] & 0xffffffff, k[3] & 0xffffffff);
29423 fprintf (file, "0x%lx,0x%lx,0x%lx,0x%lx\n",
29424 k[0] & 0xffffffff, k[1] & 0xffffffff,
29425 k[2] & 0xffffffff, k[3] & 0xffffffff);
29426 return;
29429 else if (GET_CODE (x) == CONST_DOUBLE &&
29430 (GET_MODE (x) == DFmode || GET_MODE (x) == DDmode))
29432 long k[2];
29434 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
29435 REAL_VALUE_TO_TARGET_DECIMAL64 (*CONST_DOUBLE_REAL_VALUE (x), k);
29436 else
29437 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), k);
29439 if (TARGET_64BIT)
29441 if (TARGET_ELF || TARGET_MINIMAL_TOC)
29442 fputs (DOUBLE_INT_ASM_OP, file);
29443 else
29444 fprintf (file, "\t.tc FD_%lx_%lx[TC],",
29445 k[0] & 0xffffffff, k[1] & 0xffffffff);
29446 fprintf (file, "0x%lx%08lx\n",
29447 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
29448 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff);
29449 return;
29451 else
29453 if (TARGET_ELF || TARGET_MINIMAL_TOC)
29454 fputs ("\t.long ", file);
29455 else
29456 fprintf (file, "\t.tc FD_%lx_%lx[TC],",
29457 k[0] & 0xffffffff, k[1] & 0xffffffff);
29458 fprintf (file, "0x%lx,0x%lx\n",
29459 k[0] & 0xffffffff, k[1] & 0xffffffff);
29460 return;
29463 else if (GET_CODE (x) == CONST_DOUBLE &&
29464 (GET_MODE (x) == SFmode || GET_MODE (x) == SDmode))
29466 long l;
29468 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
29469 REAL_VALUE_TO_TARGET_DECIMAL32 (*CONST_DOUBLE_REAL_VALUE (x), l);
29470 else
29471 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l);
29473 if (TARGET_64BIT)
29475 if (TARGET_ELF || TARGET_MINIMAL_TOC)
29476 fputs (DOUBLE_INT_ASM_OP, file);
29477 else
29478 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
29479 if (WORDS_BIG_ENDIAN)
29480 fprintf (file, "0x%lx00000000\n", l & 0xffffffff);
29481 else
29482 fprintf (file, "0x%lx\n", l & 0xffffffff);
29483 return;
29485 else
29487 if (TARGET_ELF || TARGET_MINIMAL_TOC)
29488 fputs ("\t.long ", file);
29489 else
29490 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
29491 fprintf (file, "0x%lx\n", l & 0xffffffff);
29492 return;
29495 else if (GET_MODE (x) == VOIDmode && GET_CODE (x) == CONST_INT)
29497 unsigned HOST_WIDE_INT low;
29498 HOST_WIDE_INT high;
29500 low = INTVAL (x) & 0xffffffff;
29501 high = (HOST_WIDE_INT) INTVAL (x) >> 32;
29503 /* TOC entries are always Pmode-sized, so when big-endian
29504 smaller integer constants in the TOC need to be padded.
29505 (This is still a win over putting the constants in
29506 a separate constant pool, because then we'd have
29507 to have both a TOC entry _and_ the actual constant.)
29509 For a 32-bit target, CONST_INT values are loaded and shifted
29510 entirely within `low' and can be stored in one TOC entry. */
29512 /* It would be easy to make this work, but it doesn't now. */
29513 gcc_assert (!TARGET_64BIT || POINTER_SIZE >= GET_MODE_BITSIZE (mode));
29515 if (WORDS_BIG_ENDIAN && POINTER_SIZE > GET_MODE_BITSIZE (mode))
29517 low |= high << 32;
29518 low <<= POINTER_SIZE - GET_MODE_BITSIZE (mode);
29519 high = (HOST_WIDE_INT) low >> 32;
29520 low &= 0xffffffff;
29523 if (TARGET_64BIT)
29525 if (TARGET_ELF || TARGET_MINIMAL_TOC)
29526 fputs (DOUBLE_INT_ASM_OP, file);
29527 else
29528 fprintf (file, "\t.tc ID_%lx_%lx[TC],",
29529 (long) high & 0xffffffff, (long) low & 0xffffffff);
29530 fprintf (file, "0x%lx%08lx\n",
29531 (long) high & 0xffffffff, (long) low & 0xffffffff);
29532 return;
29534 else
29536 if (POINTER_SIZE < GET_MODE_BITSIZE (mode))
29538 if (TARGET_ELF || TARGET_MINIMAL_TOC)
29539 fputs ("\t.long ", file);
29540 else
29541 fprintf (file, "\t.tc ID_%lx_%lx[TC],",
29542 (long) high & 0xffffffff, (long) low & 0xffffffff);
29543 fprintf (file, "0x%lx,0x%lx\n",
29544 (long) high & 0xffffffff, (long) low & 0xffffffff);
29546 else
29548 if (TARGET_ELF || TARGET_MINIMAL_TOC)
29549 fputs ("\t.long ", file);
29550 else
29551 fprintf (file, "\t.tc IS_%lx[TC],", (long) low & 0xffffffff);
29552 fprintf (file, "0x%lx\n", (long) low & 0xffffffff);
29554 return;
29558 if (GET_CODE (x) == CONST)
29560 gcc_assert (GET_CODE (XEXP (x, 0)) == PLUS
29561 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT);
29563 base = XEXP (XEXP (x, 0), 0);
29564 offset = INTVAL (XEXP (XEXP (x, 0), 1));
29567 switch (GET_CODE (base))
29569 case SYMBOL_REF:
29570 name = XSTR (base, 0);
29571 break;
29573 case LABEL_REF:
29574 ASM_GENERATE_INTERNAL_LABEL (buf, "L",
29575 CODE_LABEL_NUMBER (XEXP (base, 0)));
29576 break;
29578 case CODE_LABEL:
29579 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (base));
29580 break;
29582 default:
29583 gcc_unreachable ();
29586 if (TARGET_ELF || TARGET_MINIMAL_TOC)
29587 fputs (TARGET_32BIT ? "\t.long " : DOUBLE_INT_ASM_OP, file);
29588 else
29590 fputs ("\t.tc ", file);
29591 RS6000_OUTPUT_BASENAME (file, name);
29593 if (offset < 0)
29594 fprintf (file, ".N" HOST_WIDE_INT_PRINT_UNSIGNED, - offset);
29595 else if (offset)
29596 fprintf (file, ".P" HOST_WIDE_INT_PRINT_UNSIGNED, offset);
29598 /* Mark large TOC symbols on AIX with [TE] so they are mapped
29599 after other TOC symbols, reducing overflow of small TOC access
29600 to [TC] symbols. */
29601 fputs (TARGET_XCOFF && TARGET_CMODEL != CMODEL_SMALL
29602 ? "[TE]," : "[TC],", file);
29605 /* Currently C++ toc references to vtables can be emitted before it
29606 is decided whether the vtable is public or private. If this is
29607 the case, then the linker will eventually complain that there is
29608 a TOC reference to an unknown section. Thus, for vtables only,
29609 we emit the TOC reference to reference the symbol and not the
29610 section. */
29611 if (VTABLE_NAME_P (name))
29613 RS6000_OUTPUT_BASENAME (file, name);
29614 if (offset < 0)
29615 fprintf (file, HOST_WIDE_INT_PRINT_DEC, offset);
29616 else if (offset > 0)
29617 fprintf (file, "+" HOST_WIDE_INT_PRINT_DEC, offset);
29619 else
29620 output_addr_const (file, x);
29622 #if HAVE_AS_TLS
29623 if (TARGET_XCOFF && GET_CODE (base) == SYMBOL_REF)
29625 switch (SYMBOL_REF_TLS_MODEL (base))
29627 case 0:
29628 break;
29629 case TLS_MODEL_LOCAL_EXEC:
29630 fputs ("@le", file);
29631 break;
29632 case TLS_MODEL_INITIAL_EXEC:
29633 fputs ("@ie", file);
29634 break;
29635 /* Use global-dynamic for local-dynamic. */
29636 case TLS_MODEL_GLOBAL_DYNAMIC:
29637 case TLS_MODEL_LOCAL_DYNAMIC:
29638 putc ('\n', file);
29639 (*targetm.asm_out.internal_label) (file, "LCM", labelno);
29640 fputs ("\t.tc .", file);
29641 RS6000_OUTPUT_BASENAME (file, name);
29642 fputs ("[TC],", file);
29643 output_addr_const (file, x);
29644 fputs ("@m", file);
29645 break;
29646 default:
29647 gcc_unreachable ();
29650 #endif
29652 putc ('\n', file);
29655 /* Output an assembler pseudo-op to write an ASCII string of N characters
29656 starting at P to FILE.
29658 On the RS/6000, we have to do this using the .byte operation and
29659 write out special characters outside the quoted string.
29660 Also, the assembler is broken; very long strings are truncated,
29661 so we must artificially break them up early. */
29663 void
29664 output_ascii (FILE *file, const char *p, int n)
29666 char c;
29667 int i, count_string;
29668 const char *for_string = "\t.byte \"";
29669 const char *for_decimal = "\t.byte ";
29670 const char *to_close = NULL;
29672 count_string = 0;
29673 for (i = 0; i < n; i++)
29675 c = *p++;
29676 if (c >= ' ' && c < 0177)
29678 if (for_string)
29679 fputs (for_string, file);
29680 putc (c, file);
29682 /* Write two quotes to get one. */
29683 if (c == '"')
29685 putc (c, file);
29686 ++count_string;
29689 for_string = NULL;
29690 for_decimal = "\"\n\t.byte ";
29691 to_close = "\"\n";
29692 ++count_string;
29694 if (count_string >= 512)
29696 fputs (to_close, file);
29698 for_string = "\t.byte \"";
29699 for_decimal = "\t.byte ";
29700 to_close = NULL;
29701 count_string = 0;
29704 else
29706 if (for_decimal)
29707 fputs (for_decimal, file);
29708 fprintf (file, "%d", c);
29710 for_string = "\n\t.byte \"";
29711 for_decimal = ", ";
29712 to_close = "\n";
29713 count_string = 0;
29717 /* Now close the string if we have written one. Then end the line. */
29718 if (to_close)
29719 fputs (to_close, file);
29722 /* Generate a unique section name for FILENAME for a section type
29723 represented by SECTION_DESC. Output goes into BUF.
29725 SECTION_DESC can be any string, as long as it is different for each
29726 possible section type.
29728 We name the section in the same manner as xlc. The name begins with an
29729 underscore followed by the filename (after stripping any leading directory
29730 names) with the last period replaced by the string SECTION_DESC. If
29731 FILENAME does not contain a period, SECTION_DESC is appended to the end of
29732 the name. */
29734 void
29735 rs6000_gen_section_name (char **buf, const char *filename,
29736 const char *section_desc)
29738 const char *q, *after_last_slash, *last_period = 0;
29739 char *p;
29740 int len;
29742 after_last_slash = filename;
29743 for (q = filename; *q; q++)
29745 if (*q == '/')
29746 after_last_slash = q + 1;
29747 else if (*q == '.')
29748 last_period = q;
29751 len = strlen (after_last_slash) + strlen (section_desc) + 2;
29752 *buf = (char *) xmalloc (len);
29754 p = *buf;
29755 *p++ = '_';
29757 for (q = after_last_slash; *q; q++)
29759 if (q == last_period)
29761 strcpy (p, section_desc);
29762 p += strlen (section_desc);
29763 break;
29766 else if (ISALNUM (*q))
29767 *p++ = *q;
29770 if (last_period == 0)
29771 strcpy (p, section_desc);
29772 else
29773 *p = '\0';
29776 /* Emit profile function. */
29778 void
29779 output_profile_hook (int labelno ATTRIBUTE_UNUSED)
29781 /* Non-standard profiling for kernels, which just saves LR then calls
29782 _mcount without worrying about arg saves. The idea is to change
29783 the function prologue as little as possible as it isn't easy to
29784 account for arg save/restore code added just for _mcount. */
29785 if (TARGET_PROFILE_KERNEL)
29786 return;
29788 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
29790 #ifndef NO_PROFILE_COUNTERS
29791 # define NO_PROFILE_COUNTERS 0
29792 #endif
29793 if (NO_PROFILE_COUNTERS)
29794 emit_library_call (init_one_libfunc (RS6000_MCOUNT),
29795 LCT_NORMAL, VOIDmode, 0);
29796 else
29798 char buf[30];
29799 const char *label_name;
29800 rtx fun;
29802 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
29803 label_name = ggc_strdup ((*targetm.strip_name_encoding) (buf));
29804 fun = gen_rtx_SYMBOL_REF (Pmode, label_name);
29806 emit_library_call (init_one_libfunc (RS6000_MCOUNT),
29807 LCT_NORMAL, VOIDmode, 1, fun, Pmode);
29810 else if (DEFAULT_ABI == ABI_DARWIN)
29812 const char *mcount_name = RS6000_MCOUNT;
29813 int caller_addr_regno = LR_REGNO;
29815 /* Be conservative and always set this, at least for now. */
29816 crtl->uses_pic_offset_table = 1;
29818 #if TARGET_MACHO
29819 /* For PIC code, set up a stub and collect the caller's address
29820 from r0, which is where the prologue puts it. */
29821 if (MACHOPIC_INDIRECT
29822 && crtl->uses_pic_offset_table)
29823 caller_addr_regno = 0;
29824 #endif
29825 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, mcount_name),
29826 LCT_NORMAL, VOIDmode, 1,
29827 gen_rtx_REG (Pmode, caller_addr_regno), Pmode);
29831 /* Write function profiler code. */
29833 void
29834 output_function_profiler (FILE *file, int labelno)
29836 char buf[100];
29838 switch (DEFAULT_ABI)
29840 default:
29841 gcc_unreachable ();
29843 case ABI_V4:
29844 if (!TARGET_32BIT)
29846 warning (0, "no profiling of 64-bit code for this ABI");
29847 return;
29849 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
29850 fprintf (file, "\tmflr %s\n", reg_names[0]);
29851 if (NO_PROFILE_COUNTERS)
29853 asm_fprintf (file, "\tstw %s,4(%s)\n",
29854 reg_names[0], reg_names[1]);
29856 else if (TARGET_SECURE_PLT && flag_pic)
29858 if (TARGET_LINK_STACK)
29860 char name[32];
29861 get_ppc476_thunk_name (name);
29862 asm_fprintf (file, "\tbl %s\n", name);
29864 else
29865 asm_fprintf (file, "\tbcl 20,31,1f\n1:\n");
29866 asm_fprintf (file, "\tstw %s,4(%s)\n",
29867 reg_names[0], reg_names[1]);
29868 asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
29869 asm_fprintf (file, "\taddis %s,%s,",
29870 reg_names[12], reg_names[12]);
29871 assemble_name (file, buf);
29872 asm_fprintf (file, "-1b@ha\n\tla %s,", reg_names[0]);
29873 assemble_name (file, buf);
29874 asm_fprintf (file, "-1b@l(%s)\n", reg_names[12]);
29876 else if (flag_pic == 1)
29878 fputs ("\tbl _GLOBAL_OFFSET_TABLE_@local-4\n", file);
29879 asm_fprintf (file, "\tstw %s,4(%s)\n",
29880 reg_names[0], reg_names[1]);
29881 asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
29882 asm_fprintf (file, "\tlwz %s,", reg_names[0]);
29883 assemble_name (file, buf);
29884 asm_fprintf (file, "@got(%s)\n", reg_names[12]);
29886 else if (flag_pic > 1)
29888 asm_fprintf (file, "\tstw %s,4(%s)\n",
29889 reg_names[0], reg_names[1]);
29890 /* Now, we need to get the address of the label. */
29891 if (TARGET_LINK_STACK)
29893 char name[32];
29894 get_ppc476_thunk_name (name);
29895 asm_fprintf (file, "\tbl %s\n\tb 1f\n\t.long ", name);
29896 assemble_name (file, buf);
29897 fputs ("-.\n1:", file);
29898 asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
29899 asm_fprintf (file, "\taddi %s,%s,4\n",
29900 reg_names[11], reg_names[11]);
29902 else
29904 fputs ("\tbcl 20,31,1f\n\t.long ", file);
29905 assemble_name (file, buf);
29906 fputs ("-.\n1:", file);
29907 asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
29909 asm_fprintf (file, "\tlwz %s,0(%s)\n",
29910 reg_names[0], reg_names[11]);
29911 asm_fprintf (file, "\tadd %s,%s,%s\n",
29912 reg_names[0], reg_names[0], reg_names[11]);
29914 else
29916 asm_fprintf (file, "\tlis %s,", reg_names[12]);
29917 assemble_name (file, buf);
29918 fputs ("@ha\n", file);
29919 asm_fprintf (file, "\tstw %s,4(%s)\n",
29920 reg_names[0], reg_names[1]);
29921 asm_fprintf (file, "\tla %s,", reg_names[0]);
29922 assemble_name (file, buf);
29923 asm_fprintf (file, "@l(%s)\n", reg_names[12]);
29926 /* ABI_V4 saves the static chain reg with ASM_OUTPUT_REG_PUSH. */
29927 fprintf (file, "\tbl %s%s\n",
29928 RS6000_MCOUNT, flag_pic ? "@plt" : "");
29929 break;
29931 case ABI_AIX:
29932 case ABI_ELFv2:
29933 case ABI_DARWIN:
29934 /* Don't do anything, done in output_profile_hook (). */
29935 break;
29941 /* The following variable value is the last issued insn. */
29943 static rtx_insn *last_scheduled_insn;
29945 /* The following variable helps to balance issuing of load and
29946 store instructions */
29948 static int load_store_pendulum;
29950 /* The following variable helps pair divide insns during scheduling. */
29951 static int divide_cnt;
29952 /* The following variable helps pair and alternate vector and vector load
29953 insns during scheduling. */
29954 static int vec_load_pendulum;
29957 /* Power4 load update and store update instructions are cracked into a
29958 load or store and an integer insn which are executed in the same cycle.
29959 Branches have their own dispatch slot which does not count against the
29960 GCC issue rate, but it changes the program flow so there are no other
29961 instructions to issue in this cycle. */
29963 static int
29964 rs6000_variable_issue_1 (rtx_insn *insn, int more)
29966 last_scheduled_insn = insn;
29967 if (GET_CODE (PATTERN (insn)) == USE
29968 || GET_CODE (PATTERN (insn)) == CLOBBER)
29970 cached_can_issue_more = more;
29971 return cached_can_issue_more;
29974 if (insn_terminates_group_p (insn, current_group))
29976 cached_can_issue_more = 0;
29977 return cached_can_issue_more;
29980 /* If no reservation, but reach here */
29981 if (recog_memoized (insn) < 0)
29982 return more;
29984 if (rs6000_sched_groups)
29986 if (is_microcoded_insn (insn))
29987 cached_can_issue_more = 0;
29988 else if (is_cracked_insn (insn))
29989 cached_can_issue_more = more > 2 ? more - 2 : 0;
29990 else
29991 cached_can_issue_more = more - 1;
29993 return cached_can_issue_more;
29996 if (rs6000_cpu_attr == CPU_CELL && is_nonpipeline_insn (insn))
29997 return 0;
29999 cached_can_issue_more = more - 1;
30000 return cached_can_issue_more;
30003 static int
30004 rs6000_variable_issue (FILE *stream, int verbose, rtx_insn *insn, int more)
30006 int r = rs6000_variable_issue_1 (insn, more);
30007 if (verbose)
30008 fprintf (stream, "// rs6000_variable_issue (more = %d) = %d\n", more, r);
30009 return r;
30012 /* Adjust the cost of a scheduling dependency. Return the new cost of
30013 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
30015 static int
30016 rs6000_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep_insn, int cost)
30018 enum attr_type attr_type;
30020 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
30021 return cost;
30023 switch (REG_NOTE_KIND (link))
30025 case REG_DEP_TRUE:
30027 /* Data dependency; DEP_INSN writes a register that INSN reads
30028 some cycles later. */
30030 /* Separate a load from a narrower, dependent store. */
30031 if ((rs6000_sched_groups || rs6000_cpu_attr == CPU_POWER9)
30032 && GET_CODE (PATTERN (insn)) == SET
30033 && GET_CODE (PATTERN (dep_insn)) == SET
30034 && GET_CODE (XEXP (PATTERN (insn), 1)) == MEM
30035 && GET_CODE (XEXP (PATTERN (dep_insn), 0)) == MEM
30036 && (GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (insn), 1)))
30037 > GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (dep_insn), 0)))))
30038 return cost + 14;
30040 attr_type = get_attr_type (insn);
30042 switch (attr_type)
30044 case TYPE_JMPREG:
30045 /* Tell the first scheduling pass about the latency between
30046 a mtctr and bctr (and mtlr and br/blr). The first
30047 scheduling pass will not know about this latency since
30048 the mtctr instruction, which has the latency associated
30049 to it, will be generated by reload. */
30050 return 4;
30051 case TYPE_BRANCH:
30052 /* Leave some extra cycles between a compare and its
30053 dependent branch, to inhibit expensive mispredicts. */
30054 if ((rs6000_cpu_attr == CPU_PPC603
30055 || rs6000_cpu_attr == CPU_PPC604
30056 || rs6000_cpu_attr == CPU_PPC604E
30057 || rs6000_cpu_attr == CPU_PPC620
30058 || rs6000_cpu_attr == CPU_PPC630
30059 || rs6000_cpu_attr == CPU_PPC750
30060 || rs6000_cpu_attr == CPU_PPC7400
30061 || rs6000_cpu_attr == CPU_PPC7450
30062 || rs6000_cpu_attr == CPU_PPCE5500
30063 || rs6000_cpu_attr == CPU_PPCE6500
30064 || rs6000_cpu_attr == CPU_POWER4
30065 || rs6000_cpu_attr == CPU_POWER5
30066 || rs6000_cpu_attr == CPU_POWER7
30067 || rs6000_cpu_attr == CPU_POWER8
30068 || rs6000_cpu_attr == CPU_POWER9
30069 || rs6000_cpu_attr == CPU_CELL)
30070 && recog_memoized (dep_insn)
30071 && (INSN_CODE (dep_insn) >= 0))
30073 switch (get_attr_type (dep_insn))
30075 case TYPE_CMP:
30076 case TYPE_FPCOMPARE:
30077 case TYPE_CR_LOGICAL:
30078 case TYPE_DELAYED_CR:
30079 return cost + 2;
30080 case TYPE_EXTS:
30081 case TYPE_MUL:
30082 if (get_attr_dot (dep_insn) == DOT_YES)
30083 return cost + 2;
30084 else
30085 break;
30086 case TYPE_SHIFT:
30087 if (get_attr_dot (dep_insn) == DOT_YES
30088 && get_attr_var_shift (dep_insn) == VAR_SHIFT_NO)
30089 return cost + 2;
30090 else
30091 break;
30092 default:
30093 break;
30095 break;
30097 case TYPE_STORE:
30098 case TYPE_FPSTORE:
30099 if ((rs6000_cpu == PROCESSOR_POWER6)
30100 && recog_memoized (dep_insn)
30101 && (INSN_CODE (dep_insn) >= 0))
30104 if (GET_CODE (PATTERN (insn)) != SET)
30105 /* If this happens, we have to extend this to schedule
30106 optimally. Return default for now. */
30107 return cost;
30109 /* Adjust the cost for the case where the value written
30110 by a fixed point operation is used as the address
30111 gen value on a store. */
30112 switch (get_attr_type (dep_insn))
30114 case TYPE_LOAD:
30115 case TYPE_CNTLZ:
30117 if (! store_data_bypass_p (dep_insn, insn))
30118 return get_attr_sign_extend (dep_insn)
30119 == SIGN_EXTEND_YES ? 6 : 4;
30120 break;
30122 case TYPE_SHIFT:
30124 if (! store_data_bypass_p (dep_insn, insn))
30125 return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
30126 6 : 3;
30127 break;
30129 case TYPE_INTEGER:
30130 case TYPE_ADD:
30131 case TYPE_LOGICAL:
30132 case TYPE_EXTS:
30133 case TYPE_INSERT:
30135 if (! store_data_bypass_p (dep_insn, insn))
30136 return 3;
30137 break;
30139 case TYPE_STORE:
30140 case TYPE_FPLOAD:
30141 case TYPE_FPSTORE:
30143 if (get_attr_update (dep_insn) == UPDATE_YES
30144 && ! store_data_bypass_p (dep_insn, insn))
30145 return 3;
30146 break;
30148 case TYPE_MUL:
30150 if (! store_data_bypass_p (dep_insn, insn))
30151 return 17;
30152 break;
30154 case TYPE_DIV:
30156 if (! store_data_bypass_p (dep_insn, insn))
30157 return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
30158 break;
30160 default:
30161 break;
30164 break;
30166 case TYPE_LOAD:
30167 if ((rs6000_cpu == PROCESSOR_POWER6)
30168 && recog_memoized (dep_insn)
30169 && (INSN_CODE (dep_insn) >= 0))
30172 /* Adjust the cost for the case where the value written
30173 by a fixed point instruction is used within the address
30174 gen portion of a subsequent load(u)(x) */
30175 switch (get_attr_type (dep_insn))
30177 case TYPE_LOAD:
30178 case TYPE_CNTLZ:
30180 if (set_to_load_agen (dep_insn, insn))
30181 return get_attr_sign_extend (dep_insn)
30182 == SIGN_EXTEND_YES ? 6 : 4;
30183 break;
30185 case TYPE_SHIFT:
30187 if (set_to_load_agen (dep_insn, insn))
30188 return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
30189 6 : 3;
30190 break;
30192 case TYPE_INTEGER:
30193 case TYPE_ADD:
30194 case TYPE_LOGICAL:
30195 case TYPE_EXTS:
30196 case TYPE_INSERT:
30198 if (set_to_load_agen (dep_insn, insn))
30199 return 3;
30200 break;
30202 case TYPE_STORE:
30203 case TYPE_FPLOAD:
30204 case TYPE_FPSTORE:
30206 if (get_attr_update (dep_insn) == UPDATE_YES
30207 && set_to_load_agen (dep_insn, insn))
30208 return 3;
30209 break;
30211 case TYPE_MUL:
30213 if (set_to_load_agen (dep_insn, insn))
30214 return 17;
30215 break;
30217 case TYPE_DIV:
30219 if (set_to_load_agen (dep_insn, insn))
30220 return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
30221 break;
30223 default:
30224 break;
30227 break;
30229 case TYPE_FPLOAD:
30230 if ((rs6000_cpu == PROCESSOR_POWER6)
30231 && get_attr_update (insn) == UPDATE_NO
30232 && recog_memoized (dep_insn)
30233 && (INSN_CODE (dep_insn) >= 0)
30234 && (get_attr_type (dep_insn) == TYPE_MFFGPR))
30235 return 2;
30237 default:
30238 break;
30241 /* Fall out to return default cost. */
30243 break;
30245 case REG_DEP_OUTPUT:
30246 /* Output dependency; DEP_INSN writes a register that INSN writes some
30247 cycles later. */
30248 if ((rs6000_cpu == PROCESSOR_POWER6)
30249 && recog_memoized (dep_insn)
30250 && (INSN_CODE (dep_insn) >= 0))
30252 attr_type = get_attr_type (insn);
30254 switch (attr_type)
30256 case TYPE_FP:
30257 case TYPE_FPSIMPLE:
30258 if (get_attr_type (dep_insn) == TYPE_FP
30259 || get_attr_type (dep_insn) == TYPE_FPSIMPLE)
30260 return 1;
30261 break;
30262 case TYPE_FPLOAD:
30263 if (get_attr_update (insn) == UPDATE_NO
30264 && get_attr_type (dep_insn) == TYPE_MFFGPR)
30265 return 2;
30266 break;
30267 default:
30268 break;
30271 /* Fall through, no cost for output dependency. */
30273 case REG_DEP_ANTI:
30274 /* Anti dependency; DEP_INSN reads a register that INSN writes some
30275 cycles later. */
30276 return 0;
30278 default:
30279 gcc_unreachable ();
30282 return cost;
30285 /* Debug version of rs6000_adjust_cost. */
30287 static int
30288 rs6000_debug_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep_insn,
30289 int cost)
30291 int ret = rs6000_adjust_cost (insn, link, dep_insn, cost);
30293 if (ret != cost)
30295 const char *dep;
30297 switch (REG_NOTE_KIND (link))
30299 default: dep = "unknown depencency"; break;
30300 case REG_DEP_TRUE: dep = "data dependency"; break;
30301 case REG_DEP_OUTPUT: dep = "output dependency"; break;
30302 case REG_DEP_ANTI: dep = "anti depencency"; break;
30305 fprintf (stderr,
30306 "\nrs6000_adjust_cost, final cost = %d, orig cost = %d, "
30307 "%s, insn:\n", ret, cost, dep);
30309 debug_rtx (insn);
30312 return ret;
30315 /* The function returns a true if INSN is microcoded.
30316 Return false otherwise. */
30318 static bool
30319 is_microcoded_insn (rtx_insn *insn)
30321 if (!insn || !NONDEBUG_INSN_P (insn)
30322 || GET_CODE (PATTERN (insn)) == USE
30323 || GET_CODE (PATTERN (insn)) == CLOBBER)
30324 return false;
30326 if (rs6000_cpu_attr == CPU_CELL)
30327 return get_attr_cell_micro (insn) == CELL_MICRO_ALWAYS;
30329 if (rs6000_sched_groups
30330 && (rs6000_cpu == PROCESSOR_POWER4 || rs6000_cpu == PROCESSOR_POWER5))
30332 enum attr_type type = get_attr_type (insn);
30333 if ((type == TYPE_LOAD
30334 && get_attr_update (insn) == UPDATE_YES
30335 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES)
30336 || ((type == TYPE_LOAD || type == TYPE_STORE)
30337 && get_attr_update (insn) == UPDATE_YES
30338 && get_attr_indexed (insn) == INDEXED_YES)
30339 || type == TYPE_MFCR)
30340 return true;
30343 return false;
30346 /* The function returns true if INSN is cracked into 2 instructions
30347 by the processor (and therefore occupies 2 issue slots). */
30349 static bool
30350 is_cracked_insn (rtx_insn *insn)
30352 if (!insn || !NONDEBUG_INSN_P (insn)
30353 || GET_CODE (PATTERN (insn)) == USE
30354 || GET_CODE (PATTERN (insn)) == CLOBBER)
30355 return false;
30357 if (rs6000_sched_groups
30358 && (rs6000_cpu == PROCESSOR_POWER4 || rs6000_cpu == PROCESSOR_POWER5))
30360 enum attr_type type = get_attr_type (insn);
30361 if ((type == TYPE_LOAD
30362 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES
30363 && get_attr_update (insn) == UPDATE_NO)
30364 || (type == TYPE_LOAD
30365 && get_attr_sign_extend (insn) == SIGN_EXTEND_NO
30366 && get_attr_update (insn) == UPDATE_YES
30367 && get_attr_indexed (insn) == INDEXED_NO)
30368 || (type == TYPE_STORE
30369 && get_attr_update (insn) == UPDATE_YES
30370 && get_attr_indexed (insn) == INDEXED_NO)
30371 || ((type == TYPE_FPLOAD || type == TYPE_FPSTORE)
30372 && get_attr_update (insn) == UPDATE_YES)
30373 || type == TYPE_DELAYED_CR
30374 || (type == TYPE_EXTS
30375 && get_attr_dot (insn) == DOT_YES)
30376 || (type == TYPE_SHIFT
30377 && get_attr_dot (insn) == DOT_YES
30378 && get_attr_var_shift (insn) == VAR_SHIFT_NO)
30379 || (type == TYPE_MUL
30380 && get_attr_dot (insn) == DOT_YES)
30381 || type == TYPE_DIV
30382 || (type == TYPE_INSERT
30383 && get_attr_size (insn) == SIZE_32))
30384 return true;
30387 return false;
30390 /* The function returns true if INSN can be issued only from
30391 the branch slot. */
30393 static bool
30394 is_branch_slot_insn (rtx_insn *insn)
30396 if (!insn || !NONDEBUG_INSN_P (insn)
30397 || GET_CODE (PATTERN (insn)) == USE
30398 || GET_CODE (PATTERN (insn)) == CLOBBER)
30399 return false;
30401 if (rs6000_sched_groups)
30403 enum attr_type type = get_attr_type (insn);
30404 if (type == TYPE_BRANCH || type == TYPE_JMPREG)
30405 return true;
30406 return false;
30409 return false;
30412 /* The function returns true if out_inst sets a value that is
30413 used in the address generation computation of in_insn */
30414 static bool
30415 set_to_load_agen (rtx_insn *out_insn, rtx_insn *in_insn)
30417 rtx out_set, in_set;
30419 /* For performance reasons, only handle the simple case where
30420 both loads are a single_set. */
30421 out_set = single_set (out_insn);
30422 if (out_set)
30424 in_set = single_set (in_insn);
30425 if (in_set)
30426 return reg_mentioned_p (SET_DEST (out_set), SET_SRC (in_set));
30429 return false;
30432 /* Try to determine base/offset/size parts of the given MEM.
30433 Return true if successful, false if all the values couldn't
30434 be determined.
30436 This function only looks for REG or REG+CONST address forms.
30437 REG+REG address form will return false. */
30439 static bool
30440 get_memref_parts (rtx mem, rtx *base, HOST_WIDE_INT *offset,
30441 HOST_WIDE_INT *size)
30443 rtx addr_rtx;
30444 if MEM_SIZE_KNOWN_P (mem)
30445 *size = MEM_SIZE (mem);
30446 else
30447 return false;
30449 addr_rtx = (XEXP (mem, 0));
30450 if (GET_CODE (addr_rtx) == PRE_MODIFY)
30451 addr_rtx = XEXP (addr_rtx, 1);
30453 *offset = 0;
30454 while (GET_CODE (addr_rtx) == PLUS
30455 && CONST_INT_P (XEXP (addr_rtx, 1)))
30457 *offset += INTVAL (XEXP (addr_rtx, 1));
30458 addr_rtx = XEXP (addr_rtx, 0);
30460 if (!REG_P (addr_rtx))
30461 return false;
30463 *base = addr_rtx;
30464 return true;
30467 /* The function returns true if the target storage location of
30468 mem1 is adjacent to the target storage location of mem2 */
30469 /* Return 1 if memory locations are adjacent. */
30471 static bool
30472 adjacent_mem_locations (rtx mem1, rtx mem2)
30474 rtx reg1, reg2;
30475 HOST_WIDE_INT off1, size1, off2, size2;
30477 if (get_memref_parts (mem1, &reg1, &off1, &size1)
30478 && get_memref_parts (mem2, &reg2, &off2, &size2))
30479 return ((REGNO (reg1) == REGNO (reg2))
30480 && ((off1 + size1 == off2)
30481 || (off2 + size2 == off1)));
30483 return false;
30486 /* This function returns true if it can be determined that the two MEM
30487 locations overlap by at least 1 byte based on base reg/offset/size. */
30489 static bool
30490 mem_locations_overlap (rtx mem1, rtx mem2)
30492 rtx reg1, reg2;
30493 HOST_WIDE_INT off1, size1, off2, size2;
30495 if (get_memref_parts (mem1, &reg1, &off1, &size1)
30496 && get_memref_parts (mem2, &reg2, &off2, &size2))
30497 return ((REGNO (reg1) == REGNO (reg2))
30498 && (((off1 <= off2) && (off1 + size1 > off2))
30499 || ((off2 <= off1) && (off2 + size2 > off1))));
30501 return false;
30504 /* A C statement (sans semicolon) to update the integer scheduling
30505 priority INSN_PRIORITY (INSN). Increase the priority to execute the
30506 INSN earlier, reduce the priority to execute INSN later. Do not
30507 define this macro if you do not need to adjust the scheduling
30508 priorities of insns. */
30510 static int
30511 rs6000_adjust_priority (rtx_insn *insn ATTRIBUTE_UNUSED, int priority)
30513 rtx load_mem, str_mem;
30514 /* On machines (like the 750) which have asymmetric integer units,
30515 where one integer unit can do multiply and divides and the other
30516 can't, reduce the priority of multiply/divide so it is scheduled
30517 before other integer operations. */
30519 #if 0
30520 if (! INSN_P (insn))
30521 return priority;
30523 if (GET_CODE (PATTERN (insn)) == USE)
30524 return priority;
30526 switch (rs6000_cpu_attr) {
30527 case CPU_PPC750:
30528 switch (get_attr_type (insn))
30530 default:
30531 break;
30533 case TYPE_MUL:
30534 case TYPE_DIV:
30535 fprintf (stderr, "priority was %#x (%d) before adjustment\n",
30536 priority, priority);
30537 if (priority >= 0 && priority < 0x01000000)
30538 priority >>= 3;
30539 break;
30542 #endif
30544 if (insn_must_be_first_in_group (insn)
30545 && reload_completed
30546 && current_sched_info->sched_max_insns_priority
30547 && rs6000_sched_restricted_insns_priority)
30550 /* Prioritize insns that can be dispatched only in the first
30551 dispatch slot. */
30552 if (rs6000_sched_restricted_insns_priority == 1)
30553 /* Attach highest priority to insn. This means that in
30554 haifa-sched.c:ready_sort(), dispatch-slot restriction considerations
30555 precede 'priority' (critical path) considerations. */
30556 return current_sched_info->sched_max_insns_priority;
30557 else if (rs6000_sched_restricted_insns_priority == 2)
30558 /* Increase priority of insn by a minimal amount. This means that in
30559 haifa-sched.c:ready_sort(), only 'priority' (critical path)
30560 considerations precede dispatch-slot restriction considerations. */
30561 return (priority + 1);
30564 if (rs6000_cpu == PROCESSOR_POWER6
30565 && ((load_store_pendulum == -2 && is_load_insn (insn, &load_mem))
30566 || (load_store_pendulum == 2 && is_store_insn (insn, &str_mem))))
30567 /* Attach highest priority to insn if the scheduler has just issued two
30568 stores and this instruction is a load, or two loads and this instruction
30569 is a store. Power6 wants loads and stores scheduled alternately
30570 when possible */
30571 return current_sched_info->sched_max_insns_priority;
30573 return priority;
30576 /* Return true if the instruction is nonpipelined on the Cell. */
30577 static bool
30578 is_nonpipeline_insn (rtx_insn *insn)
30580 enum attr_type type;
30581 if (!insn || !NONDEBUG_INSN_P (insn)
30582 || GET_CODE (PATTERN (insn)) == USE
30583 || GET_CODE (PATTERN (insn)) == CLOBBER)
30584 return false;
30586 type = get_attr_type (insn);
30587 if (type == TYPE_MUL
30588 || type == TYPE_DIV
30589 || type == TYPE_SDIV
30590 || type == TYPE_DDIV
30591 || type == TYPE_SSQRT
30592 || type == TYPE_DSQRT
30593 || type == TYPE_MFCR
30594 || type == TYPE_MFCRF
30595 || type == TYPE_MFJMPR)
30597 return true;
30599 return false;
30603 /* Return how many instructions the machine can issue per cycle. */
30605 static int
30606 rs6000_issue_rate (void)
30608 /* Unless scheduling for register pressure, use issue rate of 1 for
30609 first scheduling pass to decrease degradation. */
30610 if (!reload_completed && !flag_sched_pressure)
30611 return 1;
30613 switch (rs6000_cpu_attr) {
30614 case CPU_RS64A:
30615 case CPU_PPC601: /* ? */
30616 case CPU_PPC7450:
30617 return 3;
30618 case CPU_PPC440:
30619 case CPU_PPC603:
30620 case CPU_PPC750:
30621 case CPU_PPC7400:
30622 case CPU_PPC8540:
30623 case CPU_PPC8548:
30624 case CPU_CELL:
30625 case CPU_PPCE300C2:
30626 case CPU_PPCE300C3:
30627 case CPU_PPCE500MC:
30628 case CPU_PPCE500MC64:
30629 case CPU_PPCE5500:
30630 case CPU_PPCE6500:
30631 case CPU_TITAN:
30632 return 2;
30633 case CPU_PPC476:
30634 case CPU_PPC604:
30635 case CPU_PPC604E:
30636 case CPU_PPC620:
30637 case CPU_PPC630:
30638 return 4;
30639 case CPU_POWER4:
30640 case CPU_POWER5:
30641 case CPU_POWER6:
30642 case CPU_POWER7:
30643 return 5;
30644 case CPU_POWER8:
30645 return 7;
30646 case CPU_POWER9:
30647 return 6;
30648 default:
30649 return 1;
30653 /* Return how many instructions to look ahead for better insn
30654 scheduling. */
30656 static int
30657 rs6000_use_sched_lookahead (void)
30659 switch (rs6000_cpu_attr)
30661 case CPU_PPC8540:
30662 case CPU_PPC8548:
30663 return 4;
30665 case CPU_CELL:
30666 return (reload_completed ? 8 : 0);
30668 default:
30669 return 0;
30673 /* We are choosing insn from the ready queue. Return zero if INSN can be
30674 chosen. */
30675 static int
30676 rs6000_use_sched_lookahead_guard (rtx_insn *insn, int ready_index)
30678 if (ready_index == 0)
30679 return 0;
30681 if (rs6000_cpu_attr != CPU_CELL)
30682 return 0;
30684 gcc_assert (insn != NULL_RTX && INSN_P (insn));
30686 if (!reload_completed
30687 || is_nonpipeline_insn (insn)
30688 || is_microcoded_insn (insn))
30689 return 1;
30691 return 0;
30694 /* Determine if PAT refers to memory. If so, set MEM_REF to the MEM rtx
30695 and return true. */
30697 static bool
30698 find_mem_ref (rtx pat, rtx *mem_ref)
30700 const char * fmt;
30701 int i, j;
30703 /* stack_tie does not produce any real memory traffic. */
30704 if (tie_operand (pat, VOIDmode))
30705 return false;
30707 if (GET_CODE (pat) == MEM)
30709 *mem_ref = pat;
30710 return true;
30713 /* Recursively process the pattern. */
30714 fmt = GET_RTX_FORMAT (GET_CODE (pat));
30716 for (i = GET_RTX_LENGTH (GET_CODE (pat)) - 1; i >= 0; i--)
30718 if (fmt[i] == 'e')
30720 if (find_mem_ref (XEXP (pat, i), mem_ref))
30721 return true;
30723 else if (fmt[i] == 'E')
30724 for (j = XVECLEN (pat, i) - 1; j >= 0; j--)
30726 if (find_mem_ref (XVECEXP (pat, i, j), mem_ref))
30727 return true;
30731 return false;
30734 /* Determine if PAT is a PATTERN of a load insn. */
30736 static bool
30737 is_load_insn1 (rtx pat, rtx *load_mem)
30739 if (!pat || pat == NULL_RTX)
30740 return false;
30742 if (GET_CODE (pat) == SET)
30743 return find_mem_ref (SET_SRC (pat), load_mem);
30745 if (GET_CODE (pat) == PARALLEL)
30747 int i;
30749 for (i = 0; i < XVECLEN (pat, 0); i++)
30750 if (is_load_insn1 (XVECEXP (pat, 0, i), load_mem))
30751 return true;
30754 return false;
30757 /* Determine if INSN loads from memory. */
30759 static bool
30760 is_load_insn (rtx insn, rtx *load_mem)
30762 if (!insn || !INSN_P (insn))
30763 return false;
30765 if (CALL_P (insn))
30766 return false;
30768 return is_load_insn1 (PATTERN (insn), load_mem);
30771 /* Determine if PAT is a PATTERN of a store insn. */
30773 static bool
30774 is_store_insn1 (rtx pat, rtx *str_mem)
30776 if (!pat || pat == NULL_RTX)
30777 return false;
30779 if (GET_CODE (pat) == SET)
30780 return find_mem_ref (SET_DEST (pat), str_mem);
30782 if (GET_CODE (pat) == PARALLEL)
30784 int i;
30786 for (i = 0; i < XVECLEN (pat, 0); i++)
30787 if (is_store_insn1 (XVECEXP (pat, 0, i), str_mem))
30788 return true;
30791 return false;
30794 /* Determine if INSN stores to memory. */
30796 static bool
30797 is_store_insn (rtx insn, rtx *str_mem)
30799 if (!insn || !INSN_P (insn))
30800 return false;
30802 return is_store_insn1 (PATTERN (insn), str_mem);
30805 /* Return whether TYPE is a Power9 pairable vector instruction type. */
30807 static bool
30808 is_power9_pairable_vec_type (enum attr_type type)
30810 switch (type)
30812 case TYPE_VECSIMPLE:
30813 case TYPE_VECCOMPLEX:
30814 case TYPE_VECDIV:
30815 case TYPE_VECCMP:
30816 case TYPE_VECPERM:
30817 case TYPE_VECFLOAT:
30818 case TYPE_VECFDIV:
30819 case TYPE_VECDOUBLE:
30820 return true;
30821 default:
30822 break;
30824 return false;
30827 /* Returns whether the dependence between INSN and NEXT is considered
30828 costly by the given target. */
30830 static bool
30831 rs6000_is_costly_dependence (dep_t dep, int cost, int distance)
30833 rtx insn;
30834 rtx next;
30835 rtx load_mem, str_mem;
30837 /* If the flag is not enabled - no dependence is considered costly;
30838 allow all dependent insns in the same group.
30839 This is the most aggressive option. */
30840 if (rs6000_sched_costly_dep == no_dep_costly)
30841 return false;
30843 /* If the flag is set to 1 - a dependence is always considered costly;
30844 do not allow dependent instructions in the same group.
30845 This is the most conservative option. */
30846 if (rs6000_sched_costly_dep == all_deps_costly)
30847 return true;
30849 insn = DEP_PRO (dep);
30850 next = DEP_CON (dep);
30852 if (rs6000_sched_costly_dep == store_to_load_dep_costly
30853 && is_load_insn (next, &load_mem)
30854 && is_store_insn (insn, &str_mem))
30855 /* Prevent load after store in the same group. */
30856 return true;
30858 if (rs6000_sched_costly_dep == true_store_to_load_dep_costly
30859 && is_load_insn (next, &load_mem)
30860 && is_store_insn (insn, &str_mem)
30861 && DEP_TYPE (dep) == REG_DEP_TRUE
30862 && mem_locations_overlap(str_mem, load_mem))
30863 /* Prevent load after store in the same group if it is a true
30864 dependence. */
30865 return true;
30867 /* The flag is set to X; dependences with latency >= X are considered costly,
30868 and will not be scheduled in the same group. */
30869 if (rs6000_sched_costly_dep <= max_dep_latency
30870 && ((cost - distance) >= (int)rs6000_sched_costly_dep))
30871 return true;
30873 return false;
30876 /* Return the next insn after INSN that is found before TAIL is reached,
30877 skipping any "non-active" insns - insns that will not actually occupy
30878 an issue slot. Return NULL_RTX if such an insn is not found. */
30880 static rtx_insn *
30881 get_next_active_insn (rtx_insn *insn, rtx_insn *tail)
30883 if (insn == NULL_RTX || insn == tail)
30884 return NULL;
30886 while (1)
30888 insn = NEXT_INSN (insn);
30889 if (insn == NULL_RTX || insn == tail)
30890 return NULL;
30892 if (CALL_P (insn)
30893 || JUMP_P (insn) || JUMP_TABLE_DATA_P (insn)
30894 || (NONJUMP_INSN_P (insn)
30895 && GET_CODE (PATTERN (insn)) != USE
30896 && GET_CODE (PATTERN (insn)) != CLOBBER
30897 && INSN_CODE (insn) != CODE_FOR_stack_tie))
30898 break;
30900 return insn;
30903 /* Do Power9 specific sched_reorder2 reordering of ready list. */
30905 static int
30906 power9_sched_reorder2 (rtx_insn **ready, int lastpos)
30908 int pos;
30909 int i;
30910 rtx_insn *tmp;
30911 enum attr_type type;
30913 type = get_attr_type (last_scheduled_insn);
30915 /* Try to issue fixed point divides back-to-back in pairs so they will be
30916 routed to separate execution units and execute in parallel. */
30917 if (type == TYPE_DIV && divide_cnt == 0)
30919 /* First divide has been scheduled. */
30920 divide_cnt = 1;
30922 /* Scan the ready list looking for another divide, if found move it
30923 to the end of the list so it is chosen next. */
30924 pos = lastpos;
30925 while (pos >= 0)
30927 if (recog_memoized (ready[pos]) >= 0
30928 && get_attr_type (ready[pos]) == TYPE_DIV)
30930 tmp = ready[pos];
30931 for (i = pos; i < lastpos; i++)
30932 ready[i] = ready[i + 1];
30933 ready[lastpos] = tmp;
30934 break;
30936 pos--;
30939 else
30941 /* Last insn was the 2nd divide or not a divide, reset the counter. */
30942 divide_cnt = 0;
30944 /* Power9 can execute 2 vector operations and 2 vector loads in a single
30945 cycle. So try to pair up and alternate groups of vector and vector
30946 load instructions.
30948 To aid this formation, a counter is maintained to keep track of
30949 vec/vecload insns issued. The value of vec_load_pendulum maintains
30950 the current state with the following values:
30952 0 : Initial state, no vec/vecload group has been started.
30954 -1 : 1 vector load has been issued and another has been found on
30955 the ready list and moved to the end.
30957 -2 : 2 vector loads have been issued and a vector operation has
30958 been found and moved to the end of the ready list.
30960 -3 : 2 vector loads and a vector insn have been issued and a
30961 vector operation has been found and moved to the end of the
30962 ready list.
30964 1 : 1 vector insn has been issued and another has been found and
30965 moved to the end of the ready list.
30967 2 : 2 vector insns have been issued and a vector load has been
30968 found and moved to the end of the ready list.
30970 3 : 2 vector insns and a vector load have been issued and another
30971 vector load has been found and moved to the end of the ready
30972 list. */
30973 if (type == TYPE_VECLOAD)
30975 /* Issued a vecload. */
30976 if (vec_load_pendulum == 0)
30978 /* We issued a single vecload, look for another and move it to
30979 the end of the ready list so it will be scheduled next.
30980 Set pendulum if found. */
30981 pos = lastpos;
30982 while (pos >= 0)
30984 if (recog_memoized (ready[pos]) >= 0
30985 && get_attr_type (ready[pos]) == TYPE_VECLOAD)
30987 tmp = ready[pos];
30988 for (i = pos; i < lastpos; i++)
30989 ready[i] = ready[i + 1];
30990 ready[lastpos] = tmp;
30991 vec_load_pendulum = -1;
30992 return cached_can_issue_more;
30994 pos--;
30997 else if (vec_load_pendulum == -1)
30999 /* This is the second vecload we've issued, search the ready
31000 list for a vector operation so we can try to schedule a
31001 pair of those next. If found move to the end of the ready
31002 list so it is scheduled next and set the pendulum. */
31003 pos = lastpos;
31004 while (pos >= 0)
31006 if (recog_memoized (ready[pos]) >= 0
31007 && is_power9_pairable_vec_type (
31008 get_attr_type (ready[pos])))
31010 tmp = ready[pos];
31011 for (i = pos; i < lastpos; i++)
31012 ready[i] = ready[i + 1];
31013 ready[lastpos] = tmp;
31014 vec_load_pendulum = -2;
31015 return cached_can_issue_more;
31017 pos--;
31020 else if (vec_load_pendulum == 2)
31022 /* Two vector ops have been issued and we've just issued a
31023 vecload, look for another vecload and move to end of ready
31024 list if found. */
31025 pos = lastpos;
31026 while (pos >= 0)
31028 if (recog_memoized (ready[pos]) >= 0
31029 && get_attr_type (ready[pos]) == TYPE_VECLOAD)
31031 tmp = ready[pos];
31032 for (i = pos; i < lastpos; i++)
31033 ready[i] = ready[i + 1];
31034 ready[lastpos] = tmp;
31035 /* Set pendulum so that next vecload will be seen as
31036 finishing a group, not start of one. */
31037 vec_load_pendulum = 3;
31038 return cached_can_issue_more;
31040 pos--;
31044 else if (is_power9_pairable_vec_type (type))
31046 /* Issued a vector operation. */
31047 if (vec_load_pendulum == 0)
31048 /* We issued a single vec op, look for another and move it
31049 to the end of the ready list so it will be scheduled next.
31050 Set pendulum if found. */
31052 pos = lastpos;
31053 while (pos >= 0)
31055 if (recog_memoized (ready[pos]) >= 0
31056 && is_power9_pairable_vec_type (
31057 get_attr_type (ready[pos])))
31059 tmp = ready[pos];
31060 for (i = pos; i < lastpos; i++)
31061 ready[i] = ready[i + 1];
31062 ready[lastpos] = tmp;
31063 vec_load_pendulum = 1;
31064 return cached_can_issue_more;
31066 pos--;
31069 else if (vec_load_pendulum == 1)
31071 /* This is the second vec op we've issued, search the ready
31072 list for a vecload operation so we can try to schedule a
31073 pair of those next. If found move to the end of the ready
31074 list so it is scheduled next and set the pendulum. */
31075 pos = lastpos;
31076 while (pos >= 0)
31078 if (recog_memoized (ready[pos]) >= 0
31079 && get_attr_type (ready[pos]) == TYPE_VECLOAD)
31081 tmp = ready[pos];
31082 for (i = pos; i < lastpos; i++)
31083 ready[i] = ready[i + 1];
31084 ready[lastpos] = tmp;
31085 vec_load_pendulum = 2;
31086 return cached_can_issue_more;
31088 pos--;
31091 else if (vec_load_pendulum == -2)
31093 /* Two vecload ops have been issued and we've just issued a
31094 vec op, look for another vec op and move to end of ready
31095 list if found. */
31096 pos = lastpos;
31097 while (pos >= 0)
31099 if (recog_memoized (ready[pos]) >= 0
31100 && is_power9_pairable_vec_type (
31101 get_attr_type (ready[pos])))
31103 tmp = ready[pos];
31104 for (i = pos; i < lastpos; i++)
31105 ready[i] = ready[i + 1];
31106 ready[lastpos] = tmp;
31107 /* Set pendulum so that next vec op will be seen as
31108 finishing a group, not start of one. */
31109 vec_load_pendulum = -3;
31110 return cached_can_issue_more;
31112 pos--;
31117 /* We've either finished a vec/vecload group, couldn't find an insn to
31118 continue the current group, or the last insn had nothing to do with
31119 with a group. In any case, reset the pendulum. */
31120 vec_load_pendulum = 0;
31123 return cached_can_issue_more;
31126 /* We are about to begin issuing insns for this clock cycle. */
31128 static int
31129 rs6000_sched_reorder (FILE *dump ATTRIBUTE_UNUSED, int sched_verbose,
31130 rtx_insn **ready ATTRIBUTE_UNUSED,
31131 int *pn_ready ATTRIBUTE_UNUSED,
31132 int clock_var ATTRIBUTE_UNUSED)
31134 int n_ready = *pn_ready;
31136 if (sched_verbose)
31137 fprintf (dump, "// rs6000_sched_reorder :\n");
31139 /* Reorder the ready list, if the second to last ready insn
31140 is a nonepipeline insn. */
31141 if (rs6000_cpu_attr == CPU_CELL && n_ready > 1)
31143 if (is_nonpipeline_insn (ready[n_ready - 1])
31144 && (recog_memoized (ready[n_ready - 2]) > 0))
31145 /* Simply swap first two insns. */
31146 std::swap (ready[n_ready - 1], ready[n_ready - 2]);
31149 if (rs6000_cpu == PROCESSOR_POWER6)
31150 load_store_pendulum = 0;
31152 return rs6000_issue_rate ();
31155 /* Like rs6000_sched_reorder, but called after issuing each insn. */
31157 static int
31158 rs6000_sched_reorder2 (FILE *dump, int sched_verbose, rtx_insn **ready,
31159 int *pn_ready, int clock_var ATTRIBUTE_UNUSED)
31161 if (sched_verbose)
31162 fprintf (dump, "// rs6000_sched_reorder2 :\n");
31164 /* For Power6, we need to handle some special cases to try and keep the
31165 store queue from overflowing and triggering expensive flushes.
31167 This code monitors how load and store instructions are being issued
31168 and skews the ready list one way or the other to increase the likelihood
31169 that a desired instruction is issued at the proper time.
31171 A couple of things are done. First, we maintain a "load_store_pendulum"
31172 to track the current state of load/store issue.
31174 - If the pendulum is at zero, then no loads or stores have been
31175 issued in the current cycle so we do nothing.
31177 - If the pendulum is 1, then a single load has been issued in this
31178 cycle and we attempt to locate another load in the ready list to
31179 issue with it.
31181 - If the pendulum is -2, then two stores have already been
31182 issued in this cycle, so we increase the priority of the first load
31183 in the ready list to increase it's likelihood of being chosen first
31184 in the next cycle.
31186 - If the pendulum is -1, then a single store has been issued in this
31187 cycle and we attempt to locate another store in the ready list to
31188 issue with it, preferring a store to an adjacent memory location to
31189 facilitate store pairing in the store queue.
31191 - If the pendulum is 2, then two loads have already been
31192 issued in this cycle, so we increase the priority of the first store
31193 in the ready list to increase it's likelihood of being chosen first
31194 in the next cycle.
31196 - If the pendulum < -2 or > 2, then do nothing.
31198 Note: This code covers the most common scenarios. There exist non
31199 load/store instructions which make use of the LSU and which
31200 would need to be accounted for to strictly model the behavior
31201 of the machine. Those instructions are currently unaccounted
31202 for to help minimize compile time overhead of this code.
31204 if (rs6000_cpu == PROCESSOR_POWER6 && last_scheduled_insn)
31206 int pos;
31207 int i;
31208 rtx_insn *tmp;
31209 rtx load_mem, str_mem;
31211 if (is_store_insn (last_scheduled_insn, &str_mem))
31212 /* Issuing a store, swing the load_store_pendulum to the left */
31213 load_store_pendulum--;
31214 else if (is_load_insn (last_scheduled_insn, &load_mem))
31215 /* Issuing a load, swing the load_store_pendulum to the right */
31216 load_store_pendulum++;
31217 else
31218 return cached_can_issue_more;
31220 /* If the pendulum is balanced, or there is only one instruction on
31221 the ready list, then all is well, so return. */
31222 if ((load_store_pendulum == 0) || (*pn_ready <= 1))
31223 return cached_can_issue_more;
31225 if (load_store_pendulum == 1)
31227 /* A load has been issued in this cycle. Scan the ready list
31228 for another load to issue with it */
31229 pos = *pn_ready-1;
31231 while (pos >= 0)
31233 if (is_load_insn (ready[pos], &load_mem))
31235 /* Found a load. Move it to the head of the ready list,
31236 and adjust it's priority so that it is more likely to
31237 stay there */
31238 tmp = ready[pos];
31239 for (i=pos; i<*pn_ready-1; i++)
31240 ready[i] = ready[i + 1];
31241 ready[*pn_ready-1] = tmp;
31243 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
31244 INSN_PRIORITY (tmp)++;
31245 break;
31247 pos--;
31250 else if (load_store_pendulum == -2)
31252 /* Two stores have been issued in this cycle. Increase the
31253 priority of the first load in the ready list to favor it for
31254 issuing in the next cycle. */
31255 pos = *pn_ready-1;
31257 while (pos >= 0)
31259 if (is_load_insn (ready[pos], &load_mem)
31260 && !sel_sched_p ()
31261 && INSN_PRIORITY_KNOWN (ready[pos]))
31263 INSN_PRIORITY (ready[pos])++;
31265 /* Adjust the pendulum to account for the fact that a load
31266 was found and increased in priority. This is to prevent
31267 increasing the priority of multiple loads */
31268 load_store_pendulum--;
31270 break;
31272 pos--;
31275 else if (load_store_pendulum == -1)
31277 /* A store has been issued in this cycle. Scan the ready list for
31278 another store to issue with it, preferring a store to an adjacent
31279 memory location */
31280 int first_store_pos = -1;
31282 pos = *pn_ready-1;
31284 while (pos >= 0)
31286 if (is_store_insn (ready[pos], &str_mem))
31288 rtx str_mem2;
31289 /* Maintain the index of the first store found on the
31290 list */
31291 if (first_store_pos == -1)
31292 first_store_pos = pos;
31294 if (is_store_insn (last_scheduled_insn, &str_mem2)
31295 && adjacent_mem_locations (str_mem, str_mem2))
31297 /* Found an adjacent store. Move it to the head of the
31298 ready list, and adjust it's priority so that it is
31299 more likely to stay there */
31300 tmp = ready[pos];
31301 for (i=pos; i<*pn_ready-1; i++)
31302 ready[i] = ready[i + 1];
31303 ready[*pn_ready-1] = tmp;
31305 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
31306 INSN_PRIORITY (tmp)++;
31308 first_store_pos = -1;
31310 break;
31313 pos--;
31316 if (first_store_pos >= 0)
31318 /* An adjacent store wasn't found, but a non-adjacent store was,
31319 so move the non-adjacent store to the front of the ready
31320 list, and adjust its priority so that it is more likely to
31321 stay there. */
31322 tmp = ready[first_store_pos];
31323 for (i=first_store_pos; i<*pn_ready-1; i++)
31324 ready[i] = ready[i + 1];
31325 ready[*pn_ready-1] = tmp;
31326 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
31327 INSN_PRIORITY (tmp)++;
31330 else if (load_store_pendulum == 2)
31332 /* Two loads have been issued in this cycle. Increase the priority
31333 of the first store in the ready list to favor it for issuing in
31334 the next cycle. */
31335 pos = *pn_ready-1;
31337 while (pos >= 0)
31339 if (is_store_insn (ready[pos], &str_mem)
31340 && !sel_sched_p ()
31341 && INSN_PRIORITY_KNOWN (ready[pos]))
31343 INSN_PRIORITY (ready[pos])++;
31345 /* Adjust the pendulum to account for the fact that a store
31346 was found and increased in priority. This is to prevent
31347 increasing the priority of multiple stores */
31348 load_store_pendulum++;
31350 break;
31352 pos--;
31357 /* Do Power9 dependent reordering if necessary. */
31358 if (rs6000_cpu == PROCESSOR_POWER9 && last_scheduled_insn
31359 && recog_memoized (last_scheduled_insn) >= 0)
31360 return power9_sched_reorder2 (ready, *pn_ready - 1);
31362 return cached_can_issue_more;
31365 /* Return whether the presence of INSN causes a dispatch group termination
31366 of group WHICH_GROUP.
31368 If WHICH_GROUP == current_group, this function will return true if INSN
31369 causes the termination of the current group (i.e, the dispatch group to
31370 which INSN belongs). This means that INSN will be the last insn in the
31371 group it belongs to.
31373 If WHICH_GROUP == previous_group, this function will return true if INSN
31374 causes the termination of the previous group (i.e, the dispatch group that
31375 precedes the group to which INSN belongs). This means that INSN will be
31376 the first insn in the group it belongs to). */
31378 static bool
31379 insn_terminates_group_p (rtx_insn *insn, enum group_termination which_group)
31381 bool first, last;
31383 if (! insn)
31384 return false;
31386 first = insn_must_be_first_in_group (insn);
31387 last = insn_must_be_last_in_group (insn);
31389 if (first && last)
31390 return true;
31392 if (which_group == current_group)
31393 return last;
31394 else if (which_group == previous_group)
31395 return first;
31397 return false;
31401 static bool
31402 insn_must_be_first_in_group (rtx_insn *insn)
31404 enum attr_type type;
31406 if (!insn
31407 || NOTE_P (insn)
31408 || DEBUG_INSN_P (insn)
31409 || GET_CODE (PATTERN (insn)) == USE
31410 || GET_CODE (PATTERN (insn)) == CLOBBER)
31411 return false;
31413 switch (rs6000_cpu)
31415 case PROCESSOR_POWER5:
31416 if (is_cracked_insn (insn))
31417 return true;
31418 case PROCESSOR_POWER4:
31419 if (is_microcoded_insn (insn))
31420 return true;
31422 if (!rs6000_sched_groups)
31423 return false;
31425 type = get_attr_type (insn);
31427 switch (type)
31429 case TYPE_MFCR:
31430 case TYPE_MFCRF:
31431 case TYPE_MTCR:
31432 case TYPE_DELAYED_CR:
31433 case TYPE_CR_LOGICAL:
31434 case TYPE_MTJMPR:
31435 case TYPE_MFJMPR:
31436 case TYPE_DIV:
31437 case TYPE_LOAD_L:
31438 case TYPE_STORE_C:
31439 case TYPE_ISYNC:
31440 case TYPE_SYNC:
31441 return true;
31442 default:
31443 break;
31445 break;
31446 case PROCESSOR_POWER6:
31447 type = get_attr_type (insn);
31449 switch (type)
31451 case TYPE_EXTS:
31452 case TYPE_CNTLZ:
31453 case TYPE_TRAP:
31454 case TYPE_MUL:
31455 case TYPE_INSERT:
31456 case TYPE_FPCOMPARE:
31457 case TYPE_MFCR:
31458 case TYPE_MTCR:
31459 case TYPE_MFJMPR:
31460 case TYPE_MTJMPR:
31461 case TYPE_ISYNC:
31462 case TYPE_SYNC:
31463 case TYPE_LOAD_L:
31464 case TYPE_STORE_C:
31465 return true;
31466 case TYPE_SHIFT:
31467 if (get_attr_dot (insn) == DOT_NO
31468 || get_attr_var_shift (insn) == VAR_SHIFT_NO)
31469 return true;
31470 else
31471 break;
31472 case TYPE_DIV:
31473 if (get_attr_size (insn) == SIZE_32)
31474 return true;
31475 else
31476 break;
31477 case TYPE_LOAD:
31478 case TYPE_STORE:
31479 case TYPE_FPLOAD:
31480 case TYPE_FPSTORE:
31481 if (get_attr_update (insn) == UPDATE_YES)
31482 return true;
31483 else
31484 break;
31485 default:
31486 break;
31488 break;
31489 case PROCESSOR_POWER7:
31490 type = get_attr_type (insn);
31492 switch (type)
31494 case TYPE_CR_LOGICAL:
31495 case TYPE_MFCR:
31496 case TYPE_MFCRF:
31497 case TYPE_MTCR:
31498 case TYPE_DIV:
31499 case TYPE_ISYNC:
31500 case TYPE_LOAD_L:
31501 case TYPE_STORE_C:
31502 case TYPE_MFJMPR:
31503 case TYPE_MTJMPR:
31504 return true;
31505 case TYPE_MUL:
31506 case TYPE_SHIFT:
31507 case TYPE_EXTS:
31508 if (get_attr_dot (insn) == DOT_YES)
31509 return true;
31510 else
31511 break;
31512 case TYPE_LOAD:
31513 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
31514 || get_attr_update (insn) == UPDATE_YES)
31515 return true;
31516 else
31517 break;
31518 case TYPE_STORE:
31519 case TYPE_FPLOAD:
31520 case TYPE_FPSTORE:
31521 if (get_attr_update (insn) == UPDATE_YES)
31522 return true;
31523 else
31524 break;
31525 default:
31526 break;
31528 break;
31529 case PROCESSOR_POWER8:
31530 type = get_attr_type (insn);
31532 switch (type)
31534 case TYPE_CR_LOGICAL:
31535 case TYPE_DELAYED_CR:
31536 case TYPE_MFCR:
31537 case TYPE_MFCRF:
31538 case TYPE_MTCR:
31539 case TYPE_SYNC:
31540 case TYPE_ISYNC:
31541 case TYPE_LOAD_L:
31542 case TYPE_STORE_C:
31543 case TYPE_VECSTORE:
31544 case TYPE_MFJMPR:
31545 case TYPE_MTJMPR:
31546 return true;
31547 case TYPE_SHIFT:
31548 case TYPE_EXTS:
31549 case TYPE_MUL:
31550 if (get_attr_dot (insn) == DOT_YES)
31551 return true;
31552 else
31553 break;
31554 case TYPE_LOAD:
31555 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
31556 || get_attr_update (insn) == UPDATE_YES)
31557 return true;
31558 else
31559 break;
31560 case TYPE_STORE:
31561 if (get_attr_update (insn) == UPDATE_YES
31562 && get_attr_indexed (insn) == INDEXED_YES)
31563 return true;
31564 else
31565 break;
31566 default:
31567 break;
31569 break;
31570 default:
31571 break;
31574 return false;
31577 static bool
31578 insn_must_be_last_in_group (rtx_insn *insn)
31580 enum attr_type type;
31582 if (!insn
31583 || NOTE_P (insn)
31584 || DEBUG_INSN_P (insn)
31585 || GET_CODE (PATTERN (insn)) == USE
31586 || GET_CODE (PATTERN (insn)) == CLOBBER)
31587 return false;
31589 switch (rs6000_cpu) {
31590 case PROCESSOR_POWER4:
31591 case PROCESSOR_POWER5:
31592 if (is_microcoded_insn (insn))
31593 return true;
31595 if (is_branch_slot_insn (insn))
31596 return true;
31598 break;
31599 case PROCESSOR_POWER6:
31600 type = get_attr_type (insn);
31602 switch (type)
31604 case TYPE_EXTS:
31605 case TYPE_CNTLZ:
31606 case TYPE_TRAP:
31607 case TYPE_MUL:
31608 case TYPE_FPCOMPARE:
31609 case TYPE_MFCR:
31610 case TYPE_MTCR:
31611 case TYPE_MFJMPR:
31612 case TYPE_MTJMPR:
31613 case TYPE_ISYNC:
31614 case TYPE_SYNC:
31615 case TYPE_LOAD_L:
31616 case TYPE_STORE_C:
31617 return true;
31618 case TYPE_SHIFT:
31619 if (get_attr_dot (insn) == DOT_NO
31620 || get_attr_var_shift (insn) == VAR_SHIFT_NO)
31621 return true;
31622 else
31623 break;
31624 case TYPE_DIV:
31625 if (get_attr_size (insn) == SIZE_32)
31626 return true;
31627 else
31628 break;
31629 default:
31630 break;
31632 break;
31633 case PROCESSOR_POWER7:
31634 type = get_attr_type (insn);
31636 switch (type)
31638 case TYPE_ISYNC:
31639 case TYPE_SYNC:
31640 case TYPE_LOAD_L:
31641 case TYPE_STORE_C:
31642 return true;
31643 case TYPE_LOAD:
31644 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
31645 && get_attr_update (insn) == UPDATE_YES)
31646 return true;
31647 else
31648 break;
31649 case TYPE_STORE:
31650 if (get_attr_update (insn) == UPDATE_YES
31651 && get_attr_indexed (insn) == INDEXED_YES)
31652 return true;
31653 else
31654 break;
31655 default:
31656 break;
31658 break;
31659 case PROCESSOR_POWER8:
31660 type = get_attr_type (insn);
31662 switch (type)
31664 case TYPE_MFCR:
31665 case TYPE_MTCR:
31666 case TYPE_ISYNC:
31667 case TYPE_SYNC:
31668 case TYPE_LOAD_L:
31669 case TYPE_STORE_C:
31670 return true;
31671 case TYPE_LOAD:
31672 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
31673 && get_attr_update (insn) == UPDATE_YES)
31674 return true;
31675 else
31676 break;
31677 case TYPE_STORE:
31678 if (get_attr_update (insn) == UPDATE_YES
31679 && get_attr_indexed (insn) == INDEXED_YES)
31680 return true;
31681 else
31682 break;
31683 default:
31684 break;
31686 break;
31687 default:
31688 break;
31691 return false;
31694 /* Return true if it is recommended to keep NEXT_INSN "far" (in a separate
31695 dispatch group) from the insns in GROUP_INSNS. Return false otherwise. */
31697 static bool
31698 is_costly_group (rtx *group_insns, rtx next_insn)
31700 int i;
31701 int issue_rate = rs6000_issue_rate ();
31703 for (i = 0; i < issue_rate; i++)
31705 sd_iterator_def sd_it;
31706 dep_t dep;
31707 rtx insn = group_insns[i];
31709 if (!insn)
31710 continue;
31712 FOR_EACH_DEP (insn, SD_LIST_RES_FORW, sd_it, dep)
31714 rtx next = DEP_CON (dep);
31716 if (next == next_insn
31717 && rs6000_is_costly_dependence (dep, dep_cost (dep), 0))
31718 return true;
31722 return false;
31725 /* Utility of the function redefine_groups.
31726 Check if it is too costly to schedule NEXT_INSN together with GROUP_INSNS
31727 in the same dispatch group. If so, insert nops before NEXT_INSN, in order
31728 to keep it "far" (in a separate group) from GROUP_INSNS, following
31729 one of the following schemes, depending on the value of the flag
31730 -minsert_sched_nops = X:
31731 (1) X == sched_finish_regroup_exact: insert exactly as many nops as needed
31732 in order to force NEXT_INSN into a separate group.
31733 (2) X < sched_finish_regroup_exact: insert exactly X nops.
31734 GROUP_END, CAN_ISSUE_MORE and GROUP_COUNT record the state after nop
31735 insertion (has a group just ended, how many vacant issue slots remain in the
31736 last group, and how many dispatch groups were encountered so far). */
31738 static int
31739 force_new_group (int sched_verbose, FILE *dump, rtx *group_insns,
31740 rtx_insn *next_insn, bool *group_end, int can_issue_more,
31741 int *group_count)
31743 rtx nop;
31744 bool force;
31745 int issue_rate = rs6000_issue_rate ();
31746 bool end = *group_end;
31747 int i;
31749 if (next_insn == NULL_RTX || DEBUG_INSN_P (next_insn))
31750 return can_issue_more;
31752 if (rs6000_sched_insert_nops > sched_finish_regroup_exact)
31753 return can_issue_more;
31755 force = is_costly_group (group_insns, next_insn);
31756 if (!force)
31757 return can_issue_more;
31759 if (sched_verbose > 6)
31760 fprintf (dump,"force: group count = %d, can_issue_more = %d\n",
31761 *group_count ,can_issue_more);
31763 if (rs6000_sched_insert_nops == sched_finish_regroup_exact)
31765 if (*group_end)
31766 can_issue_more = 0;
31768 /* Since only a branch can be issued in the last issue_slot, it is
31769 sufficient to insert 'can_issue_more - 1' nops if next_insn is not
31770 a branch. If next_insn is a branch, we insert 'can_issue_more' nops;
31771 in this case the last nop will start a new group and the branch
31772 will be forced to the new group. */
31773 if (can_issue_more && !is_branch_slot_insn (next_insn))
31774 can_issue_more--;
31776 /* Do we have a special group ending nop? */
31777 if (rs6000_cpu_attr == CPU_POWER6 || rs6000_cpu_attr == CPU_POWER7
31778 || rs6000_cpu_attr == CPU_POWER8)
31780 nop = gen_group_ending_nop ();
31781 emit_insn_before (nop, next_insn);
31782 can_issue_more = 0;
31784 else
31785 while (can_issue_more > 0)
31787 nop = gen_nop ();
31788 emit_insn_before (nop, next_insn);
31789 can_issue_more--;
31792 *group_end = true;
31793 return 0;
31796 if (rs6000_sched_insert_nops < sched_finish_regroup_exact)
31798 int n_nops = rs6000_sched_insert_nops;
31800 /* Nops can't be issued from the branch slot, so the effective
31801 issue_rate for nops is 'issue_rate - 1'. */
31802 if (can_issue_more == 0)
31803 can_issue_more = issue_rate;
31804 can_issue_more--;
31805 if (can_issue_more == 0)
31807 can_issue_more = issue_rate - 1;
31808 (*group_count)++;
31809 end = true;
31810 for (i = 0; i < issue_rate; i++)
31812 group_insns[i] = 0;
31816 while (n_nops > 0)
31818 nop = gen_nop ();
31819 emit_insn_before (nop, next_insn);
31820 if (can_issue_more == issue_rate - 1) /* new group begins */
31821 end = false;
31822 can_issue_more--;
31823 if (can_issue_more == 0)
31825 can_issue_more = issue_rate - 1;
31826 (*group_count)++;
31827 end = true;
31828 for (i = 0; i < issue_rate; i++)
31830 group_insns[i] = 0;
31833 n_nops--;
31836 /* Scale back relative to 'issue_rate' (instead of 'issue_rate - 1'). */
31837 can_issue_more++;
31839 /* Is next_insn going to start a new group? */
31840 *group_end
31841 = (end
31842 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
31843 || (can_issue_more <= 2 && is_cracked_insn (next_insn))
31844 || (can_issue_more < issue_rate &&
31845 insn_terminates_group_p (next_insn, previous_group)));
31846 if (*group_end && end)
31847 (*group_count)--;
31849 if (sched_verbose > 6)
31850 fprintf (dump, "done force: group count = %d, can_issue_more = %d\n",
31851 *group_count, can_issue_more);
31852 return can_issue_more;
31855 return can_issue_more;
31858 /* This function tries to synch the dispatch groups that the compiler "sees"
31859 with the dispatch groups that the processor dispatcher is expected to
31860 form in practice. It tries to achieve this synchronization by forcing the
31861 estimated processor grouping on the compiler (as opposed to the function
31862 'pad_goups' which tries to force the scheduler's grouping on the processor).
31864 The function scans the insn sequence between PREV_HEAD_INSN and TAIL and
31865 examines the (estimated) dispatch groups that will be formed by the processor
31866 dispatcher. It marks these group boundaries to reflect the estimated
31867 processor grouping, overriding the grouping that the scheduler had marked.
31868 Depending on the value of the flag '-minsert-sched-nops' this function can
31869 force certain insns into separate groups or force a certain distance between
31870 them by inserting nops, for example, if there exists a "costly dependence"
31871 between the insns.
31873 The function estimates the group boundaries that the processor will form as
31874 follows: It keeps track of how many vacant issue slots are available after
31875 each insn. A subsequent insn will start a new group if one of the following
31876 4 cases applies:
31877 - no more vacant issue slots remain in the current dispatch group.
31878 - only the last issue slot, which is the branch slot, is vacant, but the next
31879 insn is not a branch.
31880 - only the last 2 or less issue slots, including the branch slot, are vacant,
31881 which means that a cracked insn (which occupies two issue slots) can't be
31882 issued in this group.
31883 - less than 'issue_rate' slots are vacant, and the next insn always needs to
31884 start a new group. */
31886 static int
31887 redefine_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
31888 rtx_insn *tail)
31890 rtx_insn *insn, *next_insn;
31891 int issue_rate;
31892 int can_issue_more;
31893 int slot, i;
31894 bool group_end;
31895 int group_count = 0;
31896 rtx *group_insns;
31898 /* Initialize. */
31899 issue_rate = rs6000_issue_rate ();
31900 group_insns = XALLOCAVEC (rtx, issue_rate);
31901 for (i = 0; i < issue_rate; i++)
31903 group_insns[i] = 0;
31905 can_issue_more = issue_rate;
31906 slot = 0;
31907 insn = get_next_active_insn (prev_head_insn, tail);
31908 group_end = false;
31910 while (insn != NULL_RTX)
31912 slot = (issue_rate - can_issue_more);
31913 group_insns[slot] = insn;
31914 can_issue_more =
31915 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
31916 if (insn_terminates_group_p (insn, current_group))
31917 can_issue_more = 0;
31919 next_insn = get_next_active_insn (insn, tail);
31920 if (next_insn == NULL_RTX)
31921 return group_count + 1;
31923 /* Is next_insn going to start a new group? */
31924 group_end
31925 = (can_issue_more == 0
31926 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
31927 || (can_issue_more <= 2 && is_cracked_insn (next_insn))
31928 || (can_issue_more < issue_rate &&
31929 insn_terminates_group_p (next_insn, previous_group)));
31931 can_issue_more = force_new_group (sched_verbose, dump, group_insns,
31932 next_insn, &group_end, can_issue_more,
31933 &group_count);
31935 if (group_end)
31937 group_count++;
31938 can_issue_more = 0;
31939 for (i = 0; i < issue_rate; i++)
31941 group_insns[i] = 0;
31945 if (GET_MODE (next_insn) == TImode && can_issue_more)
31946 PUT_MODE (next_insn, VOIDmode);
31947 else if (!can_issue_more && GET_MODE (next_insn) != TImode)
31948 PUT_MODE (next_insn, TImode);
31950 insn = next_insn;
31951 if (can_issue_more == 0)
31952 can_issue_more = issue_rate;
31953 } /* while */
31955 return group_count;
31958 /* Scan the insn sequence between PREV_HEAD_INSN and TAIL and examine the
31959 dispatch group boundaries that the scheduler had marked. Pad with nops
31960 any dispatch groups which have vacant issue slots, in order to force the
31961 scheduler's grouping on the processor dispatcher. The function
31962 returns the number of dispatch groups found. */
31964 static int
31965 pad_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
31966 rtx_insn *tail)
31968 rtx_insn *insn, *next_insn;
31969 rtx nop;
31970 int issue_rate;
31971 int can_issue_more;
31972 int group_end;
31973 int group_count = 0;
31975 /* Initialize issue_rate. */
31976 issue_rate = rs6000_issue_rate ();
31977 can_issue_more = issue_rate;
31979 insn = get_next_active_insn (prev_head_insn, tail);
31980 next_insn = get_next_active_insn (insn, tail);
31982 while (insn != NULL_RTX)
31984 can_issue_more =
31985 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
31987 group_end = (next_insn == NULL_RTX || GET_MODE (next_insn) == TImode);
31989 if (next_insn == NULL_RTX)
31990 break;
31992 if (group_end)
31994 /* If the scheduler had marked group termination at this location
31995 (between insn and next_insn), and neither insn nor next_insn will
31996 force group termination, pad the group with nops to force group
31997 termination. */
31998 if (can_issue_more
31999 && (rs6000_sched_insert_nops == sched_finish_pad_groups)
32000 && !insn_terminates_group_p (insn, current_group)
32001 && !insn_terminates_group_p (next_insn, previous_group))
32003 if (!is_branch_slot_insn (next_insn))
32004 can_issue_more--;
32006 while (can_issue_more)
32008 nop = gen_nop ();
32009 emit_insn_before (nop, next_insn);
32010 can_issue_more--;
32014 can_issue_more = issue_rate;
32015 group_count++;
32018 insn = next_insn;
32019 next_insn = get_next_active_insn (insn, tail);
32022 return group_count;
32025 /* We're beginning a new block. Initialize data structures as necessary. */
32027 static void
32028 rs6000_sched_init (FILE *dump ATTRIBUTE_UNUSED,
32029 int sched_verbose ATTRIBUTE_UNUSED,
32030 int max_ready ATTRIBUTE_UNUSED)
32032 last_scheduled_insn = NULL;
32033 load_store_pendulum = 0;
32034 divide_cnt = 0;
32035 vec_load_pendulum = 0;
32038 /* The following function is called at the end of scheduling BB.
32039 After reload, it inserts nops at insn group bundling. */
32041 static void
32042 rs6000_sched_finish (FILE *dump, int sched_verbose)
32044 int n_groups;
32046 if (sched_verbose)
32047 fprintf (dump, "=== Finishing schedule.\n");
32049 if (reload_completed && rs6000_sched_groups)
32051 /* Do not run sched_finish hook when selective scheduling enabled. */
32052 if (sel_sched_p ())
32053 return;
32055 if (rs6000_sched_insert_nops == sched_finish_none)
32056 return;
32058 if (rs6000_sched_insert_nops == sched_finish_pad_groups)
32059 n_groups = pad_groups (dump, sched_verbose,
32060 current_sched_info->prev_head,
32061 current_sched_info->next_tail);
32062 else
32063 n_groups = redefine_groups (dump, sched_verbose,
32064 current_sched_info->prev_head,
32065 current_sched_info->next_tail);
32067 if (sched_verbose >= 6)
32069 fprintf (dump, "ngroups = %d\n", n_groups);
32070 print_rtl (dump, current_sched_info->prev_head);
32071 fprintf (dump, "Done finish_sched\n");
32076 struct rs6000_sched_context
32078 short cached_can_issue_more;
32079 rtx_insn *last_scheduled_insn;
32080 int load_store_pendulum;
32081 int divide_cnt;
32082 int vec_load_pendulum;
32085 typedef struct rs6000_sched_context rs6000_sched_context_def;
32086 typedef rs6000_sched_context_def *rs6000_sched_context_t;
32088 /* Allocate store for new scheduling context. */
32089 static void *
32090 rs6000_alloc_sched_context (void)
32092 return xmalloc (sizeof (rs6000_sched_context_def));
32095 /* If CLEAN_P is true then initializes _SC with clean data,
32096 and from the global context otherwise. */
32097 static void
32098 rs6000_init_sched_context (void *_sc, bool clean_p)
32100 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
32102 if (clean_p)
32104 sc->cached_can_issue_more = 0;
32105 sc->last_scheduled_insn = NULL;
32106 sc->load_store_pendulum = 0;
32107 sc->divide_cnt = 0;
32108 sc->vec_load_pendulum = 0;
32110 else
32112 sc->cached_can_issue_more = cached_can_issue_more;
32113 sc->last_scheduled_insn = last_scheduled_insn;
32114 sc->load_store_pendulum = load_store_pendulum;
32115 sc->divide_cnt = divide_cnt;
32116 sc->vec_load_pendulum = vec_load_pendulum;
32120 /* Sets the global scheduling context to the one pointed to by _SC. */
32121 static void
32122 rs6000_set_sched_context (void *_sc)
32124 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
32126 gcc_assert (sc != NULL);
32128 cached_can_issue_more = sc->cached_can_issue_more;
32129 last_scheduled_insn = sc->last_scheduled_insn;
32130 load_store_pendulum = sc->load_store_pendulum;
32131 divide_cnt = sc->divide_cnt;
32132 vec_load_pendulum = sc->vec_load_pendulum;
32135 /* Free _SC. */
32136 static void
32137 rs6000_free_sched_context (void *_sc)
32139 gcc_assert (_sc != NULL);
32141 free (_sc);
32145 /* Length in units of the trampoline for entering a nested function. */
32148 rs6000_trampoline_size (void)
32150 int ret = 0;
32152 switch (DEFAULT_ABI)
32154 default:
32155 gcc_unreachable ();
32157 case ABI_AIX:
32158 ret = (TARGET_32BIT) ? 12 : 24;
32159 break;
32161 case ABI_ELFv2:
32162 gcc_assert (!TARGET_32BIT);
32163 ret = 32;
32164 break;
32166 case ABI_DARWIN:
32167 case ABI_V4:
32168 ret = (TARGET_32BIT) ? 40 : 48;
32169 break;
32172 return ret;
32175 /* Emit RTL insns to initialize the variable parts of a trampoline.
32176 FNADDR is an RTX for the address of the function's pure code.
32177 CXT is an RTX for the static chain value for the function. */
32179 static void
32180 rs6000_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
32182 int regsize = (TARGET_32BIT) ? 4 : 8;
32183 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
32184 rtx ctx_reg = force_reg (Pmode, cxt);
32185 rtx addr = force_reg (Pmode, XEXP (m_tramp, 0));
32187 switch (DEFAULT_ABI)
32189 default:
32190 gcc_unreachable ();
32192 /* Under AIX, just build the 3 word function descriptor */
32193 case ABI_AIX:
32195 rtx fnmem, fn_reg, toc_reg;
32197 if (!TARGET_POINTERS_TO_NESTED_FUNCTIONS)
32198 error ("You cannot take the address of a nested function if you use "
32199 "the -mno-pointers-to-nested-functions option.");
32201 fnmem = gen_const_mem (Pmode, force_reg (Pmode, fnaddr));
32202 fn_reg = gen_reg_rtx (Pmode);
32203 toc_reg = gen_reg_rtx (Pmode);
32205 /* Macro to shorten the code expansions below. */
32206 # define MEM_PLUS(MEM, OFFSET) adjust_address (MEM, Pmode, OFFSET)
32208 m_tramp = replace_equiv_address (m_tramp, addr);
32210 emit_move_insn (fn_reg, MEM_PLUS (fnmem, 0));
32211 emit_move_insn (toc_reg, MEM_PLUS (fnmem, regsize));
32212 emit_move_insn (MEM_PLUS (m_tramp, 0), fn_reg);
32213 emit_move_insn (MEM_PLUS (m_tramp, regsize), toc_reg);
32214 emit_move_insn (MEM_PLUS (m_tramp, 2*regsize), ctx_reg);
32216 # undef MEM_PLUS
32218 break;
32220 /* Under V.4/eabi/darwin, __trampoline_setup does the real work. */
32221 case ABI_ELFv2:
32222 case ABI_DARWIN:
32223 case ABI_V4:
32224 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__trampoline_setup"),
32225 LCT_NORMAL, VOIDmode, 4,
32226 addr, Pmode,
32227 GEN_INT (rs6000_trampoline_size ()), SImode,
32228 fnaddr, Pmode,
32229 ctx_reg, Pmode);
32230 break;
32235 /* Returns TRUE iff the target attribute indicated by ATTR_ID takes a plain
32236 identifier as an argument, so the front end shouldn't look it up. */
32238 static bool
32239 rs6000_attribute_takes_identifier_p (const_tree attr_id)
32241 return is_attribute_p ("altivec", attr_id);
32244 /* Handle the "altivec" attribute. The attribute may have
32245 arguments as follows:
32247 __attribute__((altivec(vector__)))
32248 __attribute__((altivec(pixel__))) (always followed by 'unsigned short')
32249 __attribute__((altivec(bool__))) (always followed by 'unsigned')
32251 and may appear more than once (e.g., 'vector bool char') in a
32252 given declaration. */
32254 static tree
32255 rs6000_handle_altivec_attribute (tree *node,
32256 tree name ATTRIBUTE_UNUSED,
32257 tree args,
32258 int flags ATTRIBUTE_UNUSED,
32259 bool *no_add_attrs)
32261 tree type = *node, result = NULL_TREE;
32262 machine_mode mode;
32263 int unsigned_p;
32264 char altivec_type
32265 = ((args && TREE_CODE (args) == TREE_LIST && TREE_VALUE (args)
32266 && TREE_CODE (TREE_VALUE (args)) == IDENTIFIER_NODE)
32267 ? *IDENTIFIER_POINTER (TREE_VALUE (args))
32268 : '?');
32270 while (POINTER_TYPE_P (type)
32271 || TREE_CODE (type) == FUNCTION_TYPE
32272 || TREE_CODE (type) == METHOD_TYPE
32273 || TREE_CODE (type) == ARRAY_TYPE)
32274 type = TREE_TYPE (type);
32276 mode = TYPE_MODE (type);
32278 /* Check for invalid AltiVec type qualifiers. */
32279 if (type == long_double_type_node)
32280 error ("use of %<long double%> in AltiVec types is invalid");
32281 else if (type == boolean_type_node)
32282 error ("use of boolean types in AltiVec types is invalid");
32283 else if (TREE_CODE (type) == COMPLEX_TYPE)
32284 error ("use of %<complex%> in AltiVec types is invalid");
32285 else if (DECIMAL_FLOAT_MODE_P (mode))
32286 error ("use of decimal floating point types in AltiVec types is invalid");
32287 else if (!TARGET_VSX)
32289 if (type == long_unsigned_type_node || type == long_integer_type_node)
32291 if (TARGET_64BIT)
32292 error ("use of %<long%> in AltiVec types is invalid for "
32293 "64-bit code without -mvsx");
32294 else if (rs6000_warn_altivec_long)
32295 warning (0, "use of %<long%> in AltiVec types is deprecated; "
32296 "use %<int%>");
32298 else if (type == long_long_unsigned_type_node
32299 || type == long_long_integer_type_node)
32300 error ("use of %<long long%> in AltiVec types is invalid without "
32301 "-mvsx");
32302 else if (type == double_type_node)
32303 error ("use of %<double%> in AltiVec types is invalid without -mvsx");
32306 switch (altivec_type)
32308 case 'v':
32309 unsigned_p = TYPE_UNSIGNED (type);
32310 switch (mode)
32312 case TImode:
32313 result = (unsigned_p ? unsigned_V1TI_type_node : V1TI_type_node);
32314 break;
32315 case DImode:
32316 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
32317 break;
32318 case SImode:
32319 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
32320 break;
32321 case HImode:
32322 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
32323 break;
32324 case QImode:
32325 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
32326 break;
32327 case SFmode: result = V4SF_type_node; break;
32328 case DFmode: result = V2DF_type_node; break;
32329 /* If the user says 'vector int bool', we may be handed the 'bool'
32330 attribute _before_ the 'vector' attribute, and so select the
32331 proper type in the 'b' case below. */
32332 case V4SImode: case V8HImode: case V16QImode: case V4SFmode:
32333 case V2DImode: case V2DFmode:
32334 result = type;
32335 default: break;
32337 break;
32338 case 'b':
32339 switch (mode)
32341 case DImode: case V2DImode: result = bool_V2DI_type_node; break;
32342 case SImode: case V4SImode: result = bool_V4SI_type_node; break;
32343 case HImode: case V8HImode: result = bool_V8HI_type_node; break;
32344 case QImode: case V16QImode: result = bool_V16QI_type_node;
32345 default: break;
32347 break;
32348 case 'p':
32349 switch (mode)
32351 case V8HImode: result = pixel_V8HI_type_node;
32352 default: break;
32354 default: break;
32357 /* Propagate qualifiers attached to the element type
32358 onto the vector type. */
32359 if (result && result != type && TYPE_QUALS (type))
32360 result = build_qualified_type (result, TYPE_QUALS (type));
32362 *no_add_attrs = true; /* No need to hang on to the attribute. */
32364 if (result)
32365 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
32367 return NULL_TREE;
32370 /* AltiVec defines four built-in scalar types that serve as vector
32371 elements; we must teach the compiler how to mangle them. */
32373 static const char *
32374 rs6000_mangle_type (const_tree type)
32376 type = TYPE_MAIN_VARIANT (type);
32378 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
32379 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
32380 return NULL;
32382 if (type == bool_char_type_node) return "U6__boolc";
32383 if (type == bool_short_type_node) return "U6__bools";
32384 if (type == pixel_type_node) return "u7__pixel";
32385 if (type == bool_int_type_node) return "U6__booli";
32386 if (type == bool_long_type_node) return "U6__booll";
32388 /* Use a unique name for __float128 rather than trying to use "e" or "g". Use
32389 "g" for IBM extended double, no matter whether it is long double (using
32390 -mabi=ibmlongdouble) or the distinct __ibm128 type. */
32391 if (TARGET_FLOAT128)
32393 if (type == ieee128_float_type_node)
32394 return "U10__float128";
32396 if (type == ibm128_float_type_node)
32397 return "g";
32399 if (type == long_double_type_node && TARGET_LONG_DOUBLE_128)
32400 return (TARGET_IEEEQUAD) ? "U10__float128" : "g";
32403 /* Mangle IBM extended float long double as `g' (__float128) on
32404 powerpc*-linux where long-double-64 previously was the default. */
32405 if (TYPE_MAIN_VARIANT (type) == long_double_type_node
32406 && TARGET_ELF
32407 && TARGET_LONG_DOUBLE_128
32408 && !TARGET_IEEEQUAD)
32409 return "g";
32411 /* For all other types, use normal C++ mangling. */
32412 return NULL;
32415 /* Handle a "longcall" or "shortcall" attribute; arguments as in
32416 struct attribute_spec.handler. */
32418 static tree
32419 rs6000_handle_longcall_attribute (tree *node, tree name,
32420 tree args ATTRIBUTE_UNUSED,
32421 int flags ATTRIBUTE_UNUSED,
32422 bool *no_add_attrs)
32424 if (TREE_CODE (*node) != FUNCTION_TYPE
32425 && TREE_CODE (*node) != FIELD_DECL
32426 && TREE_CODE (*node) != TYPE_DECL)
32428 warning (OPT_Wattributes, "%qE attribute only applies to functions",
32429 name);
32430 *no_add_attrs = true;
32433 return NULL_TREE;
32436 /* Set longcall attributes on all functions declared when
32437 rs6000_default_long_calls is true. */
32438 static void
32439 rs6000_set_default_type_attributes (tree type)
32441 if (rs6000_default_long_calls
32442 && (TREE_CODE (type) == FUNCTION_TYPE
32443 || TREE_CODE (type) == METHOD_TYPE))
32444 TYPE_ATTRIBUTES (type) = tree_cons (get_identifier ("longcall"),
32445 NULL_TREE,
32446 TYPE_ATTRIBUTES (type));
32448 #if TARGET_MACHO
32449 darwin_set_default_type_attributes (type);
32450 #endif
32453 /* Return a reference suitable for calling a function with the
32454 longcall attribute. */
32457 rs6000_longcall_ref (rtx call_ref)
32459 const char *call_name;
32460 tree node;
32462 if (GET_CODE (call_ref) != SYMBOL_REF)
32463 return call_ref;
32465 /* System V adds '.' to the internal name, so skip them. */
32466 call_name = XSTR (call_ref, 0);
32467 if (*call_name == '.')
32469 while (*call_name == '.')
32470 call_name++;
32472 node = get_identifier (call_name);
32473 call_ref = gen_rtx_SYMBOL_REF (VOIDmode, IDENTIFIER_POINTER (node));
32476 return force_reg (Pmode, call_ref);
32479 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
32480 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
32481 #endif
32483 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
32484 struct attribute_spec.handler. */
32485 static tree
32486 rs6000_handle_struct_attribute (tree *node, tree name,
32487 tree args ATTRIBUTE_UNUSED,
32488 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
32490 tree *type = NULL;
32491 if (DECL_P (*node))
32493 if (TREE_CODE (*node) == TYPE_DECL)
32494 type = &TREE_TYPE (*node);
32496 else
32497 type = node;
32499 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
32500 || TREE_CODE (*type) == UNION_TYPE)))
32502 warning (OPT_Wattributes, "%qE attribute ignored", name);
32503 *no_add_attrs = true;
32506 else if ((is_attribute_p ("ms_struct", name)
32507 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
32508 || ((is_attribute_p ("gcc_struct", name)
32509 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
32511 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
32512 name);
32513 *no_add_attrs = true;
32516 return NULL_TREE;
32519 static bool
32520 rs6000_ms_bitfield_layout_p (const_tree record_type)
32522 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
32523 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
32524 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
32527 #ifdef USING_ELFOS_H
32529 /* A get_unnamed_section callback, used for switching to toc_section. */
32531 static void
32532 rs6000_elf_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED)
32534 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
32535 && TARGET_MINIMAL_TOC)
32537 if (!toc_initialized)
32539 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
32540 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
32541 (*targetm.asm_out.internal_label) (asm_out_file, "LCTOC", 0);
32542 fprintf (asm_out_file, "\t.tc ");
32543 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1[TC],");
32544 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
32545 fprintf (asm_out_file, "\n");
32547 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
32548 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
32549 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
32550 fprintf (asm_out_file, " = .+32768\n");
32551 toc_initialized = 1;
32553 else
32554 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
32556 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
32558 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
32559 if (!toc_initialized)
32561 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
32562 toc_initialized = 1;
32565 else
32567 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
32568 if (!toc_initialized)
32570 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
32571 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
32572 fprintf (asm_out_file, " = .+32768\n");
32573 toc_initialized = 1;
32578 /* Implement TARGET_ASM_INIT_SECTIONS. */
32580 static void
32581 rs6000_elf_asm_init_sections (void)
32583 toc_section
32584 = get_unnamed_section (0, rs6000_elf_output_toc_section_asm_op, NULL);
32586 sdata2_section
32587 = get_unnamed_section (SECTION_WRITE, output_section_asm_op,
32588 SDATA2_SECTION_ASM_OP);
32591 /* Implement TARGET_SELECT_RTX_SECTION. */
32593 static section *
32594 rs6000_elf_select_rtx_section (machine_mode mode, rtx x,
32595 unsigned HOST_WIDE_INT align)
32597 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
32598 return toc_section;
32599 else
32600 return default_elf_select_rtx_section (mode, x, align);
32603 /* For a SYMBOL_REF, set generic flags and then perform some
32604 target-specific processing.
32606 When the AIX ABI is requested on a non-AIX system, replace the
32607 function name with the real name (with a leading .) rather than the
32608 function descriptor name. This saves a lot of overriding code to
32609 read the prefixes. */
32611 static void rs6000_elf_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED;
32612 static void
32613 rs6000_elf_encode_section_info (tree decl, rtx rtl, int first)
32615 default_encode_section_info (decl, rtl, first);
32617 if (first
32618 && TREE_CODE (decl) == FUNCTION_DECL
32619 && !TARGET_AIX
32620 && DEFAULT_ABI == ABI_AIX)
32622 rtx sym_ref = XEXP (rtl, 0);
32623 size_t len = strlen (XSTR (sym_ref, 0));
32624 char *str = XALLOCAVEC (char, len + 2);
32625 str[0] = '.';
32626 memcpy (str + 1, XSTR (sym_ref, 0), len + 1);
32627 XSTR (sym_ref, 0) = ggc_alloc_string (str, len + 1);
32631 static inline bool
32632 compare_section_name (const char *section, const char *templ)
32634 int len;
32636 len = strlen (templ);
32637 return (strncmp (section, templ, len) == 0
32638 && (section[len] == 0 || section[len] == '.'));
32641 bool
32642 rs6000_elf_in_small_data_p (const_tree decl)
32644 if (rs6000_sdata == SDATA_NONE)
32645 return false;
32647 /* We want to merge strings, so we never consider them small data. */
32648 if (TREE_CODE (decl) == STRING_CST)
32649 return false;
32651 /* Functions are never in the small data area. */
32652 if (TREE_CODE (decl) == FUNCTION_DECL)
32653 return false;
32655 if (TREE_CODE (decl) == VAR_DECL && DECL_SECTION_NAME (decl))
32657 const char *section = DECL_SECTION_NAME (decl);
32658 if (compare_section_name (section, ".sdata")
32659 || compare_section_name (section, ".sdata2")
32660 || compare_section_name (section, ".gnu.linkonce.s")
32661 || compare_section_name (section, ".sbss")
32662 || compare_section_name (section, ".sbss2")
32663 || compare_section_name (section, ".gnu.linkonce.sb")
32664 || strcmp (section, ".PPC.EMB.sdata0") == 0
32665 || strcmp (section, ".PPC.EMB.sbss0") == 0)
32666 return true;
32668 else
32670 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (decl));
32672 if (size > 0
32673 && size <= g_switch_value
32674 /* If it's not public, and we're not going to reference it there,
32675 there's no need to put it in the small data section. */
32676 && (rs6000_sdata != SDATA_DATA || TREE_PUBLIC (decl)))
32677 return true;
32680 return false;
32683 #endif /* USING_ELFOS_H */
32685 /* Implement TARGET_USE_BLOCKS_FOR_CONSTANT_P. */
32687 static bool
32688 rs6000_use_blocks_for_constant_p (machine_mode mode, const_rtx x)
32690 return !ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode);
32693 /* Do not place thread-local symbols refs in the object blocks. */
32695 static bool
32696 rs6000_use_blocks_for_decl_p (const_tree decl)
32698 return !DECL_THREAD_LOCAL_P (decl);
32701 /* Return a REG that occurs in ADDR with coefficient 1.
32702 ADDR can be effectively incremented by incrementing REG.
32704 r0 is special and we must not select it as an address
32705 register by this routine since our caller will try to
32706 increment the returned register via an "la" instruction. */
32709 find_addr_reg (rtx addr)
32711 while (GET_CODE (addr) == PLUS)
32713 if (GET_CODE (XEXP (addr, 0)) == REG
32714 && REGNO (XEXP (addr, 0)) != 0)
32715 addr = XEXP (addr, 0);
32716 else if (GET_CODE (XEXP (addr, 1)) == REG
32717 && REGNO (XEXP (addr, 1)) != 0)
32718 addr = XEXP (addr, 1);
32719 else if (CONSTANT_P (XEXP (addr, 0)))
32720 addr = XEXP (addr, 1);
32721 else if (CONSTANT_P (XEXP (addr, 1)))
32722 addr = XEXP (addr, 0);
32723 else
32724 gcc_unreachable ();
32726 gcc_assert (GET_CODE (addr) == REG && REGNO (addr) != 0);
32727 return addr;
32730 void
32731 rs6000_fatal_bad_address (rtx op)
32733 fatal_insn ("bad address", op);
32736 #if TARGET_MACHO
32738 typedef struct branch_island_d {
32739 tree function_name;
32740 tree label_name;
32741 int line_number;
32742 } branch_island;
32745 static vec<branch_island, va_gc> *branch_islands;
32747 /* Remember to generate a branch island for far calls to the given
32748 function. */
32750 static void
32751 add_compiler_branch_island (tree label_name, tree function_name,
32752 int line_number)
32754 branch_island bi = {function_name, label_name, line_number};
32755 vec_safe_push (branch_islands, bi);
32758 /* Generate far-jump branch islands for everything recorded in
32759 branch_islands. Invoked immediately after the last instruction of
32760 the epilogue has been emitted; the branch islands must be appended
32761 to, and contiguous with, the function body. Mach-O stubs are
32762 generated in machopic_output_stub(). */
32764 static void
32765 macho_branch_islands (void)
32767 char tmp_buf[512];
32769 while (!vec_safe_is_empty (branch_islands))
32771 branch_island *bi = &branch_islands->last ();
32772 const char *label = IDENTIFIER_POINTER (bi->label_name);
32773 const char *name = IDENTIFIER_POINTER (bi->function_name);
32774 char name_buf[512];
32775 /* Cheap copy of the details from the Darwin ASM_OUTPUT_LABELREF(). */
32776 if (name[0] == '*' || name[0] == '&')
32777 strcpy (name_buf, name+1);
32778 else
32780 name_buf[0] = '_';
32781 strcpy (name_buf+1, name);
32783 strcpy (tmp_buf, "\n");
32784 strcat (tmp_buf, label);
32785 #if defined (DBX_DEBUGGING_INFO) || defined (XCOFF_DEBUGGING_INFO)
32786 if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG)
32787 dbxout_stabd (N_SLINE, bi->line_number);
32788 #endif /* DBX_DEBUGGING_INFO || XCOFF_DEBUGGING_INFO */
32789 if (flag_pic)
32791 if (TARGET_LINK_STACK)
32793 char name[32];
32794 get_ppc476_thunk_name (name);
32795 strcat (tmp_buf, ":\n\tmflr r0\n\tbl ");
32796 strcat (tmp_buf, name);
32797 strcat (tmp_buf, "\n");
32798 strcat (tmp_buf, label);
32799 strcat (tmp_buf, "_pic:\n\tmflr r11\n");
32801 else
32803 strcat (tmp_buf, ":\n\tmflr r0\n\tbcl 20,31,");
32804 strcat (tmp_buf, label);
32805 strcat (tmp_buf, "_pic\n");
32806 strcat (tmp_buf, label);
32807 strcat (tmp_buf, "_pic:\n\tmflr r11\n");
32810 strcat (tmp_buf, "\taddis r11,r11,ha16(");
32811 strcat (tmp_buf, name_buf);
32812 strcat (tmp_buf, " - ");
32813 strcat (tmp_buf, label);
32814 strcat (tmp_buf, "_pic)\n");
32816 strcat (tmp_buf, "\tmtlr r0\n");
32818 strcat (tmp_buf, "\taddi r12,r11,lo16(");
32819 strcat (tmp_buf, name_buf);
32820 strcat (tmp_buf, " - ");
32821 strcat (tmp_buf, label);
32822 strcat (tmp_buf, "_pic)\n");
32824 strcat (tmp_buf, "\tmtctr r12\n\tbctr\n");
32826 else
32828 strcat (tmp_buf, ":\nlis r12,hi16(");
32829 strcat (tmp_buf, name_buf);
32830 strcat (tmp_buf, ")\n\tori r12,r12,lo16(");
32831 strcat (tmp_buf, name_buf);
32832 strcat (tmp_buf, ")\n\tmtctr r12\n\tbctr");
32834 output_asm_insn (tmp_buf, 0);
32835 #if defined (DBX_DEBUGGING_INFO) || defined (XCOFF_DEBUGGING_INFO)
32836 if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG)
32837 dbxout_stabd (N_SLINE, bi->line_number);
32838 #endif /* DBX_DEBUGGING_INFO || XCOFF_DEBUGGING_INFO */
32839 branch_islands->pop ();
32843 /* NO_PREVIOUS_DEF checks in the link list whether the function name is
32844 already there or not. */
32846 static int
32847 no_previous_def (tree function_name)
32849 branch_island *bi;
32850 unsigned ix;
32852 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
32853 if (function_name == bi->function_name)
32854 return 0;
32855 return 1;
32858 /* GET_PREV_LABEL gets the label name from the previous definition of
32859 the function. */
32861 static tree
32862 get_prev_label (tree function_name)
32864 branch_island *bi;
32865 unsigned ix;
32867 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
32868 if (function_name == bi->function_name)
32869 return bi->label_name;
32870 return NULL_TREE;
32873 /* INSN is either a function call or a millicode call. It may have an
32874 unconditional jump in its delay slot.
32876 CALL_DEST is the routine we are calling. */
32878 char *
32879 output_call (rtx_insn *insn, rtx *operands, int dest_operand_number,
32880 int cookie_operand_number)
32882 static char buf[256];
32883 if (darwin_emit_branch_islands
32884 && GET_CODE (operands[dest_operand_number]) == SYMBOL_REF
32885 && (INTVAL (operands[cookie_operand_number]) & CALL_LONG))
32887 tree labelname;
32888 tree funname = get_identifier (XSTR (operands[dest_operand_number], 0));
32890 if (no_previous_def (funname))
32892 rtx label_rtx = gen_label_rtx ();
32893 char *label_buf, temp_buf[256];
32894 ASM_GENERATE_INTERNAL_LABEL (temp_buf, "L",
32895 CODE_LABEL_NUMBER (label_rtx));
32896 label_buf = temp_buf[0] == '*' ? temp_buf + 1 : temp_buf;
32897 labelname = get_identifier (label_buf);
32898 add_compiler_branch_island (labelname, funname, insn_line (insn));
32900 else
32901 labelname = get_prev_label (funname);
32903 /* "jbsr foo, L42" is Mach-O for "Link as 'bl foo' if a 'bl'
32904 instruction will reach 'foo', otherwise link as 'bl L42'".
32905 "L42" should be a 'branch island', that will do a far jump to
32906 'foo'. Branch islands are generated in
32907 macho_branch_islands(). */
32908 sprintf (buf, "jbsr %%z%d,%.246s",
32909 dest_operand_number, IDENTIFIER_POINTER (labelname));
32911 else
32912 sprintf (buf, "bl %%z%d", dest_operand_number);
32913 return buf;
32916 /* Generate PIC and indirect symbol stubs. */
32918 void
32919 machopic_output_stub (FILE *file, const char *symb, const char *stub)
32921 unsigned int length;
32922 char *symbol_name, *lazy_ptr_name;
32923 char *local_label_0;
32924 static int label = 0;
32926 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
32927 symb = (*targetm.strip_name_encoding) (symb);
32930 length = strlen (symb);
32931 symbol_name = XALLOCAVEC (char, length + 32);
32932 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
32934 lazy_ptr_name = XALLOCAVEC (char, length + 32);
32935 GEN_LAZY_PTR_NAME_FOR_SYMBOL (lazy_ptr_name, symb, length);
32937 if (flag_pic == 2)
32938 switch_to_section (darwin_sections[machopic_picsymbol_stub1_section]);
32939 else
32940 switch_to_section (darwin_sections[machopic_symbol_stub1_section]);
32942 if (flag_pic == 2)
32944 fprintf (file, "\t.align 5\n");
32946 fprintf (file, "%s:\n", stub);
32947 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
32949 label++;
32950 local_label_0 = XALLOCAVEC (char, sizeof ("\"L00000000000$spb\""));
32951 sprintf (local_label_0, "\"L%011d$spb\"", label);
32953 fprintf (file, "\tmflr r0\n");
32954 if (TARGET_LINK_STACK)
32956 char name[32];
32957 get_ppc476_thunk_name (name);
32958 fprintf (file, "\tbl %s\n", name);
32959 fprintf (file, "%s:\n\tmflr r11\n", local_label_0);
32961 else
32963 fprintf (file, "\tbcl 20,31,%s\n", local_label_0);
32964 fprintf (file, "%s:\n\tmflr r11\n", local_label_0);
32966 fprintf (file, "\taddis r11,r11,ha16(%s-%s)\n",
32967 lazy_ptr_name, local_label_0);
32968 fprintf (file, "\tmtlr r0\n");
32969 fprintf (file, "\t%s r12,lo16(%s-%s)(r11)\n",
32970 (TARGET_64BIT ? "ldu" : "lwzu"),
32971 lazy_ptr_name, local_label_0);
32972 fprintf (file, "\tmtctr r12\n");
32973 fprintf (file, "\tbctr\n");
32975 else
32977 fprintf (file, "\t.align 4\n");
32979 fprintf (file, "%s:\n", stub);
32980 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
32982 fprintf (file, "\tlis r11,ha16(%s)\n", lazy_ptr_name);
32983 fprintf (file, "\t%s r12,lo16(%s)(r11)\n",
32984 (TARGET_64BIT ? "ldu" : "lwzu"),
32985 lazy_ptr_name);
32986 fprintf (file, "\tmtctr r12\n");
32987 fprintf (file, "\tbctr\n");
32990 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
32991 fprintf (file, "%s:\n", lazy_ptr_name);
32992 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
32993 fprintf (file, "%sdyld_stub_binding_helper\n",
32994 (TARGET_64BIT ? DOUBLE_INT_ASM_OP : "\t.long\t"));
32997 /* Legitimize PIC addresses. If the address is already
32998 position-independent, we return ORIG. Newly generated
32999 position-independent addresses go into a reg. This is REG if non
33000 zero, otherwise we allocate register(s) as necessary. */
33002 #define SMALL_INT(X) ((UINTVAL (X) + 0x8000) < 0x10000)
33005 rs6000_machopic_legitimize_pic_address (rtx orig, machine_mode mode,
33006 rtx reg)
33008 rtx base, offset;
33010 if (reg == NULL && ! reload_in_progress && ! reload_completed)
33011 reg = gen_reg_rtx (Pmode);
33013 if (GET_CODE (orig) == CONST)
33015 rtx reg_temp;
33017 if (GET_CODE (XEXP (orig, 0)) == PLUS
33018 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
33019 return orig;
33021 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
33023 /* Use a different reg for the intermediate value, as
33024 it will be marked UNCHANGING. */
33025 reg_temp = !can_create_pseudo_p () ? reg : gen_reg_rtx (Pmode);
33026 base = rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 0),
33027 Pmode, reg_temp);
33028 offset =
33029 rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 1),
33030 Pmode, reg);
33032 if (GET_CODE (offset) == CONST_INT)
33034 if (SMALL_INT (offset))
33035 return plus_constant (Pmode, base, INTVAL (offset));
33036 else if (! reload_in_progress && ! reload_completed)
33037 offset = force_reg (Pmode, offset);
33038 else
33040 rtx mem = force_const_mem (Pmode, orig);
33041 return machopic_legitimize_pic_address (mem, Pmode, reg);
33044 return gen_rtx_PLUS (Pmode, base, offset);
33047 /* Fall back on generic machopic code. */
33048 return machopic_legitimize_pic_address (orig, mode, reg);
33051 /* Output a .machine directive for the Darwin assembler, and call
33052 the generic start_file routine. */
33054 static void
33055 rs6000_darwin_file_start (void)
33057 static const struct
33059 const char *arg;
33060 const char *name;
33061 HOST_WIDE_INT if_set;
33062 } mapping[] = {
33063 { "ppc64", "ppc64", MASK_64BIT },
33064 { "970", "ppc970", MASK_PPC_GPOPT | MASK_MFCRF | MASK_POWERPC64 },
33065 { "power4", "ppc970", 0 },
33066 { "G5", "ppc970", 0 },
33067 { "7450", "ppc7450", 0 },
33068 { "7400", "ppc7400", MASK_ALTIVEC },
33069 { "G4", "ppc7400", 0 },
33070 { "750", "ppc750", 0 },
33071 { "740", "ppc750", 0 },
33072 { "G3", "ppc750", 0 },
33073 { "604e", "ppc604e", 0 },
33074 { "604", "ppc604", 0 },
33075 { "603e", "ppc603", 0 },
33076 { "603", "ppc603", 0 },
33077 { "601", "ppc601", 0 },
33078 { NULL, "ppc", 0 } };
33079 const char *cpu_id = "";
33080 size_t i;
33082 rs6000_file_start ();
33083 darwin_file_start ();
33085 /* Determine the argument to -mcpu=. Default to G3 if not specified. */
33087 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
33088 cpu_id = rs6000_default_cpu;
33090 if (global_options_set.x_rs6000_cpu_index)
33091 cpu_id = processor_target_table[rs6000_cpu_index].name;
33093 /* Look through the mapping array. Pick the first name that either
33094 matches the argument, has a bit set in IF_SET that is also set
33095 in the target flags, or has a NULL name. */
33097 i = 0;
33098 while (mapping[i].arg != NULL
33099 && strcmp (mapping[i].arg, cpu_id) != 0
33100 && (mapping[i].if_set & rs6000_isa_flags) == 0)
33101 i++;
33103 fprintf (asm_out_file, "\t.machine %s\n", mapping[i].name);
33106 #endif /* TARGET_MACHO */
33108 #if TARGET_ELF
33109 static int
33110 rs6000_elf_reloc_rw_mask (void)
33112 if (flag_pic)
33113 return 3;
33114 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
33115 return 2;
33116 else
33117 return 0;
33120 /* Record an element in the table of global constructors. SYMBOL is
33121 a SYMBOL_REF of the function to be called; PRIORITY is a number
33122 between 0 and MAX_INIT_PRIORITY.
33124 This differs from default_named_section_asm_out_constructor in
33125 that we have special handling for -mrelocatable. */
33127 static void rs6000_elf_asm_out_constructor (rtx, int) ATTRIBUTE_UNUSED;
33128 static void
33129 rs6000_elf_asm_out_constructor (rtx symbol, int priority)
33131 const char *section = ".ctors";
33132 char buf[16];
33134 if (priority != DEFAULT_INIT_PRIORITY)
33136 sprintf (buf, ".ctors.%.5u",
33137 /* Invert the numbering so the linker puts us in the proper
33138 order; constructors are run from right to left, and the
33139 linker sorts in increasing order. */
33140 MAX_INIT_PRIORITY - priority);
33141 section = buf;
33144 switch_to_section (get_section (section, SECTION_WRITE, NULL));
33145 assemble_align (POINTER_SIZE);
33147 if (DEFAULT_ABI == ABI_V4
33148 && (TARGET_RELOCATABLE || flag_pic > 1))
33150 fputs ("\t.long (", asm_out_file);
33151 output_addr_const (asm_out_file, symbol);
33152 fputs (")@fixup\n", asm_out_file);
33154 else
33155 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
33158 static void rs6000_elf_asm_out_destructor (rtx, int) ATTRIBUTE_UNUSED;
33159 static void
33160 rs6000_elf_asm_out_destructor (rtx symbol, int priority)
33162 const char *section = ".dtors";
33163 char buf[16];
33165 if (priority != DEFAULT_INIT_PRIORITY)
33167 sprintf (buf, ".dtors.%.5u",
33168 /* Invert the numbering so the linker puts us in the proper
33169 order; constructors are run from right to left, and the
33170 linker sorts in increasing order. */
33171 MAX_INIT_PRIORITY - priority);
33172 section = buf;
33175 switch_to_section (get_section (section, SECTION_WRITE, NULL));
33176 assemble_align (POINTER_SIZE);
33178 if (DEFAULT_ABI == ABI_V4
33179 && (TARGET_RELOCATABLE || flag_pic > 1))
33181 fputs ("\t.long (", asm_out_file);
33182 output_addr_const (asm_out_file, symbol);
33183 fputs (")@fixup\n", asm_out_file);
33185 else
33186 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
33189 void
33190 rs6000_elf_declare_function_name (FILE *file, const char *name, tree decl)
33192 if (TARGET_64BIT && DEFAULT_ABI != ABI_ELFv2)
33194 fputs ("\t.section\t\".opd\",\"aw\"\n\t.align 3\n", file);
33195 ASM_OUTPUT_LABEL (file, name);
33196 fputs (DOUBLE_INT_ASM_OP, file);
33197 rs6000_output_function_entry (file, name);
33198 fputs (",.TOC.@tocbase,0\n\t.previous\n", file);
33199 if (DOT_SYMBOLS)
33201 fputs ("\t.size\t", file);
33202 assemble_name (file, name);
33203 fputs (",24\n\t.type\t.", file);
33204 assemble_name (file, name);
33205 fputs (",@function\n", file);
33206 if (TREE_PUBLIC (decl) && ! DECL_WEAK (decl))
33208 fputs ("\t.globl\t.", file);
33209 assemble_name (file, name);
33210 putc ('\n', file);
33213 else
33214 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
33215 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
33216 rs6000_output_function_entry (file, name);
33217 fputs (":\n", file);
33218 return;
33221 if (DEFAULT_ABI == ABI_V4
33222 && (TARGET_RELOCATABLE || flag_pic > 1)
33223 && !TARGET_SECURE_PLT
33224 && (get_pool_size () != 0 || crtl->profile)
33225 && uses_TOC ())
33227 char buf[256];
33229 (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno);
33231 fprintf (file, "\t.long ");
33232 assemble_name (file, toc_label_name);
33233 need_toc_init = 1;
33234 putc ('-', file);
33235 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
33236 assemble_name (file, buf);
33237 putc ('\n', file);
33240 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
33241 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
33243 if (TARGET_CMODEL == CMODEL_LARGE && rs6000_global_entry_point_needed_p ())
33245 char buf[256];
33247 (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno);
33249 fprintf (file, "\t.quad .TOC.-");
33250 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
33251 assemble_name (file, buf);
33252 putc ('\n', file);
33255 if (DEFAULT_ABI == ABI_AIX)
33257 const char *desc_name, *orig_name;
33259 orig_name = (*targetm.strip_name_encoding) (name);
33260 desc_name = orig_name;
33261 while (*desc_name == '.')
33262 desc_name++;
33264 if (TREE_PUBLIC (decl))
33265 fprintf (file, "\t.globl %s\n", desc_name);
33267 fprintf (file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
33268 fprintf (file, "%s:\n", desc_name);
33269 fprintf (file, "\t.long %s\n", orig_name);
33270 fputs ("\t.long _GLOBAL_OFFSET_TABLE_\n", file);
33271 fputs ("\t.long 0\n", file);
33272 fprintf (file, "\t.previous\n");
33274 ASM_OUTPUT_LABEL (file, name);
33277 static void rs6000_elf_file_end (void) ATTRIBUTE_UNUSED;
33278 static void
33279 rs6000_elf_file_end (void)
33281 #ifdef HAVE_AS_GNU_ATTRIBUTE
33282 if (TARGET_32BIT && DEFAULT_ABI == ABI_V4)
33284 if (rs6000_passes_float)
33285 fprintf (asm_out_file, "\t.gnu_attribute 4, %d\n",
33286 ((TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT) ? 1
33287 : (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT) ? 3
33288 : 2));
33289 if (rs6000_passes_vector)
33290 fprintf (asm_out_file, "\t.gnu_attribute 8, %d\n",
33291 (TARGET_ALTIVEC_ABI ? 2
33292 : TARGET_SPE_ABI ? 3
33293 : 1));
33294 if (rs6000_returns_struct)
33295 fprintf (asm_out_file, "\t.gnu_attribute 12, %d\n",
33296 aix_struct_return ? 2 : 1);
33298 #endif
33299 #if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD)
33300 if (TARGET_32BIT || DEFAULT_ABI == ABI_ELFv2)
33301 file_end_indicate_exec_stack ();
33302 #endif
33304 if (flag_split_stack)
33305 file_end_indicate_split_stack ();
33307 if (cpu_builtin_p)
33309 /* We have expanded a CPU builtin, so we need to emit a reference to
33310 the special symbol that LIBC uses to declare it supports the
33311 AT_PLATFORM and AT_HWCAP/AT_HWCAP2 in the TCB feature. */
33312 switch_to_section (data_section);
33313 fprintf (asm_out_file, "\t.align %u\n", TARGET_32BIT ? 2 : 3);
33314 fprintf (asm_out_file, "\t%s %s\n",
33315 TARGET_32BIT ? ".long" : ".quad", tcb_verification_symbol);
33318 #endif
33320 #if TARGET_XCOFF
33322 #ifndef HAVE_XCOFF_DWARF_EXTRAS
33323 #define HAVE_XCOFF_DWARF_EXTRAS 0
33324 #endif
33326 static enum unwind_info_type
33327 rs6000_xcoff_debug_unwind_info (void)
33329 return UI_NONE;
33332 static void
33333 rs6000_xcoff_asm_output_anchor (rtx symbol)
33335 char buffer[100];
33337 sprintf (buffer, "$ + " HOST_WIDE_INT_PRINT_DEC,
33338 SYMBOL_REF_BLOCK_OFFSET (symbol));
33339 fprintf (asm_out_file, "%s", SET_ASM_OP);
33340 RS6000_OUTPUT_BASENAME (asm_out_file, XSTR (symbol, 0));
33341 fprintf (asm_out_file, ",");
33342 RS6000_OUTPUT_BASENAME (asm_out_file, buffer);
33343 fprintf (asm_out_file, "\n");
33346 static void
33347 rs6000_xcoff_asm_globalize_label (FILE *stream, const char *name)
33349 fputs (GLOBAL_ASM_OP, stream);
33350 RS6000_OUTPUT_BASENAME (stream, name);
33351 putc ('\n', stream);
33354 /* A get_unnamed_decl callback, used for read-only sections. PTR
33355 points to the section string variable. */
33357 static void
33358 rs6000_xcoff_output_readonly_section_asm_op (const void *directive)
33360 fprintf (asm_out_file, "\t.csect %s[RO],%s\n",
33361 *(const char *const *) directive,
33362 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
33365 /* Likewise for read-write sections. */
33367 static void
33368 rs6000_xcoff_output_readwrite_section_asm_op (const void *directive)
33370 fprintf (asm_out_file, "\t.csect %s[RW],%s\n",
33371 *(const char *const *) directive,
33372 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
33375 static void
33376 rs6000_xcoff_output_tls_section_asm_op (const void *directive)
33378 fprintf (asm_out_file, "\t.csect %s[TL],%s\n",
33379 *(const char *const *) directive,
33380 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
33383 /* A get_unnamed_section callback, used for switching to toc_section. */
33385 static void
33386 rs6000_xcoff_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED)
33388 if (TARGET_MINIMAL_TOC)
33390 /* toc_section is always selected at least once from
33391 rs6000_xcoff_file_start, so this is guaranteed to
33392 always be defined once and only once in each file. */
33393 if (!toc_initialized)
33395 fputs ("\t.toc\nLCTOC..1:\n", asm_out_file);
33396 fputs ("\t.tc toc_table[TC],toc_table[RW]\n", asm_out_file);
33397 toc_initialized = 1;
33399 fprintf (asm_out_file, "\t.csect toc_table[RW]%s\n",
33400 (TARGET_32BIT ? "" : ",3"));
33402 else
33403 fputs ("\t.toc\n", asm_out_file);
33406 /* Implement TARGET_ASM_INIT_SECTIONS. */
33408 static void
33409 rs6000_xcoff_asm_init_sections (void)
33411 read_only_data_section
33412 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
33413 &xcoff_read_only_section_name);
33415 private_data_section
33416 = get_unnamed_section (SECTION_WRITE,
33417 rs6000_xcoff_output_readwrite_section_asm_op,
33418 &xcoff_private_data_section_name);
33420 tls_data_section
33421 = get_unnamed_section (SECTION_TLS,
33422 rs6000_xcoff_output_tls_section_asm_op,
33423 &xcoff_tls_data_section_name);
33425 tls_private_data_section
33426 = get_unnamed_section (SECTION_TLS,
33427 rs6000_xcoff_output_tls_section_asm_op,
33428 &xcoff_private_data_section_name);
33430 read_only_private_data_section
33431 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
33432 &xcoff_private_data_section_name);
33434 toc_section
33435 = get_unnamed_section (0, rs6000_xcoff_output_toc_section_asm_op, NULL);
33437 readonly_data_section = read_only_data_section;
33440 static int
33441 rs6000_xcoff_reloc_rw_mask (void)
33443 return 3;
33446 static void
33447 rs6000_xcoff_asm_named_section (const char *name, unsigned int flags,
33448 tree decl ATTRIBUTE_UNUSED)
33450 int smclass;
33451 static const char * const suffix[5] = { "PR", "RO", "RW", "TL", "XO" };
33453 if (flags & SECTION_EXCLUDE)
33454 smclass = 4;
33455 else if (flags & SECTION_DEBUG)
33457 fprintf (asm_out_file, "\t.dwsect %s\n", name);
33458 return;
33460 else if (flags & SECTION_CODE)
33461 smclass = 0;
33462 else if (flags & SECTION_TLS)
33463 smclass = 3;
33464 else if (flags & SECTION_WRITE)
33465 smclass = 2;
33466 else
33467 smclass = 1;
33469 fprintf (asm_out_file, "\t.csect %s%s[%s],%u\n",
33470 (flags & SECTION_CODE) ? "." : "",
33471 name, suffix[smclass], flags & SECTION_ENTSIZE);
33474 #define IN_NAMED_SECTION(DECL) \
33475 ((TREE_CODE (DECL) == FUNCTION_DECL || TREE_CODE (DECL) == VAR_DECL) \
33476 && DECL_SECTION_NAME (DECL) != NULL)
33478 static section *
33479 rs6000_xcoff_select_section (tree decl, int reloc,
33480 unsigned HOST_WIDE_INT align)
33482 /* Place variables with alignment stricter than BIGGEST_ALIGNMENT into
33483 named section. */
33484 if (align > BIGGEST_ALIGNMENT)
33486 resolve_unique_section (decl, reloc, true);
33487 if (IN_NAMED_SECTION (decl))
33488 return get_named_section (decl, NULL, reloc);
33491 if (decl_readonly_section (decl, reloc))
33493 if (TREE_PUBLIC (decl))
33494 return read_only_data_section;
33495 else
33496 return read_only_private_data_section;
33498 else
33500 #if HAVE_AS_TLS
33501 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
33503 if (TREE_PUBLIC (decl))
33504 return tls_data_section;
33505 else if (bss_initializer_p (decl))
33507 /* Convert to COMMON to emit in BSS. */
33508 DECL_COMMON (decl) = 1;
33509 return tls_comm_section;
33511 else
33512 return tls_private_data_section;
33514 else
33515 #endif
33516 if (TREE_PUBLIC (decl))
33517 return data_section;
33518 else
33519 return private_data_section;
33523 static void
33524 rs6000_xcoff_unique_section (tree decl, int reloc ATTRIBUTE_UNUSED)
33526 const char *name;
33528 /* Use select_section for private data and uninitialized data with
33529 alignment <= BIGGEST_ALIGNMENT. */
33530 if (!TREE_PUBLIC (decl)
33531 || DECL_COMMON (decl)
33532 || (DECL_INITIAL (decl) == NULL_TREE
33533 && DECL_ALIGN (decl) <= BIGGEST_ALIGNMENT)
33534 || DECL_INITIAL (decl) == error_mark_node
33535 || (flag_zero_initialized_in_bss
33536 && initializer_zerop (DECL_INITIAL (decl))))
33537 return;
33539 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
33540 name = (*targetm.strip_name_encoding) (name);
33541 set_decl_section_name (decl, name);
33544 /* Select section for constant in constant pool.
33546 On RS/6000, all constants are in the private read-only data area.
33547 However, if this is being placed in the TOC it must be output as a
33548 toc entry. */
33550 static section *
33551 rs6000_xcoff_select_rtx_section (machine_mode mode, rtx x,
33552 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
33554 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
33555 return toc_section;
33556 else
33557 return read_only_private_data_section;
33560 /* Remove any trailing [DS] or the like from the symbol name. */
33562 static const char *
33563 rs6000_xcoff_strip_name_encoding (const char *name)
33565 size_t len;
33566 if (*name == '*')
33567 name++;
33568 len = strlen (name);
33569 if (name[len - 1] == ']')
33570 return ggc_alloc_string (name, len - 4);
33571 else
33572 return name;
33575 /* Section attributes. AIX is always PIC. */
33577 static unsigned int
33578 rs6000_xcoff_section_type_flags (tree decl, const char *name, int reloc)
33580 unsigned int align;
33581 unsigned int flags = default_section_type_flags (decl, name, reloc);
33583 /* Align to at least UNIT size. */
33584 if ((flags & SECTION_CODE) != 0 || !decl || !DECL_P (decl))
33585 align = MIN_UNITS_PER_WORD;
33586 else
33587 /* Increase alignment of large objects if not already stricter. */
33588 align = MAX ((DECL_ALIGN (decl) / BITS_PER_UNIT),
33589 int_size_in_bytes (TREE_TYPE (decl)) > MIN_UNITS_PER_WORD
33590 ? UNITS_PER_FP_WORD : MIN_UNITS_PER_WORD);
33592 return flags | (exact_log2 (align) & SECTION_ENTSIZE);
33595 /* Output at beginning of assembler file.
33597 Initialize the section names for the RS/6000 at this point.
33599 Specify filename, including full path, to assembler.
33601 We want to go into the TOC section so at least one .toc will be emitted.
33602 Also, in order to output proper .bs/.es pairs, we need at least one static
33603 [RW] section emitted.
33605 Finally, declare mcount when profiling to make the assembler happy. */
33607 static void
33608 rs6000_xcoff_file_start (void)
33610 rs6000_gen_section_name (&xcoff_bss_section_name,
33611 main_input_filename, ".bss_");
33612 rs6000_gen_section_name (&xcoff_private_data_section_name,
33613 main_input_filename, ".rw_");
33614 rs6000_gen_section_name (&xcoff_read_only_section_name,
33615 main_input_filename, ".ro_");
33616 rs6000_gen_section_name (&xcoff_tls_data_section_name,
33617 main_input_filename, ".tls_");
33618 rs6000_gen_section_name (&xcoff_tbss_section_name,
33619 main_input_filename, ".tbss_[UL]");
33621 fputs ("\t.file\t", asm_out_file);
33622 output_quoted_string (asm_out_file, main_input_filename);
33623 fputc ('\n', asm_out_file);
33624 if (write_symbols != NO_DEBUG)
33625 switch_to_section (private_data_section);
33626 switch_to_section (toc_section);
33627 switch_to_section (text_section);
33628 if (profile_flag)
33629 fprintf (asm_out_file, "\t.extern %s\n", RS6000_MCOUNT);
33630 rs6000_file_start ();
33633 /* Output at end of assembler file.
33634 On the RS/6000, referencing data should automatically pull in text. */
33636 static void
33637 rs6000_xcoff_file_end (void)
33639 switch_to_section (text_section);
33640 fputs ("_section_.text:\n", asm_out_file);
33641 switch_to_section (data_section);
33642 fputs (TARGET_32BIT
33643 ? "\t.long _section_.text\n" : "\t.llong _section_.text\n",
33644 asm_out_file);
33647 struct declare_alias_data
33649 FILE *file;
33650 bool function_descriptor;
33653 /* Declare alias N. A helper function for for_node_and_aliases. */
33655 static bool
33656 rs6000_declare_alias (struct symtab_node *n, void *d)
33658 struct declare_alias_data *data = (struct declare_alias_data *)d;
33659 /* Main symbol is output specially, because varasm machinery does part of
33660 the job for us - we do not need to declare .globl/lglobs and such. */
33661 if (!n->alias || n->weakref)
33662 return false;
33664 if (lookup_attribute ("ifunc", DECL_ATTRIBUTES (n->decl)))
33665 return false;
33667 /* Prevent assemble_alias from trying to use .set pseudo operation
33668 that does not behave as expected by the middle-end. */
33669 TREE_ASM_WRITTEN (n->decl) = true;
33671 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (n->decl));
33672 char *buffer = (char *) alloca (strlen (name) + 2);
33673 char *p;
33674 int dollar_inside = 0;
33676 strcpy (buffer, name);
33677 p = strchr (buffer, '$');
33678 while (p) {
33679 *p = '_';
33680 dollar_inside++;
33681 p = strchr (p + 1, '$');
33683 if (TREE_PUBLIC (n->decl))
33685 if (!RS6000_WEAK || !DECL_WEAK (n->decl))
33687 if (dollar_inside) {
33688 if (data->function_descriptor)
33689 fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
33690 fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
33692 if (data->function_descriptor)
33694 fputs ("\t.globl .", data->file);
33695 RS6000_OUTPUT_BASENAME (data->file, buffer);
33696 putc ('\n', data->file);
33698 fputs ("\t.globl ", data->file);
33699 RS6000_OUTPUT_BASENAME (data->file, buffer);
33700 putc ('\n', data->file);
33702 #ifdef ASM_WEAKEN_DECL
33703 else if (DECL_WEAK (n->decl) && !data->function_descriptor)
33704 ASM_WEAKEN_DECL (data->file, n->decl, name, NULL);
33705 #endif
33707 else
33709 if (dollar_inside)
33711 if (data->function_descriptor)
33712 fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
33713 fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
33715 if (data->function_descriptor)
33717 fputs ("\t.lglobl .", data->file);
33718 RS6000_OUTPUT_BASENAME (data->file, buffer);
33719 putc ('\n', data->file);
33721 fputs ("\t.lglobl ", data->file);
33722 RS6000_OUTPUT_BASENAME (data->file, buffer);
33723 putc ('\n', data->file);
33725 if (data->function_descriptor)
33726 fputs (".", data->file);
33727 RS6000_OUTPUT_BASENAME (data->file, buffer);
33728 fputs (":\n", data->file);
33729 return false;
33732 /* This macro produces the initial definition of a function name.
33733 On the RS/6000, we need to place an extra '.' in the function name and
33734 output the function descriptor.
33735 Dollar signs are converted to underscores.
33737 The csect for the function will have already been created when
33738 text_section was selected. We do have to go back to that csect, however.
33740 The third and fourth parameters to the .function pseudo-op (16 and 044)
33741 are placeholders which no longer have any use.
33743 Because AIX assembler's .set command has unexpected semantics, we output
33744 all aliases as alternative labels in front of the definition. */
33746 void
33747 rs6000_xcoff_declare_function_name (FILE *file, const char *name, tree decl)
33749 char *buffer = (char *) alloca (strlen (name) + 1);
33750 char *p;
33751 int dollar_inside = 0;
33752 struct declare_alias_data data = {file, false};
33754 strcpy (buffer, name);
33755 p = strchr (buffer, '$');
33756 while (p) {
33757 *p = '_';
33758 dollar_inside++;
33759 p = strchr (p + 1, '$');
33761 if (TREE_PUBLIC (decl))
33763 if (!RS6000_WEAK || !DECL_WEAK (decl))
33765 if (dollar_inside) {
33766 fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
33767 fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
33769 fputs ("\t.globl .", file);
33770 RS6000_OUTPUT_BASENAME (file, buffer);
33771 putc ('\n', file);
33774 else
33776 if (dollar_inside) {
33777 fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
33778 fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
33780 fputs ("\t.lglobl .", file);
33781 RS6000_OUTPUT_BASENAME (file, buffer);
33782 putc ('\n', file);
33784 fputs ("\t.csect ", file);
33785 RS6000_OUTPUT_BASENAME (file, buffer);
33786 fputs (TARGET_32BIT ? "[DS]\n" : "[DS],3\n", file);
33787 RS6000_OUTPUT_BASENAME (file, buffer);
33788 fputs (":\n", file);
33789 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias, &data, true);
33790 fputs (TARGET_32BIT ? "\t.long ." : "\t.llong .", file);
33791 RS6000_OUTPUT_BASENAME (file, buffer);
33792 fputs (", TOC[tc0], 0\n", file);
33793 in_section = NULL;
33794 switch_to_section (function_section (decl));
33795 putc ('.', file);
33796 RS6000_OUTPUT_BASENAME (file, buffer);
33797 fputs (":\n", file);
33798 data.function_descriptor = true;
33799 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias, &data, true);
33800 if (!DECL_IGNORED_P (decl))
33802 if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG)
33803 xcoffout_declare_function (file, decl, buffer);
33804 else if (write_symbols == DWARF2_DEBUG)
33806 name = (*targetm.strip_name_encoding) (name);
33807 fprintf (file, "\t.function .%s,.%s,2,0\n", name, name);
33810 return;
33813 /* This macro produces the initial definition of a object (variable) name.
33814 Because AIX assembler's .set command has unexpected semantics, we output
33815 all aliases as alternative labels in front of the definition. */
33817 void
33818 rs6000_xcoff_declare_object_name (FILE *file, const char *name, tree decl)
33820 struct declare_alias_data data = {file, false};
33821 RS6000_OUTPUT_BASENAME (file, name);
33822 fputs (":\n", file);
33823 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias, &data, true);
33826 /* Overide the default 'SYMBOL-.' syntax with AIX compatible 'SYMBOL-$'. */
33828 void
33829 rs6000_asm_output_dwarf_pcrel (FILE *file, int size, const char *label)
33831 fputs (integer_asm_op (size, FALSE), file);
33832 assemble_name (file, label);
33833 fputs ("-$", file);
33836 /* Output a symbol offset relative to the dbase for the current object.
33837 We use __gcc_unwind_dbase as an arbitrary base for dbase and assume
33838 signed offsets.
33840 __gcc_unwind_dbase is embedded in all executables/libraries through
33841 libgcc/config/rs6000/crtdbase.S. */
33843 void
33844 rs6000_asm_output_dwarf_datarel (FILE *file, int size, const char *label)
33846 fputs (integer_asm_op (size, FALSE), file);
33847 assemble_name (file, label);
33848 fputs("-__gcc_unwind_dbase", file);
33851 #ifdef HAVE_AS_TLS
33852 static void
33853 rs6000_xcoff_encode_section_info (tree decl, rtx rtl, int first)
33855 rtx symbol;
33856 int flags;
33858 default_encode_section_info (decl, rtl, first);
33860 /* Careful not to prod global register variables. */
33861 if (!MEM_P (rtl))
33862 return;
33863 symbol = XEXP (rtl, 0);
33864 if (GET_CODE (symbol) != SYMBOL_REF)
33865 return;
33867 flags = SYMBOL_REF_FLAGS (symbol);
33869 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
33870 flags &= ~SYMBOL_FLAG_HAS_BLOCK_INFO;
33872 SYMBOL_REF_FLAGS (symbol) = flags;
33874 #endif /* HAVE_AS_TLS */
33875 #endif /* TARGET_XCOFF */
33877 /* Return true if INSN should not be copied. */
33879 static bool
33880 rs6000_cannot_copy_insn_p (rtx_insn *insn)
33882 return recog_memoized (insn) >= 0
33883 && get_attr_cannot_copy (insn);
33886 /* Compute a (partial) cost for rtx X. Return true if the complete
33887 cost has been computed, and false if subexpressions should be
33888 scanned. In either case, *TOTAL contains the cost result. */
33890 static bool
33891 rs6000_rtx_costs (rtx x, machine_mode mode, int outer_code,
33892 int opno ATTRIBUTE_UNUSED, int *total, bool speed)
33894 int code = GET_CODE (x);
33896 switch (code)
33898 /* On the RS/6000, if it is valid in the insn, it is free. */
33899 case CONST_INT:
33900 if (((outer_code == SET
33901 || outer_code == PLUS
33902 || outer_code == MINUS)
33903 && (satisfies_constraint_I (x)
33904 || satisfies_constraint_L (x)))
33905 || (outer_code == AND
33906 && (satisfies_constraint_K (x)
33907 || (mode == SImode
33908 ? satisfies_constraint_L (x)
33909 : satisfies_constraint_J (x))))
33910 || ((outer_code == IOR || outer_code == XOR)
33911 && (satisfies_constraint_K (x)
33912 || (mode == SImode
33913 ? satisfies_constraint_L (x)
33914 : satisfies_constraint_J (x))))
33915 || outer_code == ASHIFT
33916 || outer_code == ASHIFTRT
33917 || outer_code == LSHIFTRT
33918 || outer_code == ROTATE
33919 || outer_code == ROTATERT
33920 || outer_code == ZERO_EXTRACT
33921 || (outer_code == MULT
33922 && satisfies_constraint_I (x))
33923 || ((outer_code == DIV || outer_code == UDIV
33924 || outer_code == MOD || outer_code == UMOD)
33925 && exact_log2 (INTVAL (x)) >= 0)
33926 || (outer_code == COMPARE
33927 && (satisfies_constraint_I (x)
33928 || satisfies_constraint_K (x)))
33929 || ((outer_code == EQ || outer_code == NE)
33930 && (satisfies_constraint_I (x)
33931 || satisfies_constraint_K (x)
33932 || (mode == SImode
33933 ? satisfies_constraint_L (x)
33934 : satisfies_constraint_J (x))))
33935 || (outer_code == GTU
33936 && satisfies_constraint_I (x))
33937 || (outer_code == LTU
33938 && satisfies_constraint_P (x)))
33940 *total = 0;
33941 return true;
33943 else if ((outer_code == PLUS
33944 && reg_or_add_cint_operand (x, VOIDmode))
33945 || (outer_code == MINUS
33946 && reg_or_sub_cint_operand (x, VOIDmode))
33947 || ((outer_code == SET
33948 || outer_code == IOR
33949 || outer_code == XOR)
33950 && (INTVAL (x)
33951 & ~ (unsigned HOST_WIDE_INT) 0xffffffff) == 0))
33953 *total = COSTS_N_INSNS (1);
33954 return true;
33956 /* FALLTHRU */
33958 case CONST_DOUBLE:
33959 case CONST_WIDE_INT:
33960 case CONST:
33961 case HIGH:
33962 case SYMBOL_REF:
33963 case MEM:
33964 /* When optimizing for size, MEM should be slightly more expensive
33965 than generating address, e.g., (plus (reg) (const)).
33966 L1 cache latency is about two instructions. */
33967 *total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
33968 return true;
33970 case LABEL_REF:
33971 *total = 0;
33972 return true;
33974 case PLUS:
33975 case MINUS:
33976 if (FLOAT_MODE_P (mode))
33977 *total = rs6000_cost->fp;
33978 else
33979 *total = COSTS_N_INSNS (1);
33980 return false;
33982 case MULT:
33983 if (GET_CODE (XEXP (x, 1)) == CONST_INT
33984 && satisfies_constraint_I (XEXP (x, 1)))
33986 if (INTVAL (XEXP (x, 1)) >= -256
33987 && INTVAL (XEXP (x, 1)) <= 255)
33988 *total = rs6000_cost->mulsi_const9;
33989 else
33990 *total = rs6000_cost->mulsi_const;
33992 else if (mode == SFmode)
33993 *total = rs6000_cost->fp;
33994 else if (FLOAT_MODE_P (mode))
33995 *total = rs6000_cost->dmul;
33996 else if (mode == DImode)
33997 *total = rs6000_cost->muldi;
33998 else
33999 *total = rs6000_cost->mulsi;
34000 return false;
34002 case FMA:
34003 if (mode == SFmode)
34004 *total = rs6000_cost->fp;
34005 else
34006 *total = rs6000_cost->dmul;
34007 break;
34009 case DIV:
34010 case MOD:
34011 if (FLOAT_MODE_P (mode))
34013 *total = mode == DFmode ? rs6000_cost->ddiv
34014 : rs6000_cost->sdiv;
34015 return false;
34017 /* FALLTHRU */
34019 case UDIV:
34020 case UMOD:
34021 if (GET_CODE (XEXP (x, 1)) == CONST_INT
34022 && exact_log2 (INTVAL (XEXP (x, 1))) >= 0)
34024 if (code == DIV || code == MOD)
34025 /* Shift, addze */
34026 *total = COSTS_N_INSNS (2);
34027 else
34028 /* Shift */
34029 *total = COSTS_N_INSNS (1);
34031 else
34033 if (GET_MODE (XEXP (x, 1)) == DImode)
34034 *total = rs6000_cost->divdi;
34035 else
34036 *total = rs6000_cost->divsi;
34038 /* Add in shift and subtract for MOD unless we have a mod instruction. */
34039 if (!TARGET_MODULO && (code == MOD || code == UMOD))
34040 *total += COSTS_N_INSNS (2);
34041 return false;
34043 case CTZ:
34044 *total = COSTS_N_INSNS (TARGET_CTZ ? 1 : 4);
34045 return false;
34047 case FFS:
34048 *total = COSTS_N_INSNS (4);
34049 return false;
34051 case POPCOUNT:
34052 *total = COSTS_N_INSNS (TARGET_POPCNTD ? 1 : 6);
34053 return false;
34055 case PARITY:
34056 *total = COSTS_N_INSNS (TARGET_CMPB ? 2 : 6);
34057 return false;
34059 case NOT:
34060 if (outer_code == AND || outer_code == IOR || outer_code == XOR)
34061 *total = 0;
34062 else
34063 *total = COSTS_N_INSNS (1);
34064 return false;
34066 case AND:
34067 if (CONST_INT_P (XEXP (x, 1)))
34069 rtx left = XEXP (x, 0);
34070 rtx_code left_code = GET_CODE (left);
34072 /* rotate-and-mask: 1 insn. */
34073 if ((left_code == ROTATE
34074 || left_code == ASHIFT
34075 || left_code == LSHIFTRT)
34076 && rs6000_is_valid_shift_mask (XEXP (x, 1), left, mode))
34078 *total = rtx_cost (XEXP (left, 0), mode, left_code, 0, speed);
34079 if (!CONST_INT_P (XEXP (left, 1)))
34080 *total += rtx_cost (XEXP (left, 1), SImode, left_code, 1, speed);
34081 *total += COSTS_N_INSNS (1);
34082 return true;
34085 /* rotate-and-mask (no rotate), andi., andis.: 1 insn. */
34086 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
34087 if (rs6000_is_valid_and_mask (XEXP (x, 1), mode)
34088 || (val & 0xffff) == val
34089 || (val & 0xffff0000) == val
34090 || ((val & 0xffff) == 0 && mode == SImode))
34092 *total = rtx_cost (left, mode, AND, 0, speed);
34093 *total += COSTS_N_INSNS (1);
34094 return true;
34097 /* 2 insns. */
34098 if (rs6000_is_valid_2insn_and (XEXP (x, 1), mode))
34100 *total = rtx_cost (left, mode, AND, 0, speed);
34101 *total += COSTS_N_INSNS (2);
34102 return true;
34106 *total = COSTS_N_INSNS (1);
34107 return false;
34109 case IOR:
34110 /* FIXME */
34111 *total = COSTS_N_INSNS (1);
34112 return true;
34114 case CLZ:
34115 case XOR:
34116 case ZERO_EXTRACT:
34117 *total = COSTS_N_INSNS (1);
34118 return false;
34120 case ASHIFT:
34121 /* The EXTSWSLI instruction is a combined instruction. Don't count both
34122 the sign extend and shift separately within the insn. */
34123 if (TARGET_EXTSWSLI && mode == DImode
34124 && GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
34125 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode)
34127 *total = 0;
34128 return false;
34130 /* fall through */
34132 case ASHIFTRT:
34133 case LSHIFTRT:
34134 case ROTATE:
34135 case ROTATERT:
34136 /* Handle mul_highpart. */
34137 if (outer_code == TRUNCATE
34138 && GET_CODE (XEXP (x, 0)) == MULT)
34140 if (mode == DImode)
34141 *total = rs6000_cost->muldi;
34142 else
34143 *total = rs6000_cost->mulsi;
34144 return true;
34146 else if (outer_code == AND)
34147 *total = 0;
34148 else
34149 *total = COSTS_N_INSNS (1);
34150 return false;
34152 case SIGN_EXTEND:
34153 case ZERO_EXTEND:
34154 if (GET_CODE (XEXP (x, 0)) == MEM)
34155 *total = 0;
34156 else
34157 *total = COSTS_N_INSNS (1);
34158 return false;
34160 case COMPARE:
34161 case NEG:
34162 case ABS:
34163 if (!FLOAT_MODE_P (mode))
34165 *total = COSTS_N_INSNS (1);
34166 return false;
34168 /* FALLTHRU */
34170 case FLOAT:
34171 case UNSIGNED_FLOAT:
34172 case FIX:
34173 case UNSIGNED_FIX:
34174 case FLOAT_TRUNCATE:
34175 *total = rs6000_cost->fp;
34176 return false;
34178 case FLOAT_EXTEND:
34179 if (mode == DFmode)
34180 *total = rs6000_cost->sfdf_convert;
34181 else
34182 *total = rs6000_cost->fp;
34183 return false;
34185 case UNSPEC:
34186 switch (XINT (x, 1))
34188 case UNSPEC_FRSP:
34189 *total = rs6000_cost->fp;
34190 return true;
34192 default:
34193 break;
34195 break;
34197 case CALL:
34198 case IF_THEN_ELSE:
34199 if (!speed)
34201 *total = COSTS_N_INSNS (1);
34202 return true;
34204 else if (FLOAT_MODE_P (mode)
34205 && TARGET_PPC_GFXOPT && TARGET_HARD_FLOAT && TARGET_FPRS)
34207 *total = rs6000_cost->fp;
34208 return false;
34210 break;
34212 case NE:
34213 case EQ:
34214 case GTU:
34215 case LTU:
34216 /* Carry bit requires mode == Pmode.
34217 NEG or PLUS already counted so only add one. */
34218 if (mode == Pmode
34219 && (outer_code == NEG || outer_code == PLUS))
34221 *total = COSTS_N_INSNS (1);
34222 return true;
34224 if (outer_code == SET)
34226 if (XEXP (x, 1) == const0_rtx)
34228 if (TARGET_ISEL && !TARGET_MFCRF)
34229 *total = COSTS_N_INSNS (8);
34230 else
34231 *total = COSTS_N_INSNS (2);
34232 return true;
34234 else
34236 *total = COSTS_N_INSNS (3);
34237 return false;
34240 /* FALLTHRU */
34242 case GT:
34243 case LT:
34244 case UNORDERED:
34245 if (outer_code == SET && (XEXP (x, 1) == const0_rtx))
34247 if (TARGET_ISEL && !TARGET_MFCRF)
34248 *total = COSTS_N_INSNS (8);
34249 else
34250 *total = COSTS_N_INSNS (2);
34251 return true;
34253 /* CC COMPARE. */
34254 if (outer_code == COMPARE)
34256 *total = 0;
34257 return true;
34259 break;
34261 default:
34262 break;
34265 return false;
34268 /* Debug form of r6000_rtx_costs that is selected if -mdebug=cost. */
34270 static bool
34271 rs6000_debug_rtx_costs (rtx x, machine_mode mode, int outer_code,
34272 int opno, int *total, bool speed)
34274 bool ret = rs6000_rtx_costs (x, mode, outer_code, opno, total, speed);
34276 fprintf (stderr,
34277 "\nrs6000_rtx_costs, return = %s, mode = %s, outer_code = %s, "
34278 "opno = %d, total = %d, speed = %s, x:\n",
34279 ret ? "complete" : "scan inner",
34280 GET_MODE_NAME (mode),
34281 GET_RTX_NAME (outer_code),
34282 opno,
34283 *total,
34284 speed ? "true" : "false");
34286 debug_rtx (x);
34288 return ret;
34291 /* Debug form of ADDRESS_COST that is selected if -mdebug=cost. */
34293 static int
34294 rs6000_debug_address_cost (rtx x, machine_mode mode,
34295 addr_space_t as, bool speed)
34297 int ret = TARGET_ADDRESS_COST (x, mode, as, speed);
34299 fprintf (stderr, "\nrs6000_address_cost, return = %d, speed = %s, x:\n",
34300 ret, speed ? "true" : "false");
34301 debug_rtx (x);
34303 return ret;
34307 /* A C expression returning the cost of moving data from a register of class
34308 CLASS1 to one of CLASS2. */
34310 static int
34311 rs6000_register_move_cost (machine_mode mode,
34312 reg_class_t from, reg_class_t to)
34314 int ret;
34316 if (TARGET_DEBUG_COST)
34317 dbg_cost_ctrl++;
34319 /* Moves from/to GENERAL_REGS. */
34320 if (reg_classes_intersect_p (to, GENERAL_REGS)
34321 || reg_classes_intersect_p (from, GENERAL_REGS))
34323 reg_class_t rclass = from;
34325 if (! reg_classes_intersect_p (to, GENERAL_REGS))
34326 rclass = to;
34328 if (rclass == FLOAT_REGS || rclass == ALTIVEC_REGS || rclass == VSX_REGS)
34329 ret = (rs6000_memory_move_cost (mode, rclass, false)
34330 + rs6000_memory_move_cost (mode, GENERAL_REGS, false));
34332 /* It's more expensive to move CR_REGS than CR0_REGS because of the
34333 shift. */
34334 else if (rclass == CR_REGS)
34335 ret = 4;
34337 /* For those processors that have slow LR/CTR moves, make them more
34338 expensive than memory in order to bias spills to memory .*/
34339 else if ((rs6000_cpu == PROCESSOR_POWER6
34340 || rs6000_cpu == PROCESSOR_POWER7
34341 || rs6000_cpu == PROCESSOR_POWER8
34342 || rs6000_cpu == PROCESSOR_POWER9)
34343 && reg_classes_intersect_p (rclass, LINK_OR_CTR_REGS))
34344 ret = 6 * hard_regno_nregs[0][mode];
34346 else
34347 /* A move will cost one instruction per GPR moved. */
34348 ret = 2 * hard_regno_nregs[0][mode];
34351 /* If we have VSX, we can easily move between FPR or Altivec registers. */
34352 else if (VECTOR_MEM_VSX_P (mode)
34353 && reg_classes_intersect_p (to, VSX_REGS)
34354 && reg_classes_intersect_p (from, VSX_REGS))
34355 ret = 2 * hard_regno_nregs[32][mode];
34357 /* Moving between two similar registers is just one instruction. */
34358 else if (reg_classes_intersect_p (to, from))
34359 ret = (FLOAT128_2REG_P (mode)) ? 4 : 2;
34361 /* Everything else has to go through GENERAL_REGS. */
34362 else
34363 ret = (rs6000_register_move_cost (mode, GENERAL_REGS, to)
34364 + rs6000_register_move_cost (mode, from, GENERAL_REGS));
34366 if (TARGET_DEBUG_COST)
34368 if (dbg_cost_ctrl == 1)
34369 fprintf (stderr,
34370 "rs6000_register_move_cost:, ret=%d, mode=%s, from=%s, to=%s\n",
34371 ret, GET_MODE_NAME (mode), reg_class_names[from],
34372 reg_class_names[to]);
34373 dbg_cost_ctrl--;
34376 return ret;
34379 /* A C expressions returning the cost of moving data of MODE from a register to
34380 or from memory. */
34382 static int
34383 rs6000_memory_move_cost (machine_mode mode, reg_class_t rclass,
34384 bool in ATTRIBUTE_UNUSED)
34386 int ret;
34388 if (TARGET_DEBUG_COST)
34389 dbg_cost_ctrl++;
34391 if (reg_classes_intersect_p (rclass, GENERAL_REGS))
34392 ret = 4 * hard_regno_nregs[0][mode];
34393 else if ((reg_classes_intersect_p (rclass, FLOAT_REGS)
34394 || reg_classes_intersect_p (rclass, VSX_REGS)))
34395 ret = 4 * hard_regno_nregs[32][mode];
34396 else if (reg_classes_intersect_p (rclass, ALTIVEC_REGS))
34397 ret = 4 * hard_regno_nregs[FIRST_ALTIVEC_REGNO][mode];
34398 else
34399 ret = 4 + rs6000_register_move_cost (mode, rclass, GENERAL_REGS);
34401 if (TARGET_DEBUG_COST)
34403 if (dbg_cost_ctrl == 1)
34404 fprintf (stderr,
34405 "rs6000_memory_move_cost: ret=%d, mode=%s, rclass=%s, in=%d\n",
34406 ret, GET_MODE_NAME (mode), reg_class_names[rclass], in);
34407 dbg_cost_ctrl--;
34410 return ret;
34413 /* Returns a code for a target-specific builtin that implements
34414 reciprocal of the function, or NULL_TREE if not available. */
34416 static tree
34417 rs6000_builtin_reciprocal (tree fndecl)
34419 switch (DECL_FUNCTION_CODE (fndecl))
34421 case VSX_BUILTIN_XVSQRTDP:
34422 if (!RS6000_RECIP_AUTO_RSQRTE_P (V2DFmode))
34423 return NULL_TREE;
34425 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_2DF];
34427 case VSX_BUILTIN_XVSQRTSP:
34428 if (!RS6000_RECIP_AUTO_RSQRTE_P (V4SFmode))
34429 return NULL_TREE;
34431 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_4SF];
34433 default:
34434 return NULL_TREE;
34438 /* Load up a constant. If the mode is a vector mode, splat the value across
34439 all of the vector elements. */
34441 static rtx
34442 rs6000_load_constant_and_splat (machine_mode mode, REAL_VALUE_TYPE dconst)
34444 rtx reg;
34446 if (mode == SFmode || mode == DFmode)
34448 rtx d = const_double_from_real_value (dconst, mode);
34449 reg = force_reg (mode, d);
34451 else if (mode == V4SFmode)
34453 rtx d = const_double_from_real_value (dconst, SFmode);
34454 rtvec v = gen_rtvec (4, d, d, d, d);
34455 reg = gen_reg_rtx (mode);
34456 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
34458 else if (mode == V2DFmode)
34460 rtx d = const_double_from_real_value (dconst, DFmode);
34461 rtvec v = gen_rtvec (2, d, d);
34462 reg = gen_reg_rtx (mode);
34463 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
34465 else
34466 gcc_unreachable ();
34468 return reg;
34471 /* Generate an FMA instruction. */
34473 static void
34474 rs6000_emit_madd (rtx target, rtx m1, rtx m2, rtx a)
34476 machine_mode mode = GET_MODE (target);
34477 rtx dst;
34479 dst = expand_ternary_op (mode, fma_optab, m1, m2, a, target, 0);
34480 gcc_assert (dst != NULL);
34482 if (dst != target)
34483 emit_move_insn (target, dst);
34486 /* Generate a FNMSUB instruction: dst = -fma(m1, m2, -a). */
34488 static void
34489 rs6000_emit_nmsub (rtx dst, rtx m1, rtx m2, rtx a)
34491 machine_mode mode = GET_MODE (dst);
34492 rtx r;
34494 /* This is a tad more complicated, since the fnma_optab is for
34495 a different expression: fma(-m1, m2, a), which is the same
34496 thing except in the case of signed zeros.
34498 Fortunately we know that if FMA is supported that FNMSUB is
34499 also supported in the ISA. Just expand it directly. */
34501 gcc_assert (optab_handler (fma_optab, mode) != CODE_FOR_nothing);
34503 r = gen_rtx_NEG (mode, a);
34504 r = gen_rtx_FMA (mode, m1, m2, r);
34505 r = gen_rtx_NEG (mode, r);
34506 emit_insn (gen_rtx_SET (dst, r));
34509 /* Newton-Raphson approximation of floating point divide DST = N/D. If NOTE_P,
34510 add a reg_note saying that this was a division. Support both scalar and
34511 vector divide. Assumes no trapping math and finite arguments. */
34513 void
34514 rs6000_emit_swdiv (rtx dst, rtx n, rtx d, bool note_p)
34516 machine_mode mode = GET_MODE (dst);
34517 rtx one, x0, e0, x1, xprev, eprev, xnext, enext, u, v;
34518 int i;
34520 /* Low precision estimates guarantee 5 bits of accuracy. High
34521 precision estimates guarantee 14 bits of accuracy. SFmode
34522 requires 23 bits of accuracy. DFmode requires 52 bits of
34523 accuracy. Each pass at least doubles the accuracy, leading
34524 to the following. */
34525 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
34526 if (mode == DFmode || mode == V2DFmode)
34527 passes++;
34529 enum insn_code code = optab_handler (smul_optab, mode);
34530 insn_gen_fn gen_mul = GEN_FCN (code);
34532 gcc_assert (code != CODE_FOR_nothing);
34534 one = rs6000_load_constant_and_splat (mode, dconst1);
34536 /* x0 = 1./d estimate */
34537 x0 = gen_reg_rtx (mode);
34538 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, d),
34539 UNSPEC_FRES)));
34541 /* Each iteration but the last calculates x_(i+1) = x_i * (2 - d * x_i). */
34542 if (passes > 1) {
34544 /* e0 = 1. - d * x0 */
34545 e0 = gen_reg_rtx (mode);
34546 rs6000_emit_nmsub (e0, d, x0, one);
34548 /* x1 = x0 + e0 * x0 */
34549 x1 = gen_reg_rtx (mode);
34550 rs6000_emit_madd (x1, e0, x0, x0);
34552 for (i = 0, xprev = x1, eprev = e0; i < passes - 2;
34553 ++i, xprev = xnext, eprev = enext) {
34555 /* enext = eprev * eprev */
34556 enext = gen_reg_rtx (mode);
34557 emit_insn (gen_mul (enext, eprev, eprev));
34559 /* xnext = xprev + enext * xprev */
34560 xnext = gen_reg_rtx (mode);
34561 rs6000_emit_madd (xnext, enext, xprev, xprev);
34564 } else
34565 xprev = x0;
34567 /* The last iteration calculates x_(i+1) = n * x_i * (2 - d * x_i). */
34569 /* u = n * xprev */
34570 u = gen_reg_rtx (mode);
34571 emit_insn (gen_mul (u, n, xprev));
34573 /* v = n - (d * u) */
34574 v = gen_reg_rtx (mode);
34575 rs6000_emit_nmsub (v, d, u, n);
34577 /* dst = (v * xprev) + u */
34578 rs6000_emit_madd (dst, v, xprev, u);
34580 if (note_p)
34581 add_reg_note (get_last_insn (), REG_EQUAL, gen_rtx_DIV (mode, n, d));
34584 /* Goldschmidt's Algorithm for single/double-precision floating point
34585 sqrt and rsqrt. Assumes no trapping math and finite arguments. */
34587 void
34588 rs6000_emit_swsqrt (rtx dst, rtx src, bool recip)
34590 machine_mode mode = GET_MODE (src);
34591 rtx e = gen_reg_rtx (mode);
34592 rtx g = gen_reg_rtx (mode);
34593 rtx h = gen_reg_rtx (mode);
34595 /* Low precision estimates guarantee 5 bits of accuracy. High
34596 precision estimates guarantee 14 bits of accuracy. SFmode
34597 requires 23 bits of accuracy. DFmode requires 52 bits of
34598 accuracy. Each pass at least doubles the accuracy, leading
34599 to the following. */
34600 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
34601 if (mode == DFmode || mode == V2DFmode)
34602 passes++;
34604 int i;
34605 rtx mhalf;
34606 enum insn_code code = optab_handler (smul_optab, mode);
34607 insn_gen_fn gen_mul = GEN_FCN (code);
34609 gcc_assert (code != CODE_FOR_nothing);
34611 mhalf = rs6000_load_constant_and_splat (mode, dconsthalf);
34613 /* e = rsqrt estimate */
34614 emit_insn (gen_rtx_SET (e, gen_rtx_UNSPEC (mode, gen_rtvec (1, src),
34615 UNSPEC_RSQRT)));
34617 /* If (src == 0.0) filter infinity to prevent NaN for sqrt(0.0). */
34618 if (!recip)
34620 rtx zero = force_reg (mode, CONST0_RTX (mode));
34622 if (mode == SFmode)
34624 rtx target = emit_conditional_move (e, GT, src, zero, mode,
34625 e, zero, mode, 0);
34626 if (target != e)
34627 emit_move_insn (e, target);
34629 else
34631 rtx cond = gen_rtx_GT (VOIDmode, e, zero);
34632 rs6000_emit_vector_cond_expr (e, e, zero, cond, src, zero);
34636 /* g = sqrt estimate. */
34637 emit_insn (gen_mul (g, e, src));
34638 /* h = 1/(2*sqrt) estimate. */
34639 emit_insn (gen_mul (h, e, mhalf));
34641 if (recip)
34643 if (passes == 1)
34645 rtx t = gen_reg_rtx (mode);
34646 rs6000_emit_nmsub (t, g, h, mhalf);
34647 /* Apply correction directly to 1/rsqrt estimate. */
34648 rs6000_emit_madd (dst, e, t, e);
34650 else
34652 for (i = 0; i < passes; i++)
34654 rtx t1 = gen_reg_rtx (mode);
34655 rtx g1 = gen_reg_rtx (mode);
34656 rtx h1 = gen_reg_rtx (mode);
34658 rs6000_emit_nmsub (t1, g, h, mhalf);
34659 rs6000_emit_madd (g1, g, t1, g);
34660 rs6000_emit_madd (h1, h, t1, h);
34662 g = g1;
34663 h = h1;
34665 /* Multiply by 2 for 1/rsqrt. */
34666 emit_insn (gen_add3_insn (dst, h, h));
34669 else
34671 rtx t = gen_reg_rtx (mode);
34672 rs6000_emit_nmsub (t, g, h, mhalf);
34673 rs6000_emit_madd (dst, g, t, g);
34676 return;
34679 /* Emit popcount intrinsic on TARGET_POPCNTB (Power5) and TARGET_POPCNTD
34680 (Power7) targets. DST is the target, and SRC is the argument operand. */
34682 void
34683 rs6000_emit_popcount (rtx dst, rtx src)
34685 machine_mode mode = GET_MODE (dst);
34686 rtx tmp1, tmp2;
34688 /* Use the PPC ISA 2.06 popcnt{w,d} instruction if we can. */
34689 if (TARGET_POPCNTD)
34691 if (mode == SImode)
34692 emit_insn (gen_popcntdsi2 (dst, src));
34693 else
34694 emit_insn (gen_popcntddi2 (dst, src));
34695 return;
34698 tmp1 = gen_reg_rtx (mode);
34700 if (mode == SImode)
34702 emit_insn (gen_popcntbsi2 (tmp1, src));
34703 tmp2 = expand_mult (SImode, tmp1, GEN_INT (0x01010101),
34704 NULL_RTX, 0);
34705 tmp2 = force_reg (SImode, tmp2);
34706 emit_insn (gen_lshrsi3 (dst, tmp2, GEN_INT (24)));
34708 else
34710 emit_insn (gen_popcntbdi2 (tmp1, src));
34711 tmp2 = expand_mult (DImode, tmp1,
34712 GEN_INT ((HOST_WIDE_INT)
34713 0x01010101 << 32 | 0x01010101),
34714 NULL_RTX, 0);
34715 tmp2 = force_reg (DImode, tmp2);
34716 emit_insn (gen_lshrdi3 (dst, tmp2, GEN_INT (56)));
34721 /* Emit parity intrinsic on TARGET_POPCNTB targets. DST is the
34722 target, and SRC is the argument operand. */
34724 void
34725 rs6000_emit_parity (rtx dst, rtx src)
34727 machine_mode mode = GET_MODE (dst);
34728 rtx tmp;
34730 tmp = gen_reg_rtx (mode);
34732 /* Use the PPC ISA 2.05 prtyw/prtyd instruction if we can. */
34733 if (TARGET_CMPB)
34735 if (mode == SImode)
34737 emit_insn (gen_popcntbsi2 (tmp, src));
34738 emit_insn (gen_paritysi2_cmpb (dst, tmp));
34740 else
34742 emit_insn (gen_popcntbdi2 (tmp, src));
34743 emit_insn (gen_paritydi2_cmpb (dst, tmp));
34745 return;
34748 if (mode == SImode)
34750 /* Is mult+shift >= shift+xor+shift+xor? */
34751 if (rs6000_cost->mulsi_const >= COSTS_N_INSNS (3))
34753 rtx tmp1, tmp2, tmp3, tmp4;
34755 tmp1 = gen_reg_rtx (SImode);
34756 emit_insn (gen_popcntbsi2 (tmp1, src));
34758 tmp2 = gen_reg_rtx (SImode);
34759 emit_insn (gen_lshrsi3 (tmp2, tmp1, GEN_INT (16)));
34760 tmp3 = gen_reg_rtx (SImode);
34761 emit_insn (gen_xorsi3 (tmp3, tmp1, tmp2));
34763 tmp4 = gen_reg_rtx (SImode);
34764 emit_insn (gen_lshrsi3 (tmp4, tmp3, GEN_INT (8)));
34765 emit_insn (gen_xorsi3 (tmp, tmp3, tmp4));
34767 else
34768 rs6000_emit_popcount (tmp, src);
34769 emit_insn (gen_andsi3 (dst, tmp, const1_rtx));
34771 else
34773 /* Is mult+shift >= shift+xor+shift+xor+shift+xor? */
34774 if (rs6000_cost->muldi >= COSTS_N_INSNS (5))
34776 rtx tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
34778 tmp1 = gen_reg_rtx (DImode);
34779 emit_insn (gen_popcntbdi2 (tmp1, src));
34781 tmp2 = gen_reg_rtx (DImode);
34782 emit_insn (gen_lshrdi3 (tmp2, tmp1, GEN_INT (32)));
34783 tmp3 = gen_reg_rtx (DImode);
34784 emit_insn (gen_xordi3 (tmp3, tmp1, tmp2));
34786 tmp4 = gen_reg_rtx (DImode);
34787 emit_insn (gen_lshrdi3 (tmp4, tmp3, GEN_INT (16)));
34788 tmp5 = gen_reg_rtx (DImode);
34789 emit_insn (gen_xordi3 (tmp5, tmp3, tmp4));
34791 tmp6 = gen_reg_rtx (DImode);
34792 emit_insn (gen_lshrdi3 (tmp6, tmp5, GEN_INT (8)));
34793 emit_insn (gen_xordi3 (tmp, tmp5, tmp6));
34795 else
34796 rs6000_emit_popcount (tmp, src);
34797 emit_insn (gen_anddi3 (dst, tmp, const1_rtx));
34801 /* Expand an Altivec constant permutation for little endian mode.
34802 There are two issues: First, the two input operands must be
34803 swapped so that together they form a double-wide array in LE
34804 order. Second, the vperm instruction has surprising behavior
34805 in LE mode: it interprets the elements of the source vectors
34806 in BE mode ("left to right") and interprets the elements of
34807 the destination vector in LE mode ("right to left"). To
34808 correct for this, we must subtract each element of the permute
34809 control vector from 31.
34811 For example, suppose we want to concatenate vr10 = {0, 1, 2, 3}
34812 with vr11 = {4, 5, 6, 7} and extract {0, 2, 4, 6} using a vperm.
34813 We place {0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27} in vr12 to
34814 serve as the permute control vector. Then, in BE mode,
34816 vperm 9,10,11,12
34818 places the desired result in vr9. However, in LE mode the
34819 vector contents will be
34821 vr10 = 00000003 00000002 00000001 00000000
34822 vr11 = 00000007 00000006 00000005 00000004
34824 The result of the vperm using the same permute control vector is
34826 vr9 = 05000000 07000000 01000000 03000000
34828 That is, the leftmost 4 bytes of vr10 are interpreted as the
34829 source for the rightmost 4 bytes of vr9, and so on.
34831 If we change the permute control vector to
34833 vr12 = {31,20,29,28,23,22,21,20,15,14,13,12,7,6,5,4}
34835 and issue
34837 vperm 9,11,10,12
34839 we get the desired
34841 vr9 = 00000006 00000004 00000002 00000000. */
34843 void
34844 altivec_expand_vec_perm_const_le (rtx operands[4])
34846 unsigned int i;
34847 rtx perm[16];
34848 rtx constv, unspec;
34849 rtx target = operands[0];
34850 rtx op0 = operands[1];
34851 rtx op1 = operands[2];
34852 rtx sel = operands[3];
34854 /* Unpack and adjust the constant selector. */
34855 for (i = 0; i < 16; ++i)
34857 rtx e = XVECEXP (sel, 0, i);
34858 unsigned int elt = 31 - (INTVAL (e) & 31);
34859 perm[i] = GEN_INT (elt);
34862 /* Expand to a permute, swapping the inputs and using the
34863 adjusted selector. */
34864 if (!REG_P (op0))
34865 op0 = force_reg (V16QImode, op0);
34866 if (!REG_P (op1))
34867 op1 = force_reg (V16QImode, op1);
34869 constv = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm));
34870 constv = force_reg (V16QImode, constv);
34871 unspec = gen_rtx_UNSPEC (V16QImode, gen_rtvec (3, op1, op0, constv),
34872 UNSPEC_VPERM);
34873 if (!REG_P (target))
34875 rtx tmp = gen_reg_rtx (V16QImode);
34876 emit_move_insn (tmp, unspec);
34877 unspec = tmp;
34880 emit_move_insn (target, unspec);
34883 /* Similarly to altivec_expand_vec_perm_const_le, we must adjust the
34884 permute control vector. But here it's not a constant, so we must
34885 generate a vector NAND or NOR to do the adjustment. */
34887 void
34888 altivec_expand_vec_perm_le (rtx operands[4])
34890 rtx notx, iorx, unspec;
34891 rtx target = operands[0];
34892 rtx op0 = operands[1];
34893 rtx op1 = operands[2];
34894 rtx sel = operands[3];
34895 rtx tmp = target;
34896 rtx norreg = gen_reg_rtx (V16QImode);
34897 machine_mode mode = GET_MODE (target);
34899 /* Get everything in regs so the pattern matches. */
34900 if (!REG_P (op0))
34901 op0 = force_reg (mode, op0);
34902 if (!REG_P (op1))
34903 op1 = force_reg (mode, op1);
34904 if (!REG_P (sel))
34905 sel = force_reg (V16QImode, sel);
34906 if (!REG_P (target))
34907 tmp = gen_reg_rtx (mode);
34909 if (TARGET_P9_VECTOR)
34911 unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op0, op1, sel),
34912 UNSPEC_VPERMR);
34914 else
34916 /* Invert the selector with a VNAND if available, else a VNOR.
34917 The VNAND is preferred for future fusion opportunities. */
34918 notx = gen_rtx_NOT (V16QImode, sel);
34919 iorx = (TARGET_P8_VECTOR
34920 ? gen_rtx_IOR (V16QImode, notx, notx)
34921 : gen_rtx_AND (V16QImode, notx, notx));
34922 emit_insn (gen_rtx_SET (norreg, iorx));
34924 /* Permute with operands reversed and adjusted selector. */
34925 unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op0, norreg),
34926 UNSPEC_VPERM);
34929 /* Copy into target, possibly by way of a register. */
34930 if (!REG_P (target))
34932 emit_move_insn (tmp, unspec);
34933 unspec = tmp;
34936 emit_move_insn (target, unspec);
34939 /* Expand an Altivec constant permutation. Return true if we match
34940 an efficient implementation; false to fall back to VPERM. */
34942 bool
34943 altivec_expand_vec_perm_const (rtx operands[4])
34945 struct altivec_perm_insn {
34946 HOST_WIDE_INT mask;
34947 enum insn_code impl;
34948 unsigned char perm[16];
34950 static const struct altivec_perm_insn patterns[] = {
34951 { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuhum_direct,
34952 { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } },
34953 { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuwum_direct,
34954 { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } },
34955 { OPTION_MASK_ALTIVEC,
34956 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghb_direct
34957 : CODE_FOR_altivec_vmrglb_direct),
34958 { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } },
34959 { OPTION_MASK_ALTIVEC,
34960 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghh_direct
34961 : CODE_FOR_altivec_vmrglh_direct),
34962 { 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } },
34963 { OPTION_MASK_ALTIVEC,
34964 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghw_direct
34965 : CODE_FOR_altivec_vmrglw_direct),
34966 { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } },
34967 { OPTION_MASK_ALTIVEC,
34968 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglb_direct
34969 : CODE_FOR_altivec_vmrghb_direct),
34970 { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } },
34971 { OPTION_MASK_ALTIVEC,
34972 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglh_direct
34973 : CODE_FOR_altivec_vmrghh_direct),
34974 { 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } },
34975 { OPTION_MASK_ALTIVEC,
34976 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglw_direct
34977 : CODE_FOR_altivec_vmrghw_direct),
34978 { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } },
34979 { OPTION_MASK_P8_VECTOR, CODE_FOR_p8_vmrgew,
34980 { 0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 24, 25, 26, 27 } },
34981 { OPTION_MASK_P8_VECTOR, CODE_FOR_p8_vmrgow,
34982 { 4, 5, 6, 7, 20, 21, 22, 23, 12, 13, 14, 15, 28, 29, 30, 31 } }
34985 unsigned int i, j, elt, which;
34986 unsigned char perm[16];
34987 rtx target, op0, op1, sel, x;
34988 bool one_vec;
34990 target = operands[0];
34991 op0 = operands[1];
34992 op1 = operands[2];
34993 sel = operands[3];
34995 /* Unpack the constant selector. */
34996 for (i = which = 0; i < 16; ++i)
34998 rtx e = XVECEXP (sel, 0, i);
34999 elt = INTVAL (e) & 31;
35000 which |= (elt < 16 ? 1 : 2);
35001 perm[i] = elt;
35004 /* Simplify the constant selector based on operands. */
35005 switch (which)
35007 default:
35008 gcc_unreachable ();
35010 case 3:
35011 one_vec = false;
35012 if (!rtx_equal_p (op0, op1))
35013 break;
35014 /* FALLTHRU */
35016 case 2:
35017 for (i = 0; i < 16; ++i)
35018 perm[i] &= 15;
35019 op0 = op1;
35020 one_vec = true;
35021 break;
35023 case 1:
35024 op1 = op0;
35025 one_vec = true;
35026 break;
35029 /* Look for splat patterns. */
35030 if (one_vec)
35032 elt = perm[0];
35034 for (i = 0; i < 16; ++i)
35035 if (perm[i] != elt)
35036 break;
35037 if (i == 16)
35039 if (!BYTES_BIG_ENDIAN)
35040 elt = 15 - elt;
35041 emit_insn (gen_altivec_vspltb_direct (target, op0, GEN_INT (elt)));
35042 return true;
35045 if (elt % 2 == 0)
35047 for (i = 0; i < 16; i += 2)
35048 if (perm[i] != elt || perm[i + 1] != elt + 1)
35049 break;
35050 if (i == 16)
35052 int field = BYTES_BIG_ENDIAN ? elt / 2 : 7 - elt / 2;
35053 x = gen_reg_rtx (V8HImode);
35054 emit_insn (gen_altivec_vsplth_direct (x, gen_lowpart (V8HImode, op0),
35055 GEN_INT (field)));
35056 emit_move_insn (target, gen_lowpart (V16QImode, x));
35057 return true;
35061 if (elt % 4 == 0)
35063 for (i = 0; i < 16; i += 4)
35064 if (perm[i] != elt
35065 || perm[i + 1] != elt + 1
35066 || perm[i + 2] != elt + 2
35067 || perm[i + 3] != elt + 3)
35068 break;
35069 if (i == 16)
35071 int field = BYTES_BIG_ENDIAN ? elt / 4 : 3 - elt / 4;
35072 x = gen_reg_rtx (V4SImode);
35073 emit_insn (gen_altivec_vspltw_direct (x, gen_lowpart (V4SImode, op0),
35074 GEN_INT (field)));
35075 emit_move_insn (target, gen_lowpart (V16QImode, x));
35076 return true;
35081 /* Look for merge and pack patterns. */
35082 for (j = 0; j < ARRAY_SIZE (patterns); ++j)
35084 bool swapped;
35086 if ((patterns[j].mask & rs6000_isa_flags) == 0)
35087 continue;
35089 elt = patterns[j].perm[0];
35090 if (perm[0] == elt)
35091 swapped = false;
35092 else if (perm[0] == elt + 16)
35093 swapped = true;
35094 else
35095 continue;
35096 for (i = 1; i < 16; ++i)
35098 elt = patterns[j].perm[i];
35099 if (swapped)
35100 elt = (elt >= 16 ? elt - 16 : elt + 16);
35101 else if (one_vec && elt >= 16)
35102 elt -= 16;
35103 if (perm[i] != elt)
35104 break;
35106 if (i == 16)
35108 enum insn_code icode = patterns[j].impl;
35109 machine_mode omode = insn_data[icode].operand[0].mode;
35110 machine_mode imode = insn_data[icode].operand[1].mode;
35112 /* For little-endian, don't use vpkuwum and vpkuhum if the
35113 underlying vector type is not V4SI and V8HI, respectively.
35114 For example, using vpkuwum with a V8HI picks up the even
35115 halfwords (BE numbering) when the even halfwords (LE
35116 numbering) are what we need. */
35117 if (!BYTES_BIG_ENDIAN
35118 && icode == CODE_FOR_altivec_vpkuwum_direct
35119 && ((GET_CODE (op0) == REG
35120 && GET_MODE (op0) != V4SImode)
35121 || (GET_CODE (op0) == SUBREG
35122 && GET_MODE (XEXP (op0, 0)) != V4SImode)))
35123 continue;
35124 if (!BYTES_BIG_ENDIAN
35125 && icode == CODE_FOR_altivec_vpkuhum_direct
35126 && ((GET_CODE (op0) == REG
35127 && GET_MODE (op0) != V8HImode)
35128 || (GET_CODE (op0) == SUBREG
35129 && GET_MODE (XEXP (op0, 0)) != V8HImode)))
35130 continue;
35132 /* For little-endian, the two input operands must be swapped
35133 (or swapped back) to ensure proper right-to-left numbering
35134 from 0 to 2N-1. */
35135 if (swapped ^ !BYTES_BIG_ENDIAN)
35136 std::swap (op0, op1);
35137 if (imode != V16QImode)
35139 op0 = gen_lowpart (imode, op0);
35140 op1 = gen_lowpart (imode, op1);
35142 if (omode == V16QImode)
35143 x = target;
35144 else
35145 x = gen_reg_rtx (omode);
35146 emit_insn (GEN_FCN (icode) (x, op0, op1));
35147 if (omode != V16QImode)
35148 emit_move_insn (target, gen_lowpart (V16QImode, x));
35149 return true;
35153 if (!BYTES_BIG_ENDIAN)
35155 altivec_expand_vec_perm_const_le (operands);
35156 return true;
35159 return false;
35162 /* Expand a Paired Single, VSX Permute Doubleword, or SPE constant permutation.
35163 Return true if we match an efficient implementation. */
35165 static bool
35166 rs6000_expand_vec_perm_const_1 (rtx target, rtx op0, rtx op1,
35167 unsigned char perm0, unsigned char perm1)
35169 rtx x;
35171 /* If both selectors come from the same operand, fold to single op. */
35172 if ((perm0 & 2) == (perm1 & 2))
35174 if (perm0 & 2)
35175 op0 = op1;
35176 else
35177 op1 = op0;
35179 /* If both operands are equal, fold to simpler permutation. */
35180 if (rtx_equal_p (op0, op1))
35182 perm0 = perm0 & 1;
35183 perm1 = (perm1 & 1) + 2;
35185 /* If the first selector comes from the second operand, swap. */
35186 else if (perm0 & 2)
35188 if (perm1 & 2)
35189 return false;
35190 perm0 -= 2;
35191 perm1 += 2;
35192 std::swap (op0, op1);
35194 /* If the second selector does not come from the second operand, fail. */
35195 else if ((perm1 & 2) == 0)
35196 return false;
35198 /* Success! */
35199 if (target != NULL)
35201 machine_mode vmode, dmode;
35202 rtvec v;
35204 vmode = GET_MODE (target);
35205 gcc_assert (GET_MODE_NUNITS (vmode) == 2);
35206 dmode = mode_for_vector (GET_MODE_INNER (vmode), 4);
35207 x = gen_rtx_VEC_CONCAT (dmode, op0, op1);
35208 v = gen_rtvec (2, GEN_INT (perm0), GEN_INT (perm1));
35209 x = gen_rtx_VEC_SELECT (vmode, x, gen_rtx_PARALLEL (VOIDmode, v));
35210 emit_insn (gen_rtx_SET (target, x));
35212 return true;
35215 bool
35216 rs6000_expand_vec_perm_const (rtx operands[4])
35218 rtx target, op0, op1, sel;
35219 unsigned char perm0, perm1;
35221 target = operands[0];
35222 op0 = operands[1];
35223 op1 = operands[2];
35224 sel = operands[3];
35226 /* Unpack the constant selector. */
35227 perm0 = INTVAL (XVECEXP (sel, 0, 0)) & 3;
35228 perm1 = INTVAL (XVECEXP (sel, 0, 1)) & 3;
35230 return rs6000_expand_vec_perm_const_1 (target, op0, op1, perm0, perm1);
35233 /* Test whether a constant permutation is supported. */
35235 static bool
35236 rs6000_vectorize_vec_perm_const_ok (machine_mode vmode,
35237 const unsigned char *sel)
35239 /* AltiVec (and thus VSX) can handle arbitrary permutations. */
35240 if (TARGET_ALTIVEC)
35241 return true;
35243 /* Check for ps_merge* or evmerge* insns. */
35244 if ((TARGET_PAIRED_FLOAT && vmode == V2SFmode)
35245 || (TARGET_SPE && vmode == V2SImode))
35247 rtx op0 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 1);
35248 rtx op1 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 2);
35249 return rs6000_expand_vec_perm_const_1 (NULL, op0, op1, sel[0], sel[1]);
35252 return false;
35255 /* A subroutine for rs6000_expand_extract_even & rs6000_expand_interleave. */
35257 static void
35258 rs6000_do_expand_vec_perm (rtx target, rtx op0, rtx op1,
35259 machine_mode vmode, unsigned nelt, rtx perm[])
35261 machine_mode imode;
35262 rtx x;
35264 imode = vmode;
35265 if (GET_MODE_CLASS (vmode) != MODE_VECTOR_INT)
35267 imode = mode_for_size (GET_MODE_UNIT_BITSIZE (vmode), MODE_INT, 0);
35268 imode = mode_for_vector (imode, nelt);
35271 x = gen_rtx_CONST_VECTOR (imode, gen_rtvec_v (nelt, perm));
35272 x = expand_vec_perm (vmode, op0, op1, x, target);
35273 if (x != target)
35274 emit_move_insn (target, x);
35277 /* Expand an extract even operation. */
35279 void
35280 rs6000_expand_extract_even (rtx target, rtx op0, rtx op1)
35282 machine_mode vmode = GET_MODE (target);
35283 unsigned i, nelt = GET_MODE_NUNITS (vmode);
35284 rtx perm[16];
35286 for (i = 0; i < nelt; i++)
35287 perm[i] = GEN_INT (i * 2);
35289 rs6000_do_expand_vec_perm (target, op0, op1, vmode, nelt, perm);
35292 /* Expand a vector interleave operation. */
35294 void
35295 rs6000_expand_interleave (rtx target, rtx op0, rtx op1, bool highp)
35297 machine_mode vmode = GET_MODE (target);
35298 unsigned i, high, nelt = GET_MODE_NUNITS (vmode);
35299 rtx perm[16];
35301 high = (highp ? 0 : nelt / 2);
35302 for (i = 0; i < nelt / 2; i++)
35304 perm[i * 2] = GEN_INT (i + high);
35305 perm[i * 2 + 1] = GEN_INT (i + nelt + high);
35308 rs6000_do_expand_vec_perm (target, op0, op1, vmode, nelt, perm);
35311 /* Scale a V2DF vector SRC by two to the SCALE and place in TGT. */
35312 void
35313 rs6000_scale_v2df (rtx tgt, rtx src, int scale)
35315 HOST_WIDE_INT hwi_scale (scale);
35316 REAL_VALUE_TYPE r_pow;
35317 rtvec v = rtvec_alloc (2);
35318 rtx elt;
35319 rtx scale_vec = gen_reg_rtx (V2DFmode);
35320 (void)real_powi (&r_pow, DFmode, &dconst2, hwi_scale);
35321 elt = const_double_from_real_value (r_pow, DFmode);
35322 RTVEC_ELT (v, 0) = elt;
35323 RTVEC_ELT (v, 1) = elt;
35324 rs6000_expand_vector_init (scale_vec, gen_rtx_PARALLEL (V2DFmode, v));
35325 emit_insn (gen_mulv2df3 (tgt, src, scale_vec));
35328 /* Return an RTX representing where to find the function value of a
35329 function returning MODE. */
35330 static rtx
35331 rs6000_complex_function_value (machine_mode mode)
35333 unsigned int regno;
35334 rtx r1, r2;
35335 machine_mode inner = GET_MODE_INNER (mode);
35336 unsigned int inner_bytes = GET_MODE_UNIT_SIZE (mode);
35338 if (TARGET_FLOAT128
35339 && (mode == KCmode
35340 || (mode == TCmode && TARGET_IEEEQUAD)))
35341 regno = ALTIVEC_ARG_RETURN;
35343 else if (FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT && TARGET_FPRS)
35344 regno = FP_ARG_RETURN;
35346 else
35348 regno = GP_ARG_RETURN;
35350 /* 32-bit is OK since it'll go in r3/r4. */
35351 if (TARGET_32BIT && inner_bytes >= 4)
35352 return gen_rtx_REG (mode, regno);
35355 if (inner_bytes >= 8)
35356 return gen_rtx_REG (mode, regno);
35358 r1 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno),
35359 const0_rtx);
35360 r2 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno + 1),
35361 GEN_INT (inner_bytes));
35362 return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r2));
35365 /* Return an rtx describing a return value of MODE as a PARALLEL
35366 in N_ELTS registers, each of mode ELT_MODE, starting at REGNO,
35367 stride REG_STRIDE. */
35369 static rtx
35370 rs6000_parallel_return (machine_mode mode,
35371 int n_elts, machine_mode elt_mode,
35372 unsigned int regno, unsigned int reg_stride)
35374 rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
35376 int i;
35377 for (i = 0; i < n_elts; i++)
35379 rtx r = gen_rtx_REG (elt_mode, regno);
35380 rtx off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
35381 XVECEXP (par, 0, i) = gen_rtx_EXPR_LIST (VOIDmode, r, off);
35382 regno += reg_stride;
35385 return par;
35388 /* Target hook for TARGET_FUNCTION_VALUE.
35390 On the SPE, both FPs and vectors are returned in r3.
35392 On RS/6000 an integer value is in r3 and a floating-point value is in
35393 fp1, unless -msoft-float. */
35395 static rtx
35396 rs6000_function_value (const_tree valtype,
35397 const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
35398 bool outgoing ATTRIBUTE_UNUSED)
35400 machine_mode mode;
35401 unsigned int regno;
35402 machine_mode elt_mode;
35403 int n_elts;
35405 /* Special handling for structs in darwin64. */
35406 if (TARGET_MACHO
35407 && rs6000_darwin64_struct_check_p (TYPE_MODE (valtype), valtype))
35409 CUMULATIVE_ARGS valcum;
35410 rtx valret;
35412 valcum.words = 0;
35413 valcum.fregno = FP_ARG_MIN_REG;
35414 valcum.vregno = ALTIVEC_ARG_MIN_REG;
35415 /* Do a trial code generation as if this were going to be passed as
35416 an argument; if any part goes in memory, we return NULL. */
35417 valret = rs6000_darwin64_record_arg (&valcum, valtype, true, /* retval= */ true);
35418 if (valret)
35419 return valret;
35420 /* Otherwise fall through to standard ABI rules. */
35423 mode = TYPE_MODE (valtype);
35425 /* The ELFv2 ABI returns homogeneous VFP aggregates in registers. */
35426 if (rs6000_discover_homogeneous_aggregate (mode, valtype, &elt_mode, &n_elts))
35428 int first_reg, n_regs;
35430 if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (elt_mode))
35432 /* _Decimal128 must use even/odd register pairs. */
35433 first_reg = (elt_mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
35434 n_regs = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
35436 else
35438 first_reg = ALTIVEC_ARG_RETURN;
35439 n_regs = 1;
35442 return rs6000_parallel_return (mode, n_elts, elt_mode, first_reg, n_regs);
35445 /* Some return value types need be split in -mpowerpc64, 32bit ABI. */
35446 if (TARGET_32BIT && TARGET_POWERPC64)
35447 switch (mode)
35449 default:
35450 break;
35451 case DImode:
35452 case SCmode:
35453 case DCmode:
35454 case TCmode:
35455 int count = GET_MODE_SIZE (mode) / 4;
35456 return rs6000_parallel_return (mode, count, SImode, GP_ARG_RETURN, 1);
35459 if ((INTEGRAL_TYPE_P (valtype)
35460 && GET_MODE_BITSIZE (mode) < (TARGET_32BIT ? 32 : 64))
35461 || POINTER_TYPE_P (valtype))
35462 mode = TARGET_32BIT ? SImode : DImode;
35464 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT && TARGET_FPRS)
35465 /* _Decimal128 must use an even/odd register pair. */
35466 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
35467 else if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode) && TARGET_HARD_FLOAT && TARGET_FPRS
35468 && ((TARGET_SINGLE_FLOAT && (mode == SFmode)) || TARGET_DOUBLE_FLOAT))
35469 regno = FP_ARG_RETURN;
35470 else if (TREE_CODE (valtype) == COMPLEX_TYPE
35471 && targetm.calls.split_complex_arg)
35472 return rs6000_complex_function_value (mode);
35473 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
35474 return register is used in both cases, and we won't see V2DImode/V2DFmode
35475 for pure altivec, combine the two cases. */
35476 else if ((TREE_CODE (valtype) == VECTOR_TYPE || FLOAT128_VECTOR_P (mode))
35477 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI
35478 && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
35479 regno = ALTIVEC_ARG_RETURN;
35480 else if (TARGET_E500_DOUBLE && TARGET_HARD_FLOAT
35481 && (mode == DFmode || mode == DCmode
35482 || FLOAT128_IBM_P (mode) || mode == TCmode))
35483 return spe_build_register_parallel (mode, GP_ARG_RETURN);
35484 else
35485 regno = GP_ARG_RETURN;
35487 return gen_rtx_REG (mode, regno);
35490 /* Define how to find the value returned by a library function
35491 assuming the value has mode MODE. */
35493 rs6000_libcall_value (machine_mode mode)
35495 unsigned int regno;
35497 /* Long long return value need be split in -mpowerpc64, 32bit ABI. */
35498 if (TARGET_32BIT && TARGET_POWERPC64 && mode == DImode)
35499 return rs6000_parallel_return (mode, 2, SImode, GP_ARG_RETURN, 1);
35501 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT && TARGET_FPRS)
35502 /* _Decimal128 must use an even/odd register pair. */
35503 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
35504 else if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode)
35505 && TARGET_HARD_FLOAT && TARGET_FPRS
35506 && ((TARGET_SINGLE_FLOAT && mode == SFmode) || TARGET_DOUBLE_FLOAT))
35507 regno = FP_ARG_RETURN;
35508 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
35509 return register is used in both cases, and we won't see V2DImode/V2DFmode
35510 for pure altivec, combine the two cases. */
35511 else if (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
35512 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI)
35513 regno = ALTIVEC_ARG_RETURN;
35514 else if (COMPLEX_MODE_P (mode) && targetm.calls.split_complex_arg)
35515 return rs6000_complex_function_value (mode);
35516 else if (TARGET_E500_DOUBLE && TARGET_HARD_FLOAT
35517 && (mode == DFmode || mode == DCmode
35518 || FLOAT128_IBM_P (mode) || mode == TCmode))
35519 return spe_build_register_parallel (mode, GP_ARG_RETURN);
35520 else
35521 regno = GP_ARG_RETURN;
35523 return gen_rtx_REG (mode, regno);
35527 /* Return true if we use LRA instead of reload pass. */
35528 static bool
35529 rs6000_lra_p (void)
35531 return TARGET_LRA;
35534 /* Given FROM and TO register numbers, say whether this elimination is allowed.
35535 Frame pointer elimination is automatically handled.
35537 For the RS/6000, if frame pointer elimination is being done, we would like
35538 to convert ap into fp, not sp.
35540 We need r30 if -mminimal-toc was specified, and there are constant pool
35541 references. */
35543 static bool
35544 rs6000_can_eliminate (const int from, const int to)
35546 return (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM
35547 ? ! frame_pointer_needed
35548 : from == RS6000_PIC_OFFSET_TABLE_REGNUM
35549 ? ! TARGET_MINIMAL_TOC || TARGET_NO_TOC || get_pool_size () == 0
35550 : true);
35553 /* Define the offset between two registers, FROM to be eliminated and its
35554 replacement TO, at the start of a routine. */
35555 HOST_WIDE_INT
35556 rs6000_initial_elimination_offset (int from, int to)
35558 rs6000_stack_t *info = rs6000_stack_info ();
35559 HOST_WIDE_INT offset;
35561 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
35562 offset = info->push_p ? 0 : -info->total_size;
35563 else if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
35565 offset = info->push_p ? 0 : -info->total_size;
35566 if (FRAME_GROWS_DOWNWARD)
35567 offset += info->fixed_size + info->vars_size + info->parm_size;
35569 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
35570 offset = FRAME_GROWS_DOWNWARD
35571 ? info->fixed_size + info->vars_size + info->parm_size
35572 : 0;
35573 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
35574 offset = info->total_size;
35575 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
35576 offset = info->push_p ? info->total_size : 0;
35577 else if (from == RS6000_PIC_OFFSET_TABLE_REGNUM)
35578 offset = 0;
35579 else
35580 gcc_unreachable ();
35582 return offset;
35585 static rtx
35586 rs6000_dwarf_register_span (rtx reg)
35588 rtx parts[8];
35589 int i, words;
35590 unsigned regno = REGNO (reg);
35591 machine_mode mode = GET_MODE (reg);
35593 if (TARGET_SPE
35594 && regno < 32
35595 && (SPE_VECTOR_MODE (GET_MODE (reg))
35596 || (TARGET_E500_DOUBLE && FLOAT_MODE_P (mode)
35597 && mode != SFmode && mode != SDmode && mode != SCmode)))
35599 else
35600 return NULL_RTX;
35602 regno = REGNO (reg);
35604 /* The duality of the SPE register size wreaks all kinds of havoc.
35605 This is a way of distinguishing r0 in 32-bits from r0 in
35606 64-bits. */
35607 words = (GET_MODE_SIZE (mode) + UNITS_PER_FP_WORD - 1) / UNITS_PER_FP_WORD;
35608 gcc_assert (words <= 4);
35609 for (i = 0; i < words; i++, regno++)
35611 if (BYTES_BIG_ENDIAN)
35613 parts[2 * i] = gen_rtx_REG (SImode, regno + FIRST_SPE_HIGH_REGNO);
35614 parts[2 * i + 1] = gen_rtx_REG (SImode, regno);
35616 else
35618 parts[2 * i] = gen_rtx_REG (SImode, regno);
35619 parts[2 * i + 1] = gen_rtx_REG (SImode, regno + FIRST_SPE_HIGH_REGNO);
35623 return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (words * 2, parts));
35626 /* Fill in sizes for SPE register high parts in table used by unwinder. */
35628 static void
35629 rs6000_init_dwarf_reg_sizes_extra (tree address)
35631 if (TARGET_SPE)
35633 int i;
35634 machine_mode mode = TYPE_MODE (char_type_node);
35635 rtx addr = expand_expr (address, NULL_RTX, VOIDmode, EXPAND_NORMAL);
35636 rtx mem = gen_rtx_MEM (BLKmode, addr);
35637 rtx value = gen_int_mode (4, mode);
35639 for (i = FIRST_SPE_HIGH_REGNO; i < LAST_SPE_HIGH_REGNO+1; i++)
35641 int column = DWARF_REG_TO_UNWIND_COLUMN
35642 (DWARF2_FRAME_REG_OUT (DWARF_FRAME_REGNUM (i), true));
35643 HOST_WIDE_INT offset = column * GET_MODE_SIZE (mode);
35645 emit_move_insn (adjust_address (mem, mode, offset), value);
35649 if (TARGET_MACHO && ! TARGET_ALTIVEC)
35651 int i;
35652 machine_mode mode = TYPE_MODE (char_type_node);
35653 rtx addr = expand_expr (address, NULL_RTX, VOIDmode, EXPAND_NORMAL);
35654 rtx mem = gen_rtx_MEM (BLKmode, addr);
35655 rtx value = gen_int_mode (16, mode);
35657 /* On Darwin, libgcc may be built to run on both G3 and G4/5.
35658 The unwinder still needs to know the size of Altivec registers. */
35660 for (i = FIRST_ALTIVEC_REGNO; i < LAST_ALTIVEC_REGNO+1; i++)
35662 int column = DWARF_REG_TO_UNWIND_COLUMN
35663 (DWARF2_FRAME_REG_OUT (DWARF_FRAME_REGNUM (i), true));
35664 HOST_WIDE_INT offset = column * GET_MODE_SIZE (mode);
35666 emit_move_insn (adjust_address (mem, mode, offset), value);
35671 /* Map internal gcc register numbers to debug format register numbers.
35672 FORMAT specifies the type of debug register number to use:
35673 0 -- debug information, except for frame-related sections
35674 1 -- DWARF .debug_frame section
35675 2 -- DWARF .eh_frame section */
35677 unsigned int
35678 rs6000_dbx_register_number (unsigned int regno, unsigned int format)
35680 /* We never use the GCC internal number for SPE high registers.
35681 Those are mapped to the 1200..1231 range for all debug formats. */
35682 if (SPE_HIGH_REGNO_P (regno))
35683 return regno - FIRST_SPE_HIGH_REGNO + 1200;
35685 /* Except for the above, we use the internal number for non-DWARF
35686 debug information, and also for .eh_frame. */
35687 if ((format == 0 && write_symbols != DWARF2_DEBUG) || format == 2)
35688 return regno;
35690 /* On some platforms, we use the standard DWARF register
35691 numbering for .debug_info and .debug_frame. */
35692 #ifdef RS6000_USE_DWARF_NUMBERING
35693 if (regno <= 63)
35694 return regno;
35695 if (regno == LR_REGNO)
35696 return 108;
35697 if (regno == CTR_REGNO)
35698 return 109;
35699 /* Special handling for CR for .debug_frame: rs6000_emit_prologue has
35700 translated any combination of CR2, CR3, CR4 saves to a save of CR2.
35701 The actual code emitted saves the whole of CR, so we map CR2_REGNO
35702 to the DWARF reg for CR. */
35703 if (format == 1 && regno == CR2_REGNO)
35704 return 64;
35705 if (CR_REGNO_P (regno))
35706 return regno - CR0_REGNO + 86;
35707 if (regno == CA_REGNO)
35708 return 101; /* XER */
35709 if (ALTIVEC_REGNO_P (regno))
35710 return regno - FIRST_ALTIVEC_REGNO + 1124;
35711 if (regno == VRSAVE_REGNO)
35712 return 356;
35713 if (regno == VSCR_REGNO)
35714 return 67;
35715 if (regno == SPE_ACC_REGNO)
35716 return 99;
35717 if (regno == SPEFSCR_REGNO)
35718 return 612;
35719 #endif
35720 return regno;
35723 /* target hook eh_return_filter_mode */
35724 static machine_mode
35725 rs6000_eh_return_filter_mode (void)
35727 return TARGET_32BIT ? SImode : word_mode;
35730 /* Target hook for scalar_mode_supported_p. */
35731 static bool
35732 rs6000_scalar_mode_supported_p (machine_mode mode)
35734 /* -m32 does not support TImode. This is the default, from
35735 default_scalar_mode_supported_p. For -m32 -mpowerpc64 we want the
35736 same ABI as for -m32. But default_scalar_mode_supported_p allows
35737 integer modes of precision 2 * BITS_PER_WORD, which matches TImode
35738 for -mpowerpc64. */
35739 if (TARGET_32BIT && mode == TImode)
35740 return false;
35742 if (DECIMAL_FLOAT_MODE_P (mode))
35743 return default_decimal_float_supported_p ();
35744 else if (TARGET_FLOAT128 && (mode == KFmode || mode == IFmode))
35745 return true;
35746 else
35747 return default_scalar_mode_supported_p (mode);
35750 /* Target hook for vector_mode_supported_p. */
35751 static bool
35752 rs6000_vector_mode_supported_p (machine_mode mode)
35755 if (TARGET_PAIRED_FLOAT && PAIRED_VECTOR_MODE (mode))
35756 return true;
35758 if (TARGET_SPE && SPE_VECTOR_MODE (mode))
35759 return true;
35761 /* There is no vector form for IEEE 128-bit. If we return true for IEEE
35762 128-bit, the compiler might try to widen IEEE 128-bit to IBM
35763 double-double. */
35764 else if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode) && !FLOAT128_IEEE_P (mode))
35765 return true;
35767 else
35768 return false;
35771 /* Target hook for c_mode_for_suffix. */
35772 static machine_mode
35773 rs6000_c_mode_for_suffix (char suffix)
35775 if (TARGET_FLOAT128)
35777 if (suffix == 'q' || suffix == 'Q')
35778 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
35780 /* At the moment, we are not defining a suffix for IBM extended double.
35781 If/when the default for -mabi=ieeelongdouble is changed, and we want
35782 to support __ibm128 constants in legacy library code, we may need to
35783 re-evalaute this decision. Currently, c-lex.c only supports 'w' and
35784 'q' as machine dependent suffixes. The x86_64 port uses 'w' for
35785 __float80 constants. */
35788 return VOIDmode;
35791 /* Target hook for invalid_arg_for_unprototyped_fn. */
35792 static const char *
35793 invalid_arg_for_unprototyped_fn (const_tree typelist, const_tree funcdecl, const_tree val)
35795 return (!rs6000_darwin64_abi
35796 && typelist == 0
35797 && TREE_CODE (TREE_TYPE (val)) == VECTOR_TYPE
35798 && (funcdecl == NULL_TREE
35799 || (TREE_CODE (funcdecl) == FUNCTION_DECL
35800 && DECL_BUILT_IN_CLASS (funcdecl) != BUILT_IN_MD)))
35801 ? N_("AltiVec argument passed to unprototyped function")
35802 : NULL;
35805 /* For TARGET_SECURE_PLT 32-bit PIC code we can save PIC register
35806 setup by using __stack_chk_fail_local hidden function instead of
35807 calling __stack_chk_fail directly. Otherwise it is better to call
35808 __stack_chk_fail directly. */
35810 static tree ATTRIBUTE_UNUSED
35811 rs6000_stack_protect_fail (void)
35813 return (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
35814 ? default_hidden_stack_protect_fail ()
35815 : default_external_stack_protect_fail ();
35818 void
35819 rs6000_final_prescan_insn (rtx_insn *insn, rtx *operand ATTRIBUTE_UNUSED,
35820 int num_operands ATTRIBUTE_UNUSED)
35822 if (rs6000_warn_cell_microcode)
35824 const char *temp;
35825 int insn_code_number = recog_memoized (insn);
35826 location_t location = INSN_LOCATION (insn);
35828 /* Punt on insns we cannot recognize. */
35829 if (insn_code_number < 0)
35830 return;
35832 temp = get_insn_template (insn_code_number, insn);
35834 if (get_attr_cell_micro (insn) == CELL_MICRO_ALWAYS)
35835 warning_at (location, OPT_mwarn_cell_microcode,
35836 "emitting microcode insn %s\t[%s] #%d",
35837 temp, insn_data[INSN_CODE (insn)].name, INSN_UID (insn));
35838 else if (get_attr_cell_micro (insn) == CELL_MICRO_CONDITIONAL)
35839 warning_at (location, OPT_mwarn_cell_microcode,
35840 "emitting conditional microcode insn %s\t[%s] #%d",
35841 temp, insn_data[INSN_CODE (insn)].name, INSN_UID (insn));
35845 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
35847 #if TARGET_ELF
35848 static unsigned HOST_WIDE_INT
35849 rs6000_asan_shadow_offset (void)
35851 return (unsigned HOST_WIDE_INT) 1 << (TARGET_64BIT ? 41 : 29);
35853 #endif
35855 /* Mask options that we want to support inside of attribute((target)) and
35856 #pragma GCC target operations. Note, we do not include things like
35857 64/32-bit, endianess, hard/soft floating point, etc. that would have
35858 different calling sequences. */
35860 struct rs6000_opt_mask {
35861 const char *name; /* option name */
35862 HOST_WIDE_INT mask; /* mask to set */
35863 bool invert; /* invert sense of mask */
35864 bool valid_target; /* option is a target option */
35867 static struct rs6000_opt_mask const rs6000_opt_masks[] =
35869 { "altivec", OPTION_MASK_ALTIVEC, false, true },
35870 { "cmpb", OPTION_MASK_CMPB, false, true },
35871 { "crypto", OPTION_MASK_CRYPTO, false, true },
35872 { "direct-move", OPTION_MASK_DIRECT_MOVE, false, true },
35873 { "dlmzb", OPTION_MASK_DLMZB, false, true },
35874 { "efficient-unaligned-vsx", OPTION_MASK_EFFICIENT_UNALIGNED_VSX,
35875 false, true },
35876 { "float128", OPTION_MASK_FLOAT128, false, false },
35877 { "float128-hardware", OPTION_MASK_FLOAT128_HW, false, false },
35878 { "fprnd", OPTION_MASK_FPRND, false, true },
35879 { "hard-dfp", OPTION_MASK_DFP, false, true },
35880 { "htm", OPTION_MASK_HTM, false, true },
35881 { "isel", OPTION_MASK_ISEL, false, true },
35882 { "mfcrf", OPTION_MASK_MFCRF, false, true },
35883 { "mfpgpr", OPTION_MASK_MFPGPR, false, true },
35884 { "modulo", OPTION_MASK_MODULO, false, true },
35885 { "mulhw", OPTION_MASK_MULHW, false, true },
35886 { "multiple", OPTION_MASK_MULTIPLE, false, true },
35887 { "popcntb", OPTION_MASK_POPCNTB, false, true },
35888 { "popcntd", OPTION_MASK_POPCNTD, false, true },
35889 { "power8-fusion", OPTION_MASK_P8_FUSION, false, true },
35890 { "power8-fusion-sign", OPTION_MASK_P8_FUSION_SIGN, false, true },
35891 { "power8-vector", OPTION_MASK_P8_VECTOR, false, true },
35892 { "power9-dform-scalar", OPTION_MASK_P9_DFORM_SCALAR, false, true },
35893 { "power9-dform-vector", OPTION_MASK_P9_DFORM_VECTOR, false, true },
35894 { "power9-fusion", OPTION_MASK_P9_FUSION, false, true },
35895 { "power9-minmax", OPTION_MASK_P9_MINMAX, false, true },
35896 { "power9-misc", OPTION_MASK_P9_MISC, false, true },
35897 { "power9-vector", OPTION_MASK_P9_VECTOR, false, true },
35898 { "powerpc-gfxopt", OPTION_MASK_PPC_GFXOPT, false, true },
35899 { "powerpc-gpopt", OPTION_MASK_PPC_GPOPT, false, true },
35900 { "quad-memory", OPTION_MASK_QUAD_MEMORY, false, true },
35901 { "quad-memory-atomic", OPTION_MASK_QUAD_MEMORY_ATOMIC, false, true },
35902 { "recip-precision", OPTION_MASK_RECIP_PRECISION, false, true },
35903 { "save-toc-indirect", OPTION_MASK_SAVE_TOC_INDIRECT, false, true },
35904 { "string", OPTION_MASK_STRING, false, true },
35905 { "toc-fusion", OPTION_MASK_TOC_FUSION, false, true },
35906 { "update", OPTION_MASK_NO_UPDATE, true , true },
35907 { "upper-regs-di", OPTION_MASK_UPPER_REGS_DI, false, true },
35908 { "upper-regs-df", OPTION_MASK_UPPER_REGS_DF, false, true },
35909 { "upper-regs-sf", OPTION_MASK_UPPER_REGS_SF, false, true },
35910 { "vsx", OPTION_MASK_VSX, false, true },
35911 { "vsx-timode", OPTION_MASK_VSX_TIMODE, false, true },
35912 #ifdef OPTION_MASK_64BIT
35913 #if TARGET_AIX_OS
35914 { "aix64", OPTION_MASK_64BIT, false, false },
35915 { "aix32", OPTION_MASK_64BIT, true, false },
35916 #else
35917 { "64", OPTION_MASK_64BIT, false, false },
35918 { "32", OPTION_MASK_64BIT, true, false },
35919 #endif
35920 #endif
35921 #ifdef OPTION_MASK_EABI
35922 { "eabi", OPTION_MASK_EABI, false, false },
35923 #endif
35924 #ifdef OPTION_MASK_LITTLE_ENDIAN
35925 { "little", OPTION_MASK_LITTLE_ENDIAN, false, false },
35926 { "big", OPTION_MASK_LITTLE_ENDIAN, true, false },
35927 #endif
35928 #ifdef OPTION_MASK_RELOCATABLE
35929 { "relocatable", OPTION_MASK_RELOCATABLE, false, false },
35930 #endif
35931 #ifdef OPTION_MASK_STRICT_ALIGN
35932 { "strict-align", OPTION_MASK_STRICT_ALIGN, false, false },
35933 #endif
35934 { "soft-float", OPTION_MASK_SOFT_FLOAT, false, false },
35935 { "string", OPTION_MASK_STRING, false, false },
35938 /* Builtin mask mapping for printing the flags. */
35939 static struct rs6000_opt_mask const rs6000_builtin_mask_names[] =
35941 { "altivec", RS6000_BTM_ALTIVEC, false, false },
35942 { "vsx", RS6000_BTM_VSX, false, false },
35943 { "spe", RS6000_BTM_SPE, false, false },
35944 { "paired", RS6000_BTM_PAIRED, false, false },
35945 { "fre", RS6000_BTM_FRE, false, false },
35946 { "fres", RS6000_BTM_FRES, false, false },
35947 { "frsqrte", RS6000_BTM_FRSQRTE, false, false },
35948 { "frsqrtes", RS6000_BTM_FRSQRTES, false, false },
35949 { "popcntd", RS6000_BTM_POPCNTD, false, false },
35950 { "cell", RS6000_BTM_CELL, false, false },
35951 { "power8-vector", RS6000_BTM_P8_VECTOR, false, false },
35952 { "power9-vector", RS6000_BTM_P9_VECTOR, false, false },
35953 { "power9-misc", RS6000_BTM_P9_MISC, false, false },
35954 { "crypto", RS6000_BTM_CRYPTO, false, false },
35955 { "htm", RS6000_BTM_HTM, false, false },
35956 { "hard-dfp", RS6000_BTM_DFP, false, false },
35957 { "hard-float", RS6000_BTM_HARD_FLOAT, false, false },
35958 { "long-double-128", RS6000_BTM_LDBL128, false, false },
35959 { "float128", RS6000_BTM_FLOAT128, false, false },
35962 /* Option variables that we want to support inside attribute((target)) and
35963 #pragma GCC target operations. */
35965 struct rs6000_opt_var {
35966 const char *name; /* option name */
35967 size_t global_offset; /* offset of the option in global_options. */
35968 size_t target_offset; /* offset of the option in target options. */
35971 static struct rs6000_opt_var const rs6000_opt_vars[] =
35973 { "friz",
35974 offsetof (struct gcc_options, x_TARGET_FRIZ),
35975 offsetof (struct cl_target_option, x_TARGET_FRIZ), },
35976 { "avoid-indexed-addresses",
35977 offsetof (struct gcc_options, x_TARGET_AVOID_XFORM),
35978 offsetof (struct cl_target_option, x_TARGET_AVOID_XFORM) },
35979 { "paired",
35980 offsetof (struct gcc_options, x_rs6000_paired_float),
35981 offsetof (struct cl_target_option, x_rs6000_paired_float), },
35982 { "longcall",
35983 offsetof (struct gcc_options, x_rs6000_default_long_calls),
35984 offsetof (struct cl_target_option, x_rs6000_default_long_calls), },
35985 { "optimize-swaps",
35986 offsetof (struct gcc_options, x_rs6000_optimize_swaps),
35987 offsetof (struct cl_target_option, x_rs6000_optimize_swaps), },
35988 { "allow-movmisalign",
35989 offsetof (struct gcc_options, x_TARGET_ALLOW_MOVMISALIGN),
35990 offsetof (struct cl_target_option, x_TARGET_ALLOW_MOVMISALIGN), },
35991 { "allow-df-permute",
35992 offsetof (struct gcc_options, x_TARGET_ALLOW_DF_PERMUTE),
35993 offsetof (struct cl_target_option, x_TARGET_ALLOW_DF_PERMUTE), },
35994 { "sched-groups",
35995 offsetof (struct gcc_options, x_TARGET_SCHED_GROUPS),
35996 offsetof (struct cl_target_option, x_TARGET_SCHED_GROUPS), },
35997 { "always-hint",
35998 offsetof (struct gcc_options, x_TARGET_ALWAYS_HINT),
35999 offsetof (struct cl_target_option, x_TARGET_ALWAYS_HINT), },
36000 { "align-branch-targets",
36001 offsetof (struct gcc_options, x_TARGET_ALIGN_BRANCH_TARGETS),
36002 offsetof (struct cl_target_option, x_TARGET_ALIGN_BRANCH_TARGETS), },
36003 { "vectorize-builtins",
36004 offsetof (struct gcc_options, x_TARGET_VECTORIZE_BUILTINS),
36005 offsetof (struct cl_target_option, x_TARGET_VECTORIZE_BUILTINS), },
36006 { "tls-markers",
36007 offsetof (struct gcc_options, x_tls_markers),
36008 offsetof (struct cl_target_option, x_tls_markers), },
36009 { "sched-prolog",
36010 offsetof (struct gcc_options, x_TARGET_SCHED_PROLOG),
36011 offsetof (struct cl_target_option, x_TARGET_SCHED_PROLOG), },
36012 { "sched-epilog",
36013 offsetof (struct gcc_options, x_TARGET_SCHED_PROLOG),
36014 offsetof (struct cl_target_option, x_TARGET_SCHED_PROLOG), },
36015 { "gen-cell-microcode",
36016 offsetof (struct gcc_options, x_rs6000_gen_cell_microcode),
36017 offsetof (struct cl_target_option, x_rs6000_gen_cell_microcode), },
36018 { "warn-cell-microcode",
36019 offsetof (struct gcc_options, x_rs6000_warn_cell_microcode),
36020 offsetof (struct cl_target_option, x_rs6000_warn_cell_microcode), },
36023 /* Inner function to handle attribute((target("..."))) and #pragma GCC target
36024 parsing. Return true if there were no errors. */
36026 static bool
36027 rs6000_inner_target_options (tree args, bool attr_p)
36029 bool ret = true;
36031 if (args == NULL_TREE)
36034 else if (TREE_CODE (args) == STRING_CST)
36036 char *p = ASTRDUP (TREE_STRING_POINTER (args));
36037 char *q;
36039 while ((q = strtok (p, ",")) != NULL)
36041 bool error_p = false;
36042 bool not_valid_p = false;
36043 const char *cpu_opt = NULL;
36045 p = NULL;
36046 if (strncmp (q, "cpu=", 4) == 0)
36048 int cpu_index = rs6000_cpu_name_lookup (q+4);
36049 if (cpu_index >= 0)
36050 rs6000_cpu_index = cpu_index;
36051 else
36053 error_p = true;
36054 cpu_opt = q+4;
36057 else if (strncmp (q, "tune=", 5) == 0)
36059 int tune_index = rs6000_cpu_name_lookup (q+5);
36060 if (tune_index >= 0)
36061 rs6000_tune_index = tune_index;
36062 else
36064 error_p = true;
36065 cpu_opt = q+5;
36068 else
36070 size_t i;
36071 bool invert = false;
36072 char *r = q;
36074 error_p = true;
36075 if (strncmp (r, "no-", 3) == 0)
36077 invert = true;
36078 r += 3;
36081 for (i = 0; i < ARRAY_SIZE (rs6000_opt_masks); i++)
36082 if (strcmp (r, rs6000_opt_masks[i].name) == 0)
36084 HOST_WIDE_INT mask = rs6000_opt_masks[i].mask;
36086 if (!rs6000_opt_masks[i].valid_target)
36087 not_valid_p = true;
36088 else
36090 error_p = false;
36091 rs6000_isa_flags_explicit |= mask;
36093 /* VSX needs altivec, so -mvsx automagically sets
36094 altivec and disables -mavoid-indexed-addresses. */
36095 if (!invert)
36097 if (mask == OPTION_MASK_VSX)
36099 mask |= OPTION_MASK_ALTIVEC;
36100 TARGET_AVOID_XFORM = 0;
36104 if (rs6000_opt_masks[i].invert)
36105 invert = !invert;
36107 if (invert)
36108 rs6000_isa_flags &= ~mask;
36109 else
36110 rs6000_isa_flags |= mask;
36112 break;
36115 if (error_p && !not_valid_p)
36117 for (i = 0; i < ARRAY_SIZE (rs6000_opt_vars); i++)
36118 if (strcmp (r, rs6000_opt_vars[i].name) == 0)
36120 size_t j = rs6000_opt_vars[i].global_offset;
36121 *((int *) ((char *)&global_options + j)) = !invert;
36122 error_p = false;
36123 not_valid_p = false;
36124 break;
36129 if (error_p)
36131 const char *eprefix, *esuffix;
36133 ret = false;
36134 if (attr_p)
36136 eprefix = "__attribute__((__target__(";
36137 esuffix = ")))";
36139 else
36141 eprefix = "#pragma GCC target ";
36142 esuffix = "";
36145 if (cpu_opt)
36146 error ("invalid cpu \"%s\" for %s\"%s\"%s", cpu_opt, eprefix,
36147 q, esuffix);
36148 else if (not_valid_p)
36149 error ("%s\"%s\"%s is not allowed", eprefix, q, esuffix);
36150 else
36151 error ("%s\"%s\"%s is invalid", eprefix, q, esuffix);
36156 else if (TREE_CODE (args) == TREE_LIST)
36160 tree value = TREE_VALUE (args);
36161 if (value)
36163 bool ret2 = rs6000_inner_target_options (value, attr_p);
36164 if (!ret2)
36165 ret = false;
36167 args = TREE_CHAIN (args);
36169 while (args != NULL_TREE);
36172 else
36173 gcc_unreachable ();
36175 return ret;
36178 /* Print out the target options as a list for -mdebug=target. */
36180 static void
36181 rs6000_debug_target_options (tree args, const char *prefix)
36183 if (args == NULL_TREE)
36184 fprintf (stderr, "%s<NULL>", prefix);
36186 else if (TREE_CODE (args) == STRING_CST)
36188 char *p = ASTRDUP (TREE_STRING_POINTER (args));
36189 char *q;
36191 while ((q = strtok (p, ",")) != NULL)
36193 p = NULL;
36194 fprintf (stderr, "%s\"%s\"", prefix, q);
36195 prefix = ", ";
36199 else if (TREE_CODE (args) == TREE_LIST)
36203 tree value = TREE_VALUE (args);
36204 if (value)
36206 rs6000_debug_target_options (value, prefix);
36207 prefix = ", ";
36209 args = TREE_CHAIN (args);
36211 while (args != NULL_TREE);
36214 else
36215 gcc_unreachable ();
36217 return;
36221 /* Hook to validate attribute((target("..."))). */
36223 static bool
36224 rs6000_valid_attribute_p (tree fndecl,
36225 tree ARG_UNUSED (name),
36226 tree args,
36227 int flags)
36229 struct cl_target_option cur_target;
36230 bool ret;
36231 tree old_optimize = build_optimization_node (&global_options);
36232 tree new_target, new_optimize;
36233 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
36235 gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
36237 if (TARGET_DEBUG_TARGET)
36239 tree tname = DECL_NAME (fndecl);
36240 fprintf (stderr, "\n==================== rs6000_valid_attribute_p:\n");
36241 if (tname)
36242 fprintf (stderr, "function: %.*s\n",
36243 (int) IDENTIFIER_LENGTH (tname),
36244 IDENTIFIER_POINTER (tname));
36245 else
36246 fprintf (stderr, "function: unknown\n");
36248 fprintf (stderr, "args:");
36249 rs6000_debug_target_options (args, " ");
36250 fprintf (stderr, "\n");
36252 if (flags)
36253 fprintf (stderr, "flags: 0x%x\n", flags);
36255 fprintf (stderr, "--------------------\n");
36258 old_optimize = build_optimization_node (&global_options);
36259 func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
36261 /* If the function changed the optimization levels as well as setting target
36262 options, start with the optimizations specified. */
36263 if (func_optimize && func_optimize != old_optimize)
36264 cl_optimization_restore (&global_options,
36265 TREE_OPTIMIZATION (func_optimize));
36267 /* The target attributes may also change some optimization flags, so update
36268 the optimization options if necessary. */
36269 cl_target_option_save (&cur_target, &global_options);
36270 rs6000_cpu_index = rs6000_tune_index = -1;
36271 ret = rs6000_inner_target_options (args, true);
36273 /* Set up any additional state. */
36274 if (ret)
36276 ret = rs6000_option_override_internal (false);
36277 new_target = build_target_option_node (&global_options);
36279 else
36280 new_target = NULL;
36282 new_optimize = build_optimization_node (&global_options);
36284 if (!new_target)
36285 ret = false;
36287 else if (fndecl)
36289 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
36291 if (old_optimize != new_optimize)
36292 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
36295 cl_target_option_restore (&global_options, &cur_target);
36297 if (old_optimize != new_optimize)
36298 cl_optimization_restore (&global_options,
36299 TREE_OPTIMIZATION (old_optimize));
36301 return ret;
36305 /* Hook to validate the current #pragma GCC target and set the state, and
36306 update the macros based on what was changed. If ARGS is NULL, then
36307 POP_TARGET is used to reset the options. */
36309 bool
36310 rs6000_pragma_target_parse (tree args, tree pop_target)
36312 tree prev_tree = build_target_option_node (&global_options);
36313 tree cur_tree;
36314 struct cl_target_option *prev_opt, *cur_opt;
36315 HOST_WIDE_INT prev_flags, cur_flags, diff_flags;
36316 HOST_WIDE_INT prev_bumask, cur_bumask, diff_bumask;
36318 if (TARGET_DEBUG_TARGET)
36320 fprintf (stderr, "\n==================== rs6000_pragma_target_parse\n");
36321 fprintf (stderr, "args:");
36322 rs6000_debug_target_options (args, " ");
36323 fprintf (stderr, "\n");
36325 if (pop_target)
36327 fprintf (stderr, "pop_target:\n");
36328 debug_tree (pop_target);
36330 else
36331 fprintf (stderr, "pop_target: <NULL>\n");
36333 fprintf (stderr, "--------------------\n");
36336 if (! args)
36338 cur_tree = ((pop_target)
36339 ? pop_target
36340 : target_option_default_node);
36341 cl_target_option_restore (&global_options,
36342 TREE_TARGET_OPTION (cur_tree));
36344 else
36346 rs6000_cpu_index = rs6000_tune_index = -1;
36347 if (!rs6000_inner_target_options (args, false)
36348 || !rs6000_option_override_internal (false)
36349 || (cur_tree = build_target_option_node (&global_options))
36350 == NULL_TREE)
36352 if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
36353 fprintf (stderr, "invalid pragma\n");
36355 return false;
36359 target_option_current_node = cur_tree;
36361 /* If we have the preprocessor linked in (i.e. C or C++ languages), possibly
36362 change the macros that are defined. */
36363 if (rs6000_target_modify_macros_ptr)
36365 prev_opt = TREE_TARGET_OPTION (prev_tree);
36366 prev_bumask = prev_opt->x_rs6000_builtin_mask;
36367 prev_flags = prev_opt->x_rs6000_isa_flags;
36369 cur_opt = TREE_TARGET_OPTION (cur_tree);
36370 cur_flags = cur_opt->x_rs6000_isa_flags;
36371 cur_bumask = cur_opt->x_rs6000_builtin_mask;
36373 diff_bumask = (prev_bumask ^ cur_bumask);
36374 diff_flags = (prev_flags ^ cur_flags);
36376 if ((diff_flags != 0) || (diff_bumask != 0))
36378 /* Delete old macros. */
36379 rs6000_target_modify_macros_ptr (false,
36380 prev_flags & diff_flags,
36381 prev_bumask & diff_bumask);
36383 /* Define new macros. */
36384 rs6000_target_modify_macros_ptr (true,
36385 cur_flags & diff_flags,
36386 cur_bumask & diff_bumask);
36390 return true;
36394 /* Remember the last target of rs6000_set_current_function. */
36395 static GTY(()) tree rs6000_previous_fndecl;
36397 /* Establish appropriate back-end context for processing the function
36398 FNDECL. The argument might be NULL to indicate processing at top
36399 level, outside of any function scope. */
36400 static void
36401 rs6000_set_current_function (tree fndecl)
36403 tree old_tree = (rs6000_previous_fndecl
36404 ? DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl)
36405 : NULL_TREE);
36407 tree new_tree = (fndecl
36408 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
36409 : NULL_TREE);
36411 if (TARGET_DEBUG_TARGET)
36413 bool print_final = false;
36414 fprintf (stderr, "\n==================== rs6000_set_current_function");
36416 if (fndecl)
36417 fprintf (stderr, ", fndecl %s (%p)",
36418 (DECL_NAME (fndecl)
36419 ? IDENTIFIER_POINTER (DECL_NAME (fndecl))
36420 : "<unknown>"), (void *)fndecl);
36422 if (rs6000_previous_fndecl)
36423 fprintf (stderr, ", prev_fndecl (%p)", (void *)rs6000_previous_fndecl);
36425 fprintf (stderr, "\n");
36426 if (new_tree)
36428 fprintf (stderr, "\nnew fndecl target specific options:\n");
36429 debug_tree (new_tree);
36430 print_final = true;
36433 if (old_tree)
36435 fprintf (stderr, "\nold fndecl target specific options:\n");
36436 debug_tree (old_tree);
36437 print_final = true;
36440 if (print_final)
36441 fprintf (stderr, "--------------------\n");
36444 /* Only change the context if the function changes. This hook is called
36445 several times in the course of compiling a function, and we don't want to
36446 slow things down too much or call target_reinit when it isn't safe. */
36447 if (fndecl && fndecl != rs6000_previous_fndecl)
36449 rs6000_previous_fndecl = fndecl;
36450 if (old_tree == new_tree)
36453 else if (new_tree && new_tree != target_option_default_node)
36455 cl_target_option_restore (&global_options,
36456 TREE_TARGET_OPTION (new_tree));
36457 if (TREE_TARGET_GLOBALS (new_tree))
36458 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
36459 else
36460 TREE_TARGET_GLOBALS (new_tree)
36461 = save_target_globals_default_opts ();
36464 else if (old_tree && old_tree != target_option_default_node)
36466 new_tree = target_option_current_node;
36467 cl_target_option_restore (&global_options,
36468 TREE_TARGET_OPTION (new_tree));
36469 if (TREE_TARGET_GLOBALS (new_tree))
36470 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
36471 else if (new_tree == target_option_default_node)
36472 restore_target_globals (&default_target_globals);
36473 else
36474 TREE_TARGET_GLOBALS (new_tree)
36475 = save_target_globals_default_opts ();
36481 /* Save the current options */
36483 static void
36484 rs6000_function_specific_save (struct cl_target_option *ptr,
36485 struct gcc_options *opts)
36487 ptr->x_rs6000_isa_flags = opts->x_rs6000_isa_flags;
36488 ptr->x_rs6000_isa_flags_explicit = opts->x_rs6000_isa_flags_explicit;
36491 /* Restore the current options */
36493 static void
36494 rs6000_function_specific_restore (struct gcc_options *opts,
36495 struct cl_target_option *ptr)
36498 opts->x_rs6000_isa_flags = ptr->x_rs6000_isa_flags;
36499 opts->x_rs6000_isa_flags_explicit = ptr->x_rs6000_isa_flags_explicit;
36500 (void) rs6000_option_override_internal (false);
36503 /* Print the current options */
36505 static void
36506 rs6000_function_specific_print (FILE *file, int indent,
36507 struct cl_target_option *ptr)
36509 rs6000_print_isa_options (file, indent, "Isa options set",
36510 ptr->x_rs6000_isa_flags);
36512 rs6000_print_isa_options (file, indent, "Isa options explicit",
36513 ptr->x_rs6000_isa_flags_explicit);
36516 /* Helper function to print the current isa or misc options on a line. */
36518 static void
36519 rs6000_print_options_internal (FILE *file,
36520 int indent,
36521 const char *string,
36522 HOST_WIDE_INT flags,
36523 const char *prefix,
36524 const struct rs6000_opt_mask *opts,
36525 size_t num_elements)
36527 size_t i;
36528 size_t start_column = 0;
36529 size_t cur_column;
36530 size_t max_column = 120;
36531 size_t prefix_len = strlen (prefix);
36532 size_t comma_len = 0;
36533 const char *comma = "";
36535 if (indent)
36536 start_column += fprintf (file, "%*s", indent, "");
36538 if (!flags)
36540 fprintf (stderr, DEBUG_FMT_S, string, "<none>");
36541 return;
36544 start_column += fprintf (stderr, DEBUG_FMT_WX, string, flags);
36546 /* Print the various mask options. */
36547 cur_column = start_column;
36548 for (i = 0; i < num_elements; i++)
36550 bool invert = opts[i].invert;
36551 const char *name = opts[i].name;
36552 const char *no_str = "";
36553 HOST_WIDE_INT mask = opts[i].mask;
36554 size_t len = comma_len + prefix_len + strlen (name);
36556 if (!invert)
36558 if ((flags & mask) == 0)
36560 no_str = "no-";
36561 len += sizeof ("no-") - 1;
36564 flags &= ~mask;
36567 else
36569 if ((flags & mask) != 0)
36571 no_str = "no-";
36572 len += sizeof ("no-") - 1;
36575 flags |= mask;
36578 cur_column += len;
36579 if (cur_column > max_column)
36581 fprintf (stderr, ", \\\n%*s", (int)start_column, "");
36582 cur_column = start_column + len;
36583 comma = "";
36586 fprintf (file, "%s%s%s%s", comma, prefix, no_str, name);
36587 comma = ", ";
36588 comma_len = sizeof (", ") - 1;
36591 fputs ("\n", file);
36594 /* Helper function to print the current isa options on a line. */
36596 static void
36597 rs6000_print_isa_options (FILE *file, int indent, const char *string,
36598 HOST_WIDE_INT flags)
36600 rs6000_print_options_internal (file, indent, string, flags, "-m",
36601 &rs6000_opt_masks[0],
36602 ARRAY_SIZE (rs6000_opt_masks));
36605 static void
36606 rs6000_print_builtin_options (FILE *file, int indent, const char *string,
36607 HOST_WIDE_INT flags)
36609 rs6000_print_options_internal (file, indent, string, flags, "",
36610 &rs6000_builtin_mask_names[0],
36611 ARRAY_SIZE (rs6000_builtin_mask_names));
36615 /* Hook to determine if one function can safely inline another. */
36617 static bool
36618 rs6000_can_inline_p (tree caller, tree callee)
36620 bool ret = false;
36621 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
36622 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
36624 /* If callee has no option attributes, then it is ok to inline. */
36625 if (!callee_tree)
36626 ret = true;
36628 /* If caller has no option attributes, but callee does then it is not ok to
36629 inline. */
36630 else if (!caller_tree)
36631 ret = false;
36633 else
36635 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
36636 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
36638 /* Callee's options should a subset of the caller's, i.e. a vsx function
36639 can inline an altivec function but a non-vsx function can't inline a
36640 vsx function. */
36641 if ((caller_opts->x_rs6000_isa_flags & callee_opts->x_rs6000_isa_flags)
36642 == callee_opts->x_rs6000_isa_flags)
36643 ret = true;
36646 if (TARGET_DEBUG_TARGET)
36647 fprintf (stderr, "rs6000_can_inline_p:, caller %s, callee %s, %s inline\n",
36648 (DECL_NAME (caller)
36649 ? IDENTIFIER_POINTER (DECL_NAME (caller))
36650 : "<unknown>"),
36651 (DECL_NAME (callee)
36652 ? IDENTIFIER_POINTER (DECL_NAME (callee))
36653 : "<unknown>"),
36654 (ret ? "can" : "cannot"));
36656 return ret;
36659 /* Allocate a stack temp and fixup the address so it meets the particular
36660 memory requirements (either offetable or REG+REG addressing). */
36663 rs6000_allocate_stack_temp (machine_mode mode,
36664 bool offsettable_p,
36665 bool reg_reg_p)
36667 rtx stack = assign_stack_temp (mode, GET_MODE_SIZE (mode));
36668 rtx addr = XEXP (stack, 0);
36669 int strict_p = (reload_in_progress || reload_completed);
36671 if (!legitimate_indirect_address_p (addr, strict_p))
36673 if (offsettable_p
36674 && !rs6000_legitimate_offset_address_p (mode, addr, strict_p, true))
36675 stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
36677 else if (reg_reg_p && !legitimate_indexed_address_p (addr, strict_p))
36678 stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
36681 return stack;
36684 /* Given a memory reference, if it is not a reg or reg+reg addressing, convert
36685 to such a form to deal with memory reference instructions like STFIWX that
36686 only take reg+reg addressing. */
36689 rs6000_address_for_fpconvert (rtx x)
36691 int strict_p = (reload_in_progress || reload_completed);
36692 rtx addr;
36694 gcc_assert (MEM_P (x));
36695 addr = XEXP (x, 0);
36696 if (! legitimate_indirect_address_p (addr, strict_p)
36697 && ! legitimate_indexed_address_p (addr, strict_p))
36699 if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
36701 rtx reg = XEXP (addr, 0);
36702 HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (x));
36703 rtx size_rtx = GEN_INT ((GET_CODE (addr) == PRE_DEC) ? -size : size);
36704 gcc_assert (REG_P (reg));
36705 emit_insn (gen_add3_insn (reg, reg, size_rtx));
36706 addr = reg;
36708 else if (GET_CODE (addr) == PRE_MODIFY)
36710 rtx reg = XEXP (addr, 0);
36711 rtx expr = XEXP (addr, 1);
36712 gcc_assert (REG_P (reg));
36713 gcc_assert (GET_CODE (expr) == PLUS);
36714 emit_insn (gen_add3_insn (reg, XEXP (expr, 0), XEXP (expr, 1)));
36715 addr = reg;
36718 x = replace_equiv_address (x, copy_addr_to_reg (addr));
36721 return x;
36724 /* Given a memory reference, if it is not in the form for altivec memory
36725 reference instructions (i.e. reg or reg+reg addressing with AND of -16),
36726 convert to the altivec format. */
36729 rs6000_address_for_altivec (rtx x)
36731 gcc_assert (MEM_P (x));
36732 if (!altivec_indexed_or_indirect_operand (x, GET_MODE (x)))
36734 rtx addr = XEXP (x, 0);
36735 int strict_p = (reload_in_progress || reload_completed);
36737 if (!legitimate_indexed_address_p (addr, strict_p)
36738 && !legitimate_indirect_address_p (addr, strict_p))
36739 addr = copy_to_mode_reg (Pmode, addr);
36741 addr = gen_rtx_AND (Pmode, addr, GEN_INT (-16));
36742 x = change_address (x, GET_MODE (x), addr);
36745 return x;
36748 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
36750 On the RS/6000, all integer constants are acceptable, most won't be valid
36751 for particular insns, though. Only easy FP constants are acceptable. */
36753 static bool
36754 rs6000_legitimate_constant_p (machine_mode mode, rtx x)
36756 if (TARGET_ELF && tls_referenced_p (x))
36757 return false;
36759 return ((GET_CODE (x) != CONST_DOUBLE && GET_CODE (x) != CONST_VECTOR)
36760 || GET_MODE (x) == VOIDmode
36761 || (TARGET_POWERPC64 && mode == DImode)
36762 || easy_fp_constant (x, mode)
36763 || easy_vector_constant (x, mode));
36767 /* Return TRUE iff the sequence ending in LAST sets the static chain. */
36769 static bool
36770 chain_already_loaded (rtx_insn *last)
36772 for (; last != NULL; last = PREV_INSN (last))
36774 if (NONJUMP_INSN_P (last))
36776 rtx patt = PATTERN (last);
36778 if (GET_CODE (patt) == SET)
36780 rtx lhs = XEXP (patt, 0);
36782 if (REG_P (lhs) && REGNO (lhs) == STATIC_CHAIN_REGNUM)
36783 return true;
36787 return false;
36790 /* Expand code to perform a call under the AIX or ELFv2 ABI. */
36792 void
36793 rs6000_call_aix (rtx value, rtx func_desc, rtx flag, rtx cookie)
36795 const bool direct_call_p
36796 = GET_CODE (func_desc) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (func_desc);
36797 rtx toc_reg = gen_rtx_REG (Pmode, TOC_REGNUM);
36798 rtx toc_load = NULL_RTX;
36799 rtx toc_restore = NULL_RTX;
36800 rtx func_addr;
36801 rtx abi_reg = NULL_RTX;
36802 rtx call[4];
36803 int n_call;
36804 rtx insn;
36806 /* Handle longcall attributes. */
36807 if (INTVAL (cookie) & CALL_LONG)
36808 func_desc = rs6000_longcall_ref (func_desc);
36810 /* Handle indirect calls. */
36811 if (GET_CODE (func_desc) != SYMBOL_REF
36812 || (DEFAULT_ABI == ABI_AIX && !SYMBOL_REF_FUNCTION_P (func_desc)))
36814 /* Save the TOC into its reserved slot before the call,
36815 and prepare to restore it after the call. */
36816 rtx stack_ptr = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
36817 rtx stack_toc_offset = GEN_INT (RS6000_TOC_SAVE_SLOT);
36818 rtx stack_toc_mem = gen_frame_mem (Pmode,
36819 gen_rtx_PLUS (Pmode, stack_ptr,
36820 stack_toc_offset));
36821 rtx stack_toc_unspec = gen_rtx_UNSPEC (Pmode,
36822 gen_rtvec (1, stack_toc_offset),
36823 UNSPEC_TOCSLOT);
36824 toc_restore = gen_rtx_SET (toc_reg, stack_toc_unspec);
36826 /* Can we optimize saving the TOC in the prologue or
36827 do we need to do it at every call? */
36828 if (TARGET_SAVE_TOC_INDIRECT && !cfun->calls_alloca)
36829 cfun->machine->save_toc_in_prologue = true;
36830 else
36832 MEM_VOLATILE_P (stack_toc_mem) = 1;
36833 emit_move_insn (stack_toc_mem, toc_reg);
36836 if (DEFAULT_ABI == ABI_ELFv2)
36838 /* A function pointer in the ELFv2 ABI is just a plain address, but
36839 the ABI requires it to be loaded into r12 before the call. */
36840 func_addr = gen_rtx_REG (Pmode, 12);
36841 emit_move_insn (func_addr, func_desc);
36842 abi_reg = func_addr;
36844 else
36846 /* A function pointer under AIX is a pointer to a data area whose
36847 first word contains the actual address of the function, whose
36848 second word contains a pointer to its TOC, and whose third word
36849 contains a value to place in the static chain register (r11).
36850 Note that if we load the static chain, our "trampoline" need
36851 not have any executable code. */
36853 /* Load up address of the actual function. */
36854 func_desc = force_reg (Pmode, func_desc);
36855 func_addr = gen_reg_rtx (Pmode);
36856 emit_move_insn (func_addr, gen_rtx_MEM (Pmode, func_desc));
36858 /* Prepare to load the TOC of the called function. Note that the
36859 TOC load must happen immediately before the actual call so
36860 that unwinding the TOC registers works correctly. See the
36861 comment in frob_update_context. */
36862 rtx func_toc_offset = GEN_INT (GET_MODE_SIZE (Pmode));
36863 rtx func_toc_mem = gen_rtx_MEM (Pmode,
36864 gen_rtx_PLUS (Pmode, func_desc,
36865 func_toc_offset));
36866 toc_load = gen_rtx_USE (VOIDmode, func_toc_mem);
36868 /* If we have a static chain, load it up. But, if the call was
36869 originally direct, the 3rd word has not been written since no
36870 trampoline has been built, so we ought not to load it, lest we
36871 override a static chain value. */
36872 if (!direct_call_p
36873 && TARGET_POINTERS_TO_NESTED_FUNCTIONS
36874 && !chain_already_loaded (get_current_sequence ()->next->last))
36876 rtx sc_reg = gen_rtx_REG (Pmode, STATIC_CHAIN_REGNUM);
36877 rtx func_sc_offset = GEN_INT (2 * GET_MODE_SIZE (Pmode));
36878 rtx func_sc_mem = gen_rtx_MEM (Pmode,
36879 gen_rtx_PLUS (Pmode, func_desc,
36880 func_sc_offset));
36881 emit_move_insn (sc_reg, func_sc_mem);
36882 abi_reg = sc_reg;
36886 else
36888 /* Direct calls use the TOC: for local calls, the callee will
36889 assume the TOC register is set; for non-local calls, the
36890 PLT stub needs the TOC register. */
36891 abi_reg = toc_reg;
36892 func_addr = func_desc;
36895 /* Create the call. */
36896 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), flag);
36897 if (value != NULL_RTX)
36898 call[0] = gen_rtx_SET (value, call[0]);
36899 n_call = 1;
36901 if (toc_load)
36902 call[n_call++] = toc_load;
36903 if (toc_restore)
36904 call[n_call++] = toc_restore;
36906 call[n_call++] = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, LR_REGNO));
36908 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (n_call, call));
36909 insn = emit_call_insn (insn);
36911 /* Mention all registers defined by the ABI to hold information
36912 as uses in CALL_INSN_FUNCTION_USAGE. */
36913 if (abi_reg)
36914 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
36917 /* Expand code to perform a sibling call under the AIX or ELFv2 ABI. */
36919 void
36920 rs6000_sibcall_aix (rtx value, rtx func_desc, rtx flag, rtx cookie)
36922 rtx call[2];
36923 rtx insn;
36925 gcc_assert (INTVAL (cookie) == 0);
36927 /* Create the call. */
36928 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_desc), flag);
36929 if (value != NULL_RTX)
36930 call[0] = gen_rtx_SET (value, call[0]);
36932 call[1] = simple_return_rtx;
36934 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (2, call));
36935 insn = emit_call_insn (insn);
36937 /* Note use of the TOC register. */
36938 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), gen_rtx_REG (Pmode, TOC_REGNUM));
36939 /* We need to also mark a use of the link register since the function we
36940 sibling-call to will use it to return to our caller. */
36941 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), gen_rtx_REG (Pmode, LR_REGNO));
36944 /* Return whether we need to always update the saved TOC pointer when we update
36945 the stack pointer. */
36947 static bool
36948 rs6000_save_toc_in_prologue_p (void)
36950 return (cfun && cfun->machine && cfun->machine->save_toc_in_prologue);
36953 #ifdef HAVE_GAS_HIDDEN
36954 # define USE_HIDDEN_LINKONCE 1
36955 #else
36956 # define USE_HIDDEN_LINKONCE 0
36957 #endif
36959 /* Fills in the label name that should be used for a 476 link stack thunk. */
36961 void
36962 get_ppc476_thunk_name (char name[32])
36964 gcc_assert (TARGET_LINK_STACK);
36966 if (USE_HIDDEN_LINKONCE)
36967 sprintf (name, "__ppc476.get_thunk");
36968 else
36969 ASM_GENERATE_INTERNAL_LABEL (name, "LPPC476_", 0);
36972 /* This function emits the simple thunk routine that is used to preserve
36973 the link stack on the 476 cpu. */
36975 static void rs6000_code_end (void) ATTRIBUTE_UNUSED;
36976 static void
36977 rs6000_code_end (void)
36979 char name[32];
36980 tree decl;
36982 if (!TARGET_LINK_STACK)
36983 return;
36985 get_ppc476_thunk_name (name);
36987 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, get_identifier (name),
36988 build_function_type_list (void_type_node, NULL_TREE));
36989 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
36990 NULL_TREE, void_type_node);
36991 TREE_PUBLIC (decl) = 1;
36992 TREE_STATIC (decl) = 1;
36994 #if RS6000_WEAK
36995 if (USE_HIDDEN_LINKONCE)
36997 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
36998 targetm.asm_out.unique_section (decl, 0);
36999 switch_to_section (get_named_section (decl, NULL, 0));
37000 DECL_WEAK (decl) = 1;
37001 ASM_WEAKEN_DECL (asm_out_file, decl, name, 0);
37002 targetm.asm_out.globalize_label (asm_out_file, name);
37003 targetm.asm_out.assemble_visibility (decl, VISIBILITY_HIDDEN);
37004 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
37006 else
37007 #endif
37009 switch_to_section (text_section);
37010 ASM_OUTPUT_LABEL (asm_out_file, name);
37013 DECL_INITIAL (decl) = make_node (BLOCK);
37014 current_function_decl = decl;
37015 allocate_struct_function (decl, false);
37016 init_function_start (decl);
37017 first_function_block_is_cold = false;
37018 /* Make sure unwind info is emitted for the thunk if needed. */
37019 final_start_function (emit_barrier (), asm_out_file, 1);
37021 fputs ("\tblr\n", asm_out_file);
37023 final_end_function ();
37024 init_insn_lengths ();
37025 free_after_compilation (cfun);
37026 set_cfun (NULL);
37027 current_function_decl = NULL;
37030 /* Add r30 to hard reg set if the prologue sets it up and it is not
37031 pic_offset_table_rtx. */
37033 static void
37034 rs6000_set_up_by_prologue (struct hard_reg_set_container *set)
37036 if (!TARGET_SINGLE_PIC_BASE
37037 && TARGET_TOC
37038 && TARGET_MINIMAL_TOC
37039 && get_pool_size () != 0)
37040 add_to_hard_reg_set (&set->set, Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
37041 if (cfun->machine->split_stack_argp_used)
37042 add_to_hard_reg_set (&set->set, Pmode, 12);
37046 /* Helper function for rs6000_split_logical to emit a logical instruction after
37047 spliting the operation to single GPR registers.
37049 DEST is the destination register.
37050 OP1 and OP2 are the input source registers.
37051 CODE is the base operation (AND, IOR, XOR, NOT).
37052 MODE is the machine mode.
37053 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
37054 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
37055 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
37057 static void
37058 rs6000_split_logical_inner (rtx dest,
37059 rtx op1,
37060 rtx op2,
37061 enum rtx_code code,
37062 machine_mode mode,
37063 bool complement_final_p,
37064 bool complement_op1_p,
37065 bool complement_op2_p)
37067 rtx bool_rtx;
37069 /* Optimize AND of 0/0xffffffff and IOR/XOR of 0. */
37070 if (op2 && GET_CODE (op2) == CONST_INT
37071 && (mode == SImode || (mode == DImode && TARGET_POWERPC64))
37072 && !complement_final_p && !complement_op1_p && !complement_op2_p)
37074 HOST_WIDE_INT mask = GET_MODE_MASK (mode);
37075 HOST_WIDE_INT value = INTVAL (op2) & mask;
37077 /* Optimize AND of 0 to just set 0. Optimize AND of -1 to be a move. */
37078 if (code == AND)
37080 if (value == 0)
37082 emit_insn (gen_rtx_SET (dest, const0_rtx));
37083 return;
37086 else if (value == mask)
37088 if (!rtx_equal_p (dest, op1))
37089 emit_insn (gen_rtx_SET (dest, op1));
37090 return;
37094 /* Optimize IOR/XOR of 0 to be a simple move. Split large operations
37095 into separate ORI/ORIS or XORI/XORIS instrucitons. */
37096 else if (code == IOR || code == XOR)
37098 if (value == 0)
37100 if (!rtx_equal_p (dest, op1))
37101 emit_insn (gen_rtx_SET (dest, op1));
37102 return;
37107 if (code == AND && mode == SImode
37108 && !complement_final_p && !complement_op1_p && !complement_op2_p)
37110 emit_insn (gen_andsi3 (dest, op1, op2));
37111 return;
37114 if (complement_op1_p)
37115 op1 = gen_rtx_NOT (mode, op1);
37117 if (complement_op2_p)
37118 op2 = gen_rtx_NOT (mode, op2);
37120 /* For canonical RTL, if only one arm is inverted it is the first. */
37121 if (!complement_op1_p && complement_op2_p)
37122 std::swap (op1, op2);
37124 bool_rtx = ((code == NOT)
37125 ? gen_rtx_NOT (mode, op1)
37126 : gen_rtx_fmt_ee (code, mode, op1, op2));
37128 if (complement_final_p)
37129 bool_rtx = gen_rtx_NOT (mode, bool_rtx);
37131 emit_insn (gen_rtx_SET (dest, bool_rtx));
37134 /* Split a DImode AND/IOR/XOR with a constant on a 32-bit system. These
37135 operations are split immediately during RTL generation to allow for more
37136 optimizations of the AND/IOR/XOR.
37138 OPERANDS is an array containing the destination and two input operands.
37139 CODE is the base operation (AND, IOR, XOR, NOT).
37140 MODE is the machine mode.
37141 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
37142 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
37143 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT.
37144 CLOBBER_REG is either NULL or a scratch register of type CC to allow
37145 formation of the AND instructions. */
37147 static void
37148 rs6000_split_logical_di (rtx operands[3],
37149 enum rtx_code code,
37150 bool complement_final_p,
37151 bool complement_op1_p,
37152 bool complement_op2_p)
37154 const HOST_WIDE_INT lower_32bits = HOST_WIDE_INT_C(0xffffffff);
37155 const HOST_WIDE_INT upper_32bits = ~ lower_32bits;
37156 const HOST_WIDE_INT sign_bit = HOST_WIDE_INT_C(0x80000000);
37157 enum hi_lo { hi = 0, lo = 1 };
37158 rtx op0_hi_lo[2], op1_hi_lo[2], op2_hi_lo[2];
37159 size_t i;
37161 op0_hi_lo[hi] = gen_highpart (SImode, operands[0]);
37162 op1_hi_lo[hi] = gen_highpart (SImode, operands[1]);
37163 op0_hi_lo[lo] = gen_lowpart (SImode, operands[0]);
37164 op1_hi_lo[lo] = gen_lowpart (SImode, operands[1]);
37166 if (code == NOT)
37167 op2_hi_lo[hi] = op2_hi_lo[lo] = NULL_RTX;
37168 else
37170 if (GET_CODE (operands[2]) != CONST_INT)
37172 op2_hi_lo[hi] = gen_highpart_mode (SImode, DImode, operands[2]);
37173 op2_hi_lo[lo] = gen_lowpart (SImode, operands[2]);
37175 else
37177 HOST_WIDE_INT value = INTVAL (operands[2]);
37178 HOST_WIDE_INT value_hi_lo[2];
37180 gcc_assert (!complement_final_p);
37181 gcc_assert (!complement_op1_p);
37182 gcc_assert (!complement_op2_p);
37184 value_hi_lo[hi] = value >> 32;
37185 value_hi_lo[lo] = value & lower_32bits;
37187 for (i = 0; i < 2; i++)
37189 HOST_WIDE_INT sub_value = value_hi_lo[i];
37191 if (sub_value & sign_bit)
37192 sub_value |= upper_32bits;
37194 op2_hi_lo[i] = GEN_INT (sub_value);
37196 /* If this is an AND instruction, check to see if we need to load
37197 the value in a register. */
37198 if (code == AND && sub_value != -1 && sub_value != 0
37199 && !and_operand (op2_hi_lo[i], SImode))
37200 op2_hi_lo[i] = force_reg (SImode, op2_hi_lo[i]);
37205 for (i = 0; i < 2; i++)
37207 /* Split large IOR/XOR operations. */
37208 if ((code == IOR || code == XOR)
37209 && GET_CODE (op2_hi_lo[i]) == CONST_INT
37210 && !complement_final_p
37211 && !complement_op1_p
37212 && !complement_op2_p
37213 && !logical_const_operand (op2_hi_lo[i], SImode))
37215 HOST_WIDE_INT value = INTVAL (op2_hi_lo[i]);
37216 HOST_WIDE_INT hi_16bits = value & HOST_WIDE_INT_C(0xffff0000);
37217 HOST_WIDE_INT lo_16bits = value & HOST_WIDE_INT_C(0x0000ffff);
37218 rtx tmp = gen_reg_rtx (SImode);
37220 /* Make sure the constant is sign extended. */
37221 if ((hi_16bits & sign_bit) != 0)
37222 hi_16bits |= upper_32bits;
37224 rs6000_split_logical_inner (tmp, op1_hi_lo[i], GEN_INT (hi_16bits),
37225 code, SImode, false, false, false);
37227 rs6000_split_logical_inner (op0_hi_lo[i], tmp, GEN_INT (lo_16bits),
37228 code, SImode, false, false, false);
37230 else
37231 rs6000_split_logical_inner (op0_hi_lo[i], op1_hi_lo[i], op2_hi_lo[i],
37232 code, SImode, complement_final_p,
37233 complement_op1_p, complement_op2_p);
37236 return;
37239 /* Split the insns that make up boolean operations operating on multiple GPR
37240 registers. The boolean MD patterns ensure that the inputs either are
37241 exactly the same as the output registers, or there is no overlap.
37243 OPERANDS is an array containing the destination and two input operands.
37244 CODE is the base operation (AND, IOR, XOR, NOT).
37245 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
37246 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
37247 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
37249 void
37250 rs6000_split_logical (rtx operands[3],
37251 enum rtx_code code,
37252 bool complement_final_p,
37253 bool complement_op1_p,
37254 bool complement_op2_p)
37256 machine_mode mode = GET_MODE (operands[0]);
37257 machine_mode sub_mode;
37258 rtx op0, op1, op2;
37259 int sub_size, regno0, regno1, nregs, i;
37261 /* If this is DImode, use the specialized version that can run before
37262 register allocation. */
37263 if (mode == DImode && !TARGET_POWERPC64)
37265 rs6000_split_logical_di (operands, code, complement_final_p,
37266 complement_op1_p, complement_op2_p);
37267 return;
37270 op0 = operands[0];
37271 op1 = operands[1];
37272 op2 = (code == NOT) ? NULL_RTX : operands[2];
37273 sub_mode = (TARGET_POWERPC64) ? DImode : SImode;
37274 sub_size = GET_MODE_SIZE (sub_mode);
37275 regno0 = REGNO (op0);
37276 regno1 = REGNO (op1);
37278 gcc_assert (reload_completed);
37279 gcc_assert (IN_RANGE (regno0, FIRST_GPR_REGNO, LAST_GPR_REGNO));
37280 gcc_assert (IN_RANGE (regno1, FIRST_GPR_REGNO, LAST_GPR_REGNO));
37282 nregs = rs6000_hard_regno_nregs[(int)mode][regno0];
37283 gcc_assert (nregs > 1);
37285 if (op2 && REG_P (op2))
37286 gcc_assert (IN_RANGE (REGNO (op2), FIRST_GPR_REGNO, LAST_GPR_REGNO));
37288 for (i = 0; i < nregs; i++)
37290 int offset = i * sub_size;
37291 rtx sub_op0 = simplify_subreg (sub_mode, op0, mode, offset);
37292 rtx sub_op1 = simplify_subreg (sub_mode, op1, mode, offset);
37293 rtx sub_op2 = ((code == NOT)
37294 ? NULL_RTX
37295 : simplify_subreg (sub_mode, op2, mode, offset));
37297 rs6000_split_logical_inner (sub_op0, sub_op1, sub_op2, code, sub_mode,
37298 complement_final_p, complement_op1_p,
37299 complement_op2_p);
37302 return;
37306 /* Return true if the peephole2 can combine a load involving a combination of
37307 an addis instruction and a load with an offset that can be fused together on
37308 a power8. */
37310 bool
37311 fusion_gpr_load_p (rtx addis_reg, /* register set via addis. */
37312 rtx addis_value, /* addis value. */
37313 rtx target, /* target register that is loaded. */
37314 rtx mem) /* bottom part of the memory addr. */
37316 rtx addr;
37317 rtx base_reg;
37319 /* Validate arguments. */
37320 if (!base_reg_operand (addis_reg, GET_MODE (addis_reg)))
37321 return false;
37323 if (!base_reg_operand (target, GET_MODE (target)))
37324 return false;
37326 if (!fusion_gpr_addis (addis_value, GET_MODE (addis_value)))
37327 return false;
37329 /* Allow sign/zero extension. */
37330 if (GET_CODE (mem) == ZERO_EXTEND
37331 || (GET_CODE (mem) == SIGN_EXTEND && TARGET_P8_FUSION_SIGN))
37332 mem = XEXP (mem, 0);
37334 if (!MEM_P (mem))
37335 return false;
37337 if (!fusion_gpr_mem_load (mem, GET_MODE (mem)))
37338 return false;
37340 addr = XEXP (mem, 0); /* either PLUS or LO_SUM. */
37341 if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM)
37342 return false;
37344 /* Validate that the register used to load the high value is either the
37345 register being loaded, or we can safely replace its use.
37347 This function is only called from the peephole2 pass and we assume that
37348 there are 2 instructions in the peephole (addis and load), so we want to
37349 check if the target register was not used in the memory address and the
37350 register to hold the addis result is dead after the peephole. */
37351 if (REGNO (addis_reg) != REGNO (target))
37353 if (reg_mentioned_p (target, mem))
37354 return false;
37356 if (!peep2_reg_dead_p (2, addis_reg))
37357 return false;
37359 /* If the target register being loaded is the stack pointer, we must
37360 avoid loading any other value into it, even temporarily. */
37361 if (REG_P (target) && REGNO (target) == STACK_POINTER_REGNUM)
37362 return false;
37365 base_reg = XEXP (addr, 0);
37366 return REGNO (addis_reg) == REGNO (base_reg);
37369 /* During the peephole2 pass, adjust and expand the insns for a load fusion
37370 sequence. We adjust the addis register to use the target register. If the
37371 load sign extends, we adjust the code to do the zero extending load, and an
37372 explicit sign extension later since the fusion only covers zero extending
37373 loads.
37375 The operands are:
37376 operands[0] register set with addis (to be replaced with target)
37377 operands[1] value set via addis
37378 operands[2] target register being loaded
37379 operands[3] D-form memory reference using operands[0]. */
37381 void
37382 expand_fusion_gpr_load (rtx *operands)
37384 rtx addis_value = operands[1];
37385 rtx target = operands[2];
37386 rtx orig_mem = operands[3];
37387 rtx new_addr, new_mem, orig_addr, offset;
37388 enum rtx_code plus_or_lo_sum;
37389 machine_mode target_mode = GET_MODE (target);
37390 machine_mode extend_mode = target_mode;
37391 machine_mode ptr_mode = Pmode;
37392 enum rtx_code extend = UNKNOWN;
37394 if (GET_CODE (orig_mem) == ZERO_EXTEND
37395 || (TARGET_P8_FUSION_SIGN && GET_CODE (orig_mem) == SIGN_EXTEND))
37397 extend = GET_CODE (orig_mem);
37398 orig_mem = XEXP (orig_mem, 0);
37399 target_mode = GET_MODE (orig_mem);
37402 gcc_assert (MEM_P (orig_mem));
37404 orig_addr = XEXP (orig_mem, 0);
37405 plus_or_lo_sum = GET_CODE (orig_addr);
37406 gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM);
37408 offset = XEXP (orig_addr, 1);
37409 new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_value, offset);
37410 new_mem = replace_equiv_address_nv (orig_mem, new_addr, false);
37412 if (extend != UNKNOWN)
37413 new_mem = gen_rtx_fmt_e (ZERO_EXTEND, extend_mode, new_mem);
37415 new_mem = gen_rtx_UNSPEC (extend_mode, gen_rtvec (1, new_mem),
37416 UNSPEC_FUSION_GPR);
37417 emit_insn (gen_rtx_SET (target, new_mem));
37419 if (extend == SIGN_EXTEND)
37421 int sub_off = ((BYTES_BIG_ENDIAN)
37422 ? GET_MODE_SIZE (extend_mode) - GET_MODE_SIZE (target_mode)
37423 : 0);
37424 rtx sign_reg
37425 = simplify_subreg (target_mode, target, extend_mode, sub_off);
37427 emit_insn (gen_rtx_SET (target,
37428 gen_rtx_SIGN_EXTEND (extend_mode, sign_reg)));
37431 return;
37434 /* Emit the addis instruction that will be part of a fused instruction
37435 sequence. */
37437 void
37438 emit_fusion_addis (rtx target, rtx addis_value, const char *comment,
37439 const char *mode_name)
37441 rtx fuse_ops[10];
37442 char insn_template[80];
37443 const char *addis_str = NULL;
37444 const char *comment_str = ASM_COMMENT_START;
37446 if (*comment_str == ' ')
37447 comment_str++;
37449 /* Emit the addis instruction. */
37450 fuse_ops[0] = target;
37451 if (satisfies_constraint_L (addis_value))
37453 fuse_ops[1] = addis_value;
37454 addis_str = "lis %0,%v1";
37457 else if (GET_CODE (addis_value) == PLUS)
37459 rtx op0 = XEXP (addis_value, 0);
37460 rtx op1 = XEXP (addis_value, 1);
37462 if (REG_P (op0) && CONST_INT_P (op1)
37463 && satisfies_constraint_L (op1))
37465 fuse_ops[1] = op0;
37466 fuse_ops[2] = op1;
37467 addis_str = "addis %0,%1,%v2";
37471 else if (GET_CODE (addis_value) == HIGH)
37473 rtx value = XEXP (addis_value, 0);
37474 if (GET_CODE (value) == UNSPEC && XINT (value, 1) == UNSPEC_TOCREL)
37476 fuse_ops[1] = XVECEXP (value, 0, 0); /* symbol ref. */
37477 fuse_ops[2] = XVECEXP (value, 0, 1); /* TOC register. */
37478 if (TARGET_ELF)
37479 addis_str = "addis %0,%2,%1@toc@ha";
37481 else if (TARGET_XCOFF)
37482 addis_str = "addis %0,%1@u(%2)";
37484 else
37485 gcc_unreachable ();
37488 else if (GET_CODE (value) == PLUS)
37490 rtx op0 = XEXP (value, 0);
37491 rtx op1 = XEXP (value, 1);
37493 if (GET_CODE (op0) == UNSPEC
37494 && XINT (op0, 1) == UNSPEC_TOCREL
37495 && CONST_INT_P (op1))
37497 fuse_ops[1] = XVECEXP (op0, 0, 0); /* symbol ref. */
37498 fuse_ops[2] = XVECEXP (op0, 0, 1); /* TOC register. */
37499 fuse_ops[3] = op1;
37500 if (TARGET_ELF)
37501 addis_str = "addis %0,%2,%1+%3@toc@ha";
37503 else if (TARGET_XCOFF)
37504 addis_str = "addis %0,%1+%3@u(%2)";
37506 else
37507 gcc_unreachable ();
37511 else if (satisfies_constraint_L (value))
37513 fuse_ops[1] = value;
37514 addis_str = "lis %0,%v1";
37517 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (value))
37519 fuse_ops[1] = value;
37520 addis_str = "lis %0,%1@ha";
37524 if (!addis_str)
37525 fatal_insn ("Could not generate addis value for fusion", addis_value);
37527 sprintf (insn_template, "%s\t\t%s %s, type %s", addis_str, comment_str,
37528 comment, mode_name);
37529 output_asm_insn (insn_template, fuse_ops);
37532 /* Emit a D-form load or store instruction that is the second instruction
37533 of a fusion sequence. */
37535 void
37536 emit_fusion_load_store (rtx load_store_reg, rtx addis_reg, rtx offset,
37537 const char *insn_str)
37539 rtx fuse_ops[10];
37540 char insn_template[80];
37542 fuse_ops[0] = load_store_reg;
37543 fuse_ops[1] = addis_reg;
37545 if (CONST_INT_P (offset) && satisfies_constraint_I (offset))
37547 sprintf (insn_template, "%s %%0,%%2(%%1)", insn_str);
37548 fuse_ops[2] = offset;
37549 output_asm_insn (insn_template, fuse_ops);
37552 else if (GET_CODE (offset) == UNSPEC
37553 && XINT (offset, 1) == UNSPEC_TOCREL)
37555 if (TARGET_ELF)
37556 sprintf (insn_template, "%s %%0,%%2@toc@l(%%1)", insn_str);
37558 else if (TARGET_XCOFF)
37559 sprintf (insn_template, "%s %%0,%%2@l(%%1)", insn_str);
37561 else
37562 gcc_unreachable ();
37564 fuse_ops[2] = XVECEXP (offset, 0, 0);
37565 output_asm_insn (insn_template, fuse_ops);
37568 else if (GET_CODE (offset) == PLUS
37569 && GET_CODE (XEXP (offset, 0)) == UNSPEC
37570 && XINT (XEXP (offset, 0), 1) == UNSPEC_TOCREL
37571 && CONST_INT_P (XEXP (offset, 1)))
37573 rtx tocrel_unspec = XEXP (offset, 0);
37574 if (TARGET_ELF)
37575 sprintf (insn_template, "%s %%0,%%2+%%3@toc@l(%%1)", insn_str);
37577 else if (TARGET_XCOFF)
37578 sprintf (insn_template, "%s %%0,%%2+%%3@l(%%1)", insn_str);
37580 else
37581 gcc_unreachable ();
37583 fuse_ops[2] = XVECEXP (tocrel_unspec, 0, 0);
37584 fuse_ops[3] = XEXP (offset, 1);
37585 output_asm_insn (insn_template, fuse_ops);
37588 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (offset))
37590 sprintf (insn_template, "%s %%0,%%2@l(%%1)", insn_str);
37592 fuse_ops[2] = offset;
37593 output_asm_insn (insn_template, fuse_ops);
37596 else
37597 fatal_insn ("Unable to generate load/store offset for fusion", offset);
37599 return;
37602 /* Wrap a TOC address that can be fused to indicate that special fusion
37603 processing is needed. */
37606 fusion_wrap_memory_address (rtx old_mem)
37608 rtx old_addr = XEXP (old_mem, 0);
37609 rtvec v = gen_rtvec (1, old_addr);
37610 rtx new_addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_FUSION_ADDIS);
37611 return replace_equiv_address_nv (old_mem, new_addr, false);
37614 /* Given an address, convert it into the addis and load offset parts. Addresses
37615 created during the peephole2 process look like:
37616 (lo_sum (high (unspec [(sym)] UNSPEC_TOCREL))
37617 (unspec [(...)] UNSPEC_TOCREL))
37619 Addresses created via toc fusion look like:
37620 (unspec [(unspec [(...)] UNSPEC_TOCREL)] UNSPEC_FUSION_ADDIS)) */
37622 static void
37623 fusion_split_address (rtx addr, rtx *p_hi, rtx *p_lo)
37625 rtx hi, lo;
37627 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_FUSION_ADDIS)
37629 lo = XVECEXP (addr, 0, 0);
37630 hi = gen_rtx_HIGH (Pmode, lo);
37632 else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
37634 hi = XEXP (addr, 0);
37635 lo = XEXP (addr, 1);
37637 else
37638 gcc_unreachable ();
37640 *p_hi = hi;
37641 *p_lo = lo;
37644 /* Return a string to fuse an addis instruction with a gpr load to the same
37645 register that we loaded up the addis instruction. The address that is used
37646 is the logical address that was formed during peephole2:
37647 (lo_sum (high) (low-part))
37649 Or the address is the TOC address that is wrapped before register allocation:
37650 (unspec [(addr) (toc-reg)] UNSPEC_FUSION_ADDIS)
37652 The code is complicated, so we call output_asm_insn directly, and just
37653 return "". */
37655 const char *
37656 emit_fusion_gpr_load (rtx target, rtx mem)
37658 rtx addis_value;
37659 rtx addr;
37660 rtx load_offset;
37661 const char *load_str = NULL;
37662 const char *mode_name = NULL;
37663 machine_mode mode;
37665 if (GET_CODE (mem) == ZERO_EXTEND)
37666 mem = XEXP (mem, 0);
37668 gcc_assert (REG_P (target) && MEM_P (mem));
37670 addr = XEXP (mem, 0);
37671 fusion_split_address (addr, &addis_value, &load_offset);
37673 /* Now emit the load instruction to the same register. */
37674 mode = GET_MODE (mem);
37675 switch (mode)
37677 case QImode:
37678 mode_name = "char";
37679 load_str = "lbz";
37680 break;
37682 case HImode:
37683 mode_name = "short";
37684 load_str = "lhz";
37685 break;
37687 case SImode:
37688 case SFmode:
37689 mode_name = (mode == SFmode) ? "float" : "int";
37690 load_str = "lwz";
37691 break;
37693 case DImode:
37694 case DFmode:
37695 gcc_assert (TARGET_POWERPC64);
37696 mode_name = (mode == DFmode) ? "double" : "long";
37697 load_str = "ld";
37698 break;
37700 default:
37701 fatal_insn ("Bad GPR fusion", gen_rtx_SET (target, mem));
37704 /* Emit the addis instruction. */
37705 emit_fusion_addis (target, addis_value, "gpr load fusion", mode_name);
37707 /* Emit the D-form load instruction. */
37708 emit_fusion_load_store (target, target, load_offset, load_str);
37710 return "";
37714 /* Return true if the peephole2 can combine a load/store involving a
37715 combination of an addis instruction and the memory operation. This was
37716 added to the ISA 3.0 (power9) hardware. */
37718 bool
37719 fusion_p9_p (rtx addis_reg, /* register set via addis. */
37720 rtx addis_value, /* addis value. */
37721 rtx dest, /* destination (memory or register). */
37722 rtx src) /* source (register or memory). */
37724 rtx addr, mem, offset;
37725 enum machine_mode mode = GET_MODE (src);
37727 /* Validate arguments. */
37728 if (!base_reg_operand (addis_reg, GET_MODE (addis_reg)))
37729 return false;
37731 if (!fusion_gpr_addis (addis_value, GET_MODE (addis_value)))
37732 return false;
37734 /* Ignore extend operations that are part of the load. */
37735 if (GET_CODE (src) == FLOAT_EXTEND || GET_CODE (src) == ZERO_EXTEND)
37736 src = XEXP (src, 0);
37738 /* Test for memory<-register or register<-memory. */
37739 if (fpr_reg_operand (src, mode) || int_reg_operand (src, mode))
37741 if (!MEM_P (dest))
37742 return false;
37744 mem = dest;
37747 else if (MEM_P (src))
37749 if (!fpr_reg_operand (dest, mode) && !int_reg_operand (dest, mode))
37750 return false;
37752 mem = src;
37755 else
37756 return false;
37758 addr = XEXP (mem, 0); /* either PLUS or LO_SUM. */
37759 if (GET_CODE (addr) == PLUS)
37761 if (!rtx_equal_p (addis_reg, XEXP (addr, 0)))
37762 return false;
37764 return satisfies_constraint_I (XEXP (addr, 1));
37767 else if (GET_CODE (addr) == LO_SUM)
37769 if (!rtx_equal_p (addis_reg, XEXP (addr, 0)))
37770 return false;
37772 offset = XEXP (addr, 1);
37773 if (TARGET_XCOFF || (TARGET_ELF && TARGET_POWERPC64))
37774 return small_toc_ref (offset, GET_MODE (offset));
37776 else if (TARGET_ELF && !TARGET_POWERPC64)
37777 return CONSTANT_P (offset);
37780 return false;
37783 /* During the peephole2 pass, adjust and expand the insns for an extended fusion
37784 load sequence.
37786 The operands are:
37787 operands[0] register set with addis
37788 operands[1] value set via addis
37789 operands[2] target register being loaded
37790 operands[3] D-form memory reference using operands[0].
37792 This is similar to the fusion introduced with power8, except it scales to
37793 both loads/stores and does not require the result register to be the same as
37794 the base register. At the moment, we only do this if register set with addis
37795 is dead. */
37797 void
37798 expand_fusion_p9_load (rtx *operands)
37800 rtx tmp_reg = operands[0];
37801 rtx addis_value = operands[1];
37802 rtx target = operands[2];
37803 rtx orig_mem = operands[3];
37804 rtx new_addr, new_mem, orig_addr, offset, set, clobber, insn;
37805 enum rtx_code plus_or_lo_sum;
37806 machine_mode target_mode = GET_MODE (target);
37807 machine_mode extend_mode = target_mode;
37808 machine_mode ptr_mode = Pmode;
37809 enum rtx_code extend = UNKNOWN;
37811 if (GET_CODE (orig_mem) == FLOAT_EXTEND || GET_CODE (orig_mem) == ZERO_EXTEND)
37813 extend = GET_CODE (orig_mem);
37814 orig_mem = XEXP (orig_mem, 0);
37815 target_mode = GET_MODE (orig_mem);
37818 gcc_assert (MEM_P (orig_mem));
37820 orig_addr = XEXP (orig_mem, 0);
37821 plus_or_lo_sum = GET_CODE (orig_addr);
37822 gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM);
37824 offset = XEXP (orig_addr, 1);
37825 new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_value, offset);
37826 new_mem = replace_equiv_address_nv (orig_mem, new_addr, false);
37828 if (extend != UNKNOWN)
37829 new_mem = gen_rtx_fmt_e (extend, extend_mode, new_mem);
37831 new_mem = gen_rtx_UNSPEC (extend_mode, gen_rtvec (1, new_mem),
37832 UNSPEC_FUSION_P9);
37834 set = gen_rtx_SET (target, new_mem);
37835 clobber = gen_rtx_CLOBBER (VOIDmode, tmp_reg);
37836 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber));
37837 emit_insn (insn);
37839 return;
37842 /* During the peephole2 pass, adjust and expand the insns for an extended fusion
37843 store sequence.
37845 The operands are:
37846 operands[0] register set with addis
37847 operands[1] value set via addis
37848 operands[2] target D-form memory being stored to
37849 operands[3] register being stored
37851 This is similar to the fusion introduced with power8, except it scales to
37852 both loads/stores and does not require the result register to be the same as
37853 the base register. At the moment, we only do this if register set with addis
37854 is dead. */
37856 void
37857 expand_fusion_p9_store (rtx *operands)
37859 rtx tmp_reg = operands[0];
37860 rtx addis_value = operands[1];
37861 rtx orig_mem = operands[2];
37862 rtx src = operands[3];
37863 rtx new_addr, new_mem, orig_addr, offset, set, clobber, insn, new_src;
37864 enum rtx_code plus_or_lo_sum;
37865 machine_mode target_mode = GET_MODE (orig_mem);
37866 machine_mode ptr_mode = Pmode;
37868 gcc_assert (MEM_P (orig_mem));
37870 orig_addr = XEXP (orig_mem, 0);
37871 plus_or_lo_sum = GET_CODE (orig_addr);
37872 gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM);
37874 offset = XEXP (orig_addr, 1);
37875 new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_value, offset);
37876 new_mem = replace_equiv_address_nv (orig_mem, new_addr, false);
37878 new_src = gen_rtx_UNSPEC (target_mode, gen_rtvec (1, src),
37879 UNSPEC_FUSION_P9);
37881 set = gen_rtx_SET (new_mem, new_src);
37882 clobber = gen_rtx_CLOBBER (VOIDmode, tmp_reg);
37883 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber));
37884 emit_insn (insn);
37886 return;
37889 /* Return a string to fuse an addis instruction with a load using extended
37890 fusion. The address that is used is the logical address that was formed
37891 during peephole2: (lo_sum (high) (low-part))
37893 The code is complicated, so we call output_asm_insn directly, and just
37894 return "". */
37896 const char *
37897 emit_fusion_p9_load (rtx reg, rtx mem, rtx tmp_reg)
37899 enum machine_mode mode = GET_MODE (reg);
37900 rtx hi;
37901 rtx lo;
37902 rtx addr;
37903 const char *load_string;
37904 int r;
37906 if (GET_CODE (mem) == FLOAT_EXTEND || GET_CODE (mem) == ZERO_EXTEND)
37908 mem = XEXP (mem, 0);
37909 mode = GET_MODE (mem);
37912 if (GET_CODE (reg) == SUBREG)
37914 gcc_assert (SUBREG_BYTE (reg) == 0);
37915 reg = SUBREG_REG (reg);
37918 if (!REG_P (reg))
37919 fatal_insn ("emit_fusion_p9_load, bad reg #1", reg);
37921 r = REGNO (reg);
37922 if (FP_REGNO_P (r))
37924 if (mode == SFmode)
37925 load_string = "lfs";
37926 else if (mode == DFmode || mode == DImode)
37927 load_string = "lfd";
37928 else
37929 gcc_unreachable ();
37931 else if (INT_REGNO_P (r))
37933 switch (mode)
37935 case QImode:
37936 load_string = "lbz";
37937 break;
37938 case HImode:
37939 load_string = "lhz";
37940 break;
37941 case SImode:
37942 case SFmode:
37943 load_string = "lwz";
37944 break;
37945 case DImode:
37946 case DFmode:
37947 if (!TARGET_POWERPC64)
37948 gcc_unreachable ();
37949 load_string = "ld";
37950 break;
37951 default:
37952 gcc_unreachable ();
37955 else
37956 fatal_insn ("emit_fusion_p9_load, bad reg #2", reg);
37958 if (!MEM_P (mem))
37959 fatal_insn ("emit_fusion_p9_load not MEM", mem);
37961 addr = XEXP (mem, 0);
37962 fusion_split_address (addr, &hi, &lo);
37964 /* Emit the addis instruction. */
37965 emit_fusion_addis (tmp_reg, hi, "power9 load fusion", GET_MODE_NAME (mode));
37967 /* Emit the D-form load instruction. */
37968 emit_fusion_load_store (reg, tmp_reg, lo, load_string);
37970 return "";
37973 /* Return a string to fuse an addis instruction with a store using extended
37974 fusion. The address that is used is the logical address that was formed
37975 during peephole2: (lo_sum (high) (low-part))
37977 The code is complicated, so we call output_asm_insn directly, and just
37978 return "". */
37980 const char *
37981 emit_fusion_p9_store (rtx mem, rtx reg, rtx tmp_reg)
37983 enum machine_mode mode = GET_MODE (reg);
37984 rtx hi;
37985 rtx lo;
37986 rtx addr;
37987 const char *store_string;
37988 int r;
37990 if (GET_CODE (reg) == SUBREG)
37992 gcc_assert (SUBREG_BYTE (reg) == 0);
37993 reg = SUBREG_REG (reg);
37996 if (!REG_P (reg))
37997 fatal_insn ("emit_fusion_p9_store, bad reg #1", reg);
37999 r = REGNO (reg);
38000 if (FP_REGNO_P (r))
38002 if (mode == SFmode)
38003 store_string = "stfs";
38004 else if (mode == DFmode)
38005 store_string = "stfd";
38006 else
38007 gcc_unreachable ();
38009 else if (INT_REGNO_P (r))
38011 switch (mode)
38013 case QImode:
38014 store_string = "stb";
38015 break;
38016 case HImode:
38017 store_string = "sth";
38018 break;
38019 case SImode:
38020 case SFmode:
38021 store_string = "stw";
38022 break;
38023 case DImode:
38024 case DFmode:
38025 if (!TARGET_POWERPC64)
38026 gcc_unreachable ();
38027 store_string = "std";
38028 break;
38029 default:
38030 gcc_unreachable ();
38033 else
38034 fatal_insn ("emit_fusion_p9_store, bad reg #2", reg);
38036 if (!MEM_P (mem))
38037 fatal_insn ("emit_fusion_p9_store not MEM", mem);
38039 addr = XEXP (mem, 0);
38040 fusion_split_address (addr, &hi, &lo);
38042 /* Emit the addis instruction. */
38043 emit_fusion_addis (tmp_reg, hi, "power9 store fusion", GET_MODE_NAME (mode));
38045 /* Emit the D-form load instruction. */
38046 emit_fusion_load_store (reg, tmp_reg, lo, store_string);
38048 return "";
38052 /* Analyze vector computations and remove unnecessary doubleword
38053 swaps (xxswapdi instructions). This pass is performed only
38054 for little-endian VSX code generation.
38056 For this specific case, loads and stores of 4x32 and 2x64 vectors
38057 are inefficient. These are implemented using the lvx2dx and
38058 stvx2dx instructions, which invert the order of doublewords in
38059 a vector register. Thus the code generation inserts an xxswapdi
38060 after each such load, and prior to each such store. (For spill
38061 code after register assignment, an additional xxswapdi is inserted
38062 following each store in order to return a hard register to its
38063 unpermuted value.)
38065 The extra xxswapdi instructions reduce performance. This can be
38066 particularly bad for vectorized code. The purpose of this pass
38067 is to reduce the number of xxswapdi instructions required for
38068 correctness.
38070 The primary insight is that much code that operates on vectors
38071 does not care about the relative order of elements in a register,
38072 so long as the correct memory order is preserved. If we have
38073 a computation where all input values are provided by lvxd2x/xxswapdi
38074 sequences, all outputs are stored using xxswapdi/stvxd2x sequences,
38075 and all intermediate computations are pure SIMD (independent of
38076 element order), then all the xxswapdi's associated with the loads
38077 and stores may be removed.
38079 This pass uses some of the infrastructure and logical ideas from
38080 the "web" pass in web.c. We create maximal webs of computations
38081 fitting the description above using union-find. Each such web is
38082 then optimized by removing its unnecessary xxswapdi instructions.
38084 The pass is placed prior to global optimization so that we can
38085 perform the optimization in the safest and simplest way possible;
38086 that is, by replacing each xxswapdi insn with a register copy insn.
38087 Subsequent forward propagation will remove copies where possible.
38089 There are some operations sensitive to element order for which we
38090 can still allow the operation, provided we modify those operations.
38091 These include CONST_VECTORs, for which we must swap the first and
38092 second halves of the constant vector; and SUBREGs, for which we
38093 must adjust the byte offset to account for the swapped doublewords.
38094 A remaining opportunity would be non-immediate-form splats, for
38095 which we should adjust the selected lane of the input. We should
38096 also make code generation adjustments for sum-across operations,
38097 since this is a common vectorizer reduction.
38099 Because we run prior to the first split, we can see loads and stores
38100 here that match *vsx_le_perm_{load,store}_<mode>. These are vanilla
38101 vector loads and stores that have not yet been split into a permuting
38102 load/store and a swap. (One way this can happen is with a builtin
38103 call to vec_vsx_{ld,st}.) We can handle these as well, but rather
38104 than deleting a swap, we convert the load/store into a permuting
38105 load/store (which effectively removes the swap). */
38107 /* Notes on Permutes
38109 We do not currently handle computations that contain permutes. There
38110 is a general transformation that can be performed correctly, but it
38111 may introduce more expensive code than it replaces. To handle these
38112 would require a cost model to determine when to perform the optimization.
38113 This commentary records how this could be done if desired.
38115 The most general permute is something like this (example for V16QI):
38117 (vec_select:V16QI (vec_concat:V32QI (op1:V16QI) (op2:V16QI))
38118 (parallel [(const_int a0) (const_int a1)
38120 (const_int a14) (const_int a15)]))
38122 where a0,...,a15 are in [0,31] and select elements from op1 and op2
38123 to produce in the result.
38125 Regardless of mode, we can convert the PARALLEL to a mask of 16
38126 byte-element selectors. Let's call this M, with M[i] representing
38127 the ith byte-element selector value. Then if we swap doublewords
38128 throughout the computation, we can get correct behavior by replacing
38129 M with M' as follows:
38131 M'[i] = { (M[i]+8)%16 : M[i] in [0,15]
38132 { ((M[i]+8)%16)+16 : M[i] in [16,31]
38134 This seems promising at first, since we are just replacing one mask
38135 with another. But certain masks are preferable to others. If M
38136 is a mask that matches a vmrghh pattern, for example, M' certainly
38137 will not. Instead of a single vmrghh, we would generate a load of
38138 M' and a vperm. So we would need to know how many xxswapd's we can
38139 remove as a result of this transformation to determine if it's
38140 profitable; and preferably the logic would need to be aware of all
38141 the special preferable masks.
38143 Another form of permute is an UNSPEC_VPERM, in which the mask is
38144 already in a register. In some cases, this mask may be a constant
38145 that we can discover with ud-chains, in which case the above
38146 transformation is ok. However, the common usage here is for the
38147 mask to be produced by an UNSPEC_LVSL, in which case the mask
38148 cannot be known at compile time. In such a case we would have to
38149 generate several instructions to compute M' as above at run time,
38150 and a cost model is needed again.
38152 However, when the mask M for an UNSPEC_VPERM is loaded from the
38153 constant pool, we can replace M with M' as above at no cost
38154 beyond adding a constant pool entry. */
38156 /* This is based on the union-find logic in web.c. web_entry_base is
38157 defined in df.h. */
38158 class swap_web_entry : public web_entry_base
38160 public:
38161 /* Pointer to the insn. */
38162 rtx_insn *insn;
38163 /* Set if insn contains a mention of a vector register. All other
38164 fields are undefined if this field is unset. */
38165 unsigned int is_relevant : 1;
38166 /* Set if insn is a load. */
38167 unsigned int is_load : 1;
38168 /* Set if insn is a store. */
38169 unsigned int is_store : 1;
38170 /* Set if insn is a doubleword swap. This can either be a register swap
38171 or a permuting load or store (test is_load and is_store for this). */
38172 unsigned int is_swap : 1;
38173 /* Set if the insn has a live-in use of a parameter register. */
38174 unsigned int is_live_in : 1;
38175 /* Set if the insn has a live-out def of a return register. */
38176 unsigned int is_live_out : 1;
38177 /* Set if the insn contains a subreg reference of a vector register. */
38178 unsigned int contains_subreg : 1;
38179 /* Set if the insn contains a 128-bit integer operand. */
38180 unsigned int is_128_int : 1;
38181 /* Set if this is a call-insn. */
38182 unsigned int is_call : 1;
38183 /* Set if this insn does not perform a vector operation for which
38184 element order matters, or if we know how to fix it up if it does.
38185 Undefined if is_swap is set. */
38186 unsigned int is_swappable : 1;
38187 /* A nonzero value indicates what kind of special handling for this
38188 insn is required if doublewords are swapped. Undefined if
38189 is_swappable is not set. */
38190 unsigned int special_handling : 4;
38191 /* Set if the web represented by this entry cannot be optimized. */
38192 unsigned int web_not_optimizable : 1;
38193 /* Set if this insn should be deleted. */
38194 unsigned int will_delete : 1;
38197 enum special_handling_values {
38198 SH_NONE = 0,
38199 SH_CONST_VECTOR,
38200 SH_SUBREG,
38201 SH_NOSWAP_LD,
38202 SH_NOSWAP_ST,
38203 SH_EXTRACT,
38204 SH_SPLAT,
38205 SH_XXPERMDI,
38206 SH_CONCAT,
38207 SH_VPERM
38210 /* Union INSN with all insns containing definitions that reach USE.
38211 Detect whether USE is live-in to the current function. */
38212 static void
38213 union_defs (swap_web_entry *insn_entry, rtx insn, df_ref use)
38215 struct df_link *link = DF_REF_CHAIN (use);
38217 if (!link)
38218 insn_entry[INSN_UID (insn)].is_live_in = 1;
38220 while (link)
38222 if (DF_REF_IS_ARTIFICIAL (link->ref))
38223 insn_entry[INSN_UID (insn)].is_live_in = 1;
38225 if (DF_REF_INSN_INFO (link->ref))
38227 rtx def_insn = DF_REF_INSN (link->ref);
38228 (void)unionfind_union (insn_entry + INSN_UID (insn),
38229 insn_entry + INSN_UID (def_insn));
38232 link = link->next;
38236 /* Union INSN with all insns containing uses reached from DEF.
38237 Detect whether DEF is live-out from the current function. */
38238 static void
38239 union_uses (swap_web_entry *insn_entry, rtx insn, df_ref def)
38241 struct df_link *link = DF_REF_CHAIN (def);
38243 if (!link)
38244 insn_entry[INSN_UID (insn)].is_live_out = 1;
38246 while (link)
38248 /* This could be an eh use or some other artificial use;
38249 we treat these all the same (killing the optimization). */
38250 if (DF_REF_IS_ARTIFICIAL (link->ref))
38251 insn_entry[INSN_UID (insn)].is_live_out = 1;
38253 if (DF_REF_INSN_INFO (link->ref))
38255 rtx use_insn = DF_REF_INSN (link->ref);
38256 (void)unionfind_union (insn_entry + INSN_UID (insn),
38257 insn_entry + INSN_UID (use_insn));
38260 link = link->next;
38264 /* Return 1 iff INSN is a load insn, including permuting loads that
38265 represent an lvxd2x instruction; else return 0. */
38266 static unsigned int
38267 insn_is_load_p (rtx insn)
38269 rtx body = PATTERN (insn);
38271 if (GET_CODE (body) == SET)
38273 if (GET_CODE (SET_SRC (body)) == MEM)
38274 return 1;
38276 if (GET_CODE (SET_SRC (body)) == VEC_SELECT
38277 && GET_CODE (XEXP (SET_SRC (body), 0)) == MEM)
38278 return 1;
38280 return 0;
38283 if (GET_CODE (body) != PARALLEL)
38284 return 0;
38286 rtx set = XVECEXP (body, 0, 0);
38288 if (GET_CODE (set) == SET && GET_CODE (SET_SRC (set)) == MEM)
38289 return 1;
38291 return 0;
38294 /* Return 1 iff INSN is a store insn, including permuting stores that
38295 represent an stvxd2x instruction; else return 0. */
38296 static unsigned int
38297 insn_is_store_p (rtx insn)
38299 rtx body = PATTERN (insn);
38300 if (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == MEM)
38301 return 1;
38302 if (GET_CODE (body) != PARALLEL)
38303 return 0;
38304 rtx set = XVECEXP (body, 0, 0);
38305 if (GET_CODE (set) == SET && GET_CODE (SET_DEST (set)) == MEM)
38306 return 1;
38307 return 0;
38310 /* Return 1 iff INSN swaps doublewords. This may be a reg-reg swap,
38311 a permuting load, or a permuting store. */
38312 static unsigned int
38313 insn_is_swap_p (rtx insn)
38315 rtx body = PATTERN (insn);
38316 if (GET_CODE (body) != SET)
38317 return 0;
38318 rtx rhs = SET_SRC (body);
38319 if (GET_CODE (rhs) != VEC_SELECT)
38320 return 0;
38321 rtx parallel = XEXP (rhs, 1);
38322 if (GET_CODE (parallel) != PARALLEL)
38323 return 0;
38324 unsigned int len = XVECLEN (parallel, 0);
38325 if (len != 2 && len != 4 && len != 8 && len != 16)
38326 return 0;
38327 for (unsigned int i = 0; i < len / 2; ++i)
38329 rtx op = XVECEXP (parallel, 0, i);
38330 if (GET_CODE (op) != CONST_INT || INTVAL (op) != len / 2 + i)
38331 return 0;
38333 for (unsigned int i = len / 2; i < len; ++i)
38335 rtx op = XVECEXP (parallel, 0, i);
38336 if (GET_CODE (op) != CONST_INT || INTVAL (op) != i - len / 2)
38337 return 0;
38339 return 1;
38342 /* Return TRUE if insn is a swap fed by a load from the constant pool. */
38343 static bool
38344 const_load_sequence_p (swap_web_entry *insn_entry, rtx insn)
38346 unsigned uid = INSN_UID (insn);
38347 if (!insn_entry[uid].is_swap || insn_entry[uid].is_load)
38348 return false;
38350 /* Find the unique use in the swap and locate its def. If the def
38351 isn't unique, punt. */
38352 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
38353 df_ref use;
38354 FOR_EACH_INSN_INFO_USE (use, insn_info)
38356 struct df_link *def_link = DF_REF_CHAIN (use);
38357 if (!def_link || def_link->next)
38358 return false;
38360 rtx def_insn = DF_REF_INSN (def_link->ref);
38361 unsigned uid2 = INSN_UID (def_insn);
38362 if (!insn_entry[uid2].is_load || !insn_entry[uid2].is_swap)
38363 return false;
38365 rtx body = PATTERN (def_insn);
38366 if (GET_CODE (body) != SET
38367 || GET_CODE (SET_SRC (body)) != VEC_SELECT
38368 || GET_CODE (XEXP (SET_SRC (body), 0)) != MEM)
38369 return false;
38371 rtx mem = XEXP (SET_SRC (body), 0);
38372 rtx base_reg = XEXP (mem, 0);
38374 df_ref base_use;
38375 insn_info = DF_INSN_INFO_GET (def_insn);
38376 FOR_EACH_INSN_INFO_USE (base_use, insn_info)
38378 if (!rtx_equal_p (DF_REF_REG (base_use), base_reg))
38379 continue;
38381 struct df_link *base_def_link = DF_REF_CHAIN (base_use);
38382 if (!base_def_link || base_def_link->next)
38383 return false;
38385 rtx tocrel_insn = DF_REF_INSN (base_def_link->ref);
38386 rtx tocrel_body = PATTERN (tocrel_insn);
38387 rtx base, offset;
38388 if (GET_CODE (tocrel_body) != SET)
38389 return false;
38390 /* There is an extra level of indirection for small/large
38391 code models. */
38392 rtx tocrel_expr = SET_SRC (tocrel_body);
38393 if (GET_CODE (tocrel_expr) == MEM)
38394 tocrel_expr = XEXP (tocrel_expr, 0);
38395 if (!toc_relative_expr_p (tocrel_expr, false))
38396 return false;
38397 split_const (XVECEXP (tocrel_base, 0, 0), &base, &offset);
38398 if (GET_CODE (base) != SYMBOL_REF || !CONSTANT_POOL_ADDRESS_P (base))
38399 return false;
38402 return true;
38405 /* Return TRUE iff OP matches a V2DF reduction pattern. See the
38406 definition of vsx_reduc_<VEC_reduc_name>_v2df in vsx.md. */
38407 static bool
38408 v2df_reduction_p (rtx op)
38410 if (GET_MODE (op) != V2DFmode)
38411 return false;
38413 enum rtx_code code = GET_CODE (op);
38414 if (code != PLUS && code != SMIN && code != SMAX)
38415 return false;
38417 rtx concat = XEXP (op, 0);
38418 if (GET_CODE (concat) != VEC_CONCAT)
38419 return false;
38421 rtx select0 = XEXP (concat, 0);
38422 rtx select1 = XEXP (concat, 1);
38423 if (GET_CODE (select0) != VEC_SELECT || GET_CODE (select1) != VEC_SELECT)
38424 return false;
38426 rtx reg0 = XEXP (select0, 0);
38427 rtx reg1 = XEXP (select1, 0);
38428 if (!rtx_equal_p (reg0, reg1) || !REG_P (reg0))
38429 return false;
38431 rtx parallel0 = XEXP (select0, 1);
38432 rtx parallel1 = XEXP (select1, 1);
38433 if (GET_CODE (parallel0) != PARALLEL || GET_CODE (parallel1) != PARALLEL)
38434 return false;
38436 if (!rtx_equal_p (XVECEXP (parallel0, 0, 0), const1_rtx)
38437 || !rtx_equal_p (XVECEXP (parallel1, 0, 0), const0_rtx))
38438 return false;
38440 return true;
38443 /* Return 1 iff OP is an operand that will not be affected by having
38444 vector doublewords swapped in memory. */
38445 static unsigned int
38446 rtx_is_swappable_p (rtx op, unsigned int *special)
38448 enum rtx_code code = GET_CODE (op);
38449 int i, j;
38450 rtx parallel;
38452 switch (code)
38454 case LABEL_REF:
38455 case SYMBOL_REF:
38456 case CLOBBER:
38457 case REG:
38458 return 1;
38460 case VEC_CONCAT:
38461 case ASM_INPUT:
38462 case ASM_OPERANDS:
38463 return 0;
38465 case CONST_VECTOR:
38467 *special = SH_CONST_VECTOR;
38468 return 1;
38471 case VEC_DUPLICATE:
38472 /* Opportunity: If XEXP (op, 0) has the same mode as the result,
38473 and XEXP (op, 1) is a PARALLEL with a single QImode const int,
38474 it represents a vector splat for which we can do special
38475 handling. */
38476 if (GET_CODE (XEXP (op, 0)) == CONST_INT)
38477 return 1;
38478 else if (GET_CODE (XEXP (op, 0)) == REG
38479 && GET_MODE_INNER (GET_MODE (op)) == GET_MODE (XEXP (op, 0)))
38480 /* This catches V2DF and V2DI splat, at a minimum. */
38481 return 1;
38482 else if (GET_CODE (XEXP (op, 0)) == VEC_SELECT)
38483 /* If the duplicated item is from a select, defer to the select
38484 processing to see if we can change the lane for the splat. */
38485 return rtx_is_swappable_p (XEXP (op, 0), special);
38486 else
38487 return 0;
38489 case VEC_SELECT:
38490 /* A vec_extract operation is ok if we change the lane. */
38491 if (GET_CODE (XEXP (op, 0)) == REG
38492 && GET_MODE_INNER (GET_MODE (XEXP (op, 0))) == GET_MODE (op)
38493 && GET_CODE ((parallel = XEXP (op, 1))) == PARALLEL
38494 && XVECLEN (parallel, 0) == 1
38495 && GET_CODE (XVECEXP (parallel, 0, 0)) == CONST_INT)
38497 *special = SH_EXTRACT;
38498 return 1;
38500 /* An XXPERMDI is ok if we adjust the lanes. Note that if the
38501 XXPERMDI is a swap operation, it will be identified by
38502 insn_is_swap_p and therefore we won't get here. */
38503 else if (GET_CODE (XEXP (op, 0)) == VEC_CONCAT
38504 && (GET_MODE (XEXP (op, 0)) == V4DFmode
38505 || GET_MODE (XEXP (op, 0)) == V4DImode)
38506 && GET_CODE ((parallel = XEXP (op, 1))) == PARALLEL
38507 && XVECLEN (parallel, 0) == 2
38508 && GET_CODE (XVECEXP (parallel, 0, 0)) == CONST_INT
38509 && GET_CODE (XVECEXP (parallel, 0, 1)) == CONST_INT)
38511 *special = SH_XXPERMDI;
38512 return 1;
38514 else if (v2df_reduction_p (op))
38515 return 1;
38516 else
38517 return 0;
38519 case UNSPEC:
38521 /* Various operations are unsafe for this optimization, at least
38522 without significant additional work. Permutes are obviously
38523 problematic, as both the permute control vector and the ordering
38524 of the target values are invalidated by doubleword swapping.
38525 Vector pack and unpack modify the number of vector lanes.
38526 Merge-high/low will not operate correctly on swapped operands.
38527 Vector shifts across element boundaries are clearly uncool,
38528 as are vector select and concatenate operations. Vector
38529 sum-across instructions define one operand with a specific
38530 order-dependent element, so additional fixup code would be
38531 needed to make those work. Vector set and non-immediate-form
38532 vector splat are element-order sensitive. A few of these
38533 cases might be workable with special handling if required.
38534 Adding cost modeling would be appropriate in some cases. */
38535 int val = XINT (op, 1);
38536 switch (val)
38538 default:
38539 break;
38540 case UNSPEC_VMRGH_DIRECT:
38541 case UNSPEC_VMRGL_DIRECT:
38542 case UNSPEC_VPACK_SIGN_SIGN_SAT:
38543 case UNSPEC_VPACK_SIGN_UNS_SAT:
38544 case UNSPEC_VPACK_UNS_UNS_MOD:
38545 case UNSPEC_VPACK_UNS_UNS_MOD_DIRECT:
38546 case UNSPEC_VPACK_UNS_UNS_SAT:
38547 case UNSPEC_VPERM:
38548 case UNSPEC_VPERM_UNS:
38549 case UNSPEC_VPERMHI:
38550 case UNSPEC_VPERMSI:
38551 case UNSPEC_VPKPX:
38552 case UNSPEC_VSLDOI:
38553 case UNSPEC_VSLO:
38554 case UNSPEC_VSRO:
38555 case UNSPEC_VSUM2SWS:
38556 case UNSPEC_VSUM4S:
38557 case UNSPEC_VSUM4UBS:
38558 case UNSPEC_VSUMSWS:
38559 case UNSPEC_VSUMSWS_DIRECT:
38560 case UNSPEC_VSX_CONCAT:
38561 case UNSPEC_VSX_SET:
38562 case UNSPEC_VSX_SLDWI:
38563 case UNSPEC_VUNPACK_HI_SIGN:
38564 case UNSPEC_VUNPACK_HI_SIGN_DIRECT:
38565 case UNSPEC_VUNPACK_LO_SIGN:
38566 case UNSPEC_VUNPACK_LO_SIGN_DIRECT:
38567 case UNSPEC_VUPKHPX:
38568 case UNSPEC_VUPKHS_V4SF:
38569 case UNSPEC_VUPKHU_V4SF:
38570 case UNSPEC_VUPKLPX:
38571 case UNSPEC_VUPKLS_V4SF:
38572 case UNSPEC_VUPKLU_V4SF:
38573 case UNSPEC_VSX_CVDPSPN:
38574 case UNSPEC_VSX_CVSPDP:
38575 case UNSPEC_VSX_CVSPDPN:
38576 return 0;
38577 case UNSPEC_VSPLT_DIRECT:
38578 *special = SH_SPLAT;
38579 return 1;
38580 case UNSPEC_REDUC_PLUS:
38581 case UNSPEC_REDUC:
38582 return 1;
38586 default:
38587 break;
38590 const char *fmt = GET_RTX_FORMAT (code);
38591 int ok = 1;
38593 for (i = 0; i < GET_RTX_LENGTH (code); ++i)
38594 if (fmt[i] == 'e' || fmt[i] == 'u')
38596 unsigned int special_op = SH_NONE;
38597 ok &= rtx_is_swappable_p (XEXP (op, i), &special_op);
38598 if (special_op == SH_NONE)
38599 continue;
38600 /* Ensure we never have two kinds of special handling
38601 for the same insn. */
38602 if (*special != SH_NONE && *special != special_op)
38603 return 0;
38604 *special = special_op;
38606 else if (fmt[i] == 'E')
38607 for (j = 0; j < XVECLEN (op, i); ++j)
38609 unsigned int special_op = SH_NONE;
38610 ok &= rtx_is_swappable_p (XVECEXP (op, i, j), &special_op);
38611 if (special_op == SH_NONE)
38612 continue;
38613 /* Ensure we never have two kinds of special handling
38614 for the same insn. */
38615 if (*special != SH_NONE && *special != special_op)
38616 return 0;
38617 *special = special_op;
38620 return ok;
38623 /* Return 1 iff INSN is an operand that will not be affected by
38624 having vector doublewords swapped in memory (in which case
38625 *SPECIAL is unchanged), or that can be modified to be correct
38626 if vector doublewords are swapped in memory (in which case
38627 *SPECIAL is changed to a value indicating how). */
38628 static unsigned int
38629 insn_is_swappable_p (swap_web_entry *insn_entry, rtx insn,
38630 unsigned int *special)
38632 /* Calls are always bad. */
38633 if (GET_CODE (insn) == CALL_INSN)
38634 return 0;
38636 /* Loads and stores seen here are not permuting, but we can still
38637 fix them up by converting them to permuting ones. Exceptions:
38638 UNSPEC_LVE, UNSPEC_LVX, and UNSPEC_STVX, which have a PARALLEL
38639 body instead of a SET; and UNSPEC_STVE, which has an UNSPEC
38640 for the SET source. Also we must now make an exception for lvx
38641 and stvx when they are not in the UNSPEC_LVX/STVX form (with the
38642 explicit "& -16") since this leads to unrecognizable insns. */
38643 rtx body = PATTERN (insn);
38644 int i = INSN_UID (insn);
38646 if (insn_entry[i].is_load)
38648 if (GET_CODE (body) == SET)
38650 rtx rhs = SET_SRC (body);
38651 gcc_assert (GET_CODE (rhs) == MEM);
38652 if (GET_CODE (XEXP (rhs, 0)) == AND)
38653 return 0;
38655 *special = SH_NOSWAP_LD;
38656 return 1;
38658 else
38659 return 0;
38662 if (insn_entry[i].is_store)
38664 if (GET_CODE (body) == SET
38665 && GET_CODE (SET_SRC (body)) != UNSPEC)
38667 rtx lhs = SET_DEST (body);
38668 gcc_assert (GET_CODE (lhs) == MEM);
38669 if (GET_CODE (XEXP (lhs, 0)) == AND)
38670 return 0;
38672 *special = SH_NOSWAP_ST;
38673 return 1;
38675 else
38676 return 0;
38679 /* A convert to single precision can be left as is provided that
38680 all of its uses are in xxspltw instructions that splat BE element
38681 zero. */
38682 if (GET_CODE (body) == SET
38683 && GET_CODE (SET_SRC (body)) == UNSPEC
38684 && XINT (SET_SRC (body), 1) == UNSPEC_VSX_CVDPSPN)
38686 df_ref def;
38687 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
38689 FOR_EACH_INSN_INFO_DEF (def, insn_info)
38691 struct df_link *link = DF_REF_CHAIN (def);
38692 if (!link)
38693 return 0;
38695 for (; link; link = link->next) {
38696 rtx use_insn = DF_REF_INSN (link->ref);
38697 rtx use_body = PATTERN (use_insn);
38698 if (GET_CODE (use_body) != SET
38699 || GET_CODE (SET_SRC (use_body)) != UNSPEC
38700 || XINT (SET_SRC (use_body), 1) != UNSPEC_VSX_XXSPLTW
38701 || XEXP (XEXP (SET_SRC (use_body), 0), 1) != const0_rtx)
38702 return 0;
38706 return 1;
38709 /* A concatenation of two doublewords is ok if we reverse the
38710 order of the inputs. */
38711 if (GET_CODE (body) == SET
38712 && GET_CODE (SET_SRC (body)) == VEC_CONCAT
38713 && (GET_MODE (SET_SRC (body)) == V2DFmode
38714 || GET_MODE (SET_SRC (body)) == V2DImode))
38716 *special = SH_CONCAT;
38717 return 1;
38720 /* V2DF reductions are always swappable. */
38721 if (GET_CODE (body) == PARALLEL)
38723 rtx expr = XVECEXP (body, 0, 0);
38724 if (GET_CODE (expr) == SET
38725 && v2df_reduction_p (SET_SRC (expr)))
38726 return 1;
38729 /* An UNSPEC_VPERM is ok if the mask operand is loaded from the
38730 constant pool. */
38731 if (GET_CODE (body) == SET
38732 && GET_CODE (SET_SRC (body)) == UNSPEC
38733 && XINT (SET_SRC (body), 1) == UNSPEC_VPERM
38734 && XVECLEN (SET_SRC (body), 0) == 3
38735 && GET_CODE (XVECEXP (SET_SRC (body), 0, 2)) == REG)
38737 rtx mask_reg = XVECEXP (SET_SRC (body), 0, 2);
38738 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
38739 df_ref use;
38740 FOR_EACH_INSN_INFO_USE (use, insn_info)
38741 if (rtx_equal_p (DF_REF_REG (use), mask_reg))
38743 struct df_link *def_link = DF_REF_CHAIN (use);
38744 /* Punt if multiple definitions for this reg. */
38745 if (def_link && !def_link->next &&
38746 const_load_sequence_p (insn_entry,
38747 DF_REF_INSN (def_link->ref)))
38749 *special = SH_VPERM;
38750 return 1;
38755 /* Otherwise check the operands for vector lane violations. */
38756 return rtx_is_swappable_p (body, special);
38759 enum chain_purpose { FOR_LOADS, FOR_STORES };
38761 /* Return true if the UD or DU chain headed by LINK is non-empty,
38762 and every entry on the chain references an insn that is a
38763 register swap. Furthermore, if PURPOSE is FOR_LOADS, each such
38764 register swap must have only permuting loads as reaching defs.
38765 If PURPOSE is FOR_STORES, each such register swap must have only
38766 register swaps or permuting stores as reached uses. */
38767 static bool
38768 chain_contains_only_swaps (swap_web_entry *insn_entry, struct df_link *link,
38769 enum chain_purpose purpose)
38771 if (!link)
38772 return false;
38774 for (; link; link = link->next)
38776 if (!ALTIVEC_OR_VSX_VECTOR_MODE (GET_MODE (DF_REF_REG (link->ref))))
38777 continue;
38779 if (DF_REF_IS_ARTIFICIAL (link->ref))
38780 return false;
38782 rtx reached_insn = DF_REF_INSN (link->ref);
38783 unsigned uid = INSN_UID (reached_insn);
38784 struct df_insn_info *insn_info = DF_INSN_INFO_GET (reached_insn);
38786 if (!insn_entry[uid].is_swap || insn_entry[uid].is_load
38787 || insn_entry[uid].is_store)
38788 return false;
38790 if (purpose == FOR_LOADS)
38792 df_ref use;
38793 FOR_EACH_INSN_INFO_USE (use, insn_info)
38795 struct df_link *swap_link = DF_REF_CHAIN (use);
38797 while (swap_link)
38799 if (DF_REF_IS_ARTIFICIAL (link->ref))
38800 return false;
38802 rtx swap_def_insn = DF_REF_INSN (swap_link->ref);
38803 unsigned uid2 = INSN_UID (swap_def_insn);
38805 /* Only permuting loads are allowed. */
38806 if (!insn_entry[uid2].is_swap || !insn_entry[uid2].is_load)
38807 return false;
38809 swap_link = swap_link->next;
38813 else if (purpose == FOR_STORES)
38815 df_ref def;
38816 FOR_EACH_INSN_INFO_DEF (def, insn_info)
38818 struct df_link *swap_link = DF_REF_CHAIN (def);
38820 while (swap_link)
38822 if (DF_REF_IS_ARTIFICIAL (link->ref))
38823 return false;
38825 rtx swap_use_insn = DF_REF_INSN (swap_link->ref);
38826 unsigned uid2 = INSN_UID (swap_use_insn);
38828 /* Permuting stores or register swaps are allowed. */
38829 if (!insn_entry[uid2].is_swap || insn_entry[uid2].is_load)
38830 return false;
38832 swap_link = swap_link->next;
38838 return true;
38841 /* Mark the xxswapdi instructions associated with permuting loads and
38842 stores for removal. Note that we only flag them for deletion here,
38843 as there is a possibility of a swap being reached from multiple
38844 loads, etc. */
38845 static void
38846 mark_swaps_for_removal (swap_web_entry *insn_entry, unsigned int i)
38848 rtx insn = insn_entry[i].insn;
38849 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
38851 if (insn_entry[i].is_load)
38853 df_ref def;
38854 FOR_EACH_INSN_INFO_DEF (def, insn_info)
38856 struct df_link *link = DF_REF_CHAIN (def);
38858 /* We know by now that these are swaps, so we can delete
38859 them confidently. */
38860 while (link)
38862 rtx use_insn = DF_REF_INSN (link->ref);
38863 insn_entry[INSN_UID (use_insn)].will_delete = 1;
38864 link = link->next;
38868 else if (insn_entry[i].is_store)
38870 df_ref use;
38871 FOR_EACH_INSN_INFO_USE (use, insn_info)
38873 /* Ignore uses for addressability. */
38874 machine_mode mode = GET_MODE (DF_REF_REG (use));
38875 if (!ALTIVEC_OR_VSX_VECTOR_MODE (mode))
38876 continue;
38878 struct df_link *link = DF_REF_CHAIN (use);
38880 /* We know by now that these are swaps, so we can delete
38881 them confidently. */
38882 while (link)
38884 rtx def_insn = DF_REF_INSN (link->ref);
38885 insn_entry[INSN_UID (def_insn)].will_delete = 1;
38886 link = link->next;
38892 /* OP is either a CONST_VECTOR or an expression containing one.
38893 Swap the first half of the vector with the second in the first
38894 case. Recurse to find it in the second. */
38895 static void
38896 swap_const_vector_halves (rtx op)
38898 int i;
38899 enum rtx_code code = GET_CODE (op);
38900 if (GET_CODE (op) == CONST_VECTOR)
38902 int half_units = GET_MODE_NUNITS (GET_MODE (op)) / 2;
38903 for (i = 0; i < half_units; ++i)
38905 rtx temp = CONST_VECTOR_ELT (op, i);
38906 CONST_VECTOR_ELT (op, i) = CONST_VECTOR_ELT (op, i + half_units);
38907 CONST_VECTOR_ELT (op, i + half_units) = temp;
38910 else
38912 int j;
38913 const char *fmt = GET_RTX_FORMAT (code);
38914 for (i = 0; i < GET_RTX_LENGTH (code); ++i)
38915 if (fmt[i] == 'e' || fmt[i] == 'u')
38916 swap_const_vector_halves (XEXP (op, i));
38917 else if (fmt[i] == 'E')
38918 for (j = 0; j < XVECLEN (op, i); ++j)
38919 swap_const_vector_halves (XVECEXP (op, i, j));
38923 /* Find all subregs of a vector expression that perform a narrowing,
38924 and adjust the subreg index to account for doubleword swapping. */
38925 static void
38926 adjust_subreg_index (rtx op)
38928 enum rtx_code code = GET_CODE (op);
38929 if (code == SUBREG
38930 && (GET_MODE_SIZE (GET_MODE (op))
38931 < GET_MODE_SIZE (GET_MODE (XEXP (op, 0)))))
38933 unsigned int index = SUBREG_BYTE (op);
38934 if (index < 8)
38935 index += 8;
38936 else
38937 index -= 8;
38938 SUBREG_BYTE (op) = index;
38941 const char *fmt = GET_RTX_FORMAT (code);
38942 int i,j;
38943 for (i = 0; i < GET_RTX_LENGTH (code); ++i)
38944 if (fmt[i] == 'e' || fmt[i] == 'u')
38945 adjust_subreg_index (XEXP (op, i));
38946 else if (fmt[i] == 'E')
38947 for (j = 0; j < XVECLEN (op, i); ++j)
38948 adjust_subreg_index (XVECEXP (op, i, j));
38951 /* Convert the non-permuting load INSN to a permuting one. */
38952 static void
38953 permute_load (rtx_insn *insn)
38955 rtx body = PATTERN (insn);
38956 rtx mem_op = SET_SRC (body);
38957 rtx tgt_reg = SET_DEST (body);
38958 machine_mode mode = GET_MODE (tgt_reg);
38959 int n_elts = GET_MODE_NUNITS (mode);
38960 int half_elts = n_elts / 2;
38961 rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
38962 int i, j;
38963 for (i = 0, j = half_elts; i < half_elts; ++i, ++j)
38964 XVECEXP (par, 0, i) = GEN_INT (j);
38965 for (i = half_elts, j = 0; j < half_elts; ++i, ++j)
38966 XVECEXP (par, 0, i) = GEN_INT (j);
38967 rtx sel = gen_rtx_VEC_SELECT (mode, mem_op, par);
38968 SET_SRC (body) = sel;
38969 INSN_CODE (insn) = -1; /* Force re-recognition. */
38970 df_insn_rescan (insn);
38972 if (dump_file)
38973 fprintf (dump_file, "Replacing load %d with permuted load\n",
38974 INSN_UID (insn));
38977 /* Convert the non-permuting store INSN to a permuting one. */
38978 static void
38979 permute_store (rtx_insn *insn)
38981 rtx body = PATTERN (insn);
38982 rtx src_reg = SET_SRC (body);
38983 machine_mode mode = GET_MODE (src_reg);
38984 int n_elts = GET_MODE_NUNITS (mode);
38985 int half_elts = n_elts / 2;
38986 rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
38987 int i, j;
38988 for (i = 0, j = half_elts; i < half_elts; ++i, ++j)
38989 XVECEXP (par, 0, i) = GEN_INT (j);
38990 for (i = half_elts, j = 0; j < half_elts; ++i, ++j)
38991 XVECEXP (par, 0, i) = GEN_INT (j);
38992 rtx sel = gen_rtx_VEC_SELECT (mode, src_reg, par);
38993 SET_SRC (body) = sel;
38994 INSN_CODE (insn) = -1; /* Force re-recognition. */
38995 df_insn_rescan (insn);
38997 if (dump_file)
38998 fprintf (dump_file, "Replacing store %d with permuted store\n",
38999 INSN_UID (insn));
39002 /* Given OP that contains a vector extract operation, adjust the index
39003 of the extracted lane to account for the doubleword swap. */
39004 static void
39005 adjust_extract (rtx_insn *insn)
39007 rtx pattern = PATTERN (insn);
39008 if (GET_CODE (pattern) == PARALLEL)
39009 pattern = XVECEXP (pattern, 0, 0);
39010 rtx src = SET_SRC (pattern);
39011 /* The vec_select may be wrapped in a vec_duplicate for a splat, so
39012 account for that. */
39013 rtx sel = GET_CODE (src) == VEC_DUPLICATE ? XEXP (src, 0) : src;
39014 rtx par = XEXP (sel, 1);
39015 int half_elts = GET_MODE_NUNITS (GET_MODE (XEXP (sel, 0))) >> 1;
39016 int lane = INTVAL (XVECEXP (par, 0, 0));
39017 lane = lane >= half_elts ? lane - half_elts : lane + half_elts;
39018 XVECEXP (par, 0, 0) = GEN_INT (lane);
39019 INSN_CODE (insn) = -1; /* Force re-recognition. */
39020 df_insn_rescan (insn);
39022 if (dump_file)
39023 fprintf (dump_file, "Changing lane for extract %d\n", INSN_UID (insn));
39026 /* Given OP that contains a vector direct-splat operation, adjust the index
39027 of the source lane to account for the doubleword swap. */
39028 static void
39029 adjust_splat (rtx_insn *insn)
39031 rtx body = PATTERN (insn);
39032 rtx unspec = XEXP (body, 1);
39033 int half_elts = GET_MODE_NUNITS (GET_MODE (unspec)) >> 1;
39034 int lane = INTVAL (XVECEXP (unspec, 0, 1));
39035 lane = lane >= half_elts ? lane - half_elts : lane + half_elts;
39036 XVECEXP (unspec, 0, 1) = GEN_INT (lane);
39037 INSN_CODE (insn) = -1; /* Force re-recognition. */
39038 df_insn_rescan (insn);
39040 if (dump_file)
39041 fprintf (dump_file, "Changing lane for splat %d\n", INSN_UID (insn));
39044 /* Given OP that contains an XXPERMDI operation (that is not a doubleword
39045 swap), reverse the order of the source operands and adjust the indices
39046 of the source lanes to account for doubleword reversal. */
39047 static void
39048 adjust_xxpermdi (rtx_insn *insn)
39050 rtx set = PATTERN (insn);
39051 rtx select = XEXP (set, 1);
39052 rtx concat = XEXP (select, 0);
39053 rtx src0 = XEXP (concat, 0);
39054 XEXP (concat, 0) = XEXP (concat, 1);
39055 XEXP (concat, 1) = src0;
39056 rtx parallel = XEXP (select, 1);
39057 int lane0 = INTVAL (XVECEXP (parallel, 0, 0));
39058 int lane1 = INTVAL (XVECEXP (parallel, 0, 1));
39059 int new_lane0 = 3 - lane1;
39060 int new_lane1 = 3 - lane0;
39061 XVECEXP (parallel, 0, 0) = GEN_INT (new_lane0);
39062 XVECEXP (parallel, 0, 1) = GEN_INT (new_lane1);
39063 INSN_CODE (insn) = -1; /* Force re-recognition. */
39064 df_insn_rescan (insn);
39066 if (dump_file)
39067 fprintf (dump_file, "Changing lanes for xxpermdi %d\n", INSN_UID (insn));
39070 /* Given OP that contains a VEC_CONCAT operation of two doublewords,
39071 reverse the order of those inputs. */
39072 static void
39073 adjust_concat (rtx_insn *insn)
39075 rtx set = PATTERN (insn);
39076 rtx concat = XEXP (set, 1);
39077 rtx src0 = XEXP (concat, 0);
39078 XEXP (concat, 0) = XEXP (concat, 1);
39079 XEXP (concat, 1) = src0;
39080 INSN_CODE (insn) = -1; /* Force re-recognition. */
39081 df_insn_rescan (insn);
39083 if (dump_file)
39084 fprintf (dump_file, "Reversing inputs for concat %d\n", INSN_UID (insn));
39087 /* Given an UNSPEC_VPERM insn, modify the mask loaded from the
39088 constant pool to reflect swapped doublewords. */
39089 static void
39090 adjust_vperm (rtx_insn *insn)
39092 /* We previously determined that the UNSPEC_VPERM was fed by a
39093 swap of a swapping load of a TOC-relative constant pool symbol.
39094 Find the MEM in the swapping load and replace it with a MEM for
39095 the adjusted mask constant. */
39096 rtx set = PATTERN (insn);
39097 rtx mask_reg = XVECEXP (SET_SRC (set), 0, 2);
39099 /* Find the swap. */
39100 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
39101 df_ref use;
39102 rtx_insn *swap_insn = 0;
39103 FOR_EACH_INSN_INFO_USE (use, insn_info)
39104 if (rtx_equal_p (DF_REF_REG (use), mask_reg))
39106 struct df_link *def_link = DF_REF_CHAIN (use);
39107 gcc_assert (def_link && !def_link->next);
39108 swap_insn = DF_REF_INSN (def_link->ref);
39109 break;
39111 gcc_assert (swap_insn);
39113 /* Find the load. */
39114 insn_info = DF_INSN_INFO_GET (swap_insn);
39115 rtx_insn *load_insn = 0;
39116 FOR_EACH_INSN_INFO_USE (use, insn_info)
39118 struct df_link *def_link = DF_REF_CHAIN (use);
39119 gcc_assert (def_link && !def_link->next);
39120 load_insn = DF_REF_INSN (def_link->ref);
39121 break;
39123 gcc_assert (load_insn);
39125 /* Find the TOC-relative symbol access. */
39126 insn_info = DF_INSN_INFO_GET (load_insn);
39127 rtx_insn *tocrel_insn = 0;
39128 FOR_EACH_INSN_INFO_USE (use, insn_info)
39130 struct df_link *def_link = DF_REF_CHAIN (use);
39131 gcc_assert (def_link && !def_link->next);
39132 tocrel_insn = DF_REF_INSN (def_link->ref);
39133 break;
39135 gcc_assert (tocrel_insn);
39137 /* Find the embedded CONST_VECTOR. We have to call toc_relative_expr_p
39138 to set tocrel_base; otherwise it would be unnecessary as we've
39139 already established it will return true. */
39140 rtx base, offset;
39141 rtx tocrel_expr = SET_SRC (PATTERN (tocrel_insn));
39142 /* There is an extra level of indirection for small/large code models. */
39143 if (GET_CODE (tocrel_expr) == MEM)
39144 tocrel_expr = XEXP (tocrel_expr, 0);
39145 if (!toc_relative_expr_p (tocrel_expr, false))
39146 gcc_unreachable ();
39147 split_const (XVECEXP (tocrel_base, 0, 0), &base, &offset);
39148 rtx const_vector = get_pool_constant (base);
39149 /* With the extra indirection, get_pool_constant will produce the
39150 real constant from the reg_equal expression, so get the real
39151 constant. */
39152 if (GET_CODE (const_vector) == SYMBOL_REF)
39153 const_vector = get_pool_constant (const_vector);
39154 gcc_assert (GET_CODE (const_vector) == CONST_VECTOR);
39156 /* Create an adjusted mask from the initial mask. */
39157 unsigned int new_mask[16], i, val;
39158 for (i = 0; i < 16; ++i) {
39159 val = INTVAL (XVECEXP (const_vector, 0, i));
39160 if (val < 16)
39161 new_mask[i] = (val + 8) % 16;
39162 else
39163 new_mask[i] = ((val + 8) % 16) + 16;
39166 /* Create a new CONST_VECTOR and a MEM that references it. */
39167 rtx vals = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
39168 for (i = 0; i < 16; ++i)
39169 XVECEXP (vals, 0, i) = GEN_INT (new_mask[i]);
39170 rtx new_const_vector = gen_rtx_CONST_VECTOR (V16QImode, XVEC (vals, 0));
39171 rtx new_mem = force_const_mem (V16QImode, new_const_vector);
39172 /* This gives us a MEM whose base operand is a SYMBOL_REF, which we
39173 can't recognize. Force the SYMBOL_REF into a register. */
39174 if (!REG_P (XEXP (new_mem, 0))) {
39175 rtx base_reg = force_reg (Pmode, XEXP (new_mem, 0));
39176 XEXP (new_mem, 0) = base_reg;
39177 /* Move the newly created insn ahead of the load insn. */
39178 rtx_insn *force_insn = get_last_insn ();
39179 remove_insn (force_insn);
39180 rtx_insn *before_load_insn = PREV_INSN (load_insn);
39181 add_insn_after (force_insn, before_load_insn, BLOCK_FOR_INSN (load_insn));
39182 df_insn_rescan (before_load_insn);
39183 df_insn_rescan (force_insn);
39186 /* Replace the MEM in the load instruction and rescan it. */
39187 XEXP (SET_SRC (PATTERN (load_insn)), 0) = new_mem;
39188 INSN_CODE (load_insn) = -1; /* Force re-recognition. */
39189 df_insn_rescan (load_insn);
39191 if (dump_file)
39192 fprintf (dump_file, "Adjusting mask for vperm %d\n", INSN_UID (insn));
39195 /* The insn described by INSN_ENTRY[I] can be swapped, but only
39196 with special handling. Take care of that here. */
39197 static void
39198 handle_special_swappables (swap_web_entry *insn_entry, unsigned i)
39200 rtx_insn *insn = insn_entry[i].insn;
39201 rtx body = PATTERN (insn);
39203 switch (insn_entry[i].special_handling)
39205 default:
39206 gcc_unreachable ();
39207 case SH_CONST_VECTOR:
39209 /* A CONST_VECTOR will only show up somewhere in the RHS of a SET. */
39210 gcc_assert (GET_CODE (body) == SET);
39211 rtx rhs = SET_SRC (body);
39212 swap_const_vector_halves (rhs);
39213 if (dump_file)
39214 fprintf (dump_file, "Swapping constant halves in insn %d\n", i);
39215 break;
39217 case SH_SUBREG:
39218 /* A subreg of the same size is already safe. For subregs that
39219 select a smaller portion of a reg, adjust the index for
39220 swapped doublewords. */
39221 adjust_subreg_index (body);
39222 if (dump_file)
39223 fprintf (dump_file, "Adjusting subreg in insn %d\n", i);
39224 break;
39225 case SH_NOSWAP_LD:
39226 /* Convert a non-permuting load to a permuting one. */
39227 permute_load (insn);
39228 break;
39229 case SH_NOSWAP_ST:
39230 /* Convert a non-permuting store to a permuting one. */
39231 permute_store (insn);
39232 break;
39233 case SH_EXTRACT:
39234 /* Change the lane on an extract operation. */
39235 adjust_extract (insn);
39236 break;
39237 case SH_SPLAT:
39238 /* Change the lane on a direct-splat operation. */
39239 adjust_splat (insn);
39240 break;
39241 case SH_XXPERMDI:
39242 /* Change the lanes on an XXPERMDI operation. */
39243 adjust_xxpermdi (insn);
39244 break;
39245 case SH_CONCAT:
39246 /* Reverse the order of a concatenation operation. */
39247 adjust_concat (insn);
39248 break;
39249 case SH_VPERM:
39250 /* Change the mask loaded from the constant pool for a VPERM. */
39251 adjust_vperm (insn);
39252 break;
39256 /* Find the insn from the Ith table entry, which is known to be a
39257 register swap Y = SWAP(X). Replace it with a copy Y = X. */
39258 static void
39259 replace_swap_with_copy (swap_web_entry *insn_entry, unsigned i)
39261 rtx_insn *insn = insn_entry[i].insn;
39262 rtx body = PATTERN (insn);
39263 rtx src_reg = XEXP (SET_SRC (body), 0);
39264 rtx copy = gen_rtx_SET (SET_DEST (body), src_reg);
39265 rtx_insn *new_insn = emit_insn_before (copy, insn);
39266 set_block_for_insn (new_insn, BLOCK_FOR_INSN (insn));
39267 df_insn_rescan (new_insn);
39269 if (dump_file)
39271 unsigned int new_uid = INSN_UID (new_insn);
39272 fprintf (dump_file, "Replacing swap %d with copy %d\n", i, new_uid);
39275 df_insn_delete (insn);
39276 remove_insn (insn);
39277 insn->set_deleted ();
39280 /* Dump the swap table to DUMP_FILE. */
39281 static void
39282 dump_swap_insn_table (swap_web_entry *insn_entry)
39284 int e = get_max_uid ();
39285 fprintf (dump_file, "\nRelevant insns with their flag settings\n\n");
39287 for (int i = 0; i < e; ++i)
39288 if (insn_entry[i].is_relevant)
39290 swap_web_entry *pred_entry = (swap_web_entry *)insn_entry[i].pred ();
39291 fprintf (dump_file, "%6d %6d ", i,
39292 pred_entry && pred_entry->insn
39293 ? INSN_UID (pred_entry->insn) : 0);
39294 if (insn_entry[i].is_load)
39295 fputs ("load ", dump_file);
39296 if (insn_entry[i].is_store)
39297 fputs ("store ", dump_file);
39298 if (insn_entry[i].is_swap)
39299 fputs ("swap ", dump_file);
39300 if (insn_entry[i].is_live_in)
39301 fputs ("live-in ", dump_file);
39302 if (insn_entry[i].is_live_out)
39303 fputs ("live-out ", dump_file);
39304 if (insn_entry[i].contains_subreg)
39305 fputs ("subreg ", dump_file);
39306 if (insn_entry[i].is_128_int)
39307 fputs ("int128 ", dump_file);
39308 if (insn_entry[i].is_call)
39309 fputs ("call ", dump_file);
39310 if (insn_entry[i].is_swappable)
39312 fputs ("swappable ", dump_file);
39313 if (insn_entry[i].special_handling == SH_CONST_VECTOR)
39314 fputs ("special:constvec ", dump_file);
39315 else if (insn_entry[i].special_handling == SH_SUBREG)
39316 fputs ("special:subreg ", dump_file);
39317 else if (insn_entry[i].special_handling == SH_NOSWAP_LD)
39318 fputs ("special:load ", dump_file);
39319 else if (insn_entry[i].special_handling == SH_NOSWAP_ST)
39320 fputs ("special:store ", dump_file);
39321 else if (insn_entry[i].special_handling == SH_EXTRACT)
39322 fputs ("special:extract ", dump_file);
39323 else if (insn_entry[i].special_handling == SH_SPLAT)
39324 fputs ("special:splat ", dump_file);
39325 else if (insn_entry[i].special_handling == SH_XXPERMDI)
39326 fputs ("special:xxpermdi ", dump_file);
39327 else if (insn_entry[i].special_handling == SH_CONCAT)
39328 fputs ("special:concat ", dump_file);
39329 else if (insn_entry[i].special_handling == SH_VPERM)
39330 fputs ("special:vperm ", dump_file);
39332 if (insn_entry[i].web_not_optimizable)
39333 fputs ("unoptimizable ", dump_file);
39334 if (insn_entry[i].will_delete)
39335 fputs ("delete ", dump_file);
39336 fputs ("\n", dump_file);
39338 fputs ("\n", dump_file);
39341 /* Return RTX with its address canonicalized to (reg) or (+ reg reg).
39342 Here RTX is an (& addr (const_int -16)). Always return a new copy
39343 to avoid problems with combine. */
39344 static rtx
39345 alignment_with_canonical_addr (rtx align)
39347 rtx canon;
39348 rtx addr = XEXP (align, 0);
39350 if (REG_P (addr))
39351 canon = addr;
39353 else if (GET_CODE (addr) == PLUS)
39355 rtx addrop0 = XEXP (addr, 0);
39356 rtx addrop1 = XEXP (addr, 1);
39358 if (!REG_P (addrop0))
39359 addrop0 = force_reg (GET_MODE (addrop0), addrop0);
39361 if (!REG_P (addrop1))
39362 addrop1 = force_reg (GET_MODE (addrop1), addrop1);
39364 canon = gen_rtx_PLUS (GET_MODE (addr), addrop0, addrop1);
39367 else
39368 canon = force_reg (GET_MODE (addr), addr);
39370 return gen_rtx_AND (GET_MODE (align), canon, GEN_INT (-16));
39373 /* Check whether an rtx is an alignment mask, and if so, return
39374 a fully-expanded rtx for the masking operation. */
39375 static rtx
39376 alignment_mask (rtx_insn *insn)
39378 rtx body = PATTERN (insn);
39380 if (GET_CODE (body) != SET
39381 || GET_CODE (SET_SRC (body)) != AND
39382 || !REG_P (XEXP (SET_SRC (body), 0)))
39383 return 0;
39385 rtx mask = XEXP (SET_SRC (body), 1);
39387 if (GET_CODE (mask) == CONST_INT)
39389 if (INTVAL (mask) == -16)
39390 return alignment_with_canonical_addr (SET_SRC (body));
39391 else
39392 return 0;
39395 if (!REG_P (mask))
39396 return 0;
39398 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
39399 df_ref use;
39400 rtx real_mask = 0;
39402 FOR_EACH_INSN_INFO_USE (use, insn_info)
39404 if (!rtx_equal_p (DF_REF_REG (use), mask))
39405 continue;
39407 struct df_link *def_link = DF_REF_CHAIN (use);
39408 if (!def_link || def_link->next)
39409 return 0;
39411 rtx_insn *const_insn = DF_REF_INSN (def_link->ref);
39412 rtx const_body = PATTERN (const_insn);
39413 if (GET_CODE (const_body) != SET)
39414 return 0;
39416 real_mask = SET_SRC (const_body);
39418 if (GET_CODE (real_mask) != CONST_INT
39419 || INTVAL (real_mask) != -16)
39420 return 0;
39423 if (real_mask == 0)
39424 return 0;
39426 return alignment_with_canonical_addr (SET_SRC (body));
39429 /* Given INSN that's a load or store based at BASE_REG, look for a
39430 feeding computation that aligns its address on a 16-byte boundary. */
39431 static rtx
39432 find_alignment_op (rtx_insn *insn, rtx base_reg)
39434 df_ref base_use;
39435 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
39436 rtx and_operation = 0;
39438 FOR_EACH_INSN_INFO_USE (base_use, insn_info)
39440 if (!rtx_equal_p (DF_REF_REG (base_use), base_reg))
39441 continue;
39443 struct df_link *base_def_link = DF_REF_CHAIN (base_use);
39444 if (!base_def_link || base_def_link->next)
39445 break;
39447 rtx_insn *and_insn = DF_REF_INSN (base_def_link->ref);
39448 and_operation = alignment_mask (and_insn);
39449 if (and_operation != 0)
39450 break;
39453 return and_operation;
39456 struct del_info { bool replace; rtx_insn *replace_insn; };
39458 /* If INSN is the load for an lvx pattern, put it in canonical form. */
39459 static void
39460 recombine_lvx_pattern (rtx_insn *insn, del_info *to_delete)
39462 rtx body = PATTERN (insn);
39463 gcc_assert (GET_CODE (body) == SET
39464 && GET_CODE (SET_SRC (body)) == VEC_SELECT
39465 && GET_CODE (XEXP (SET_SRC (body), 0)) == MEM);
39467 rtx mem = XEXP (SET_SRC (body), 0);
39468 rtx base_reg = XEXP (mem, 0);
39470 rtx and_operation = find_alignment_op (insn, base_reg);
39472 if (and_operation != 0)
39474 df_ref def;
39475 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
39476 FOR_EACH_INSN_INFO_DEF (def, insn_info)
39478 struct df_link *link = DF_REF_CHAIN (def);
39479 if (!link || link->next)
39480 break;
39482 rtx_insn *swap_insn = DF_REF_INSN (link->ref);
39483 if (!insn_is_swap_p (swap_insn)
39484 || insn_is_load_p (swap_insn)
39485 || insn_is_store_p (swap_insn))
39486 break;
39488 /* Expected lvx pattern found. Change the swap to
39489 a copy, and propagate the AND operation into the
39490 load. */
39491 to_delete[INSN_UID (swap_insn)].replace = true;
39492 to_delete[INSN_UID (swap_insn)].replace_insn = swap_insn;
39494 XEXP (mem, 0) = and_operation;
39495 SET_SRC (body) = mem;
39496 INSN_CODE (insn) = -1; /* Force re-recognition. */
39497 df_insn_rescan (insn);
39499 if (dump_file)
39500 fprintf (dump_file, "lvx opportunity found at %d\n",
39501 INSN_UID (insn));
39506 /* If INSN is the store for an stvx pattern, put it in canonical form. */
39507 static void
39508 recombine_stvx_pattern (rtx_insn *insn, del_info *to_delete)
39510 rtx body = PATTERN (insn);
39511 gcc_assert (GET_CODE (body) == SET
39512 && GET_CODE (SET_DEST (body)) == MEM
39513 && GET_CODE (SET_SRC (body)) == VEC_SELECT);
39514 rtx mem = SET_DEST (body);
39515 rtx base_reg = XEXP (mem, 0);
39517 rtx and_operation = find_alignment_op (insn, base_reg);
39519 if (and_operation != 0)
39521 rtx src_reg = XEXP (SET_SRC (body), 0);
39522 df_ref src_use;
39523 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
39524 FOR_EACH_INSN_INFO_USE (src_use, insn_info)
39526 if (!rtx_equal_p (DF_REF_REG (src_use), src_reg))
39527 continue;
39529 struct df_link *link = DF_REF_CHAIN (src_use);
39530 if (!link || link->next)
39531 break;
39533 rtx_insn *swap_insn = DF_REF_INSN (link->ref);
39534 if (!insn_is_swap_p (swap_insn)
39535 || insn_is_load_p (swap_insn)
39536 || insn_is_store_p (swap_insn))
39537 break;
39539 /* Expected stvx pattern found. Change the swap to
39540 a copy, and propagate the AND operation into the
39541 store. */
39542 to_delete[INSN_UID (swap_insn)].replace = true;
39543 to_delete[INSN_UID (swap_insn)].replace_insn = swap_insn;
39545 XEXP (mem, 0) = and_operation;
39546 SET_SRC (body) = src_reg;
39547 INSN_CODE (insn) = -1; /* Force re-recognition. */
39548 df_insn_rescan (insn);
39550 if (dump_file)
39551 fprintf (dump_file, "stvx opportunity found at %d\n",
39552 INSN_UID (insn));
39557 /* Look for patterns created from builtin lvx and stvx calls, and
39558 canonicalize them to be properly recognized as such. */
39559 static void
39560 recombine_lvx_stvx_patterns (function *fun)
39562 int i;
39563 basic_block bb;
39564 rtx_insn *insn;
39566 int num_insns = get_max_uid ();
39567 del_info *to_delete = XCNEWVEC (del_info, num_insns);
39569 FOR_ALL_BB_FN (bb, fun)
39570 FOR_BB_INSNS (bb, insn)
39572 if (!NONDEBUG_INSN_P (insn))
39573 continue;
39575 if (insn_is_load_p (insn) && insn_is_swap_p (insn))
39576 recombine_lvx_pattern (insn, to_delete);
39577 else if (insn_is_store_p (insn) && insn_is_swap_p (insn))
39578 recombine_stvx_pattern (insn, to_delete);
39581 /* Turning swaps into copies is delayed until now, to avoid problems
39582 with deleting instructions during the insn walk. */
39583 for (i = 0; i < num_insns; i++)
39584 if (to_delete[i].replace)
39586 rtx swap_body = PATTERN (to_delete[i].replace_insn);
39587 rtx src_reg = XEXP (SET_SRC (swap_body), 0);
39588 rtx copy = gen_rtx_SET (SET_DEST (swap_body), src_reg);
39589 rtx_insn *new_insn = emit_insn_before (copy,
39590 to_delete[i].replace_insn);
39591 set_block_for_insn (new_insn,
39592 BLOCK_FOR_INSN (to_delete[i].replace_insn));
39593 df_insn_rescan (new_insn);
39594 df_insn_delete (to_delete[i].replace_insn);
39595 remove_insn (to_delete[i].replace_insn);
39596 to_delete[i].replace_insn->set_deleted ();
39599 free (to_delete);
39602 /* Main entry point for this pass. */
39603 unsigned int
39604 rs6000_analyze_swaps (function *fun)
39606 swap_web_entry *insn_entry;
39607 basic_block bb;
39608 rtx_insn *insn, *curr_insn = 0;
39610 /* Dataflow analysis for use-def chains. */
39611 df_set_flags (DF_RD_PRUNE_DEAD_DEFS);
39612 df_chain_add_problem (DF_DU_CHAIN | DF_UD_CHAIN);
39613 df_analyze ();
39614 df_set_flags (DF_DEFER_INSN_RESCAN);
39616 /* Pre-pass to recombine lvx and stvx patterns so we don't lose info. */
39617 recombine_lvx_stvx_patterns (fun);
39619 /* Allocate structure to represent webs of insns. */
39620 insn_entry = XCNEWVEC (swap_web_entry, get_max_uid ());
39622 /* Walk the insns to gather basic data. */
39623 FOR_ALL_BB_FN (bb, fun)
39624 FOR_BB_INSNS_SAFE (bb, insn, curr_insn)
39626 unsigned int uid = INSN_UID (insn);
39627 if (NONDEBUG_INSN_P (insn))
39629 insn_entry[uid].insn = insn;
39631 if (GET_CODE (insn) == CALL_INSN)
39632 insn_entry[uid].is_call = 1;
39634 /* Walk the uses and defs to see if we mention vector regs.
39635 Record any constraints on optimization of such mentions. */
39636 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
39637 df_ref mention;
39638 FOR_EACH_INSN_INFO_USE (mention, insn_info)
39640 /* We use DF_REF_REAL_REG here to get inside any subregs. */
39641 machine_mode mode = GET_MODE (DF_REF_REAL_REG (mention));
39643 /* If a use gets its value from a call insn, it will be
39644 a hard register and will look like (reg:V4SI 3 3).
39645 The df analysis creates two mentions for GPR3 and GPR4,
39646 both DImode. We must recognize this and treat it as a
39647 vector mention to ensure the call is unioned with this
39648 use. */
39649 if (mode == DImode && DF_REF_INSN_INFO (mention))
39651 rtx feeder = DF_REF_INSN (mention);
39652 /* FIXME: It is pretty hard to get from the df mention
39653 to the mode of the use in the insn. We arbitrarily
39654 pick a vector mode here, even though the use might
39655 be a real DImode. We can be too conservative
39656 (create a web larger than necessary) because of
39657 this, so consider eventually fixing this. */
39658 if (GET_CODE (feeder) == CALL_INSN)
39659 mode = V4SImode;
39662 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode) || mode == TImode)
39664 insn_entry[uid].is_relevant = 1;
39665 if (mode == TImode || mode == V1TImode
39666 || FLOAT128_VECTOR_P (mode))
39667 insn_entry[uid].is_128_int = 1;
39668 if (DF_REF_INSN_INFO (mention))
39669 insn_entry[uid].contains_subreg
39670 = !rtx_equal_p (DF_REF_REG (mention),
39671 DF_REF_REAL_REG (mention));
39672 union_defs (insn_entry, insn, mention);
39675 FOR_EACH_INSN_INFO_DEF (mention, insn_info)
39677 /* We use DF_REF_REAL_REG here to get inside any subregs. */
39678 machine_mode mode = GET_MODE (DF_REF_REAL_REG (mention));
39680 /* If we're loading up a hard vector register for a call,
39681 it looks like (set (reg:V4SI 9 9) (...)). The df
39682 analysis creates two mentions for GPR9 and GPR10, both
39683 DImode. So relying on the mode from the mentions
39684 isn't sufficient to ensure we union the call into the
39685 web with the parameter setup code. */
39686 if (mode == DImode && GET_CODE (insn) == SET
39687 && ALTIVEC_OR_VSX_VECTOR_MODE (GET_MODE (SET_DEST (insn))))
39688 mode = GET_MODE (SET_DEST (insn));
39690 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode) || mode == TImode)
39692 insn_entry[uid].is_relevant = 1;
39693 if (mode == TImode || mode == V1TImode
39694 || FLOAT128_VECTOR_P (mode))
39695 insn_entry[uid].is_128_int = 1;
39696 if (DF_REF_INSN_INFO (mention))
39697 insn_entry[uid].contains_subreg
39698 = !rtx_equal_p (DF_REF_REG (mention),
39699 DF_REF_REAL_REG (mention));
39700 /* REG_FUNCTION_VALUE_P is not valid for subregs. */
39701 else if (REG_FUNCTION_VALUE_P (DF_REF_REG (mention)))
39702 insn_entry[uid].is_live_out = 1;
39703 union_uses (insn_entry, insn, mention);
39707 if (insn_entry[uid].is_relevant)
39709 /* Determine if this is a load or store. */
39710 insn_entry[uid].is_load = insn_is_load_p (insn);
39711 insn_entry[uid].is_store = insn_is_store_p (insn);
39713 /* Determine if this is a doubleword swap. If not,
39714 determine whether it can legally be swapped. */
39715 if (insn_is_swap_p (insn))
39716 insn_entry[uid].is_swap = 1;
39717 else
39719 unsigned int special = SH_NONE;
39720 insn_entry[uid].is_swappable
39721 = insn_is_swappable_p (insn_entry, insn, &special);
39722 if (special != SH_NONE && insn_entry[uid].contains_subreg)
39723 insn_entry[uid].is_swappable = 0;
39724 else if (special != SH_NONE)
39725 insn_entry[uid].special_handling = special;
39726 else if (insn_entry[uid].contains_subreg)
39727 insn_entry[uid].special_handling = SH_SUBREG;
39733 if (dump_file)
39735 fprintf (dump_file, "\nSwap insn entry table when first built\n");
39736 dump_swap_insn_table (insn_entry);
39739 /* Record unoptimizable webs. */
39740 unsigned e = get_max_uid (), i;
39741 for (i = 0; i < e; ++i)
39743 if (!insn_entry[i].is_relevant)
39744 continue;
39746 swap_web_entry *root
39747 = (swap_web_entry*)(&insn_entry[i])->unionfind_root ();
39749 if (insn_entry[i].is_live_in || insn_entry[i].is_live_out
39750 || (insn_entry[i].contains_subreg
39751 && insn_entry[i].special_handling != SH_SUBREG)
39752 || insn_entry[i].is_128_int || insn_entry[i].is_call
39753 || !(insn_entry[i].is_swappable || insn_entry[i].is_swap))
39754 root->web_not_optimizable = 1;
39756 /* If we have loads or stores that aren't permuting then the
39757 optimization isn't appropriate. */
39758 else if ((insn_entry[i].is_load || insn_entry[i].is_store)
39759 && !insn_entry[i].is_swap && !insn_entry[i].is_swappable)
39760 root->web_not_optimizable = 1;
39762 /* If we have permuting loads or stores that are not accompanied
39763 by a register swap, the optimization isn't appropriate. */
39764 else if (insn_entry[i].is_load && insn_entry[i].is_swap)
39766 rtx insn = insn_entry[i].insn;
39767 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
39768 df_ref def;
39770 FOR_EACH_INSN_INFO_DEF (def, insn_info)
39772 struct df_link *link = DF_REF_CHAIN (def);
39774 if (!chain_contains_only_swaps (insn_entry, link, FOR_LOADS))
39776 root->web_not_optimizable = 1;
39777 break;
39781 else if (insn_entry[i].is_store && insn_entry[i].is_swap)
39783 rtx insn = insn_entry[i].insn;
39784 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
39785 df_ref use;
39787 FOR_EACH_INSN_INFO_USE (use, insn_info)
39789 struct df_link *link = DF_REF_CHAIN (use);
39791 if (!chain_contains_only_swaps (insn_entry, link, FOR_STORES))
39793 root->web_not_optimizable = 1;
39794 break;
39800 if (dump_file)
39802 fprintf (dump_file, "\nSwap insn entry table after web analysis\n");
39803 dump_swap_insn_table (insn_entry);
39806 /* For each load and store in an optimizable web (which implies
39807 the loads and stores are permuting), find the associated
39808 register swaps and mark them for removal. Due to various
39809 optimizations we may mark the same swap more than once. Also
39810 perform special handling for swappable insns that require it. */
39811 for (i = 0; i < e; ++i)
39812 if ((insn_entry[i].is_load || insn_entry[i].is_store)
39813 && insn_entry[i].is_swap)
39815 swap_web_entry* root_entry
39816 = (swap_web_entry*)((&insn_entry[i])->unionfind_root ());
39817 if (!root_entry->web_not_optimizable)
39818 mark_swaps_for_removal (insn_entry, i);
39820 else if (insn_entry[i].is_swappable && insn_entry[i].special_handling)
39822 swap_web_entry* root_entry
39823 = (swap_web_entry*)((&insn_entry[i])->unionfind_root ());
39824 if (!root_entry->web_not_optimizable)
39825 handle_special_swappables (insn_entry, i);
39828 /* Now delete the swaps marked for removal. */
39829 for (i = 0; i < e; ++i)
39830 if (insn_entry[i].will_delete)
39831 replace_swap_with_copy (insn_entry, i);
39833 /* Clean up. */
39834 free (insn_entry);
39835 return 0;
39838 const pass_data pass_data_analyze_swaps =
39840 RTL_PASS, /* type */
39841 "swaps", /* name */
39842 OPTGROUP_NONE, /* optinfo_flags */
39843 TV_NONE, /* tv_id */
39844 0, /* properties_required */
39845 0, /* properties_provided */
39846 0, /* properties_destroyed */
39847 0, /* todo_flags_start */
39848 TODO_df_finish, /* todo_flags_finish */
39851 class pass_analyze_swaps : public rtl_opt_pass
39853 public:
39854 pass_analyze_swaps(gcc::context *ctxt)
39855 : rtl_opt_pass(pass_data_analyze_swaps, ctxt)
39858 /* opt_pass methods: */
39859 virtual bool gate (function *)
39861 return (optimize > 0 && !BYTES_BIG_ENDIAN && TARGET_VSX
39862 && !TARGET_P9_VECTOR && rs6000_optimize_swaps);
39865 virtual unsigned int execute (function *fun)
39867 return rs6000_analyze_swaps (fun);
39870 }; // class pass_analyze_swaps
39872 rtl_opt_pass *
39873 make_pass_analyze_swaps (gcc::context *ctxt)
39875 return new pass_analyze_swaps (ctxt);
39878 #ifdef RS6000_GLIBC_ATOMIC_FENV
39879 /* Function declarations for rs6000_atomic_assign_expand_fenv. */
39880 static tree atomic_hold_decl, atomic_clear_decl, atomic_update_decl;
39881 #endif
39883 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
39885 static void
39886 rs6000_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
39888 if (!TARGET_HARD_FLOAT || !TARGET_FPRS)
39890 #ifdef RS6000_GLIBC_ATOMIC_FENV
39891 if (atomic_hold_decl == NULL_TREE)
39893 atomic_hold_decl
39894 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
39895 get_identifier ("__atomic_feholdexcept"),
39896 build_function_type_list (void_type_node,
39897 double_ptr_type_node,
39898 NULL_TREE));
39899 TREE_PUBLIC (atomic_hold_decl) = 1;
39900 DECL_EXTERNAL (atomic_hold_decl) = 1;
39903 if (atomic_clear_decl == NULL_TREE)
39905 atomic_clear_decl
39906 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
39907 get_identifier ("__atomic_feclearexcept"),
39908 build_function_type_list (void_type_node,
39909 NULL_TREE));
39910 TREE_PUBLIC (atomic_clear_decl) = 1;
39911 DECL_EXTERNAL (atomic_clear_decl) = 1;
39914 tree const_double = build_qualified_type (double_type_node,
39915 TYPE_QUAL_CONST);
39916 tree const_double_ptr = build_pointer_type (const_double);
39917 if (atomic_update_decl == NULL_TREE)
39919 atomic_update_decl
39920 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
39921 get_identifier ("__atomic_feupdateenv"),
39922 build_function_type_list (void_type_node,
39923 const_double_ptr,
39924 NULL_TREE));
39925 TREE_PUBLIC (atomic_update_decl) = 1;
39926 DECL_EXTERNAL (atomic_update_decl) = 1;
39929 tree fenv_var = create_tmp_var_raw (double_type_node);
39930 TREE_ADDRESSABLE (fenv_var) = 1;
39931 tree fenv_addr = build1 (ADDR_EXPR, double_ptr_type_node, fenv_var);
39933 *hold = build_call_expr (atomic_hold_decl, 1, fenv_addr);
39934 *clear = build_call_expr (atomic_clear_decl, 0);
39935 *update = build_call_expr (atomic_update_decl, 1,
39936 fold_convert (const_double_ptr, fenv_addr));
39937 #endif
39938 return;
39941 tree mffs = rs6000_builtin_decls[RS6000_BUILTIN_MFFS];
39942 tree mtfsf = rs6000_builtin_decls[RS6000_BUILTIN_MTFSF];
39943 tree call_mffs = build_call_expr (mffs, 0);
39945 /* Generates the equivalent of feholdexcept (&fenv_var)
39947 *fenv_var = __builtin_mffs ();
39948 double fenv_hold;
39949 *(uint64_t*)&fenv_hold = *(uint64_t*)fenv_var & 0xffffffff00000007LL;
39950 __builtin_mtfsf (0xff, fenv_hold); */
39952 /* Mask to clear everything except for the rounding modes and non-IEEE
39953 arithmetic flag. */
39954 const unsigned HOST_WIDE_INT hold_exception_mask =
39955 HOST_WIDE_INT_C (0xffffffff00000007);
39957 tree fenv_var = create_tmp_var_raw (double_type_node);
39959 tree hold_mffs = build2 (MODIFY_EXPR, void_type_node, fenv_var, call_mffs);
39961 tree fenv_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_var);
39962 tree fenv_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
39963 build_int_cst (uint64_type_node,
39964 hold_exception_mask));
39966 tree fenv_hold_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
39967 fenv_llu_and);
39969 tree hold_mtfsf = build_call_expr (mtfsf, 2,
39970 build_int_cst (unsigned_type_node, 0xff),
39971 fenv_hold_mtfsf);
39973 *hold = build2 (COMPOUND_EXPR, void_type_node, hold_mffs, hold_mtfsf);
39975 /* Generates the equivalent of feclearexcept (FE_ALL_EXCEPT):
39977 double fenv_clear = __builtin_mffs ();
39978 *(uint64_t)&fenv_clear &= 0xffffffff00000000LL;
39979 __builtin_mtfsf (0xff, fenv_clear); */
39981 /* Mask to clear everything except for the rounding modes and non-IEEE
39982 arithmetic flag. */
39983 const unsigned HOST_WIDE_INT clear_exception_mask =
39984 HOST_WIDE_INT_C (0xffffffff00000000);
39986 tree fenv_clear = create_tmp_var_raw (double_type_node);
39988 tree clear_mffs = build2 (MODIFY_EXPR, void_type_node, fenv_clear, call_mffs);
39990 tree fenv_clean_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_clear);
39991 tree fenv_clear_llu_and = build2 (BIT_AND_EXPR, uint64_type_node,
39992 fenv_clean_llu,
39993 build_int_cst (uint64_type_node,
39994 clear_exception_mask));
39996 tree fenv_clear_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
39997 fenv_clear_llu_and);
39999 tree clear_mtfsf = build_call_expr (mtfsf, 2,
40000 build_int_cst (unsigned_type_node, 0xff),
40001 fenv_clear_mtfsf);
40003 *clear = build2 (COMPOUND_EXPR, void_type_node, clear_mffs, clear_mtfsf);
40005 /* Generates the equivalent of feupdateenv (&fenv_var)
40007 double old_fenv = __builtin_mffs ();
40008 double fenv_update;
40009 *(uint64_t*)&fenv_update = (*(uint64_t*)&old & 0xffffffff1fffff00LL) |
40010 (*(uint64_t*)fenv_var 0x1ff80fff);
40011 __builtin_mtfsf (0xff, fenv_update); */
40013 const unsigned HOST_WIDE_INT update_exception_mask =
40014 HOST_WIDE_INT_C (0xffffffff1fffff00);
40015 const unsigned HOST_WIDE_INT new_exception_mask =
40016 HOST_WIDE_INT_C (0x1ff80fff);
40018 tree old_fenv = create_tmp_var_raw (double_type_node);
40019 tree update_mffs = build2 (MODIFY_EXPR, void_type_node, old_fenv, call_mffs);
40021 tree old_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, old_fenv);
40022 tree old_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, old_llu,
40023 build_int_cst (uint64_type_node,
40024 update_exception_mask));
40026 tree new_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
40027 build_int_cst (uint64_type_node,
40028 new_exception_mask));
40030 tree new_llu_mask = build2 (BIT_IOR_EXPR, uint64_type_node,
40031 old_llu_and, new_llu_and);
40033 tree fenv_update_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
40034 new_llu_mask);
40036 tree update_mtfsf = build_call_expr (mtfsf, 2,
40037 build_int_cst (unsigned_type_node, 0xff),
40038 fenv_update_mtfsf);
40040 *update = build2 (COMPOUND_EXPR, void_type_node, update_mffs, update_mtfsf);
40043 /* Implement the TARGET_OPTAB_SUPPORTED_P hook. */
40045 static bool
40046 rs6000_optab_supported_p (int op, machine_mode mode1, machine_mode,
40047 optimization_type opt_type)
40049 switch (op)
40051 case rsqrt_optab:
40052 return (opt_type == OPTIMIZE_FOR_SPEED
40053 && RS6000_RECIP_AUTO_RSQRTE_P (mode1));
40055 default:
40056 return true;
40060 struct gcc_target targetm = TARGET_INITIALIZER;
40062 #include "gt-rs6000.h"