[RS6000] Delete duplicate code
[official-gcc.git] / gcc / config / rs6000 / rs6000.c
blob5b9aae2768112246390412a07ac065ae780870da
1 /* Subroutines used for code generation on IBM RS/6000.
2 Copyright (C) 1991-2016 Free Software Foundation, Inc.
3 Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published
9 by the Free Software Foundation; either version 3, or (at your
10 option) any later version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "gimple.h"
28 #include "cfghooks.h"
29 #include "cfgloop.h"
30 #include "df.h"
31 #include "tm_p.h"
32 #include "stringpool.h"
33 #include "expmed.h"
34 #include "optabs.h"
35 #include "regs.h"
36 #include "ira.h"
37 #include "recog.h"
38 #include "cgraph.h"
39 #include "diagnostic-core.h"
40 #include "insn-attr.h"
41 #include "flags.h"
42 #include "alias.h"
43 #include "fold-const.h"
44 #include "stor-layout.h"
45 #include "calls.h"
46 #include "print-tree.h"
47 #include "varasm.h"
48 #include "explow.h"
49 #include "expr.h"
50 #include "output.h"
51 #include "dbxout.h"
52 #include "common/common-target.h"
53 #include "langhooks.h"
54 #include "reload.h"
55 #include "sched-int.h"
56 #include "gimplify.h"
57 #include "gimple-iterator.h"
58 #include "gimple-walk.h"
59 #include "intl.h"
60 #include "params.h"
61 #include "tm-constrs.h"
62 #include "tree-vectorizer.h"
63 #include "target-globals.h"
64 #include "builtins.h"
65 #include "context.h"
66 #include "tree-pass.h"
67 #if TARGET_XCOFF
68 #include "xcoffout.h" /* get declarations of xcoff_*_section_name */
69 #endif
70 #if TARGET_MACHO
71 #include "gstab.h" /* for N_SLINE */
72 #endif
73 #include "case-cfn-macros.h"
74 #include "ppc-auxv.h"
76 /* This file should be included last. */
77 #include "target-def.h"
79 #ifndef TARGET_NO_PROTOTYPE
80 #define TARGET_NO_PROTOTYPE 0
81 #endif
83 #define min(A,B) ((A) < (B) ? (A) : (B))
84 #define max(A,B) ((A) > (B) ? (A) : (B))
86 /* Structure used to define the rs6000 stack */
87 typedef struct rs6000_stack {
88 int reload_completed; /* stack info won't change from here on */
89 int first_gp_reg_save; /* first callee saved GP register used */
90 int first_fp_reg_save; /* first callee saved FP register used */
91 int first_altivec_reg_save; /* first callee saved AltiVec register used */
92 int lr_save_p; /* true if the link reg needs to be saved */
93 int cr_save_p; /* true if the CR reg needs to be saved */
94 unsigned int vrsave_mask; /* mask of vec registers to save */
95 int push_p; /* true if we need to allocate stack space */
96 int calls_p; /* true if the function makes any calls */
97 int world_save_p; /* true if we're saving *everything*:
98 r13-r31, cr, f14-f31, vrsave, v20-v31 */
99 enum rs6000_abi abi; /* which ABI to use */
100 int gp_save_offset; /* offset to save GP regs from initial SP */
101 int fp_save_offset; /* offset to save FP regs from initial SP */
102 int altivec_save_offset; /* offset to save AltiVec regs from initial SP */
103 int lr_save_offset; /* offset to save LR from initial SP */
104 int cr_save_offset; /* offset to save CR from initial SP */
105 int vrsave_save_offset; /* offset to save VRSAVE from initial SP */
106 int spe_gp_save_offset; /* offset to save spe 64-bit gprs */
107 int varargs_save_offset; /* offset to save the varargs registers */
108 int ehrd_offset; /* offset to EH return data */
109 int ehcr_offset; /* offset to EH CR field data */
110 int reg_size; /* register size (4 or 8) */
111 HOST_WIDE_INT vars_size; /* variable save area size */
112 int parm_size; /* outgoing parameter size */
113 int save_size; /* save area size */
114 int fixed_size; /* fixed size of stack frame */
115 int gp_size; /* size of saved GP registers */
116 int fp_size; /* size of saved FP registers */
117 int altivec_size; /* size of saved AltiVec registers */
118 int cr_size; /* size to hold CR if not in fixed area */
119 int vrsave_size; /* size to hold VRSAVE */
120 int altivec_padding_size; /* size of altivec alignment padding */
121 int spe_gp_size; /* size of 64-bit GPR save size for SPE */
122 int spe_padding_size;
123 HOST_WIDE_INT total_size; /* total bytes allocated for stack */
124 int spe_64bit_regs_used;
125 int savres_strategy;
126 } rs6000_stack_t;
128 /* A C structure for machine-specific, per-function data.
129 This is added to the cfun structure. */
130 typedef struct GTY(()) machine_function
132 /* Whether the instruction chain has been scanned already. */
133 int spe_insn_chain_scanned_p;
134 /* Flags if __builtin_return_address (n) with n >= 1 was used. */
135 int ra_needs_full_frame;
136 /* Flags if __builtin_return_address (0) was used. */
137 int ra_need_lr;
138 /* Cache lr_save_p after expansion of builtin_eh_return. */
139 int lr_save_state;
140 /* Whether we need to save the TOC to the reserved stack location in the
141 function prologue. */
142 bool save_toc_in_prologue;
143 /* Offset from virtual_stack_vars_rtx to the start of the ABI_V4
144 varargs save area. */
145 HOST_WIDE_INT varargs_save_offset;
146 /* Temporary stack slot to use for SDmode copies. This slot is
147 64-bits wide and is allocated early enough so that the offset
148 does not overflow the 16-bit load/store offset field. */
149 rtx sdmode_stack_slot;
150 /* Alternative internal arg pointer for -fsplit-stack. */
151 rtx split_stack_arg_pointer;
152 bool split_stack_argp_used;
153 /* Flag if r2 setup is needed with ELFv2 ABI. */
154 bool r2_setup_needed;
155 } machine_function;
157 /* Support targetm.vectorize.builtin_mask_for_load. */
158 static GTY(()) tree altivec_builtin_mask_for_load;
160 /* Set to nonzero once AIX common-mode calls have been defined. */
161 static GTY(()) int common_mode_defined;
163 /* Label number of label created for -mrelocatable, to call to so we can
164 get the address of the GOT section */
165 static int rs6000_pic_labelno;
167 #ifdef USING_ELFOS_H
168 /* Counter for labels which are to be placed in .fixup. */
169 int fixuplabelno = 0;
170 #endif
172 /* Whether to use variant of AIX ABI for PowerPC64 Linux. */
173 int dot_symbols;
175 /* Specify the machine mode that pointers have. After generation of rtl, the
176 compiler makes no further distinction between pointers and any other objects
177 of this machine mode. The type is unsigned since not all things that
178 include rs6000.h also include machmode.h. */
179 unsigned rs6000_pmode;
181 /* Width in bits of a pointer. */
182 unsigned rs6000_pointer_size;
184 #ifdef HAVE_AS_GNU_ATTRIBUTE
185 /* Flag whether floating point values have been passed/returned. */
186 static bool rs6000_passes_float;
187 /* Flag whether vector values have been passed/returned. */
188 static bool rs6000_passes_vector;
189 /* Flag whether small (<= 8 byte) structures have been returned. */
190 static bool rs6000_returns_struct;
191 #endif
193 /* Value is TRUE if register/mode pair is acceptable. */
194 bool rs6000_hard_regno_mode_ok_p[NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
196 /* Maximum number of registers needed for a given register class and mode. */
197 unsigned char rs6000_class_max_nregs[NUM_MACHINE_MODES][LIM_REG_CLASSES];
199 /* How many registers are needed for a given register and mode. */
200 unsigned char rs6000_hard_regno_nregs[NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
202 /* Map register number to register class. */
203 enum reg_class rs6000_regno_regclass[FIRST_PSEUDO_REGISTER];
205 static int dbg_cost_ctrl;
207 /* Built in types. */
208 tree rs6000_builtin_types[RS6000_BTI_MAX];
209 tree rs6000_builtin_decls[RS6000_BUILTIN_COUNT];
211 /* Flag to say the TOC is initialized */
212 int toc_initialized, need_toc_init;
213 char toc_label_name[10];
215 /* Cached value of rs6000_variable_issue. This is cached in
216 rs6000_variable_issue hook and returned from rs6000_sched_reorder2. */
217 static short cached_can_issue_more;
219 static GTY(()) section *read_only_data_section;
220 static GTY(()) section *private_data_section;
221 static GTY(()) section *tls_data_section;
222 static GTY(()) section *tls_private_data_section;
223 static GTY(()) section *read_only_private_data_section;
224 static GTY(()) section *sdata2_section;
225 static GTY(()) section *toc_section;
227 struct builtin_description
229 const HOST_WIDE_INT mask;
230 const enum insn_code icode;
231 const char *const name;
232 const enum rs6000_builtins code;
235 /* Describe the vector unit used for modes. */
236 enum rs6000_vector rs6000_vector_unit[NUM_MACHINE_MODES];
237 enum rs6000_vector rs6000_vector_mem[NUM_MACHINE_MODES];
239 /* Register classes for various constraints that are based on the target
240 switches. */
241 enum reg_class rs6000_constraints[RS6000_CONSTRAINT_MAX];
243 /* Describe the alignment of a vector. */
244 int rs6000_vector_align[NUM_MACHINE_MODES];
246 /* Map selected modes to types for builtins. */
247 static GTY(()) tree builtin_mode_to_type[MAX_MACHINE_MODE][2];
249 /* What modes to automatically generate reciprocal divide estimate (fre) and
250 reciprocal sqrt (frsqrte) for. */
251 unsigned char rs6000_recip_bits[MAX_MACHINE_MODE];
253 /* Masks to determine which reciprocal esitmate instructions to generate
254 automatically. */
255 enum rs6000_recip_mask {
256 RECIP_SF_DIV = 0x001, /* Use divide estimate */
257 RECIP_DF_DIV = 0x002,
258 RECIP_V4SF_DIV = 0x004,
259 RECIP_V2DF_DIV = 0x008,
261 RECIP_SF_RSQRT = 0x010, /* Use reciprocal sqrt estimate. */
262 RECIP_DF_RSQRT = 0x020,
263 RECIP_V4SF_RSQRT = 0x040,
264 RECIP_V2DF_RSQRT = 0x080,
266 /* Various combination of flags for -mrecip=xxx. */
267 RECIP_NONE = 0,
268 RECIP_ALL = (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
269 | RECIP_V2DF_DIV | RECIP_SF_RSQRT | RECIP_DF_RSQRT
270 | RECIP_V4SF_RSQRT | RECIP_V2DF_RSQRT),
272 RECIP_HIGH_PRECISION = RECIP_ALL,
274 /* On low precision machines like the power5, don't enable double precision
275 reciprocal square root estimate, since it isn't accurate enough. */
276 RECIP_LOW_PRECISION = (RECIP_ALL & ~(RECIP_DF_RSQRT | RECIP_V2DF_RSQRT))
279 /* -mrecip options. */
280 static struct
282 const char *string; /* option name */
283 unsigned int mask; /* mask bits to set */
284 } recip_options[] = {
285 { "all", RECIP_ALL },
286 { "none", RECIP_NONE },
287 { "div", (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
288 | RECIP_V2DF_DIV) },
289 { "divf", (RECIP_SF_DIV | RECIP_V4SF_DIV) },
290 { "divd", (RECIP_DF_DIV | RECIP_V2DF_DIV) },
291 { "rsqrt", (RECIP_SF_RSQRT | RECIP_DF_RSQRT | RECIP_V4SF_RSQRT
292 | RECIP_V2DF_RSQRT) },
293 { "rsqrtf", (RECIP_SF_RSQRT | RECIP_V4SF_RSQRT) },
294 { "rsqrtd", (RECIP_DF_RSQRT | RECIP_V2DF_RSQRT) },
297 /* Used by __builtin_cpu_is(), mapping from PLATFORM names to values. */
298 static const struct
300 const char *cpu;
301 unsigned int cpuid;
302 } cpu_is_info[] = {
303 { "power9", PPC_PLATFORM_POWER9 },
304 { "power8", PPC_PLATFORM_POWER8 },
305 { "power7", PPC_PLATFORM_POWER7 },
306 { "power6x", PPC_PLATFORM_POWER6X },
307 { "power6", PPC_PLATFORM_POWER6 },
308 { "power5+", PPC_PLATFORM_POWER5_PLUS },
309 { "power5", PPC_PLATFORM_POWER5 },
310 { "ppc970", PPC_PLATFORM_PPC970 },
311 { "power4", PPC_PLATFORM_POWER4 },
312 { "ppca2", PPC_PLATFORM_PPCA2 },
313 { "ppc476", PPC_PLATFORM_PPC476 },
314 { "ppc464", PPC_PLATFORM_PPC464 },
315 { "ppc440", PPC_PLATFORM_PPC440 },
316 { "ppc405", PPC_PLATFORM_PPC405 },
317 { "ppc-cell-be", PPC_PLATFORM_CELL_BE }
320 /* Used by __builtin_cpu_supports(), mapping from HWCAP names to masks. */
321 static const struct
323 const char *hwcap;
324 int mask;
325 unsigned int id;
326 } cpu_supports_info[] = {
327 /* AT_HWCAP masks. */
328 { "4xxmac", PPC_FEATURE_HAS_4xxMAC, 0 },
329 { "altivec", PPC_FEATURE_HAS_ALTIVEC, 0 },
330 { "arch_2_05", PPC_FEATURE_ARCH_2_05, 0 },
331 { "arch_2_06", PPC_FEATURE_ARCH_2_06, 0 },
332 { "archpmu", PPC_FEATURE_PERFMON_COMPAT, 0 },
333 { "booke", PPC_FEATURE_BOOKE, 0 },
334 { "cellbe", PPC_FEATURE_CELL_BE, 0 },
335 { "dfp", PPC_FEATURE_HAS_DFP, 0 },
336 { "efpdouble", PPC_FEATURE_HAS_EFP_DOUBLE, 0 },
337 { "efpsingle", PPC_FEATURE_HAS_EFP_SINGLE, 0 },
338 { "fpu", PPC_FEATURE_HAS_FPU, 0 },
339 { "ic_snoop", PPC_FEATURE_ICACHE_SNOOP, 0 },
340 { "mmu", PPC_FEATURE_HAS_MMU, 0 },
341 { "notb", PPC_FEATURE_NO_TB, 0 },
342 { "pa6t", PPC_FEATURE_PA6T, 0 },
343 { "power4", PPC_FEATURE_POWER4, 0 },
344 { "power5", PPC_FEATURE_POWER5, 0 },
345 { "power5+", PPC_FEATURE_POWER5_PLUS, 0 },
346 { "power6x", PPC_FEATURE_POWER6_EXT, 0 },
347 { "ppc32", PPC_FEATURE_32, 0 },
348 { "ppc601", PPC_FEATURE_601_INSTR, 0 },
349 { "ppc64", PPC_FEATURE_64, 0 },
350 { "ppcle", PPC_FEATURE_PPC_LE, 0 },
351 { "smt", PPC_FEATURE_SMT, 0 },
352 { "spe", PPC_FEATURE_HAS_SPE, 0 },
353 { "true_le", PPC_FEATURE_TRUE_LE, 0 },
354 { "ucache", PPC_FEATURE_UNIFIED_CACHE, 0 },
355 { "vsx", PPC_FEATURE_HAS_VSX, 0 },
357 /* AT_HWCAP2 masks. */
358 { "arch_2_07", PPC_FEATURE2_ARCH_2_07, 1 },
359 { "dscr", PPC_FEATURE2_HAS_DSCR, 1 },
360 { "ebb", PPC_FEATURE2_HAS_EBB, 1 },
361 { "htm", PPC_FEATURE2_HAS_HTM, 1 },
362 { "htm-nosc", PPC_FEATURE2_HTM_NOSC, 1 },
363 { "isel", PPC_FEATURE2_HAS_ISEL, 1 },
364 { "tar", PPC_FEATURE2_HAS_TAR, 1 },
365 { "vcrypto", PPC_FEATURE2_HAS_VEC_CRYPTO, 1 },
366 { "arch_3_00", PPC_FEATURE2_ARCH_3_00, 1 },
367 { "ieee128", PPC_FEATURE2_HAS_IEEE128, 1 }
370 /* Newer LIBCs explicitly export this symbol to declare that they provide
371 the AT_PLATFORM and AT_HWCAP/AT_HWCAP2 values in the TCB. We emit a
372 reference to this symbol whenever we expand a CPU builtin, so that
373 we never link against an old LIBC. */
374 const char *tcb_verification_symbol = "__parse_hwcap_and_convert_at_platform";
376 /* True if we have expanded a CPU builtin. */
377 bool cpu_builtin_p;
379 /* Pointer to function (in rs6000-c.c) that can define or undefine target
380 macros that have changed. Languages that don't support the preprocessor
381 don't link in rs6000-c.c, so we can't call it directly. */
382 void (*rs6000_target_modify_macros_ptr) (bool, HOST_WIDE_INT, HOST_WIDE_INT);
384 /* Simplfy register classes into simpler classifications. We assume
385 GPR_REG_TYPE - FPR_REG_TYPE are ordered so that we can use a simple range
386 check for standard register classes (gpr/floating/altivec/vsx) and
387 floating/vector classes (float/altivec/vsx). */
389 enum rs6000_reg_type {
390 NO_REG_TYPE,
391 PSEUDO_REG_TYPE,
392 GPR_REG_TYPE,
393 VSX_REG_TYPE,
394 ALTIVEC_REG_TYPE,
395 FPR_REG_TYPE,
396 SPR_REG_TYPE,
397 CR_REG_TYPE,
398 SPE_ACC_TYPE,
399 SPEFSCR_REG_TYPE
402 /* Map register class to register type. */
403 static enum rs6000_reg_type reg_class_to_reg_type[N_REG_CLASSES];
405 /* First/last register type for the 'normal' register types (i.e. general
406 purpose, floating point, altivec, and VSX registers). */
407 #define IS_STD_REG_TYPE(RTYPE) IN_RANGE(RTYPE, GPR_REG_TYPE, FPR_REG_TYPE)
409 #define IS_FP_VECT_REG_TYPE(RTYPE) IN_RANGE(RTYPE, VSX_REG_TYPE, FPR_REG_TYPE)
412 /* Register classes we care about in secondary reload or go if legitimate
413 address. We only need to worry about GPR, FPR, and Altivec registers here,
414 along an ANY field that is the OR of the 3 register classes. */
416 enum rs6000_reload_reg_type {
417 RELOAD_REG_GPR, /* General purpose registers. */
418 RELOAD_REG_FPR, /* Traditional floating point regs. */
419 RELOAD_REG_VMX, /* Altivec (VMX) registers. */
420 RELOAD_REG_ANY, /* OR of GPR, FPR, Altivec masks. */
421 N_RELOAD_REG
424 /* For setting up register classes, loop through the 3 register classes mapping
425 into real registers, and skip the ANY class, which is just an OR of the
426 bits. */
427 #define FIRST_RELOAD_REG_CLASS RELOAD_REG_GPR
428 #define LAST_RELOAD_REG_CLASS RELOAD_REG_VMX
430 /* Map reload register type to a register in the register class. */
431 struct reload_reg_map_type {
432 const char *name; /* Register class name. */
433 int reg; /* Register in the register class. */
436 static const struct reload_reg_map_type reload_reg_map[N_RELOAD_REG] = {
437 { "Gpr", FIRST_GPR_REGNO }, /* RELOAD_REG_GPR. */
438 { "Fpr", FIRST_FPR_REGNO }, /* RELOAD_REG_FPR. */
439 { "VMX", FIRST_ALTIVEC_REGNO }, /* RELOAD_REG_VMX. */
440 { "Any", -1 }, /* RELOAD_REG_ANY. */
443 /* Mask bits for each register class, indexed per mode. Historically the
444 compiler has been more restrictive which types can do PRE_MODIFY instead of
445 PRE_INC and PRE_DEC, so keep track of sepaate bits for these two. */
446 typedef unsigned char addr_mask_type;
448 #define RELOAD_REG_VALID 0x01 /* Mode valid in register.. */
449 #define RELOAD_REG_MULTIPLE 0x02 /* Mode takes multiple registers. */
450 #define RELOAD_REG_INDEXED 0x04 /* Reg+reg addressing. */
451 #define RELOAD_REG_OFFSET 0x08 /* Reg+offset addressing. */
452 #define RELOAD_REG_PRE_INCDEC 0x10 /* PRE_INC/PRE_DEC valid. */
453 #define RELOAD_REG_PRE_MODIFY 0x20 /* PRE_MODIFY valid. */
454 #define RELOAD_REG_AND_M16 0x40 /* AND -16 addressing. */
455 #define RELOAD_REG_QUAD_OFFSET 0x80 /* quad offset is limited. */
457 /* Register type masks based on the type, of valid addressing modes. */
458 struct rs6000_reg_addr {
459 enum insn_code reload_load; /* INSN to reload for loading. */
460 enum insn_code reload_store; /* INSN to reload for storing. */
461 enum insn_code reload_fpr_gpr; /* INSN to move from FPR to GPR. */
462 enum insn_code reload_gpr_vsx; /* INSN to move from GPR to VSX. */
463 enum insn_code reload_vsx_gpr; /* INSN to move from VSX to GPR. */
464 enum insn_code fusion_gpr_ld; /* INSN for fusing gpr ADDIS/loads. */
465 /* INSNs for fusing addi with loads
466 or stores for each reg. class. */
467 enum insn_code fusion_addi_ld[(int)N_RELOAD_REG];
468 enum insn_code fusion_addi_st[(int)N_RELOAD_REG];
469 /* INSNs for fusing addis with loads
470 or stores for each reg. class. */
471 enum insn_code fusion_addis_ld[(int)N_RELOAD_REG];
472 enum insn_code fusion_addis_st[(int)N_RELOAD_REG];
473 addr_mask_type addr_mask[(int)N_RELOAD_REG]; /* Valid address masks. */
474 bool scalar_in_vmx_p; /* Scalar value can go in VMX. */
475 bool fused_toc; /* Mode supports TOC fusion. */
478 static struct rs6000_reg_addr reg_addr[NUM_MACHINE_MODES];
480 /* Helper function to say whether a mode supports PRE_INC or PRE_DEC. */
481 static inline bool
482 mode_supports_pre_incdec_p (machine_mode mode)
484 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_INCDEC)
485 != 0);
488 /* Helper function to say whether a mode supports PRE_MODIFY. */
489 static inline bool
490 mode_supports_pre_modify_p (machine_mode mode)
492 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_MODIFY)
493 != 0);
496 /* Return true if we have D-form addressing in altivec registers. */
497 static inline bool
498 mode_supports_vmx_dform (machine_mode mode)
500 return ((reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_OFFSET) != 0);
503 /* Return true if we have D-form addressing in VSX registers. This addressing
504 is more limited than normal d-form addressing in that the offset must be
505 aligned on a 16-byte boundary. */
506 static inline bool
507 mode_supports_vsx_dform_quad (machine_mode mode)
509 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_QUAD_OFFSET)
510 != 0);
514 /* Target cpu costs. */
516 struct processor_costs {
517 const int mulsi; /* cost of SImode multiplication. */
518 const int mulsi_const; /* cost of SImode multiplication by constant. */
519 const int mulsi_const9; /* cost of SImode mult by short constant. */
520 const int muldi; /* cost of DImode multiplication. */
521 const int divsi; /* cost of SImode division. */
522 const int divdi; /* cost of DImode division. */
523 const int fp; /* cost of simple SFmode and DFmode insns. */
524 const int dmul; /* cost of DFmode multiplication (and fmadd). */
525 const int sdiv; /* cost of SFmode division (fdivs). */
526 const int ddiv; /* cost of DFmode division (fdiv). */
527 const int cache_line_size; /* cache line size in bytes. */
528 const int l1_cache_size; /* size of l1 cache, in kilobytes. */
529 const int l2_cache_size; /* size of l2 cache, in kilobytes. */
530 const int simultaneous_prefetches; /* number of parallel prefetch
531 operations. */
532 const int sfdf_convert; /* cost of SF->DF conversion. */
535 const struct processor_costs *rs6000_cost;
537 /* Processor costs (relative to an add) */
539 /* Instruction size costs on 32bit processors. */
540 static const
541 struct processor_costs size32_cost = {
542 COSTS_N_INSNS (1), /* mulsi */
543 COSTS_N_INSNS (1), /* mulsi_const */
544 COSTS_N_INSNS (1), /* mulsi_const9 */
545 COSTS_N_INSNS (1), /* muldi */
546 COSTS_N_INSNS (1), /* divsi */
547 COSTS_N_INSNS (1), /* divdi */
548 COSTS_N_INSNS (1), /* fp */
549 COSTS_N_INSNS (1), /* dmul */
550 COSTS_N_INSNS (1), /* sdiv */
551 COSTS_N_INSNS (1), /* ddiv */
552 32, /* cache line size */
553 0, /* l1 cache */
554 0, /* l2 cache */
555 0, /* streams */
556 0, /* SF->DF convert */
559 /* Instruction size costs on 64bit processors. */
560 static const
561 struct processor_costs size64_cost = {
562 COSTS_N_INSNS (1), /* mulsi */
563 COSTS_N_INSNS (1), /* mulsi_const */
564 COSTS_N_INSNS (1), /* mulsi_const9 */
565 COSTS_N_INSNS (1), /* muldi */
566 COSTS_N_INSNS (1), /* divsi */
567 COSTS_N_INSNS (1), /* divdi */
568 COSTS_N_INSNS (1), /* fp */
569 COSTS_N_INSNS (1), /* dmul */
570 COSTS_N_INSNS (1), /* sdiv */
571 COSTS_N_INSNS (1), /* ddiv */
572 128, /* cache line size */
573 0, /* l1 cache */
574 0, /* l2 cache */
575 0, /* streams */
576 0, /* SF->DF convert */
579 /* Instruction costs on RS64A processors. */
580 static const
581 struct processor_costs rs64a_cost = {
582 COSTS_N_INSNS (20), /* mulsi */
583 COSTS_N_INSNS (12), /* mulsi_const */
584 COSTS_N_INSNS (8), /* mulsi_const9 */
585 COSTS_N_INSNS (34), /* muldi */
586 COSTS_N_INSNS (65), /* divsi */
587 COSTS_N_INSNS (67), /* divdi */
588 COSTS_N_INSNS (4), /* fp */
589 COSTS_N_INSNS (4), /* dmul */
590 COSTS_N_INSNS (31), /* sdiv */
591 COSTS_N_INSNS (31), /* ddiv */
592 128, /* cache line size */
593 128, /* l1 cache */
594 2048, /* l2 cache */
595 1, /* streams */
596 0, /* SF->DF convert */
599 /* Instruction costs on MPCCORE processors. */
600 static const
601 struct processor_costs mpccore_cost = {
602 COSTS_N_INSNS (2), /* mulsi */
603 COSTS_N_INSNS (2), /* mulsi_const */
604 COSTS_N_INSNS (2), /* mulsi_const9 */
605 COSTS_N_INSNS (2), /* muldi */
606 COSTS_N_INSNS (6), /* divsi */
607 COSTS_N_INSNS (6), /* divdi */
608 COSTS_N_INSNS (4), /* fp */
609 COSTS_N_INSNS (5), /* dmul */
610 COSTS_N_INSNS (10), /* sdiv */
611 COSTS_N_INSNS (17), /* ddiv */
612 32, /* cache line size */
613 4, /* l1 cache */
614 16, /* l2 cache */
615 1, /* streams */
616 0, /* SF->DF convert */
619 /* Instruction costs on PPC403 processors. */
620 static const
621 struct processor_costs ppc403_cost = {
622 COSTS_N_INSNS (4), /* mulsi */
623 COSTS_N_INSNS (4), /* mulsi_const */
624 COSTS_N_INSNS (4), /* mulsi_const9 */
625 COSTS_N_INSNS (4), /* muldi */
626 COSTS_N_INSNS (33), /* divsi */
627 COSTS_N_INSNS (33), /* divdi */
628 COSTS_N_INSNS (11), /* fp */
629 COSTS_N_INSNS (11), /* dmul */
630 COSTS_N_INSNS (11), /* sdiv */
631 COSTS_N_INSNS (11), /* ddiv */
632 32, /* cache line size */
633 4, /* l1 cache */
634 16, /* l2 cache */
635 1, /* streams */
636 0, /* SF->DF convert */
639 /* Instruction costs on PPC405 processors. */
640 static const
641 struct processor_costs ppc405_cost = {
642 COSTS_N_INSNS (5), /* mulsi */
643 COSTS_N_INSNS (4), /* mulsi_const */
644 COSTS_N_INSNS (3), /* mulsi_const9 */
645 COSTS_N_INSNS (5), /* muldi */
646 COSTS_N_INSNS (35), /* divsi */
647 COSTS_N_INSNS (35), /* divdi */
648 COSTS_N_INSNS (11), /* fp */
649 COSTS_N_INSNS (11), /* dmul */
650 COSTS_N_INSNS (11), /* sdiv */
651 COSTS_N_INSNS (11), /* ddiv */
652 32, /* cache line size */
653 16, /* l1 cache */
654 128, /* l2 cache */
655 1, /* streams */
656 0, /* SF->DF convert */
659 /* Instruction costs on PPC440 processors. */
660 static const
661 struct processor_costs ppc440_cost = {
662 COSTS_N_INSNS (3), /* mulsi */
663 COSTS_N_INSNS (2), /* mulsi_const */
664 COSTS_N_INSNS (2), /* mulsi_const9 */
665 COSTS_N_INSNS (3), /* muldi */
666 COSTS_N_INSNS (34), /* divsi */
667 COSTS_N_INSNS (34), /* divdi */
668 COSTS_N_INSNS (5), /* fp */
669 COSTS_N_INSNS (5), /* dmul */
670 COSTS_N_INSNS (19), /* sdiv */
671 COSTS_N_INSNS (33), /* ddiv */
672 32, /* cache line size */
673 32, /* l1 cache */
674 256, /* l2 cache */
675 1, /* streams */
676 0, /* SF->DF convert */
679 /* Instruction costs on PPC476 processors. */
680 static const
681 struct processor_costs ppc476_cost = {
682 COSTS_N_INSNS (4), /* mulsi */
683 COSTS_N_INSNS (4), /* mulsi_const */
684 COSTS_N_INSNS (4), /* mulsi_const9 */
685 COSTS_N_INSNS (4), /* muldi */
686 COSTS_N_INSNS (11), /* divsi */
687 COSTS_N_INSNS (11), /* divdi */
688 COSTS_N_INSNS (6), /* fp */
689 COSTS_N_INSNS (6), /* dmul */
690 COSTS_N_INSNS (19), /* sdiv */
691 COSTS_N_INSNS (33), /* ddiv */
692 32, /* l1 cache line size */
693 32, /* l1 cache */
694 512, /* l2 cache */
695 1, /* streams */
696 0, /* SF->DF convert */
699 /* Instruction costs on PPC601 processors. */
700 static const
701 struct processor_costs ppc601_cost = {
702 COSTS_N_INSNS (5), /* mulsi */
703 COSTS_N_INSNS (5), /* mulsi_const */
704 COSTS_N_INSNS (5), /* mulsi_const9 */
705 COSTS_N_INSNS (5), /* muldi */
706 COSTS_N_INSNS (36), /* divsi */
707 COSTS_N_INSNS (36), /* divdi */
708 COSTS_N_INSNS (4), /* fp */
709 COSTS_N_INSNS (5), /* dmul */
710 COSTS_N_INSNS (17), /* sdiv */
711 COSTS_N_INSNS (31), /* ddiv */
712 32, /* cache line size */
713 32, /* l1 cache */
714 256, /* l2 cache */
715 1, /* streams */
716 0, /* SF->DF convert */
719 /* Instruction costs on PPC603 processors. */
720 static const
721 struct processor_costs ppc603_cost = {
722 COSTS_N_INSNS (5), /* mulsi */
723 COSTS_N_INSNS (3), /* mulsi_const */
724 COSTS_N_INSNS (2), /* mulsi_const9 */
725 COSTS_N_INSNS (5), /* muldi */
726 COSTS_N_INSNS (37), /* divsi */
727 COSTS_N_INSNS (37), /* divdi */
728 COSTS_N_INSNS (3), /* fp */
729 COSTS_N_INSNS (4), /* dmul */
730 COSTS_N_INSNS (18), /* sdiv */
731 COSTS_N_INSNS (33), /* ddiv */
732 32, /* cache line size */
733 8, /* l1 cache */
734 64, /* l2 cache */
735 1, /* streams */
736 0, /* SF->DF convert */
739 /* Instruction costs on PPC604 processors. */
740 static const
741 struct processor_costs ppc604_cost = {
742 COSTS_N_INSNS (4), /* mulsi */
743 COSTS_N_INSNS (4), /* mulsi_const */
744 COSTS_N_INSNS (4), /* mulsi_const9 */
745 COSTS_N_INSNS (4), /* muldi */
746 COSTS_N_INSNS (20), /* divsi */
747 COSTS_N_INSNS (20), /* divdi */
748 COSTS_N_INSNS (3), /* fp */
749 COSTS_N_INSNS (3), /* dmul */
750 COSTS_N_INSNS (18), /* sdiv */
751 COSTS_N_INSNS (32), /* ddiv */
752 32, /* cache line size */
753 16, /* l1 cache */
754 512, /* l2 cache */
755 1, /* streams */
756 0, /* SF->DF convert */
759 /* Instruction costs on PPC604e processors. */
760 static const
761 struct processor_costs ppc604e_cost = {
762 COSTS_N_INSNS (2), /* mulsi */
763 COSTS_N_INSNS (2), /* mulsi_const */
764 COSTS_N_INSNS (2), /* mulsi_const9 */
765 COSTS_N_INSNS (2), /* muldi */
766 COSTS_N_INSNS (20), /* divsi */
767 COSTS_N_INSNS (20), /* divdi */
768 COSTS_N_INSNS (3), /* fp */
769 COSTS_N_INSNS (3), /* dmul */
770 COSTS_N_INSNS (18), /* sdiv */
771 COSTS_N_INSNS (32), /* ddiv */
772 32, /* cache line size */
773 32, /* l1 cache */
774 1024, /* l2 cache */
775 1, /* streams */
776 0, /* SF->DF convert */
779 /* Instruction costs on PPC620 processors. */
780 static const
781 struct processor_costs ppc620_cost = {
782 COSTS_N_INSNS (5), /* mulsi */
783 COSTS_N_INSNS (4), /* mulsi_const */
784 COSTS_N_INSNS (3), /* mulsi_const9 */
785 COSTS_N_INSNS (7), /* muldi */
786 COSTS_N_INSNS (21), /* divsi */
787 COSTS_N_INSNS (37), /* divdi */
788 COSTS_N_INSNS (3), /* fp */
789 COSTS_N_INSNS (3), /* dmul */
790 COSTS_N_INSNS (18), /* sdiv */
791 COSTS_N_INSNS (32), /* ddiv */
792 128, /* cache line size */
793 32, /* l1 cache */
794 1024, /* l2 cache */
795 1, /* streams */
796 0, /* SF->DF convert */
799 /* Instruction costs on PPC630 processors. */
800 static const
801 struct processor_costs ppc630_cost = {
802 COSTS_N_INSNS (5), /* mulsi */
803 COSTS_N_INSNS (4), /* mulsi_const */
804 COSTS_N_INSNS (3), /* mulsi_const9 */
805 COSTS_N_INSNS (7), /* muldi */
806 COSTS_N_INSNS (21), /* divsi */
807 COSTS_N_INSNS (37), /* divdi */
808 COSTS_N_INSNS (3), /* fp */
809 COSTS_N_INSNS (3), /* dmul */
810 COSTS_N_INSNS (17), /* sdiv */
811 COSTS_N_INSNS (21), /* ddiv */
812 128, /* cache line size */
813 64, /* l1 cache */
814 1024, /* l2 cache */
815 1, /* streams */
816 0, /* SF->DF convert */
819 /* Instruction costs on Cell processor. */
820 /* COSTS_N_INSNS (1) ~ one add. */
821 static const
822 struct processor_costs ppccell_cost = {
823 COSTS_N_INSNS (9/2)+2, /* mulsi */
824 COSTS_N_INSNS (6/2), /* mulsi_const */
825 COSTS_N_INSNS (6/2), /* mulsi_const9 */
826 COSTS_N_INSNS (15/2)+2, /* muldi */
827 COSTS_N_INSNS (38/2), /* divsi */
828 COSTS_N_INSNS (70/2), /* divdi */
829 COSTS_N_INSNS (10/2), /* fp */
830 COSTS_N_INSNS (10/2), /* dmul */
831 COSTS_N_INSNS (74/2), /* sdiv */
832 COSTS_N_INSNS (74/2), /* ddiv */
833 128, /* cache line size */
834 32, /* l1 cache */
835 512, /* l2 cache */
836 6, /* streams */
837 0, /* SF->DF convert */
840 /* Instruction costs on PPC750 and PPC7400 processors. */
841 static const
842 struct processor_costs ppc750_cost = {
843 COSTS_N_INSNS (5), /* mulsi */
844 COSTS_N_INSNS (3), /* mulsi_const */
845 COSTS_N_INSNS (2), /* mulsi_const9 */
846 COSTS_N_INSNS (5), /* muldi */
847 COSTS_N_INSNS (17), /* divsi */
848 COSTS_N_INSNS (17), /* divdi */
849 COSTS_N_INSNS (3), /* fp */
850 COSTS_N_INSNS (3), /* dmul */
851 COSTS_N_INSNS (17), /* sdiv */
852 COSTS_N_INSNS (31), /* ddiv */
853 32, /* cache line size */
854 32, /* l1 cache */
855 512, /* l2 cache */
856 1, /* streams */
857 0, /* SF->DF convert */
860 /* Instruction costs on PPC7450 processors. */
861 static const
862 struct processor_costs ppc7450_cost = {
863 COSTS_N_INSNS (4), /* mulsi */
864 COSTS_N_INSNS (3), /* mulsi_const */
865 COSTS_N_INSNS (3), /* mulsi_const9 */
866 COSTS_N_INSNS (4), /* muldi */
867 COSTS_N_INSNS (23), /* divsi */
868 COSTS_N_INSNS (23), /* divdi */
869 COSTS_N_INSNS (5), /* fp */
870 COSTS_N_INSNS (5), /* dmul */
871 COSTS_N_INSNS (21), /* sdiv */
872 COSTS_N_INSNS (35), /* ddiv */
873 32, /* cache line size */
874 32, /* l1 cache */
875 1024, /* l2 cache */
876 1, /* streams */
877 0, /* SF->DF convert */
880 /* Instruction costs on PPC8540 processors. */
881 static const
882 struct processor_costs ppc8540_cost = {
883 COSTS_N_INSNS (4), /* mulsi */
884 COSTS_N_INSNS (4), /* mulsi_const */
885 COSTS_N_INSNS (4), /* mulsi_const9 */
886 COSTS_N_INSNS (4), /* muldi */
887 COSTS_N_INSNS (19), /* divsi */
888 COSTS_N_INSNS (19), /* divdi */
889 COSTS_N_INSNS (4), /* fp */
890 COSTS_N_INSNS (4), /* dmul */
891 COSTS_N_INSNS (29), /* sdiv */
892 COSTS_N_INSNS (29), /* ddiv */
893 32, /* cache line size */
894 32, /* l1 cache */
895 256, /* l2 cache */
896 1, /* prefetch streams /*/
897 0, /* SF->DF convert */
900 /* Instruction costs on E300C2 and E300C3 cores. */
901 static const
902 struct processor_costs ppce300c2c3_cost = {
903 COSTS_N_INSNS (4), /* mulsi */
904 COSTS_N_INSNS (4), /* mulsi_const */
905 COSTS_N_INSNS (4), /* mulsi_const9 */
906 COSTS_N_INSNS (4), /* muldi */
907 COSTS_N_INSNS (19), /* divsi */
908 COSTS_N_INSNS (19), /* divdi */
909 COSTS_N_INSNS (3), /* fp */
910 COSTS_N_INSNS (4), /* dmul */
911 COSTS_N_INSNS (18), /* sdiv */
912 COSTS_N_INSNS (33), /* ddiv */
914 16, /* l1 cache */
915 16, /* l2 cache */
916 1, /* prefetch streams /*/
917 0, /* SF->DF convert */
920 /* Instruction costs on PPCE500MC processors. */
921 static const
922 struct processor_costs ppce500mc_cost = {
923 COSTS_N_INSNS (4), /* mulsi */
924 COSTS_N_INSNS (4), /* mulsi_const */
925 COSTS_N_INSNS (4), /* mulsi_const9 */
926 COSTS_N_INSNS (4), /* muldi */
927 COSTS_N_INSNS (14), /* divsi */
928 COSTS_N_INSNS (14), /* divdi */
929 COSTS_N_INSNS (8), /* fp */
930 COSTS_N_INSNS (10), /* dmul */
931 COSTS_N_INSNS (36), /* sdiv */
932 COSTS_N_INSNS (66), /* ddiv */
933 64, /* cache line size */
934 32, /* l1 cache */
935 128, /* l2 cache */
936 1, /* prefetch streams /*/
937 0, /* SF->DF convert */
940 /* Instruction costs on PPCE500MC64 processors. */
941 static const
942 struct processor_costs ppce500mc64_cost = {
943 COSTS_N_INSNS (4), /* mulsi */
944 COSTS_N_INSNS (4), /* mulsi_const */
945 COSTS_N_INSNS (4), /* mulsi_const9 */
946 COSTS_N_INSNS (4), /* muldi */
947 COSTS_N_INSNS (14), /* divsi */
948 COSTS_N_INSNS (14), /* divdi */
949 COSTS_N_INSNS (4), /* fp */
950 COSTS_N_INSNS (10), /* dmul */
951 COSTS_N_INSNS (36), /* sdiv */
952 COSTS_N_INSNS (66), /* ddiv */
953 64, /* cache line size */
954 32, /* l1 cache */
955 128, /* l2 cache */
956 1, /* prefetch streams /*/
957 0, /* SF->DF convert */
960 /* Instruction costs on PPCE5500 processors. */
961 static const
962 struct processor_costs ppce5500_cost = {
963 COSTS_N_INSNS (5), /* mulsi */
964 COSTS_N_INSNS (5), /* mulsi_const */
965 COSTS_N_INSNS (4), /* mulsi_const9 */
966 COSTS_N_INSNS (5), /* muldi */
967 COSTS_N_INSNS (14), /* divsi */
968 COSTS_N_INSNS (14), /* divdi */
969 COSTS_N_INSNS (7), /* fp */
970 COSTS_N_INSNS (10), /* dmul */
971 COSTS_N_INSNS (36), /* sdiv */
972 COSTS_N_INSNS (66), /* ddiv */
973 64, /* cache line size */
974 32, /* l1 cache */
975 128, /* l2 cache */
976 1, /* prefetch streams /*/
977 0, /* SF->DF convert */
980 /* Instruction costs on PPCE6500 processors. */
981 static const
982 struct processor_costs ppce6500_cost = {
983 COSTS_N_INSNS (5), /* mulsi */
984 COSTS_N_INSNS (5), /* mulsi_const */
985 COSTS_N_INSNS (4), /* mulsi_const9 */
986 COSTS_N_INSNS (5), /* muldi */
987 COSTS_N_INSNS (14), /* divsi */
988 COSTS_N_INSNS (14), /* divdi */
989 COSTS_N_INSNS (7), /* fp */
990 COSTS_N_INSNS (10), /* dmul */
991 COSTS_N_INSNS (36), /* sdiv */
992 COSTS_N_INSNS (66), /* ddiv */
993 64, /* cache line size */
994 32, /* l1 cache */
995 128, /* l2 cache */
996 1, /* prefetch streams /*/
997 0, /* SF->DF convert */
1000 /* Instruction costs on AppliedMicro Titan processors. */
1001 static const
1002 struct processor_costs titan_cost = {
1003 COSTS_N_INSNS (5), /* mulsi */
1004 COSTS_N_INSNS (5), /* mulsi_const */
1005 COSTS_N_INSNS (5), /* mulsi_const9 */
1006 COSTS_N_INSNS (5), /* muldi */
1007 COSTS_N_INSNS (18), /* divsi */
1008 COSTS_N_INSNS (18), /* divdi */
1009 COSTS_N_INSNS (10), /* fp */
1010 COSTS_N_INSNS (10), /* dmul */
1011 COSTS_N_INSNS (46), /* sdiv */
1012 COSTS_N_INSNS (72), /* ddiv */
1013 32, /* cache line size */
1014 32, /* l1 cache */
1015 512, /* l2 cache */
1016 1, /* prefetch streams /*/
1017 0, /* SF->DF convert */
1020 /* Instruction costs on POWER4 and POWER5 processors. */
1021 static const
1022 struct processor_costs power4_cost = {
1023 COSTS_N_INSNS (3), /* mulsi */
1024 COSTS_N_INSNS (2), /* mulsi_const */
1025 COSTS_N_INSNS (2), /* mulsi_const9 */
1026 COSTS_N_INSNS (4), /* muldi */
1027 COSTS_N_INSNS (18), /* divsi */
1028 COSTS_N_INSNS (34), /* divdi */
1029 COSTS_N_INSNS (3), /* fp */
1030 COSTS_N_INSNS (3), /* dmul */
1031 COSTS_N_INSNS (17), /* sdiv */
1032 COSTS_N_INSNS (17), /* ddiv */
1033 128, /* cache line size */
1034 32, /* l1 cache */
1035 1024, /* l2 cache */
1036 8, /* prefetch streams /*/
1037 0, /* SF->DF convert */
1040 /* Instruction costs on POWER6 processors. */
1041 static const
1042 struct processor_costs power6_cost = {
1043 COSTS_N_INSNS (8), /* mulsi */
1044 COSTS_N_INSNS (8), /* mulsi_const */
1045 COSTS_N_INSNS (8), /* mulsi_const9 */
1046 COSTS_N_INSNS (8), /* muldi */
1047 COSTS_N_INSNS (22), /* divsi */
1048 COSTS_N_INSNS (28), /* divdi */
1049 COSTS_N_INSNS (3), /* fp */
1050 COSTS_N_INSNS (3), /* dmul */
1051 COSTS_N_INSNS (13), /* sdiv */
1052 COSTS_N_INSNS (16), /* ddiv */
1053 128, /* cache line size */
1054 64, /* l1 cache */
1055 2048, /* l2 cache */
1056 16, /* prefetch streams */
1057 0, /* SF->DF convert */
1060 /* Instruction costs on POWER7 processors. */
1061 static const
1062 struct processor_costs power7_cost = {
1063 COSTS_N_INSNS (2), /* mulsi */
1064 COSTS_N_INSNS (2), /* mulsi_const */
1065 COSTS_N_INSNS (2), /* mulsi_const9 */
1066 COSTS_N_INSNS (2), /* muldi */
1067 COSTS_N_INSNS (18), /* divsi */
1068 COSTS_N_INSNS (34), /* divdi */
1069 COSTS_N_INSNS (3), /* fp */
1070 COSTS_N_INSNS (3), /* dmul */
1071 COSTS_N_INSNS (13), /* sdiv */
1072 COSTS_N_INSNS (16), /* ddiv */
1073 128, /* cache line size */
1074 32, /* l1 cache */
1075 256, /* l2 cache */
1076 12, /* prefetch streams */
1077 COSTS_N_INSNS (3), /* SF->DF convert */
1080 /* Instruction costs on POWER8 processors. */
1081 static const
1082 struct processor_costs power8_cost = {
1083 COSTS_N_INSNS (3), /* mulsi */
1084 COSTS_N_INSNS (3), /* mulsi_const */
1085 COSTS_N_INSNS (3), /* mulsi_const9 */
1086 COSTS_N_INSNS (3), /* muldi */
1087 COSTS_N_INSNS (19), /* divsi */
1088 COSTS_N_INSNS (35), /* divdi */
1089 COSTS_N_INSNS (3), /* fp */
1090 COSTS_N_INSNS (3), /* dmul */
1091 COSTS_N_INSNS (14), /* sdiv */
1092 COSTS_N_INSNS (17), /* ddiv */
1093 128, /* cache line size */
1094 32, /* l1 cache */
1095 256, /* l2 cache */
1096 12, /* prefetch streams */
1097 COSTS_N_INSNS (3), /* SF->DF convert */
1100 /* Instruction costs on POWER9 processors. */
1101 static const
1102 struct processor_costs power9_cost = {
1103 COSTS_N_INSNS (3), /* mulsi */
1104 COSTS_N_INSNS (3), /* mulsi_const */
1105 COSTS_N_INSNS (3), /* mulsi_const9 */
1106 COSTS_N_INSNS (3), /* muldi */
1107 COSTS_N_INSNS (8), /* divsi */
1108 COSTS_N_INSNS (12), /* divdi */
1109 COSTS_N_INSNS (3), /* fp */
1110 COSTS_N_INSNS (3), /* dmul */
1111 COSTS_N_INSNS (13), /* sdiv */
1112 COSTS_N_INSNS (18), /* ddiv */
1113 128, /* cache line size */
1114 32, /* l1 cache */
1115 512, /* l2 cache */
1116 8, /* prefetch streams */
1117 COSTS_N_INSNS (3), /* SF->DF convert */
1120 /* Instruction costs on POWER A2 processors. */
1121 static const
1122 struct processor_costs ppca2_cost = {
1123 COSTS_N_INSNS (16), /* mulsi */
1124 COSTS_N_INSNS (16), /* mulsi_const */
1125 COSTS_N_INSNS (16), /* mulsi_const9 */
1126 COSTS_N_INSNS (16), /* muldi */
1127 COSTS_N_INSNS (22), /* divsi */
1128 COSTS_N_INSNS (28), /* divdi */
1129 COSTS_N_INSNS (3), /* fp */
1130 COSTS_N_INSNS (3), /* dmul */
1131 COSTS_N_INSNS (59), /* sdiv */
1132 COSTS_N_INSNS (72), /* ddiv */
1134 16, /* l1 cache */
1135 2048, /* l2 cache */
1136 16, /* prefetch streams */
1137 0, /* SF->DF convert */
1141 /* Table that classifies rs6000 builtin functions (pure, const, etc.). */
1142 #undef RS6000_BUILTIN_0
1143 #undef RS6000_BUILTIN_1
1144 #undef RS6000_BUILTIN_2
1145 #undef RS6000_BUILTIN_3
1146 #undef RS6000_BUILTIN_A
1147 #undef RS6000_BUILTIN_D
1148 #undef RS6000_BUILTIN_E
1149 #undef RS6000_BUILTIN_H
1150 #undef RS6000_BUILTIN_P
1151 #undef RS6000_BUILTIN_Q
1152 #undef RS6000_BUILTIN_S
1153 #undef RS6000_BUILTIN_X
1155 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE) \
1156 { NAME, ICODE, MASK, ATTR },
1158 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE) \
1159 { NAME, ICODE, MASK, ATTR },
1161 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE) \
1162 { NAME, ICODE, MASK, ATTR },
1164 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE) \
1165 { NAME, ICODE, MASK, ATTR },
1167 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE) \
1168 { NAME, ICODE, MASK, ATTR },
1170 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE) \
1171 { NAME, ICODE, MASK, ATTR },
1173 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE) \
1174 { NAME, ICODE, MASK, ATTR },
1176 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) \
1177 { NAME, ICODE, MASK, ATTR },
1179 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) \
1180 { NAME, ICODE, MASK, ATTR },
1182 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE) \
1183 { NAME, ICODE, MASK, ATTR },
1185 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE) \
1186 { NAME, ICODE, MASK, ATTR },
1188 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE) \
1189 { NAME, ICODE, MASK, ATTR },
1191 struct rs6000_builtin_info_type {
1192 const char *name;
1193 const enum insn_code icode;
1194 const HOST_WIDE_INT mask;
1195 const unsigned attr;
1198 static const struct rs6000_builtin_info_type rs6000_builtin_info[] =
1200 #include "rs6000-builtin.def"
1203 #undef RS6000_BUILTIN_0
1204 #undef RS6000_BUILTIN_1
1205 #undef RS6000_BUILTIN_2
1206 #undef RS6000_BUILTIN_3
1207 #undef RS6000_BUILTIN_A
1208 #undef RS6000_BUILTIN_D
1209 #undef RS6000_BUILTIN_E
1210 #undef RS6000_BUILTIN_H
1211 #undef RS6000_BUILTIN_P
1212 #undef RS6000_BUILTIN_Q
1213 #undef RS6000_BUILTIN_S
1214 #undef RS6000_BUILTIN_X
1216 /* Support for -mveclibabi=<xxx> to control which vector library to use. */
1217 static tree (*rs6000_veclib_handler) (combined_fn, tree, tree);
1220 static bool rs6000_debug_legitimate_address_p (machine_mode, rtx, bool);
1221 static bool spe_func_has_64bit_regs_p (void);
1222 static struct machine_function * rs6000_init_machine_status (void);
1223 static int rs6000_ra_ever_killed (void);
1224 static tree rs6000_handle_longcall_attribute (tree *, tree, tree, int, bool *);
1225 static tree rs6000_handle_altivec_attribute (tree *, tree, tree, int, bool *);
1226 static tree rs6000_handle_struct_attribute (tree *, tree, tree, int, bool *);
1227 static tree rs6000_builtin_vectorized_libmass (combined_fn, tree, tree);
1228 static void rs6000_emit_set_long_const (rtx, HOST_WIDE_INT);
1229 static int rs6000_memory_move_cost (machine_mode, reg_class_t, bool);
1230 static bool rs6000_debug_rtx_costs (rtx, machine_mode, int, int, int *, bool);
1231 static int rs6000_debug_address_cost (rtx, machine_mode, addr_space_t,
1232 bool);
1233 static int rs6000_debug_adjust_cost (rtx_insn *, int, rtx_insn *, int,
1234 unsigned int);
1235 static bool is_microcoded_insn (rtx_insn *);
1236 static bool is_nonpipeline_insn (rtx_insn *);
1237 static bool is_cracked_insn (rtx_insn *);
1238 static bool is_load_insn (rtx, rtx *);
1239 static bool is_store_insn (rtx, rtx *);
1240 static bool set_to_load_agen (rtx_insn *,rtx_insn *);
1241 static bool insn_terminates_group_p (rtx_insn *, enum group_termination);
1242 static bool insn_must_be_first_in_group (rtx_insn *);
1243 static bool insn_must_be_last_in_group (rtx_insn *);
1244 static void altivec_init_builtins (void);
1245 static tree builtin_function_type (machine_mode, machine_mode,
1246 machine_mode, machine_mode,
1247 enum rs6000_builtins, const char *name);
1248 static void rs6000_common_init_builtins (void);
1249 static void paired_init_builtins (void);
1250 static rtx paired_expand_predicate_builtin (enum insn_code, tree, rtx);
1251 static void spe_init_builtins (void);
1252 static void htm_init_builtins (void);
1253 static rtx spe_expand_predicate_builtin (enum insn_code, tree, rtx);
1254 static rtx spe_expand_evsel_builtin (enum insn_code, tree, rtx);
1255 static int rs6000_emit_int_cmove (rtx, rtx, rtx, rtx);
1256 static rs6000_stack_t *rs6000_stack_info (void);
1257 static void is_altivec_return_reg (rtx, void *);
1258 int easy_vector_constant (rtx, machine_mode);
1259 static rtx rs6000_debug_legitimize_address (rtx, rtx, machine_mode);
1260 static rtx rs6000_legitimize_tls_address (rtx, enum tls_model);
1261 static rtx rs6000_darwin64_record_arg (CUMULATIVE_ARGS *, const_tree,
1262 bool, bool);
1263 #if TARGET_MACHO
1264 static void macho_branch_islands (void);
1265 #endif
1266 static rtx rs6000_legitimize_reload_address (rtx, machine_mode, int, int,
1267 int, int *);
1268 static rtx rs6000_debug_legitimize_reload_address (rtx, machine_mode, int,
1269 int, int, int *);
1270 static bool rs6000_mode_dependent_address (const_rtx);
1271 static bool rs6000_debug_mode_dependent_address (const_rtx);
1272 static enum reg_class rs6000_secondary_reload_class (enum reg_class,
1273 machine_mode, rtx);
1274 static enum reg_class rs6000_debug_secondary_reload_class (enum reg_class,
1275 machine_mode,
1276 rtx);
1277 static enum reg_class rs6000_preferred_reload_class (rtx, enum reg_class);
1278 static enum reg_class rs6000_debug_preferred_reload_class (rtx,
1279 enum reg_class);
1280 static bool rs6000_secondary_memory_needed (enum reg_class, enum reg_class,
1281 machine_mode);
1282 static bool rs6000_debug_secondary_memory_needed (enum reg_class,
1283 enum reg_class,
1284 machine_mode);
1285 static bool rs6000_cannot_change_mode_class (machine_mode,
1286 machine_mode,
1287 enum reg_class);
1288 static bool rs6000_debug_cannot_change_mode_class (machine_mode,
1289 machine_mode,
1290 enum reg_class);
1291 static bool rs6000_save_toc_in_prologue_p (void);
1292 static rtx rs6000_internal_arg_pointer (void);
1294 rtx (*rs6000_legitimize_reload_address_ptr) (rtx, machine_mode, int, int,
1295 int, int *)
1296 = rs6000_legitimize_reload_address;
1298 static bool (*rs6000_mode_dependent_address_ptr) (const_rtx)
1299 = rs6000_mode_dependent_address;
1301 enum reg_class (*rs6000_secondary_reload_class_ptr) (enum reg_class,
1302 machine_mode, rtx)
1303 = rs6000_secondary_reload_class;
1305 enum reg_class (*rs6000_preferred_reload_class_ptr) (rtx, enum reg_class)
1306 = rs6000_preferred_reload_class;
1308 bool (*rs6000_secondary_memory_needed_ptr) (enum reg_class, enum reg_class,
1309 machine_mode)
1310 = rs6000_secondary_memory_needed;
1312 bool (*rs6000_cannot_change_mode_class_ptr) (machine_mode,
1313 machine_mode,
1314 enum reg_class)
1315 = rs6000_cannot_change_mode_class;
1317 const int INSN_NOT_AVAILABLE = -1;
1319 static void rs6000_print_isa_options (FILE *, int, const char *,
1320 HOST_WIDE_INT);
1321 static void rs6000_print_builtin_options (FILE *, int, const char *,
1322 HOST_WIDE_INT);
1324 static enum rs6000_reg_type register_to_reg_type (rtx, bool *);
1325 static bool rs6000_secondary_reload_move (enum rs6000_reg_type,
1326 enum rs6000_reg_type,
1327 machine_mode,
1328 secondary_reload_info *,
1329 bool);
1330 rtl_opt_pass *make_pass_analyze_swaps (gcc::context*);
1331 static bool rs6000_keep_leaf_when_profiled () __attribute__ ((unused));
1332 static tree rs6000_fold_builtin (tree, int, tree *, bool);
1334 /* Hash table stuff for keeping track of TOC entries. */
1336 struct GTY((for_user)) toc_hash_struct
1338 /* `key' will satisfy CONSTANT_P; in fact, it will satisfy
1339 ASM_OUTPUT_SPECIAL_POOL_ENTRY_P. */
1340 rtx key;
1341 machine_mode key_mode;
1342 int labelno;
1345 struct toc_hasher : ggc_ptr_hash<toc_hash_struct>
1347 static hashval_t hash (toc_hash_struct *);
1348 static bool equal (toc_hash_struct *, toc_hash_struct *);
1351 static GTY (()) hash_table<toc_hasher> *toc_hash_table;
1353 /* Hash table to keep track of the argument types for builtin functions. */
1355 struct GTY((for_user)) builtin_hash_struct
1357 tree type;
1358 machine_mode mode[4]; /* return value + 3 arguments. */
1359 unsigned char uns_p[4]; /* and whether the types are unsigned. */
1362 struct builtin_hasher : ggc_ptr_hash<builtin_hash_struct>
1364 static hashval_t hash (builtin_hash_struct *);
1365 static bool equal (builtin_hash_struct *, builtin_hash_struct *);
1368 static GTY (()) hash_table<builtin_hasher> *builtin_hash_table;
1371 /* Default register names. */
1372 char rs6000_reg_names[][8] =
1374 "0", "1", "2", "3", "4", "5", "6", "7",
1375 "8", "9", "10", "11", "12", "13", "14", "15",
1376 "16", "17", "18", "19", "20", "21", "22", "23",
1377 "24", "25", "26", "27", "28", "29", "30", "31",
1378 "0", "1", "2", "3", "4", "5", "6", "7",
1379 "8", "9", "10", "11", "12", "13", "14", "15",
1380 "16", "17", "18", "19", "20", "21", "22", "23",
1381 "24", "25", "26", "27", "28", "29", "30", "31",
1382 "mq", "lr", "ctr","ap",
1383 "0", "1", "2", "3", "4", "5", "6", "7",
1384 "ca",
1385 /* AltiVec registers. */
1386 "0", "1", "2", "3", "4", "5", "6", "7",
1387 "8", "9", "10", "11", "12", "13", "14", "15",
1388 "16", "17", "18", "19", "20", "21", "22", "23",
1389 "24", "25", "26", "27", "28", "29", "30", "31",
1390 "vrsave", "vscr",
1391 /* SPE registers. */
1392 "spe_acc", "spefscr",
1393 /* Soft frame pointer. */
1394 "sfp",
1395 /* HTM SPR registers. */
1396 "tfhar", "tfiar", "texasr",
1397 /* SPE High registers. */
1398 "0", "1", "2", "3", "4", "5", "6", "7",
1399 "8", "9", "10", "11", "12", "13", "14", "15",
1400 "16", "17", "18", "19", "20", "21", "22", "23",
1401 "24", "25", "26", "27", "28", "29", "30", "31"
1404 #ifdef TARGET_REGNAMES
1405 static const char alt_reg_names[][8] =
1407 "%r0", "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7",
1408 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
1409 "%r16", "%r17", "%r18", "%r19", "%r20", "%r21", "%r22", "%r23",
1410 "%r24", "%r25", "%r26", "%r27", "%r28", "%r29", "%r30", "%r31",
1411 "%f0", "%f1", "%f2", "%f3", "%f4", "%f5", "%f6", "%f7",
1412 "%f8", "%f9", "%f10", "%f11", "%f12", "%f13", "%f14", "%f15",
1413 "%f16", "%f17", "%f18", "%f19", "%f20", "%f21", "%f22", "%f23",
1414 "%f24", "%f25", "%f26", "%f27", "%f28", "%f29", "%f30", "%f31",
1415 "mq", "lr", "ctr", "ap",
1416 "%cr0", "%cr1", "%cr2", "%cr3", "%cr4", "%cr5", "%cr6", "%cr7",
1417 "ca",
1418 /* AltiVec registers. */
1419 "%v0", "%v1", "%v2", "%v3", "%v4", "%v5", "%v6", "%v7",
1420 "%v8", "%v9", "%v10", "%v11", "%v12", "%v13", "%v14", "%v15",
1421 "%v16", "%v17", "%v18", "%v19", "%v20", "%v21", "%v22", "%v23",
1422 "%v24", "%v25", "%v26", "%v27", "%v28", "%v29", "%v30", "%v31",
1423 "vrsave", "vscr",
1424 /* SPE registers. */
1425 "spe_acc", "spefscr",
1426 /* Soft frame pointer. */
1427 "sfp",
1428 /* HTM SPR registers. */
1429 "tfhar", "tfiar", "texasr",
1430 /* SPE High registers. */
1431 "%rh0", "%rh1", "%rh2", "%rh3", "%rh4", "%rh5", "%rh6", "%rh7",
1432 "%rh8", "%rh9", "%rh10", "%r11", "%rh12", "%rh13", "%rh14", "%rh15",
1433 "%rh16", "%rh17", "%rh18", "%rh19", "%rh20", "%rh21", "%rh22", "%rh23",
1434 "%rh24", "%rh25", "%rh26", "%rh27", "%rh28", "%rh29", "%rh30", "%rh31"
1436 #endif
1438 /* Table of valid machine attributes. */
1440 static const struct attribute_spec rs6000_attribute_table[] =
1442 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
1443 affects_type_identity } */
1444 { "altivec", 1, 1, false, true, false, rs6000_handle_altivec_attribute,
1445 false },
1446 { "longcall", 0, 0, false, true, true, rs6000_handle_longcall_attribute,
1447 false },
1448 { "shortcall", 0, 0, false, true, true, rs6000_handle_longcall_attribute,
1449 false },
1450 { "ms_struct", 0, 0, false, false, false, rs6000_handle_struct_attribute,
1451 false },
1452 { "gcc_struct", 0, 0, false, false, false, rs6000_handle_struct_attribute,
1453 false },
1454 #ifdef SUBTARGET_ATTRIBUTE_TABLE
1455 SUBTARGET_ATTRIBUTE_TABLE,
1456 #endif
1457 { NULL, 0, 0, false, false, false, NULL, false }
1460 #ifndef TARGET_PROFILE_KERNEL
1461 #define TARGET_PROFILE_KERNEL 0
1462 #endif
1464 /* The VRSAVE bitmask puts bit %v0 as the most significant bit. */
1465 #define ALTIVEC_REG_BIT(REGNO) (0x80000000 >> ((REGNO) - FIRST_ALTIVEC_REGNO))
1467 /* Initialize the GCC target structure. */
1468 #undef TARGET_ATTRIBUTE_TABLE
1469 #define TARGET_ATTRIBUTE_TABLE rs6000_attribute_table
1470 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
1471 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES rs6000_set_default_type_attributes
1472 #undef TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P
1473 #define TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P rs6000_attribute_takes_identifier_p
1475 #undef TARGET_ASM_ALIGNED_DI_OP
1476 #define TARGET_ASM_ALIGNED_DI_OP DOUBLE_INT_ASM_OP
1478 /* Default unaligned ops are only provided for ELF. Find the ops needed
1479 for non-ELF systems. */
1480 #ifndef OBJECT_FORMAT_ELF
1481 #if TARGET_XCOFF
1482 /* For XCOFF. rs6000_assemble_integer will handle unaligned DIs on
1483 64-bit targets. */
1484 #undef TARGET_ASM_UNALIGNED_HI_OP
1485 #define TARGET_ASM_UNALIGNED_HI_OP "\t.vbyte\t2,"
1486 #undef TARGET_ASM_UNALIGNED_SI_OP
1487 #define TARGET_ASM_UNALIGNED_SI_OP "\t.vbyte\t4,"
1488 #undef TARGET_ASM_UNALIGNED_DI_OP
1489 #define TARGET_ASM_UNALIGNED_DI_OP "\t.vbyte\t8,"
1490 #else
1491 /* For Darwin. */
1492 #undef TARGET_ASM_UNALIGNED_HI_OP
1493 #define TARGET_ASM_UNALIGNED_HI_OP "\t.short\t"
1494 #undef TARGET_ASM_UNALIGNED_SI_OP
1495 #define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
1496 #undef TARGET_ASM_UNALIGNED_DI_OP
1497 #define TARGET_ASM_UNALIGNED_DI_OP "\t.quad\t"
1498 #undef TARGET_ASM_ALIGNED_DI_OP
1499 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
1500 #endif
1501 #endif
1503 /* This hook deals with fixups for relocatable code and DI-mode objects
1504 in 64-bit code. */
1505 #undef TARGET_ASM_INTEGER
1506 #define TARGET_ASM_INTEGER rs6000_assemble_integer
1508 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
1509 #undef TARGET_ASM_ASSEMBLE_VISIBILITY
1510 #define TARGET_ASM_ASSEMBLE_VISIBILITY rs6000_assemble_visibility
1511 #endif
1513 #undef TARGET_SET_UP_BY_PROLOGUE
1514 #define TARGET_SET_UP_BY_PROLOGUE rs6000_set_up_by_prologue
1516 #undef TARGET_EXTRA_LIVE_ON_ENTRY
1517 #define TARGET_EXTRA_LIVE_ON_ENTRY rs6000_live_on_entry
1519 #undef TARGET_INTERNAL_ARG_POINTER
1520 #define TARGET_INTERNAL_ARG_POINTER rs6000_internal_arg_pointer
1522 #undef TARGET_HAVE_TLS
1523 #define TARGET_HAVE_TLS HAVE_AS_TLS
1525 #undef TARGET_CANNOT_FORCE_CONST_MEM
1526 #define TARGET_CANNOT_FORCE_CONST_MEM rs6000_cannot_force_const_mem
1528 #undef TARGET_DELEGITIMIZE_ADDRESS
1529 #define TARGET_DELEGITIMIZE_ADDRESS rs6000_delegitimize_address
1531 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
1532 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P rs6000_const_not_ok_for_debug_p
1534 #undef TARGET_ASM_FUNCTION_PROLOGUE
1535 #define TARGET_ASM_FUNCTION_PROLOGUE rs6000_output_function_prologue
1536 #undef TARGET_ASM_FUNCTION_EPILOGUE
1537 #define TARGET_ASM_FUNCTION_EPILOGUE rs6000_output_function_epilogue
1539 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
1540 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA rs6000_output_addr_const_extra
1542 #undef TARGET_LEGITIMIZE_ADDRESS
1543 #define TARGET_LEGITIMIZE_ADDRESS rs6000_legitimize_address
1545 #undef TARGET_SCHED_VARIABLE_ISSUE
1546 #define TARGET_SCHED_VARIABLE_ISSUE rs6000_variable_issue
1548 #undef TARGET_SCHED_ISSUE_RATE
1549 #define TARGET_SCHED_ISSUE_RATE rs6000_issue_rate
1550 #undef TARGET_SCHED_ADJUST_COST
1551 #define TARGET_SCHED_ADJUST_COST rs6000_adjust_cost
1552 #undef TARGET_SCHED_ADJUST_PRIORITY
1553 #define TARGET_SCHED_ADJUST_PRIORITY rs6000_adjust_priority
1554 #undef TARGET_SCHED_IS_COSTLY_DEPENDENCE
1555 #define TARGET_SCHED_IS_COSTLY_DEPENDENCE rs6000_is_costly_dependence
1556 #undef TARGET_SCHED_INIT
1557 #define TARGET_SCHED_INIT rs6000_sched_init
1558 #undef TARGET_SCHED_FINISH
1559 #define TARGET_SCHED_FINISH rs6000_sched_finish
1560 #undef TARGET_SCHED_REORDER
1561 #define TARGET_SCHED_REORDER rs6000_sched_reorder
1562 #undef TARGET_SCHED_REORDER2
1563 #define TARGET_SCHED_REORDER2 rs6000_sched_reorder2
1565 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1566 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD rs6000_use_sched_lookahead
1568 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
1569 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD rs6000_use_sched_lookahead_guard
1571 #undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
1572 #define TARGET_SCHED_ALLOC_SCHED_CONTEXT rs6000_alloc_sched_context
1573 #undef TARGET_SCHED_INIT_SCHED_CONTEXT
1574 #define TARGET_SCHED_INIT_SCHED_CONTEXT rs6000_init_sched_context
1575 #undef TARGET_SCHED_SET_SCHED_CONTEXT
1576 #define TARGET_SCHED_SET_SCHED_CONTEXT rs6000_set_sched_context
1577 #undef TARGET_SCHED_FREE_SCHED_CONTEXT
1578 #define TARGET_SCHED_FREE_SCHED_CONTEXT rs6000_free_sched_context
1580 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
1581 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD rs6000_builtin_mask_for_load
1582 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
1583 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
1584 rs6000_builtin_support_vector_misalignment
1585 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
1586 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE rs6000_vector_alignment_reachable
1587 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
1588 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
1589 rs6000_builtin_vectorization_cost
1590 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
1591 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
1592 rs6000_preferred_simd_mode
1593 #undef TARGET_VECTORIZE_INIT_COST
1594 #define TARGET_VECTORIZE_INIT_COST rs6000_init_cost
1595 #undef TARGET_VECTORIZE_ADD_STMT_COST
1596 #define TARGET_VECTORIZE_ADD_STMT_COST rs6000_add_stmt_cost
1597 #undef TARGET_VECTORIZE_FINISH_COST
1598 #define TARGET_VECTORIZE_FINISH_COST rs6000_finish_cost
1599 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
1600 #define TARGET_VECTORIZE_DESTROY_COST_DATA rs6000_destroy_cost_data
1602 #undef TARGET_INIT_BUILTINS
1603 #define TARGET_INIT_BUILTINS rs6000_init_builtins
1604 #undef TARGET_BUILTIN_DECL
1605 #define TARGET_BUILTIN_DECL rs6000_builtin_decl
1607 #undef TARGET_FOLD_BUILTIN
1608 #define TARGET_FOLD_BUILTIN rs6000_fold_builtin
1610 #undef TARGET_EXPAND_BUILTIN
1611 #define TARGET_EXPAND_BUILTIN rs6000_expand_builtin
1613 #undef TARGET_MANGLE_TYPE
1614 #define TARGET_MANGLE_TYPE rs6000_mangle_type
1616 #undef TARGET_INIT_LIBFUNCS
1617 #define TARGET_INIT_LIBFUNCS rs6000_init_libfuncs
1619 #if TARGET_MACHO
1620 #undef TARGET_BINDS_LOCAL_P
1621 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1622 #endif
1624 #undef TARGET_MS_BITFIELD_LAYOUT_P
1625 #define TARGET_MS_BITFIELD_LAYOUT_P rs6000_ms_bitfield_layout_p
1627 #undef TARGET_ASM_OUTPUT_MI_THUNK
1628 #define TARGET_ASM_OUTPUT_MI_THUNK rs6000_output_mi_thunk
1630 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1631 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
1633 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1634 #define TARGET_FUNCTION_OK_FOR_SIBCALL rs6000_function_ok_for_sibcall
1636 #undef TARGET_REGISTER_MOVE_COST
1637 #define TARGET_REGISTER_MOVE_COST rs6000_register_move_cost
1638 #undef TARGET_MEMORY_MOVE_COST
1639 #define TARGET_MEMORY_MOVE_COST rs6000_memory_move_cost
1640 #undef TARGET_CANNOT_COPY_INSN_P
1641 #define TARGET_CANNOT_COPY_INSN_P rs6000_cannot_copy_insn_p
1642 #undef TARGET_RTX_COSTS
1643 #define TARGET_RTX_COSTS rs6000_rtx_costs
1644 #undef TARGET_ADDRESS_COST
1645 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
1647 #undef TARGET_DWARF_REGISTER_SPAN
1648 #define TARGET_DWARF_REGISTER_SPAN rs6000_dwarf_register_span
1650 #undef TARGET_INIT_DWARF_REG_SIZES_EXTRA
1651 #define TARGET_INIT_DWARF_REG_SIZES_EXTRA rs6000_init_dwarf_reg_sizes_extra
1653 #undef TARGET_MEMBER_TYPE_FORCES_BLK
1654 #define TARGET_MEMBER_TYPE_FORCES_BLK rs6000_member_type_forces_blk
1656 #undef TARGET_PROMOTE_FUNCTION_MODE
1657 #define TARGET_PROMOTE_FUNCTION_MODE rs6000_promote_function_mode
1659 #undef TARGET_RETURN_IN_MEMORY
1660 #define TARGET_RETURN_IN_MEMORY rs6000_return_in_memory
1662 #undef TARGET_RETURN_IN_MSB
1663 #define TARGET_RETURN_IN_MSB rs6000_return_in_msb
1665 #undef TARGET_SETUP_INCOMING_VARARGS
1666 #define TARGET_SETUP_INCOMING_VARARGS setup_incoming_varargs
1668 /* Always strict argument naming on rs6000. */
1669 #undef TARGET_STRICT_ARGUMENT_NAMING
1670 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
1671 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
1672 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED hook_bool_CUMULATIVE_ARGS_true
1673 #undef TARGET_SPLIT_COMPLEX_ARG
1674 #define TARGET_SPLIT_COMPLEX_ARG hook_bool_const_tree_true
1675 #undef TARGET_MUST_PASS_IN_STACK
1676 #define TARGET_MUST_PASS_IN_STACK rs6000_must_pass_in_stack
1677 #undef TARGET_PASS_BY_REFERENCE
1678 #define TARGET_PASS_BY_REFERENCE rs6000_pass_by_reference
1679 #undef TARGET_ARG_PARTIAL_BYTES
1680 #define TARGET_ARG_PARTIAL_BYTES rs6000_arg_partial_bytes
1681 #undef TARGET_FUNCTION_ARG_ADVANCE
1682 #define TARGET_FUNCTION_ARG_ADVANCE rs6000_function_arg_advance
1683 #undef TARGET_FUNCTION_ARG
1684 #define TARGET_FUNCTION_ARG rs6000_function_arg
1685 #undef TARGET_FUNCTION_ARG_BOUNDARY
1686 #define TARGET_FUNCTION_ARG_BOUNDARY rs6000_function_arg_boundary
1688 #undef TARGET_BUILD_BUILTIN_VA_LIST
1689 #define TARGET_BUILD_BUILTIN_VA_LIST rs6000_build_builtin_va_list
1691 #undef TARGET_EXPAND_BUILTIN_VA_START
1692 #define TARGET_EXPAND_BUILTIN_VA_START rs6000_va_start
1694 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1695 #define TARGET_GIMPLIFY_VA_ARG_EXPR rs6000_gimplify_va_arg
1697 #undef TARGET_EH_RETURN_FILTER_MODE
1698 #define TARGET_EH_RETURN_FILTER_MODE rs6000_eh_return_filter_mode
1700 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1701 #define TARGET_SCALAR_MODE_SUPPORTED_P rs6000_scalar_mode_supported_p
1703 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1704 #define TARGET_VECTOR_MODE_SUPPORTED_P rs6000_vector_mode_supported_p
1706 #undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN
1707 #define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN invalid_arg_for_unprototyped_fn
1709 #undef TARGET_ASM_LOOP_ALIGN_MAX_SKIP
1710 #define TARGET_ASM_LOOP_ALIGN_MAX_SKIP rs6000_loop_align_max_skip
1712 #undef TARGET_MD_ASM_ADJUST
1713 #define TARGET_MD_ASM_ADJUST rs6000_md_asm_adjust
1715 #undef TARGET_OPTION_OVERRIDE
1716 #define TARGET_OPTION_OVERRIDE rs6000_option_override
1718 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
1719 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
1720 rs6000_builtin_vectorized_function
1722 #undef TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION
1723 #define TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION \
1724 rs6000_builtin_md_vectorized_function
1726 #if !TARGET_MACHO
1727 #undef TARGET_STACK_PROTECT_FAIL
1728 #define TARGET_STACK_PROTECT_FAIL rs6000_stack_protect_fail
1729 #endif
1731 #ifdef HAVE_AS_TLS
1732 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1733 #define TARGET_ASM_OUTPUT_DWARF_DTPREL rs6000_output_dwarf_dtprel
1734 #endif
1736 /* Use a 32-bit anchor range. This leads to sequences like:
1738 addis tmp,anchor,high
1739 add dest,tmp,low
1741 where tmp itself acts as an anchor, and can be shared between
1742 accesses to the same 64k page. */
1743 #undef TARGET_MIN_ANCHOR_OFFSET
1744 #define TARGET_MIN_ANCHOR_OFFSET -0x7fffffff - 1
1745 #undef TARGET_MAX_ANCHOR_OFFSET
1746 #define TARGET_MAX_ANCHOR_OFFSET 0x7fffffff
1747 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1748 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P rs6000_use_blocks_for_constant_p
1749 #undef TARGET_USE_BLOCKS_FOR_DECL_P
1750 #define TARGET_USE_BLOCKS_FOR_DECL_P rs6000_use_blocks_for_decl_p
1752 #undef TARGET_BUILTIN_RECIPROCAL
1753 #define TARGET_BUILTIN_RECIPROCAL rs6000_builtin_reciprocal
1755 #undef TARGET_EXPAND_TO_RTL_HOOK
1756 #define TARGET_EXPAND_TO_RTL_HOOK rs6000_alloc_sdmode_stack_slot
1758 #undef TARGET_INSTANTIATE_DECLS
1759 #define TARGET_INSTANTIATE_DECLS rs6000_instantiate_decls
1761 #undef TARGET_SECONDARY_RELOAD
1762 #define TARGET_SECONDARY_RELOAD rs6000_secondary_reload
1764 #undef TARGET_LEGITIMATE_ADDRESS_P
1765 #define TARGET_LEGITIMATE_ADDRESS_P rs6000_legitimate_address_p
1767 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
1768 #define TARGET_MODE_DEPENDENT_ADDRESS_P rs6000_mode_dependent_address_p
1770 #undef TARGET_LRA_P
1771 #define TARGET_LRA_P rs6000_lra_p
1773 #undef TARGET_CAN_ELIMINATE
1774 #define TARGET_CAN_ELIMINATE rs6000_can_eliminate
1776 #undef TARGET_CONDITIONAL_REGISTER_USAGE
1777 #define TARGET_CONDITIONAL_REGISTER_USAGE rs6000_conditional_register_usage
1779 #undef TARGET_SCHED_REASSOCIATION_WIDTH
1780 #define TARGET_SCHED_REASSOCIATION_WIDTH rs6000_reassociation_width
1782 #undef TARGET_TRAMPOLINE_INIT
1783 #define TARGET_TRAMPOLINE_INIT rs6000_trampoline_init
1785 #undef TARGET_FUNCTION_VALUE
1786 #define TARGET_FUNCTION_VALUE rs6000_function_value
1788 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
1789 #define TARGET_OPTION_VALID_ATTRIBUTE_P rs6000_valid_attribute_p
1791 #undef TARGET_OPTION_SAVE
1792 #define TARGET_OPTION_SAVE rs6000_function_specific_save
1794 #undef TARGET_OPTION_RESTORE
1795 #define TARGET_OPTION_RESTORE rs6000_function_specific_restore
1797 #undef TARGET_OPTION_PRINT
1798 #define TARGET_OPTION_PRINT rs6000_function_specific_print
1800 #undef TARGET_CAN_INLINE_P
1801 #define TARGET_CAN_INLINE_P rs6000_can_inline_p
1803 #undef TARGET_SET_CURRENT_FUNCTION
1804 #define TARGET_SET_CURRENT_FUNCTION rs6000_set_current_function
1806 #undef TARGET_LEGITIMATE_CONSTANT_P
1807 #define TARGET_LEGITIMATE_CONSTANT_P rs6000_legitimate_constant_p
1809 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
1810 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK rs6000_vectorize_vec_perm_const_ok
1812 #undef TARGET_CAN_USE_DOLOOP_P
1813 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
1815 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
1816 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV rs6000_atomic_assign_expand_fenv
1818 #undef TARGET_LIBGCC_CMP_RETURN_MODE
1819 #define TARGET_LIBGCC_CMP_RETURN_MODE rs6000_abi_word_mode
1820 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
1821 #define TARGET_LIBGCC_SHIFT_COUNT_MODE rs6000_abi_word_mode
1822 #undef TARGET_UNWIND_WORD_MODE
1823 #define TARGET_UNWIND_WORD_MODE rs6000_abi_word_mode
1825 #undef TARGET_OFFLOAD_OPTIONS
1826 #define TARGET_OFFLOAD_OPTIONS rs6000_offload_options
1828 #undef TARGET_C_MODE_FOR_SUFFIX
1829 #define TARGET_C_MODE_FOR_SUFFIX rs6000_c_mode_for_suffix
1831 #undef TARGET_INVALID_BINARY_OP
1832 #define TARGET_INVALID_BINARY_OP rs6000_invalid_binary_op
1834 #undef TARGET_OPTAB_SUPPORTED_P
1835 #define TARGET_OPTAB_SUPPORTED_P rs6000_optab_supported_p
1838 /* Processor table. */
1839 struct rs6000_ptt
1841 const char *const name; /* Canonical processor name. */
1842 const enum processor_type processor; /* Processor type enum value. */
1843 const HOST_WIDE_INT target_enable; /* Target flags to enable. */
1846 static struct rs6000_ptt const processor_target_table[] =
1848 #define RS6000_CPU(NAME, CPU, FLAGS) { NAME, CPU, FLAGS },
1849 #include "rs6000-cpus.def"
1850 #undef RS6000_CPU
1853 /* Look up a processor name for -mcpu=xxx and -mtune=xxx. Return -1 if the
1854 name is invalid. */
1856 static int
1857 rs6000_cpu_name_lookup (const char *name)
1859 size_t i;
1861 if (name != NULL)
1863 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
1864 if (! strcmp (name, processor_target_table[i].name))
1865 return (int)i;
1868 return -1;
1872 /* Return number of consecutive hard regs needed starting at reg REGNO
1873 to hold something of mode MODE.
1874 This is ordinarily the length in words of a value of mode MODE
1875 but can be less for certain modes in special long registers.
1877 For the SPE, GPRs are 64 bits but only 32 bits are visible in
1878 scalar instructions. The upper 32 bits are only available to the
1879 SIMD instructions.
1881 POWER and PowerPC GPRs hold 32 bits worth;
1882 PowerPC64 GPRs and FPRs point register holds 64 bits worth. */
1884 static int
1885 rs6000_hard_regno_nregs_internal (int regno, machine_mode mode)
1887 unsigned HOST_WIDE_INT reg_size;
1889 /* 128-bit floating point usually takes 2 registers, unless it is IEEE
1890 128-bit floating point that can go in vector registers, which has VSX
1891 memory addressing. */
1892 if (FP_REGNO_P (regno))
1893 reg_size = (VECTOR_MEM_VSX_P (mode) || FLOAT128_VECTOR_P (mode)
1894 ? UNITS_PER_VSX_WORD
1895 : UNITS_PER_FP_WORD);
1897 else if (SPE_SIMD_REGNO_P (regno) && TARGET_SPE && SPE_VECTOR_MODE (mode))
1898 reg_size = UNITS_PER_SPE_WORD;
1900 else if (ALTIVEC_REGNO_P (regno))
1901 reg_size = UNITS_PER_ALTIVEC_WORD;
1903 /* The value returned for SCmode in the E500 double case is 2 for
1904 ABI compatibility; storing an SCmode value in a single register
1905 would require function_arg and rs6000_spe_function_arg to handle
1906 SCmode so as to pass the value correctly in a pair of
1907 registers. */
1908 else if (TARGET_E500_DOUBLE && FLOAT_MODE_P (mode) && mode != SCmode
1909 && !DECIMAL_FLOAT_MODE_P (mode) && SPE_SIMD_REGNO_P (regno))
1910 reg_size = UNITS_PER_FP_WORD;
1912 else
1913 reg_size = UNITS_PER_WORD;
1915 return (GET_MODE_SIZE (mode) + reg_size - 1) / reg_size;
1918 /* Value is 1 if hard register REGNO can hold a value of machine-mode
1919 MODE. */
1920 static int
1921 rs6000_hard_regno_mode_ok (int regno, machine_mode mode)
1923 int last_regno = regno + rs6000_hard_regno_nregs[mode][regno] - 1;
1925 if (COMPLEX_MODE_P (mode))
1926 mode = GET_MODE_INNER (mode);
1928 /* PTImode can only go in GPRs. Quad word memory operations require even/odd
1929 register combinations, and use PTImode where we need to deal with quad
1930 word memory operations. Don't allow quad words in the argument or frame
1931 pointer registers, just registers 0..31. */
1932 if (mode == PTImode)
1933 return (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
1934 && IN_RANGE (last_regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
1935 && ((regno & 1) == 0));
1937 /* VSX registers that overlap the FPR registers are larger than for non-VSX
1938 implementations. Don't allow an item to be split between a FP register
1939 and an Altivec register. Allow TImode in all VSX registers if the user
1940 asked for it. */
1941 if (TARGET_VSX && VSX_REGNO_P (regno)
1942 && (VECTOR_MEM_VSX_P (mode)
1943 || FLOAT128_VECTOR_P (mode)
1944 || reg_addr[mode].scalar_in_vmx_p
1945 || (TARGET_VSX_TIMODE && mode == TImode)
1946 || (TARGET_VADDUQM && mode == V1TImode)
1947 || (TARGET_UPPER_REGS_DI && mode == DImode)))
1949 if (FP_REGNO_P (regno))
1950 return FP_REGNO_P (last_regno);
1952 if (ALTIVEC_REGNO_P (regno))
1954 if (GET_MODE_SIZE (mode) != 16 && !reg_addr[mode].scalar_in_vmx_p)
1955 return 0;
1957 return ALTIVEC_REGNO_P (last_regno);
1961 /* The GPRs can hold any mode, but values bigger than one register
1962 cannot go past R31. */
1963 if (INT_REGNO_P (regno))
1964 return INT_REGNO_P (last_regno);
1966 /* The float registers (except for VSX vector modes) can only hold floating
1967 modes and DImode. */
1968 if (FP_REGNO_P (regno))
1970 if (FLOAT128_VECTOR_P (mode))
1971 return false;
1973 if (SCALAR_FLOAT_MODE_P (mode)
1974 && (mode != TDmode || (regno % 2) == 0)
1975 && FP_REGNO_P (last_regno))
1976 return 1;
1978 if (GET_MODE_CLASS (mode) == MODE_INT
1979 && GET_MODE_SIZE (mode) == UNITS_PER_FP_WORD)
1980 return 1;
1982 if (PAIRED_SIMD_REGNO_P (regno) && TARGET_PAIRED_FLOAT
1983 && PAIRED_VECTOR_MODE (mode))
1984 return 1;
1986 return 0;
1989 /* The CR register can only hold CC modes. */
1990 if (CR_REGNO_P (regno))
1991 return GET_MODE_CLASS (mode) == MODE_CC;
1993 if (CA_REGNO_P (regno))
1994 return mode == Pmode || mode == SImode;
1996 /* AltiVec only in AldyVec registers. */
1997 if (ALTIVEC_REGNO_P (regno))
1998 return (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)
1999 || mode == V1TImode);
2001 /* ...but GPRs can hold SIMD data on the SPE in one register. */
2002 if (SPE_SIMD_REGNO_P (regno) && TARGET_SPE && SPE_VECTOR_MODE (mode))
2003 return 1;
2005 /* We cannot put non-VSX TImode or PTImode anywhere except general register
2006 and it must be able to fit within the register set. */
2008 return GET_MODE_SIZE (mode) <= UNITS_PER_WORD;
2011 /* Print interesting facts about registers. */
2012 static void
2013 rs6000_debug_reg_print (int first_regno, int last_regno, const char *reg_name)
2015 int r, m;
2017 for (r = first_regno; r <= last_regno; ++r)
2019 const char *comma = "";
2020 int len;
2022 if (first_regno == last_regno)
2023 fprintf (stderr, "%s:\t", reg_name);
2024 else
2025 fprintf (stderr, "%s%d:\t", reg_name, r - first_regno);
2027 len = 8;
2028 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2029 if (rs6000_hard_regno_mode_ok_p[m][r] && rs6000_hard_regno_nregs[m][r])
2031 if (len > 70)
2033 fprintf (stderr, ",\n\t");
2034 len = 8;
2035 comma = "";
2038 if (rs6000_hard_regno_nregs[m][r] > 1)
2039 len += fprintf (stderr, "%s%s/%d", comma, GET_MODE_NAME (m),
2040 rs6000_hard_regno_nregs[m][r]);
2041 else
2042 len += fprintf (stderr, "%s%s", comma, GET_MODE_NAME (m));
2044 comma = ", ";
2047 if (call_used_regs[r])
2049 if (len > 70)
2051 fprintf (stderr, ",\n\t");
2052 len = 8;
2053 comma = "";
2056 len += fprintf (stderr, "%s%s", comma, "call-used");
2057 comma = ", ";
2060 if (fixed_regs[r])
2062 if (len > 70)
2064 fprintf (stderr, ",\n\t");
2065 len = 8;
2066 comma = "";
2069 len += fprintf (stderr, "%s%s", comma, "fixed");
2070 comma = ", ";
2073 if (len > 70)
2075 fprintf (stderr, ",\n\t");
2076 comma = "";
2079 len += fprintf (stderr, "%sreg-class = %s", comma,
2080 reg_class_names[(int)rs6000_regno_regclass[r]]);
2081 comma = ", ";
2083 if (len > 70)
2085 fprintf (stderr, ",\n\t");
2086 comma = "";
2089 fprintf (stderr, "%sregno = %d\n", comma, r);
2093 static const char *
2094 rs6000_debug_vector_unit (enum rs6000_vector v)
2096 const char *ret;
2098 switch (v)
2100 case VECTOR_NONE: ret = "none"; break;
2101 case VECTOR_ALTIVEC: ret = "altivec"; break;
2102 case VECTOR_VSX: ret = "vsx"; break;
2103 case VECTOR_P8_VECTOR: ret = "p8_vector"; break;
2104 case VECTOR_PAIRED: ret = "paired"; break;
2105 case VECTOR_SPE: ret = "spe"; break;
2106 case VECTOR_OTHER: ret = "other"; break;
2107 default: ret = "unknown"; break;
2110 return ret;
2113 /* Inner function printing just the address mask for a particular reload
2114 register class. */
2115 DEBUG_FUNCTION char *
2116 rs6000_debug_addr_mask (addr_mask_type mask, bool keep_spaces)
2118 static char ret[8];
2119 char *p = ret;
2121 if ((mask & RELOAD_REG_VALID) != 0)
2122 *p++ = 'v';
2123 else if (keep_spaces)
2124 *p++ = ' ';
2126 if ((mask & RELOAD_REG_MULTIPLE) != 0)
2127 *p++ = 'm';
2128 else if (keep_spaces)
2129 *p++ = ' ';
2131 if ((mask & RELOAD_REG_INDEXED) != 0)
2132 *p++ = 'i';
2133 else if (keep_spaces)
2134 *p++ = ' ';
2136 if ((mask & RELOAD_REG_QUAD_OFFSET) != 0)
2137 *p++ = 'O';
2138 else if ((mask & RELOAD_REG_OFFSET) != 0)
2139 *p++ = 'o';
2140 else if (keep_spaces)
2141 *p++ = ' ';
2143 if ((mask & RELOAD_REG_PRE_INCDEC) != 0)
2144 *p++ = '+';
2145 else if (keep_spaces)
2146 *p++ = ' ';
2148 if ((mask & RELOAD_REG_PRE_MODIFY) != 0)
2149 *p++ = '+';
2150 else if (keep_spaces)
2151 *p++ = ' ';
2153 if ((mask & RELOAD_REG_AND_M16) != 0)
2154 *p++ = '&';
2155 else if (keep_spaces)
2156 *p++ = ' ';
2158 *p = '\0';
2160 return ret;
2163 /* Print the address masks in a human readble fashion. */
2164 DEBUG_FUNCTION void
2165 rs6000_debug_print_mode (ssize_t m)
2167 ssize_t rc;
2168 int spaces = 0;
2169 bool fuse_extra_p;
2171 fprintf (stderr, "Mode: %-5s", GET_MODE_NAME (m));
2172 for (rc = 0; rc < N_RELOAD_REG; rc++)
2173 fprintf (stderr, " %s: %s", reload_reg_map[rc].name,
2174 rs6000_debug_addr_mask (reg_addr[m].addr_mask[rc], true));
2176 if ((reg_addr[m].reload_store != CODE_FOR_nothing)
2177 || (reg_addr[m].reload_load != CODE_FOR_nothing))
2178 fprintf (stderr, " Reload=%c%c",
2179 (reg_addr[m].reload_store != CODE_FOR_nothing) ? 's' : '*',
2180 (reg_addr[m].reload_load != CODE_FOR_nothing) ? 'l' : '*');
2181 else
2182 spaces += sizeof (" Reload=sl") - 1;
2184 if (reg_addr[m].scalar_in_vmx_p)
2186 fprintf (stderr, "%*s Upper=y", spaces, "");
2187 spaces = 0;
2189 else
2190 spaces += sizeof (" Upper=y") - 1;
2192 fuse_extra_p = ((reg_addr[m].fusion_gpr_ld != CODE_FOR_nothing)
2193 || reg_addr[m].fused_toc);
2194 if (!fuse_extra_p)
2196 for (rc = 0; rc < N_RELOAD_REG; rc++)
2198 if (rc != RELOAD_REG_ANY)
2200 if (reg_addr[m].fusion_addi_ld[rc] != CODE_FOR_nothing
2201 || reg_addr[m].fusion_addi_ld[rc] != CODE_FOR_nothing
2202 || reg_addr[m].fusion_addi_st[rc] != CODE_FOR_nothing
2203 || reg_addr[m].fusion_addis_ld[rc] != CODE_FOR_nothing
2204 || reg_addr[m].fusion_addis_st[rc] != CODE_FOR_nothing)
2206 fuse_extra_p = true;
2207 break;
2213 if (fuse_extra_p)
2215 fprintf (stderr, "%*s Fuse:", spaces, "");
2216 spaces = 0;
2218 for (rc = 0; rc < N_RELOAD_REG; rc++)
2220 if (rc != RELOAD_REG_ANY)
2222 char load, store;
2224 if (reg_addr[m].fusion_addis_ld[rc] != CODE_FOR_nothing)
2225 load = 'l';
2226 else if (reg_addr[m].fusion_addi_ld[rc] != CODE_FOR_nothing)
2227 load = 'L';
2228 else
2229 load = '-';
2231 if (reg_addr[m].fusion_addis_st[rc] != CODE_FOR_nothing)
2232 store = 's';
2233 else if (reg_addr[m].fusion_addi_st[rc] != CODE_FOR_nothing)
2234 store = 'S';
2235 else
2236 store = '-';
2238 if (load == '-' && store == '-')
2239 spaces += 5;
2240 else
2242 fprintf (stderr, "%*s%c=%c%c", (spaces + 1), "",
2243 reload_reg_map[rc].name[0], load, store);
2244 spaces = 0;
2249 if (reg_addr[m].fusion_gpr_ld != CODE_FOR_nothing)
2251 fprintf (stderr, "%*sP8gpr", (spaces + 1), "");
2252 spaces = 0;
2254 else
2255 spaces += sizeof (" P8gpr") - 1;
2257 if (reg_addr[m].fused_toc)
2259 fprintf (stderr, "%*sToc", (spaces + 1), "");
2260 spaces = 0;
2262 else
2263 spaces += sizeof (" Toc") - 1;
2265 else
2266 spaces += sizeof (" Fuse: G=ls F=ls v=ls P8gpr Toc") - 1;
2268 if (rs6000_vector_unit[m] != VECTOR_NONE
2269 || rs6000_vector_mem[m] != VECTOR_NONE)
2271 fprintf (stderr, "%*s vector: arith=%-10s mem=%s",
2272 spaces, "",
2273 rs6000_debug_vector_unit (rs6000_vector_unit[m]),
2274 rs6000_debug_vector_unit (rs6000_vector_mem[m]));
2277 fputs ("\n", stderr);
2280 #define DEBUG_FMT_ID "%-32s= "
2281 #define DEBUG_FMT_D DEBUG_FMT_ID "%d\n"
2282 #define DEBUG_FMT_WX DEBUG_FMT_ID "%#.12" HOST_WIDE_INT_PRINT "x: "
2283 #define DEBUG_FMT_S DEBUG_FMT_ID "%s\n"
2285 /* Print various interesting information with -mdebug=reg. */
2286 static void
2287 rs6000_debug_reg_global (void)
2289 static const char *const tf[2] = { "false", "true" };
2290 const char *nl = (const char *)0;
2291 int m;
2292 size_t m1, m2, v;
2293 char costly_num[20];
2294 char nop_num[20];
2295 char flags_buffer[40];
2296 const char *costly_str;
2297 const char *nop_str;
2298 const char *trace_str;
2299 const char *abi_str;
2300 const char *cmodel_str;
2301 struct cl_target_option cl_opts;
2303 /* Modes we want tieable information on. */
2304 static const machine_mode print_tieable_modes[] = {
2305 QImode,
2306 HImode,
2307 SImode,
2308 DImode,
2309 TImode,
2310 PTImode,
2311 SFmode,
2312 DFmode,
2313 TFmode,
2314 IFmode,
2315 KFmode,
2316 SDmode,
2317 DDmode,
2318 TDmode,
2319 V8QImode,
2320 V4HImode,
2321 V2SImode,
2322 V16QImode,
2323 V8HImode,
2324 V4SImode,
2325 V2DImode,
2326 V1TImode,
2327 V32QImode,
2328 V16HImode,
2329 V8SImode,
2330 V4DImode,
2331 V2TImode,
2332 V2SFmode,
2333 V4SFmode,
2334 V2DFmode,
2335 V8SFmode,
2336 V4DFmode,
2337 CCmode,
2338 CCUNSmode,
2339 CCEQmode,
2342 /* Virtual regs we are interested in. */
2343 const static struct {
2344 int regno; /* register number. */
2345 const char *name; /* register name. */
2346 } virtual_regs[] = {
2347 { STACK_POINTER_REGNUM, "stack pointer:" },
2348 { TOC_REGNUM, "toc: " },
2349 { STATIC_CHAIN_REGNUM, "static chain: " },
2350 { RS6000_PIC_OFFSET_TABLE_REGNUM, "pic offset: " },
2351 { HARD_FRAME_POINTER_REGNUM, "hard frame: " },
2352 { ARG_POINTER_REGNUM, "arg pointer: " },
2353 { FRAME_POINTER_REGNUM, "frame pointer:" },
2354 { FIRST_PSEUDO_REGISTER, "first pseudo: " },
2355 { FIRST_VIRTUAL_REGISTER, "first virtual:" },
2356 { VIRTUAL_INCOMING_ARGS_REGNUM, "incoming_args:" },
2357 { VIRTUAL_STACK_VARS_REGNUM, "stack_vars: " },
2358 { VIRTUAL_STACK_DYNAMIC_REGNUM, "stack_dynamic:" },
2359 { VIRTUAL_OUTGOING_ARGS_REGNUM, "outgoing_args:" },
2360 { VIRTUAL_CFA_REGNUM, "cfa (frame): " },
2361 { VIRTUAL_PREFERRED_STACK_BOUNDARY_REGNUM, "stack boundry:" },
2362 { LAST_VIRTUAL_REGISTER, "last virtual: " },
2365 fputs ("\nHard register information:\n", stderr);
2366 rs6000_debug_reg_print (FIRST_GPR_REGNO, LAST_GPR_REGNO, "gr");
2367 rs6000_debug_reg_print (FIRST_FPR_REGNO, LAST_FPR_REGNO, "fp");
2368 rs6000_debug_reg_print (FIRST_ALTIVEC_REGNO,
2369 LAST_ALTIVEC_REGNO,
2370 "vs");
2371 rs6000_debug_reg_print (LR_REGNO, LR_REGNO, "lr");
2372 rs6000_debug_reg_print (CTR_REGNO, CTR_REGNO, "ctr");
2373 rs6000_debug_reg_print (CR0_REGNO, CR7_REGNO, "cr");
2374 rs6000_debug_reg_print (CA_REGNO, CA_REGNO, "ca");
2375 rs6000_debug_reg_print (VRSAVE_REGNO, VRSAVE_REGNO, "vrsave");
2376 rs6000_debug_reg_print (VSCR_REGNO, VSCR_REGNO, "vscr");
2377 rs6000_debug_reg_print (SPE_ACC_REGNO, SPE_ACC_REGNO, "spe_a");
2378 rs6000_debug_reg_print (SPEFSCR_REGNO, SPEFSCR_REGNO, "spe_f");
2380 fputs ("\nVirtual/stack/frame registers:\n", stderr);
2381 for (v = 0; v < ARRAY_SIZE (virtual_regs); v++)
2382 fprintf (stderr, "%s regno = %3d\n", virtual_regs[v].name, virtual_regs[v].regno);
2384 fprintf (stderr,
2385 "\n"
2386 "d reg_class = %s\n"
2387 "f reg_class = %s\n"
2388 "v reg_class = %s\n"
2389 "wa reg_class = %s\n"
2390 "wb reg_class = %s\n"
2391 "wd reg_class = %s\n"
2392 "we reg_class = %s\n"
2393 "wf reg_class = %s\n"
2394 "wg reg_class = %s\n"
2395 "wh reg_class = %s\n"
2396 "wi reg_class = %s\n"
2397 "wj reg_class = %s\n"
2398 "wk reg_class = %s\n"
2399 "wl reg_class = %s\n"
2400 "wm reg_class = %s\n"
2401 "wo reg_class = %s\n"
2402 "wp reg_class = %s\n"
2403 "wq reg_class = %s\n"
2404 "wr reg_class = %s\n"
2405 "ws reg_class = %s\n"
2406 "wt reg_class = %s\n"
2407 "wu reg_class = %s\n"
2408 "wv reg_class = %s\n"
2409 "ww reg_class = %s\n"
2410 "wx reg_class = %s\n"
2411 "wy reg_class = %s\n"
2412 "wz reg_class = %s\n"
2413 "\n",
2414 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_d]],
2415 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_f]],
2416 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_v]],
2417 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wa]],
2418 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wb]],
2419 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wd]],
2420 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_we]],
2421 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wf]],
2422 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wg]],
2423 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wh]],
2424 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wi]],
2425 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wj]],
2426 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wk]],
2427 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wl]],
2428 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wm]],
2429 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wo]],
2430 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wp]],
2431 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wq]],
2432 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wr]],
2433 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_ws]],
2434 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wt]],
2435 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wu]],
2436 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wv]],
2437 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_ww]],
2438 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wx]],
2439 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wy]],
2440 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wz]]);
2442 nl = "\n";
2443 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2444 rs6000_debug_print_mode (m);
2446 fputs ("\n", stderr);
2448 for (m1 = 0; m1 < ARRAY_SIZE (print_tieable_modes); m1++)
2450 machine_mode mode1 = print_tieable_modes[m1];
2451 bool first_time = true;
2453 nl = (const char *)0;
2454 for (m2 = 0; m2 < ARRAY_SIZE (print_tieable_modes); m2++)
2456 machine_mode mode2 = print_tieable_modes[m2];
2457 if (mode1 != mode2 && MODES_TIEABLE_P (mode1, mode2))
2459 if (first_time)
2461 fprintf (stderr, "Tieable modes %s:", GET_MODE_NAME (mode1));
2462 nl = "\n";
2463 first_time = false;
2466 fprintf (stderr, " %s", GET_MODE_NAME (mode2));
2470 if (!first_time)
2471 fputs ("\n", stderr);
2474 if (nl)
2475 fputs (nl, stderr);
2477 if (rs6000_recip_control)
2479 fprintf (stderr, "\nReciprocal mask = 0x%x\n", rs6000_recip_control);
2481 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2482 if (rs6000_recip_bits[m])
2484 fprintf (stderr,
2485 "Reciprocal estimate mode: %-5s divide: %s rsqrt: %s\n",
2486 GET_MODE_NAME (m),
2487 (RS6000_RECIP_AUTO_RE_P (m)
2488 ? "auto"
2489 : (RS6000_RECIP_HAVE_RE_P (m) ? "have" : "none")),
2490 (RS6000_RECIP_AUTO_RSQRTE_P (m)
2491 ? "auto"
2492 : (RS6000_RECIP_HAVE_RSQRTE_P (m) ? "have" : "none")));
2495 fputs ("\n", stderr);
2498 if (rs6000_cpu_index >= 0)
2500 const char *name = processor_target_table[rs6000_cpu_index].name;
2501 HOST_WIDE_INT flags
2502 = processor_target_table[rs6000_cpu_index].target_enable;
2504 sprintf (flags_buffer, "-mcpu=%s flags", name);
2505 rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2507 else
2508 fprintf (stderr, DEBUG_FMT_S, "cpu", "<none>");
2510 if (rs6000_tune_index >= 0)
2512 const char *name = processor_target_table[rs6000_tune_index].name;
2513 HOST_WIDE_INT flags
2514 = processor_target_table[rs6000_tune_index].target_enable;
2516 sprintf (flags_buffer, "-mtune=%s flags", name);
2517 rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2519 else
2520 fprintf (stderr, DEBUG_FMT_S, "tune", "<none>");
2522 cl_target_option_save (&cl_opts, &global_options);
2523 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags",
2524 rs6000_isa_flags);
2526 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags_explicit",
2527 rs6000_isa_flags_explicit);
2529 rs6000_print_builtin_options (stderr, 0, "rs6000_builtin_mask",
2530 rs6000_builtin_mask);
2532 rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT);
2534 fprintf (stderr, DEBUG_FMT_S, "--with-cpu default",
2535 OPTION_TARGET_CPU_DEFAULT ? OPTION_TARGET_CPU_DEFAULT : "<none>");
2537 switch (rs6000_sched_costly_dep)
2539 case max_dep_latency:
2540 costly_str = "max_dep_latency";
2541 break;
2543 case no_dep_costly:
2544 costly_str = "no_dep_costly";
2545 break;
2547 case all_deps_costly:
2548 costly_str = "all_deps_costly";
2549 break;
2551 case true_store_to_load_dep_costly:
2552 costly_str = "true_store_to_load_dep_costly";
2553 break;
2555 case store_to_load_dep_costly:
2556 costly_str = "store_to_load_dep_costly";
2557 break;
2559 default:
2560 costly_str = costly_num;
2561 sprintf (costly_num, "%d", (int)rs6000_sched_costly_dep);
2562 break;
2565 fprintf (stderr, DEBUG_FMT_S, "sched_costly_dep", costly_str);
2567 switch (rs6000_sched_insert_nops)
2569 case sched_finish_regroup_exact:
2570 nop_str = "sched_finish_regroup_exact";
2571 break;
2573 case sched_finish_pad_groups:
2574 nop_str = "sched_finish_pad_groups";
2575 break;
2577 case sched_finish_none:
2578 nop_str = "sched_finish_none";
2579 break;
2581 default:
2582 nop_str = nop_num;
2583 sprintf (nop_num, "%d", (int)rs6000_sched_insert_nops);
2584 break;
2587 fprintf (stderr, DEBUG_FMT_S, "sched_insert_nops", nop_str);
2589 switch (rs6000_sdata)
2591 default:
2592 case SDATA_NONE:
2593 break;
2595 case SDATA_DATA:
2596 fprintf (stderr, DEBUG_FMT_S, "sdata", "data");
2597 break;
2599 case SDATA_SYSV:
2600 fprintf (stderr, DEBUG_FMT_S, "sdata", "sysv");
2601 break;
2603 case SDATA_EABI:
2604 fprintf (stderr, DEBUG_FMT_S, "sdata", "eabi");
2605 break;
2609 switch (rs6000_traceback)
2611 case traceback_default: trace_str = "default"; break;
2612 case traceback_none: trace_str = "none"; break;
2613 case traceback_part: trace_str = "part"; break;
2614 case traceback_full: trace_str = "full"; break;
2615 default: trace_str = "unknown"; break;
2618 fprintf (stderr, DEBUG_FMT_S, "traceback", trace_str);
2620 switch (rs6000_current_cmodel)
2622 case CMODEL_SMALL: cmodel_str = "small"; break;
2623 case CMODEL_MEDIUM: cmodel_str = "medium"; break;
2624 case CMODEL_LARGE: cmodel_str = "large"; break;
2625 default: cmodel_str = "unknown"; break;
2628 fprintf (stderr, DEBUG_FMT_S, "cmodel", cmodel_str);
2630 switch (rs6000_current_abi)
2632 case ABI_NONE: abi_str = "none"; break;
2633 case ABI_AIX: abi_str = "aix"; break;
2634 case ABI_ELFv2: abi_str = "ELFv2"; break;
2635 case ABI_V4: abi_str = "V4"; break;
2636 case ABI_DARWIN: abi_str = "darwin"; break;
2637 default: abi_str = "unknown"; break;
2640 fprintf (stderr, DEBUG_FMT_S, "abi", abi_str);
2642 if (rs6000_altivec_abi)
2643 fprintf (stderr, DEBUG_FMT_S, "altivec_abi", "true");
2645 if (rs6000_spe_abi)
2646 fprintf (stderr, DEBUG_FMT_S, "spe_abi", "true");
2648 if (rs6000_darwin64_abi)
2649 fprintf (stderr, DEBUG_FMT_S, "darwin64_abi", "true");
2651 if (rs6000_float_gprs)
2652 fprintf (stderr, DEBUG_FMT_S, "float_gprs", "true");
2654 fprintf (stderr, DEBUG_FMT_S, "fprs",
2655 (TARGET_FPRS ? "true" : "false"));
2657 fprintf (stderr, DEBUG_FMT_S, "single_float",
2658 (TARGET_SINGLE_FLOAT ? "true" : "false"));
2660 fprintf (stderr, DEBUG_FMT_S, "double_float",
2661 (TARGET_DOUBLE_FLOAT ? "true" : "false"));
2663 fprintf (stderr, DEBUG_FMT_S, "soft_float",
2664 (TARGET_SOFT_FLOAT ? "true" : "false"));
2666 fprintf (stderr, DEBUG_FMT_S, "e500_single",
2667 (TARGET_E500_SINGLE ? "true" : "false"));
2669 fprintf (stderr, DEBUG_FMT_S, "e500_double",
2670 (TARGET_E500_DOUBLE ? "true" : "false"));
2672 if (TARGET_LINK_STACK)
2673 fprintf (stderr, DEBUG_FMT_S, "link_stack", "true");
2675 fprintf (stderr, DEBUG_FMT_S, "lra", TARGET_LRA ? "true" : "false");
2677 if (TARGET_P8_FUSION)
2679 char options[80];
2681 strcpy (options, (TARGET_P9_FUSION) ? "power9" : "power8");
2682 if (TARGET_TOC_FUSION)
2683 strcat (options, ", toc");
2685 if (TARGET_P8_FUSION_SIGN)
2686 strcat (options, ", sign");
2688 fprintf (stderr, DEBUG_FMT_S, "fusion", options);
2691 fprintf (stderr, DEBUG_FMT_S, "plt-format",
2692 TARGET_SECURE_PLT ? "secure" : "bss");
2693 fprintf (stderr, DEBUG_FMT_S, "struct-return",
2694 aix_struct_return ? "aix" : "sysv");
2695 fprintf (stderr, DEBUG_FMT_S, "always_hint", tf[!!rs6000_always_hint]);
2696 fprintf (stderr, DEBUG_FMT_S, "sched_groups", tf[!!rs6000_sched_groups]);
2697 fprintf (stderr, DEBUG_FMT_S, "align_branch",
2698 tf[!!rs6000_align_branch_targets]);
2699 fprintf (stderr, DEBUG_FMT_D, "tls_size", rs6000_tls_size);
2700 fprintf (stderr, DEBUG_FMT_D, "long_double_size",
2701 rs6000_long_double_type_size);
2702 fprintf (stderr, DEBUG_FMT_D, "sched_restricted_insns_priority",
2703 (int)rs6000_sched_restricted_insns_priority);
2704 fprintf (stderr, DEBUG_FMT_D, "Number of standard builtins",
2705 (int)END_BUILTINS);
2706 fprintf (stderr, DEBUG_FMT_D, "Number of rs6000 builtins",
2707 (int)RS6000_BUILTIN_COUNT);
2709 if (TARGET_VSX)
2710 fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit scalar element",
2711 (int)VECTOR_ELEMENT_SCALAR_64BIT);
2713 if (TARGET_DIRECT_MOVE_128)
2714 fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit mfvsrld element",
2715 (int)VECTOR_ELEMENT_MFVSRLD_64BIT);
2719 /* Update the addr mask bits in reg_addr to help secondary reload and go if
2720 legitimate address support to figure out the appropriate addressing to
2721 use. */
2723 static void
2724 rs6000_setup_reg_addr_masks (void)
2726 ssize_t rc, reg, m, nregs;
2727 addr_mask_type any_addr_mask, addr_mask;
2729 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2731 machine_mode m2 = (machine_mode) m;
2732 bool complex_p = false;
2733 size_t msize;
2735 if (COMPLEX_MODE_P (m2))
2737 complex_p = true;
2738 m2 = GET_MODE_INNER (m2);
2741 msize = GET_MODE_SIZE (m2);
2743 /* SDmode is special in that we want to access it only via REG+REG
2744 addressing on power7 and above, since we want to use the LFIWZX and
2745 STFIWZX instructions to load it. */
2746 bool indexed_only_p = (m == SDmode && TARGET_NO_SDMODE_STACK);
2748 any_addr_mask = 0;
2749 for (rc = FIRST_RELOAD_REG_CLASS; rc <= LAST_RELOAD_REG_CLASS; rc++)
2751 addr_mask = 0;
2752 reg = reload_reg_map[rc].reg;
2754 /* Can mode values go in the GPR/FPR/Altivec registers? */
2755 if (reg >= 0 && rs6000_hard_regno_mode_ok_p[m][reg])
2757 nregs = rs6000_hard_regno_nregs[m][reg];
2758 addr_mask |= RELOAD_REG_VALID;
2760 /* Indicate if the mode takes more than 1 physical register. If
2761 it takes a single register, indicate it can do REG+REG
2762 addressing. */
2763 if (nregs > 1 || m == BLKmode || complex_p)
2764 addr_mask |= RELOAD_REG_MULTIPLE;
2765 else
2766 addr_mask |= RELOAD_REG_INDEXED;
2768 /* Figure out if we can do PRE_INC, PRE_DEC, or PRE_MODIFY
2769 addressing. Restrict addressing on SPE for 64-bit types
2770 because of the SUBREG hackery used to address 64-bit floats in
2771 '32-bit' GPRs. If we allow scalars into Altivec registers,
2772 don't allow PRE_INC, PRE_DEC, or PRE_MODIFY. */
2774 if (TARGET_UPDATE
2775 && (rc == RELOAD_REG_GPR || rc == RELOAD_REG_FPR)
2776 && msize <= 8
2777 && !VECTOR_MODE_P (m2)
2778 && !FLOAT128_VECTOR_P (m2)
2779 && !complex_p
2780 && (m2 != DFmode || !TARGET_UPPER_REGS_DF)
2781 && (m2 != SFmode || !TARGET_UPPER_REGS_SF)
2782 && !(TARGET_E500_DOUBLE && msize == 8))
2784 addr_mask |= RELOAD_REG_PRE_INCDEC;
2786 /* PRE_MODIFY is more restricted than PRE_INC/PRE_DEC in that
2787 we don't allow PRE_MODIFY for some multi-register
2788 operations. */
2789 switch (m)
2791 default:
2792 addr_mask |= RELOAD_REG_PRE_MODIFY;
2793 break;
2795 case DImode:
2796 if (TARGET_POWERPC64)
2797 addr_mask |= RELOAD_REG_PRE_MODIFY;
2798 break;
2800 case DFmode:
2801 case DDmode:
2802 if (TARGET_DF_INSN)
2803 addr_mask |= RELOAD_REG_PRE_MODIFY;
2804 break;
2809 /* GPR and FPR registers can do REG+OFFSET addressing, except
2810 possibly for SDmode. ISA 3.0 (i.e. power9) adds D-form addressing
2811 for 64-bit scalars and 32-bit SFmode to altivec registers. */
2812 if ((addr_mask != 0) && !indexed_only_p
2813 && msize <= 8
2814 && (rc == RELOAD_REG_GPR
2815 || ((msize == 8 || m2 == SFmode)
2816 && (rc == RELOAD_REG_FPR
2817 || (rc == RELOAD_REG_VMX
2818 && TARGET_P9_DFORM_SCALAR)))))
2819 addr_mask |= RELOAD_REG_OFFSET;
2821 /* VSX registers can do REG+OFFSET addresssing if ISA 3.0
2822 instructions are enabled. The offset for 128-bit VSX registers is
2823 only 12-bits. While GPRs can handle the full offset range, VSX
2824 registers can only handle the restricted range. */
2825 else if ((addr_mask != 0) && !indexed_only_p
2826 && msize == 16 && TARGET_P9_DFORM_VECTOR
2827 && (ALTIVEC_OR_VSX_VECTOR_MODE (m2)
2828 || (m2 == TImode && TARGET_VSX_TIMODE)))
2830 addr_mask |= RELOAD_REG_OFFSET;
2831 if (rc == RELOAD_REG_FPR || rc == RELOAD_REG_VMX)
2832 addr_mask |= RELOAD_REG_QUAD_OFFSET;
2835 /* VMX registers can do (REG & -16) and ((REG+REG) & -16)
2836 addressing on 128-bit types. */
2837 if (rc == RELOAD_REG_VMX && msize == 16
2838 && (addr_mask & RELOAD_REG_VALID) != 0)
2839 addr_mask |= RELOAD_REG_AND_M16;
2841 reg_addr[m].addr_mask[rc] = addr_mask;
2842 any_addr_mask |= addr_mask;
2845 reg_addr[m].addr_mask[RELOAD_REG_ANY] = any_addr_mask;
2850 /* Initialize the various global tables that are based on register size. */
2851 static void
2852 rs6000_init_hard_regno_mode_ok (bool global_init_p)
2854 ssize_t r, m, c;
2855 int align64;
2856 int align32;
2858 /* Precalculate REGNO_REG_CLASS. */
2859 rs6000_regno_regclass[0] = GENERAL_REGS;
2860 for (r = 1; r < 32; ++r)
2861 rs6000_regno_regclass[r] = BASE_REGS;
2863 for (r = 32; r < 64; ++r)
2864 rs6000_regno_regclass[r] = FLOAT_REGS;
2866 for (r = 64; r < FIRST_PSEUDO_REGISTER; ++r)
2867 rs6000_regno_regclass[r] = NO_REGS;
2869 for (r = FIRST_ALTIVEC_REGNO; r <= LAST_ALTIVEC_REGNO; ++r)
2870 rs6000_regno_regclass[r] = ALTIVEC_REGS;
2872 rs6000_regno_regclass[CR0_REGNO] = CR0_REGS;
2873 for (r = CR1_REGNO; r <= CR7_REGNO; ++r)
2874 rs6000_regno_regclass[r] = CR_REGS;
2876 rs6000_regno_regclass[LR_REGNO] = LINK_REGS;
2877 rs6000_regno_regclass[CTR_REGNO] = CTR_REGS;
2878 rs6000_regno_regclass[CA_REGNO] = NO_REGS;
2879 rs6000_regno_regclass[VRSAVE_REGNO] = VRSAVE_REGS;
2880 rs6000_regno_regclass[VSCR_REGNO] = VRSAVE_REGS;
2881 rs6000_regno_regclass[SPE_ACC_REGNO] = SPE_ACC_REGS;
2882 rs6000_regno_regclass[SPEFSCR_REGNO] = SPEFSCR_REGS;
2883 rs6000_regno_regclass[TFHAR_REGNO] = SPR_REGS;
2884 rs6000_regno_regclass[TFIAR_REGNO] = SPR_REGS;
2885 rs6000_regno_regclass[TEXASR_REGNO] = SPR_REGS;
2886 rs6000_regno_regclass[ARG_POINTER_REGNUM] = BASE_REGS;
2887 rs6000_regno_regclass[FRAME_POINTER_REGNUM] = BASE_REGS;
2889 /* Precalculate register class to simpler reload register class. We don't
2890 need all of the register classes that are combinations of different
2891 classes, just the simple ones that have constraint letters. */
2892 for (c = 0; c < N_REG_CLASSES; c++)
2893 reg_class_to_reg_type[c] = NO_REG_TYPE;
2895 reg_class_to_reg_type[(int)GENERAL_REGS] = GPR_REG_TYPE;
2896 reg_class_to_reg_type[(int)BASE_REGS] = GPR_REG_TYPE;
2897 reg_class_to_reg_type[(int)VSX_REGS] = VSX_REG_TYPE;
2898 reg_class_to_reg_type[(int)VRSAVE_REGS] = SPR_REG_TYPE;
2899 reg_class_to_reg_type[(int)VSCR_REGS] = SPR_REG_TYPE;
2900 reg_class_to_reg_type[(int)LINK_REGS] = SPR_REG_TYPE;
2901 reg_class_to_reg_type[(int)CTR_REGS] = SPR_REG_TYPE;
2902 reg_class_to_reg_type[(int)LINK_OR_CTR_REGS] = SPR_REG_TYPE;
2903 reg_class_to_reg_type[(int)CR_REGS] = CR_REG_TYPE;
2904 reg_class_to_reg_type[(int)CR0_REGS] = CR_REG_TYPE;
2905 reg_class_to_reg_type[(int)SPE_ACC_REGS] = SPE_ACC_TYPE;
2906 reg_class_to_reg_type[(int)SPEFSCR_REGS] = SPEFSCR_REG_TYPE;
2908 if (TARGET_VSX)
2910 reg_class_to_reg_type[(int)FLOAT_REGS] = VSX_REG_TYPE;
2911 reg_class_to_reg_type[(int)ALTIVEC_REGS] = VSX_REG_TYPE;
2913 else
2915 reg_class_to_reg_type[(int)FLOAT_REGS] = FPR_REG_TYPE;
2916 reg_class_to_reg_type[(int)ALTIVEC_REGS] = ALTIVEC_REG_TYPE;
2919 /* Precalculate the valid memory formats as well as the vector information,
2920 this must be set up before the rs6000_hard_regno_nregs_internal calls
2921 below. */
2922 gcc_assert ((int)VECTOR_NONE == 0);
2923 memset ((void *) &rs6000_vector_unit[0], '\0', sizeof (rs6000_vector_unit));
2924 memset ((void *) &rs6000_vector_mem[0], '\0', sizeof (rs6000_vector_unit));
2926 gcc_assert ((int)CODE_FOR_nothing == 0);
2927 memset ((void *) &reg_addr[0], '\0', sizeof (reg_addr));
2929 gcc_assert ((int)NO_REGS == 0);
2930 memset ((void *) &rs6000_constraints[0], '\0', sizeof (rs6000_constraints));
2932 /* The VSX hardware allows native alignment for vectors, but control whether the compiler
2933 believes it can use native alignment or still uses 128-bit alignment. */
2934 if (TARGET_VSX && !TARGET_VSX_ALIGN_128)
2936 align64 = 64;
2937 align32 = 32;
2939 else
2941 align64 = 128;
2942 align32 = 128;
2945 /* KF mode (IEEE 128-bit in VSX registers). We do not have arithmetic, so
2946 only set the memory modes. Include TFmode if -mabi=ieeelongdouble. */
2947 if (TARGET_FLOAT128)
2949 rs6000_vector_mem[KFmode] = VECTOR_VSX;
2950 rs6000_vector_align[KFmode] = 128;
2952 if (FLOAT128_IEEE_P (TFmode))
2954 rs6000_vector_mem[TFmode] = VECTOR_VSX;
2955 rs6000_vector_align[TFmode] = 128;
2959 /* V2DF mode, VSX only. */
2960 if (TARGET_VSX)
2962 rs6000_vector_unit[V2DFmode] = VECTOR_VSX;
2963 rs6000_vector_mem[V2DFmode] = VECTOR_VSX;
2964 rs6000_vector_align[V2DFmode] = align64;
2967 /* V4SF mode, either VSX or Altivec. */
2968 if (TARGET_VSX)
2970 rs6000_vector_unit[V4SFmode] = VECTOR_VSX;
2971 rs6000_vector_mem[V4SFmode] = VECTOR_VSX;
2972 rs6000_vector_align[V4SFmode] = align32;
2974 else if (TARGET_ALTIVEC)
2976 rs6000_vector_unit[V4SFmode] = VECTOR_ALTIVEC;
2977 rs6000_vector_mem[V4SFmode] = VECTOR_ALTIVEC;
2978 rs6000_vector_align[V4SFmode] = align32;
2981 /* V16QImode, V8HImode, V4SImode are Altivec only, but possibly do VSX loads
2982 and stores. */
2983 if (TARGET_ALTIVEC)
2985 rs6000_vector_unit[V4SImode] = VECTOR_ALTIVEC;
2986 rs6000_vector_unit[V8HImode] = VECTOR_ALTIVEC;
2987 rs6000_vector_unit[V16QImode] = VECTOR_ALTIVEC;
2988 rs6000_vector_align[V4SImode] = align32;
2989 rs6000_vector_align[V8HImode] = align32;
2990 rs6000_vector_align[V16QImode] = align32;
2992 if (TARGET_VSX)
2994 rs6000_vector_mem[V4SImode] = VECTOR_VSX;
2995 rs6000_vector_mem[V8HImode] = VECTOR_VSX;
2996 rs6000_vector_mem[V16QImode] = VECTOR_VSX;
2998 else
3000 rs6000_vector_mem[V4SImode] = VECTOR_ALTIVEC;
3001 rs6000_vector_mem[V8HImode] = VECTOR_ALTIVEC;
3002 rs6000_vector_mem[V16QImode] = VECTOR_ALTIVEC;
3006 /* V2DImode, full mode depends on ISA 2.07 vector mode. Allow under VSX to
3007 do insert/splat/extract. Altivec doesn't have 64-bit integer support. */
3008 if (TARGET_VSX)
3010 rs6000_vector_mem[V2DImode] = VECTOR_VSX;
3011 rs6000_vector_unit[V2DImode]
3012 = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
3013 rs6000_vector_align[V2DImode] = align64;
3015 rs6000_vector_mem[V1TImode] = VECTOR_VSX;
3016 rs6000_vector_unit[V1TImode]
3017 = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
3018 rs6000_vector_align[V1TImode] = 128;
3021 /* DFmode, see if we want to use the VSX unit. Memory is handled
3022 differently, so don't set rs6000_vector_mem. */
3023 if (TARGET_VSX && TARGET_VSX_SCALAR_DOUBLE)
3025 rs6000_vector_unit[DFmode] = VECTOR_VSX;
3026 rs6000_vector_align[DFmode] = 64;
3029 /* SFmode, see if we want to use the VSX unit. */
3030 if (TARGET_P8_VECTOR && TARGET_VSX_SCALAR_FLOAT)
3032 rs6000_vector_unit[SFmode] = VECTOR_VSX;
3033 rs6000_vector_align[SFmode] = 32;
3036 /* Allow TImode in VSX register and set the VSX memory macros. */
3037 if (TARGET_VSX && TARGET_VSX_TIMODE)
3039 rs6000_vector_mem[TImode] = VECTOR_VSX;
3040 rs6000_vector_align[TImode] = align64;
3043 /* TODO add SPE and paired floating point vector support. */
3045 /* Register class constraints for the constraints that depend on compile
3046 switches. When the VSX code was added, different constraints were added
3047 based on the type (DFmode, V2DFmode, V4SFmode). For the vector types, all
3048 of the VSX registers are used. The register classes for scalar floating
3049 point types is set, based on whether we allow that type into the upper
3050 (Altivec) registers. GCC has register classes to target the Altivec
3051 registers for load/store operations, to select using a VSX memory
3052 operation instead of the traditional floating point operation. The
3053 constraints are:
3055 d - Register class to use with traditional DFmode instructions.
3056 f - Register class to use with traditional SFmode instructions.
3057 v - Altivec register.
3058 wa - Any VSX register.
3059 wc - Reserved to represent individual CR bits (used in LLVM).
3060 wd - Preferred register class for V2DFmode.
3061 wf - Preferred register class for V4SFmode.
3062 wg - Float register for power6x move insns.
3063 wh - FP register for direct move instructions.
3064 wi - FP or VSX register to hold 64-bit integers for VSX insns.
3065 wj - FP or VSX register to hold 64-bit integers for direct moves.
3066 wk - FP or VSX register to hold 64-bit doubles for direct moves.
3067 wl - Float register if we can do 32-bit signed int loads.
3068 wm - VSX register for ISA 2.07 direct move operations.
3069 wn - always NO_REGS.
3070 wr - GPR if 64-bit mode is permitted.
3071 ws - Register class to do ISA 2.06 DF operations.
3072 wt - VSX register for TImode in VSX registers.
3073 wu - Altivec register for ISA 2.07 VSX SF/SI load/stores.
3074 wv - Altivec register for ISA 2.06 VSX DF/DI load/stores.
3075 ww - Register class to do SF conversions in with VSX operations.
3076 wx - Float register if we can do 32-bit int stores.
3077 wy - Register class to do ISA 2.07 SF operations.
3078 wz - Float register if we can do 32-bit unsigned int loads. */
3080 if (TARGET_HARD_FLOAT && TARGET_FPRS)
3081 rs6000_constraints[RS6000_CONSTRAINT_f] = FLOAT_REGS; /* SFmode */
3083 if (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
3084 rs6000_constraints[RS6000_CONSTRAINT_d] = FLOAT_REGS; /* DFmode */
3086 if (TARGET_VSX)
3088 rs6000_constraints[RS6000_CONSTRAINT_wa] = VSX_REGS;
3089 rs6000_constraints[RS6000_CONSTRAINT_wd] = VSX_REGS; /* V2DFmode */
3090 rs6000_constraints[RS6000_CONSTRAINT_wf] = VSX_REGS; /* V4SFmode */
3092 if (TARGET_VSX_TIMODE)
3093 rs6000_constraints[RS6000_CONSTRAINT_wt] = VSX_REGS; /* TImode */
3095 if (TARGET_UPPER_REGS_DF) /* DFmode */
3097 rs6000_constraints[RS6000_CONSTRAINT_ws] = VSX_REGS;
3098 rs6000_constraints[RS6000_CONSTRAINT_wv] = ALTIVEC_REGS;
3100 else
3101 rs6000_constraints[RS6000_CONSTRAINT_ws] = FLOAT_REGS;
3103 if (TARGET_UPPER_REGS_DF) /* DImode */
3104 rs6000_constraints[RS6000_CONSTRAINT_wi] = VSX_REGS;
3105 else
3106 rs6000_constraints[RS6000_CONSTRAINT_wi] = FLOAT_REGS;
3109 /* Add conditional constraints based on various options, to allow us to
3110 collapse multiple insn patterns. */
3111 if (TARGET_ALTIVEC)
3112 rs6000_constraints[RS6000_CONSTRAINT_v] = ALTIVEC_REGS;
3114 if (TARGET_MFPGPR) /* DFmode */
3115 rs6000_constraints[RS6000_CONSTRAINT_wg] = FLOAT_REGS;
3117 if (TARGET_LFIWAX)
3118 rs6000_constraints[RS6000_CONSTRAINT_wl] = FLOAT_REGS; /* DImode */
3120 if (TARGET_DIRECT_MOVE)
3122 rs6000_constraints[RS6000_CONSTRAINT_wh] = FLOAT_REGS;
3123 rs6000_constraints[RS6000_CONSTRAINT_wj] /* DImode */
3124 = rs6000_constraints[RS6000_CONSTRAINT_wi];
3125 rs6000_constraints[RS6000_CONSTRAINT_wk] /* DFmode */
3126 = rs6000_constraints[RS6000_CONSTRAINT_ws];
3127 rs6000_constraints[RS6000_CONSTRAINT_wm] = VSX_REGS;
3130 if (TARGET_POWERPC64)
3131 rs6000_constraints[RS6000_CONSTRAINT_wr] = GENERAL_REGS;
3133 if (TARGET_P8_VECTOR && TARGET_UPPER_REGS_SF) /* SFmode */
3135 rs6000_constraints[RS6000_CONSTRAINT_wu] = ALTIVEC_REGS;
3136 rs6000_constraints[RS6000_CONSTRAINT_wy] = VSX_REGS;
3137 rs6000_constraints[RS6000_CONSTRAINT_ww] = VSX_REGS;
3139 else if (TARGET_P8_VECTOR)
3141 rs6000_constraints[RS6000_CONSTRAINT_wy] = FLOAT_REGS;
3142 rs6000_constraints[RS6000_CONSTRAINT_ww] = FLOAT_REGS;
3144 else if (TARGET_VSX)
3145 rs6000_constraints[RS6000_CONSTRAINT_ww] = FLOAT_REGS;
3147 if (TARGET_STFIWX)
3148 rs6000_constraints[RS6000_CONSTRAINT_wx] = FLOAT_REGS; /* DImode */
3150 if (TARGET_LFIWZX)
3151 rs6000_constraints[RS6000_CONSTRAINT_wz] = FLOAT_REGS; /* DImode */
3153 if (TARGET_FLOAT128)
3155 rs6000_constraints[RS6000_CONSTRAINT_wq] = VSX_REGS; /* KFmode */
3156 if (FLOAT128_IEEE_P (TFmode))
3157 rs6000_constraints[RS6000_CONSTRAINT_wp] = VSX_REGS; /* TFmode */
3160 /* Support for new D-form instructions. */
3161 if (TARGET_P9_DFORM_SCALAR)
3162 rs6000_constraints[RS6000_CONSTRAINT_wb] = ALTIVEC_REGS;
3164 /* Support for ISA 3.0 (power9) vectors. */
3165 if (TARGET_P9_VECTOR)
3166 rs6000_constraints[RS6000_CONSTRAINT_wo] = VSX_REGS;
3168 /* Support for new direct moves (ISA 3.0 + 64bit). */
3169 if (TARGET_DIRECT_MOVE_128)
3170 rs6000_constraints[RS6000_CONSTRAINT_we] = VSX_REGS;
3172 /* Set up the reload helper and direct move functions. */
3173 if (TARGET_VSX || TARGET_ALTIVEC)
3175 if (TARGET_64BIT)
3177 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_di_store;
3178 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_di_load;
3179 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_di_store;
3180 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_di_load;
3181 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_di_store;
3182 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_di_load;
3183 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_di_store;
3184 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_di_load;
3185 reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_di_store;
3186 reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_di_load;
3187 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_di_store;
3188 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_di_load;
3189 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_di_store;
3190 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_di_load;
3191 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_di_store;
3192 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_di_load;
3193 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_di_store;
3194 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_di_load;
3195 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_di_store;
3196 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_di_load;
3198 if (FLOAT128_VECTOR_P (KFmode))
3200 reg_addr[KFmode].reload_store = CODE_FOR_reload_kf_di_store;
3201 reg_addr[KFmode].reload_load = CODE_FOR_reload_kf_di_load;
3204 if (FLOAT128_VECTOR_P (TFmode))
3206 reg_addr[TFmode].reload_store = CODE_FOR_reload_tf_di_store;
3207 reg_addr[TFmode].reload_load = CODE_FOR_reload_tf_di_load;
3210 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
3211 available. */
3212 if (TARGET_NO_SDMODE_STACK)
3214 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_di_store;
3215 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_di_load;
3218 if (TARGET_VSX_TIMODE)
3220 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_di_store;
3221 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_di_load;
3224 if (TARGET_DIRECT_MOVE && !TARGET_DIRECT_MOVE_128)
3226 reg_addr[TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxti;
3227 reg_addr[V1TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv1ti;
3228 reg_addr[V2DFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2df;
3229 reg_addr[V2DImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2di;
3230 reg_addr[V4SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4sf;
3231 reg_addr[V4SImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4si;
3232 reg_addr[V8HImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv8hi;
3233 reg_addr[V16QImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv16qi;
3234 reg_addr[SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxsf;
3236 reg_addr[TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprti;
3237 reg_addr[V1TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv1ti;
3238 reg_addr[V2DFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2df;
3239 reg_addr[V2DImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2di;
3240 reg_addr[V4SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4sf;
3241 reg_addr[V4SImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4si;
3242 reg_addr[V8HImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv8hi;
3243 reg_addr[V16QImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv16qi;
3244 reg_addr[SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprsf;
3246 if (FLOAT128_VECTOR_P (KFmode))
3248 reg_addr[KFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxkf;
3249 reg_addr[KFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprkf;
3252 if (FLOAT128_VECTOR_P (TFmode))
3254 reg_addr[TFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxtf;
3255 reg_addr[TFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprtf;
3259 else
3261 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_si_store;
3262 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_si_load;
3263 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_si_store;
3264 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_si_load;
3265 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_si_store;
3266 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_si_load;
3267 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_si_store;
3268 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_si_load;
3269 reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_si_store;
3270 reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_si_load;
3271 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_si_store;
3272 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_si_load;
3273 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_si_store;
3274 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_si_load;
3275 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_si_store;
3276 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_si_load;
3277 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_si_store;
3278 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_si_load;
3279 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_si_store;
3280 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_si_load;
3282 if (FLOAT128_VECTOR_P (KFmode))
3284 reg_addr[KFmode].reload_store = CODE_FOR_reload_kf_si_store;
3285 reg_addr[KFmode].reload_load = CODE_FOR_reload_kf_si_load;
3288 if (FLOAT128_IEEE_P (TFmode))
3290 reg_addr[TFmode].reload_store = CODE_FOR_reload_tf_si_store;
3291 reg_addr[TFmode].reload_load = CODE_FOR_reload_tf_si_load;
3294 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
3295 available. */
3296 if (TARGET_NO_SDMODE_STACK)
3298 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_si_store;
3299 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_si_load;
3302 if (TARGET_VSX_TIMODE)
3304 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_si_store;
3305 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_si_load;
3308 if (TARGET_DIRECT_MOVE)
3310 reg_addr[DImode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdi;
3311 reg_addr[DDmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdd;
3312 reg_addr[DFmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdf;
3316 if (TARGET_UPPER_REGS_DF)
3317 reg_addr[DFmode].scalar_in_vmx_p = true;
3319 if (TARGET_UPPER_REGS_DI)
3320 reg_addr[DImode].scalar_in_vmx_p = true;
3322 if (TARGET_UPPER_REGS_SF)
3323 reg_addr[SFmode].scalar_in_vmx_p = true;
3326 /* Setup the fusion operations. */
3327 if (TARGET_P8_FUSION)
3329 reg_addr[QImode].fusion_gpr_ld = CODE_FOR_fusion_gpr_load_qi;
3330 reg_addr[HImode].fusion_gpr_ld = CODE_FOR_fusion_gpr_load_hi;
3331 reg_addr[SImode].fusion_gpr_ld = CODE_FOR_fusion_gpr_load_si;
3332 if (TARGET_64BIT)
3333 reg_addr[DImode].fusion_gpr_ld = CODE_FOR_fusion_gpr_load_di;
3336 if (TARGET_P9_FUSION)
3338 struct fuse_insns {
3339 enum machine_mode mode; /* mode of the fused type. */
3340 enum machine_mode pmode; /* pointer mode. */
3341 enum rs6000_reload_reg_type rtype; /* register type. */
3342 enum insn_code load; /* load insn. */
3343 enum insn_code store; /* store insn. */
3346 static const struct fuse_insns addis_insns[] = {
3347 { SFmode, DImode, RELOAD_REG_FPR,
3348 CODE_FOR_fusion_fpr_di_sf_load,
3349 CODE_FOR_fusion_fpr_di_sf_store },
3351 { SFmode, SImode, RELOAD_REG_FPR,
3352 CODE_FOR_fusion_fpr_si_sf_load,
3353 CODE_FOR_fusion_fpr_si_sf_store },
3355 { DFmode, DImode, RELOAD_REG_FPR,
3356 CODE_FOR_fusion_fpr_di_df_load,
3357 CODE_FOR_fusion_fpr_di_df_store },
3359 { DFmode, SImode, RELOAD_REG_FPR,
3360 CODE_FOR_fusion_fpr_si_df_load,
3361 CODE_FOR_fusion_fpr_si_df_store },
3363 { DImode, DImode, RELOAD_REG_FPR,
3364 CODE_FOR_fusion_fpr_di_di_load,
3365 CODE_FOR_fusion_fpr_di_di_store },
3367 { DImode, SImode, RELOAD_REG_FPR,
3368 CODE_FOR_fusion_fpr_si_di_load,
3369 CODE_FOR_fusion_fpr_si_di_store },
3371 { QImode, DImode, RELOAD_REG_GPR,
3372 CODE_FOR_fusion_gpr_di_qi_load,
3373 CODE_FOR_fusion_gpr_di_qi_store },
3375 { QImode, SImode, RELOAD_REG_GPR,
3376 CODE_FOR_fusion_gpr_si_qi_load,
3377 CODE_FOR_fusion_gpr_si_qi_store },
3379 { HImode, DImode, RELOAD_REG_GPR,
3380 CODE_FOR_fusion_gpr_di_hi_load,
3381 CODE_FOR_fusion_gpr_di_hi_store },
3383 { HImode, SImode, RELOAD_REG_GPR,
3384 CODE_FOR_fusion_gpr_si_hi_load,
3385 CODE_FOR_fusion_gpr_si_hi_store },
3387 { SImode, DImode, RELOAD_REG_GPR,
3388 CODE_FOR_fusion_gpr_di_si_load,
3389 CODE_FOR_fusion_gpr_di_si_store },
3391 { SImode, SImode, RELOAD_REG_GPR,
3392 CODE_FOR_fusion_gpr_si_si_load,
3393 CODE_FOR_fusion_gpr_si_si_store },
3395 { SFmode, DImode, RELOAD_REG_GPR,
3396 CODE_FOR_fusion_gpr_di_sf_load,
3397 CODE_FOR_fusion_gpr_di_sf_store },
3399 { SFmode, SImode, RELOAD_REG_GPR,
3400 CODE_FOR_fusion_gpr_si_sf_load,
3401 CODE_FOR_fusion_gpr_si_sf_store },
3403 { DImode, DImode, RELOAD_REG_GPR,
3404 CODE_FOR_fusion_gpr_di_di_load,
3405 CODE_FOR_fusion_gpr_di_di_store },
3407 { DFmode, DImode, RELOAD_REG_GPR,
3408 CODE_FOR_fusion_gpr_di_df_load,
3409 CODE_FOR_fusion_gpr_di_df_store },
3412 enum machine_mode cur_pmode = Pmode;
3413 size_t i;
3415 for (i = 0; i < ARRAY_SIZE (addis_insns); i++)
3417 enum machine_mode xmode = addis_insns[i].mode;
3418 enum rs6000_reload_reg_type rtype = addis_insns[i].rtype;
3420 if (addis_insns[i].pmode != cur_pmode)
3421 continue;
3423 if (rtype == RELOAD_REG_FPR
3424 && (!TARGET_HARD_FLOAT || !TARGET_FPRS))
3425 continue;
3427 reg_addr[xmode].fusion_addis_ld[rtype] = addis_insns[i].load;
3428 reg_addr[xmode].fusion_addis_st[rtype] = addis_insns[i].store;
3432 /* Note which types we support fusing TOC setup plus memory insn. We only do
3433 fused TOCs for medium/large code models. */
3434 if (TARGET_P8_FUSION && TARGET_TOC_FUSION && TARGET_POWERPC64
3435 && (TARGET_CMODEL != CMODEL_SMALL))
3437 reg_addr[QImode].fused_toc = true;
3438 reg_addr[HImode].fused_toc = true;
3439 reg_addr[SImode].fused_toc = true;
3440 reg_addr[DImode].fused_toc = true;
3441 if (TARGET_HARD_FLOAT && TARGET_FPRS)
3443 if (TARGET_SINGLE_FLOAT)
3444 reg_addr[SFmode].fused_toc = true;
3445 if (TARGET_DOUBLE_FLOAT)
3446 reg_addr[DFmode].fused_toc = true;
3450 /* Precalculate HARD_REGNO_NREGS. */
3451 for (r = 0; r < FIRST_PSEUDO_REGISTER; ++r)
3452 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3453 rs6000_hard_regno_nregs[m][r]
3454 = rs6000_hard_regno_nregs_internal (r, (machine_mode)m);
3456 /* Precalculate HARD_REGNO_MODE_OK. */
3457 for (r = 0; r < FIRST_PSEUDO_REGISTER; ++r)
3458 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3459 if (rs6000_hard_regno_mode_ok (r, (machine_mode)m))
3460 rs6000_hard_regno_mode_ok_p[m][r] = true;
3462 /* Precalculate CLASS_MAX_NREGS sizes. */
3463 for (c = 0; c < LIM_REG_CLASSES; ++c)
3465 int reg_size;
3467 if (TARGET_VSX && VSX_REG_CLASS_P (c))
3468 reg_size = UNITS_PER_VSX_WORD;
3470 else if (c == ALTIVEC_REGS)
3471 reg_size = UNITS_PER_ALTIVEC_WORD;
3473 else if (c == FLOAT_REGS)
3474 reg_size = UNITS_PER_FP_WORD;
3476 else
3477 reg_size = UNITS_PER_WORD;
3479 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3481 machine_mode m2 = (machine_mode)m;
3482 int reg_size2 = reg_size;
3484 /* TDmode & IBM 128-bit floating point always takes 2 registers, even
3485 in VSX. */
3486 if (TARGET_VSX && VSX_REG_CLASS_P (c) && FLOAT128_2REG_P (m))
3487 reg_size2 = UNITS_PER_FP_WORD;
3489 rs6000_class_max_nregs[m][c]
3490 = (GET_MODE_SIZE (m2) + reg_size2 - 1) / reg_size2;
3494 if (TARGET_E500_DOUBLE)
3495 rs6000_class_max_nregs[DFmode][GENERAL_REGS] = 1;
3497 /* Calculate which modes to automatically generate code to use a the
3498 reciprocal divide and square root instructions. In the future, possibly
3499 automatically generate the instructions even if the user did not specify
3500 -mrecip. The older machines double precision reciprocal sqrt estimate is
3501 not accurate enough. */
3502 memset (rs6000_recip_bits, 0, sizeof (rs6000_recip_bits));
3503 if (TARGET_FRES)
3504 rs6000_recip_bits[SFmode] = RS6000_RECIP_MASK_HAVE_RE;
3505 if (TARGET_FRE)
3506 rs6000_recip_bits[DFmode] = RS6000_RECIP_MASK_HAVE_RE;
3507 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
3508 rs6000_recip_bits[V4SFmode] = RS6000_RECIP_MASK_HAVE_RE;
3509 if (VECTOR_UNIT_VSX_P (V2DFmode))
3510 rs6000_recip_bits[V2DFmode] = RS6000_RECIP_MASK_HAVE_RE;
3512 if (TARGET_FRSQRTES)
3513 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3514 if (TARGET_FRSQRTE)
3515 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3516 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
3517 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3518 if (VECTOR_UNIT_VSX_P (V2DFmode))
3519 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3521 if (rs6000_recip_control)
3523 if (!flag_finite_math_only)
3524 warning (0, "-mrecip requires -ffinite-math or -ffast-math");
3525 if (flag_trapping_math)
3526 warning (0, "-mrecip requires -fno-trapping-math or -ffast-math");
3527 if (!flag_reciprocal_math)
3528 warning (0, "-mrecip requires -freciprocal-math or -ffast-math");
3529 if (flag_finite_math_only && !flag_trapping_math && flag_reciprocal_math)
3531 if (RS6000_RECIP_HAVE_RE_P (SFmode)
3532 && (rs6000_recip_control & RECIP_SF_DIV) != 0)
3533 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3535 if (RS6000_RECIP_HAVE_RE_P (DFmode)
3536 && (rs6000_recip_control & RECIP_DF_DIV) != 0)
3537 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3539 if (RS6000_RECIP_HAVE_RE_P (V4SFmode)
3540 && (rs6000_recip_control & RECIP_V4SF_DIV) != 0)
3541 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3543 if (RS6000_RECIP_HAVE_RE_P (V2DFmode)
3544 && (rs6000_recip_control & RECIP_V2DF_DIV) != 0)
3545 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3547 if (RS6000_RECIP_HAVE_RSQRTE_P (SFmode)
3548 && (rs6000_recip_control & RECIP_SF_RSQRT) != 0)
3549 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3551 if (RS6000_RECIP_HAVE_RSQRTE_P (DFmode)
3552 && (rs6000_recip_control & RECIP_DF_RSQRT) != 0)
3553 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3555 if (RS6000_RECIP_HAVE_RSQRTE_P (V4SFmode)
3556 && (rs6000_recip_control & RECIP_V4SF_RSQRT) != 0)
3557 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3559 if (RS6000_RECIP_HAVE_RSQRTE_P (V2DFmode)
3560 && (rs6000_recip_control & RECIP_V2DF_RSQRT) != 0)
3561 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3565 /* Update the addr mask bits in reg_addr to help secondary reload and go if
3566 legitimate address support to figure out the appropriate addressing to
3567 use. */
3568 rs6000_setup_reg_addr_masks ();
3570 if (global_init_p || TARGET_DEBUG_TARGET)
3572 if (TARGET_DEBUG_REG)
3573 rs6000_debug_reg_global ();
3575 if (TARGET_DEBUG_COST || TARGET_DEBUG_REG)
3576 fprintf (stderr,
3577 "SImode variable mult cost = %d\n"
3578 "SImode constant mult cost = %d\n"
3579 "SImode short constant mult cost = %d\n"
3580 "DImode multipliciation cost = %d\n"
3581 "SImode division cost = %d\n"
3582 "DImode division cost = %d\n"
3583 "Simple fp operation cost = %d\n"
3584 "DFmode multiplication cost = %d\n"
3585 "SFmode division cost = %d\n"
3586 "DFmode division cost = %d\n"
3587 "cache line size = %d\n"
3588 "l1 cache size = %d\n"
3589 "l2 cache size = %d\n"
3590 "simultaneous prefetches = %d\n"
3591 "\n",
3592 rs6000_cost->mulsi,
3593 rs6000_cost->mulsi_const,
3594 rs6000_cost->mulsi_const9,
3595 rs6000_cost->muldi,
3596 rs6000_cost->divsi,
3597 rs6000_cost->divdi,
3598 rs6000_cost->fp,
3599 rs6000_cost->dmul,
3600 rs6000_cost->sdiv,
3601 rs6000_cost->ddiv,
3602 rs6000_cost->cache_line_size,
3603 rs6000_cost->l1_cache_size,
3604 rs6000_cost->l2_cache_size,
3605 rs6000_cost->simultaneous_prefetches);
3609 #if TARGET_MACHO
3610 /* The Darwin version of SUBTARGET_OVERRIDE_OPTIONS. */
3612 static void
3613 darwin_rs6000_override_options (void)
3615 /* The Darwin ABI always includes AltiVec, can't be (validly) turned
3616 off. */
3617 rs6000_altivec_abi = 1;
3618 TARGET_ALTIVEC_VRSAVE = 1;
3619 rs6000_current_abi = ABI_DARWIN;
3621 if (DEFAULT_ABI == ABI_DARWIN
3622 && TARGET_64BIT)
3623 darwin_one_byte_bool = 1;
3625 if (TARGET_64BIT && ! TARGET_POWERPC64)
3627 rs6000_isa_flags |= OPTION_MASK_POWERPC64;
3628 warning (0, "-m64 requires PowerPC64 architecture, enabling");
3630 if (flag_mkernel)
3632 rs6000_default_long_calls = 1;
3633 rs6000_isa_flags |= OPTION_MASK_SOFT_FLOAT;
3636 /* Make -m64 imply -maltivec. Darwin's 64-bit ABI includes
3637 Altivec. */
3638 if (!flag_mkernel && !flag_apple_kext
3639 && TARGET_64BIT
3640 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC))
3641 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3643 /* Unless the user (not the configurer) has explicitly overridden
3644 it with -mcpu=G3 or -mno-altivec, then 10.5+ targets default to
3645 G4 unless targeting the kernel. */
3646 if (!flag_mkernel
3647 && !flag_apple_kext
3648 && strverscmp (darwin_macosx_version_min, "10.5") >= 0
3649 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC)
3650 && ! global_options_set.x_rs6000_cpu_index)
3652 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3655 #endif
3657 /* If not otherwise specified by a target, make 'long double' equivalent to
3658 'double'. */
3660 #ifndef RS6000_DEFAULT_LONG_DOUBLE_SIZE
3661 #define RS6000_DEFAULT_LONG_DOUBLE_SIZE 64
3662 #endif
3664 /* Return the builtin mask of the various options used that could affect which
3665 builtins were used. In the past we used target_flags, but we've run out of
3666 bits, and some options like SPE and PAIRED are no longer in
3667 target_flags. */
3669 HOST_WIDE_INT
3670 rs6000_builtin_mask_calculate (void)
3672 return (((TARGET_ALTIVEC) ? RS6000_BTM_ALTIVEC : 0)
3673 | ((TARGET_VSX) ? RS6000_BTM_VSX : 0)
3674 | ((TARGET_SPE) ? RS6000_BTM_SPE : 0)
3675 | ((TARGET_PAIRED_FLOAT) ? RS6000_BTM_PAIRED : 0)
3676 | ((TARGET_FRE) ? RS6000_BTM_FRE : 0)
3677 | ((TARGET_FRES) ? RS6000_BTM_FRES : 0)
3678 | ((TARGET_FRSQRTE) ? RS6000_BTM_FRSQRTE : 0)
3679 | ((TARGET_FRSQRTES) ? RS6000_BTM_FRSQRTES : 0)
3680 | ((TARGET_POPCNTD) ? RS6000_BTM_POPCNTD : 0)
3681 | ((rs6000_cpu == PROCESSOR_CELL) ? RS6000_BTM_CELL : 0)
3682 | ((TARGET_P8_VECTOR) ? RS6000_BTM_P8_VECTOR : 0)
3683 | ((TARGET_P9_VECTOR) ? RS6000_BTM_P9_VECTOR : 0)
3684 | ((TARGET_P9_MISC) ? RS6000_BTM_P9_MISC : 0)
3685 | ((TARGET_MODULO) ? RS6000_BTM_MODULO : 0)
3686 | ((TARGET_64BIT) ? RS6000_BTM_64BIT : 0)
3687 | ((TARGET_CRYPTO) ? RS6000_BTM_CRYPTO : 0)
3688 | ((TARGET_HTM) ? RS6000_BTM_HTM : 0)
3689 | ((TARGET_DFP) ? RS6000_BTM_DFP : 0)
3690 | ((TARGET_HARD_FLOAT) ? RS6000_BTM_HARD_FLOAT : 0)
3691 | ((TARGET_LONG_DOUBLE_128) ? RS6000_BTM_LDBL128 : 0)
3692 | ((TARGET_FLOAT128) ? RS6000_BTM_FLOAT128 : 0));
3695 /* Implement TARGET_MD_ASM_ADJUST. All asm statements are considered
3696 to clobber the XER[CA] bit because clobbering that bit without telling
3697 the compiler worked just fine with versions of GCC before GCC 5, and
3698 breaking a lot of older code in ways that are hard to track down is
3699 not such a great idea. */
3701 static rtx_insn *
3702 rs6000_md_asm_adjust (vec<rtx> &/*outputs*/, vec<rtx> &/*inputs*/,
3703 vec<const char *> &/*constraints*/,
3704 vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs)
3706 clobbers.safe_push (gen_rtx_REG (SImode, CA_REGNO));
3707 SET_HARD_REG_BIT (clobbered_regs, CA_REGNO);
3708 return NULL;
3711 /* Override command line options. Mostly we process the processor type and
3712 sometimes adjust other TARGET_ options. */
3714 static bool
3715 rs6000_option_override_internal (bool global_init_p)
3717 bool ret = true;
3718 bool have_cpu = false;
3720 /* The default cpu requested at configure time, if any. */
3721 const char *implicit_cpu = OPTION_TARGET_CPU_DEFAULT;
3723 HOST_WIDE_INT set_masks;
3724 int cpu_index;
3725 int tune_index;
3726 struct cl_target_option *main_target_opt
3727 = ((global_init_p || target_option_default_node == NULL)
3728 ? NULL : TREE_TARGET_OPTION (target_option_default_node));
3730 /* Print defaults. */
3731 if ((TARGET_DEBUG_REG || TARGET_DEBUG_TARGET) && global_init_p)
3732 rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT);
3734 /* Remember the explicit arguments. */
3735 if (global_init_p)
3736 rs6000_isa_flags_explicit = global_options_set.x_rs6000_isa_flags;
3738 /* On 64-bit Darwin, power alignment is ABI-incompatible with some C
3739 library functions, so warn about it. The flag may be useful for
3740 performance studies from time to time though, so don't disable it
3741 entirely. */
3742 if (global_options_set.x_rs6000_alignment_flags
3743 && rs6000_alignment_flags == MASK_ALIGN_POWER
3744 && DEFAULT_ABI == ABI_DARWIN
3745 && TARGET_64BIT)
3746 warning (0, "-malign-power is not supported for 64-bit Darwin;"
3747 " it is incompatible with the installed C and C++ libraries");
3749 /* Numerous experiment shows that IRA based loop pressure
3750 calculation works better for RTL loop invariant motion on targets
3751 with enough (>= 32) registers. It is an expensive optimization.
3752 So it is on only for peak performance. */
3753 if (optimize >= 3 && global_init_p
3754 && !global_options_set.x_flag_ira_loop_pressure)
3755 flag_ira_loop_pressure = 1;
3757 /* Set the pointer size. */
3758 if (TARGET_64BIT)
3760 rs6000_pmode = (int)DImode;
3761 rs6000_pointer_size = 64;
3763 else
3765 rs6000_pmode = (int)SImode;
3766 rs6000_pointer_size = 32;
3769 /* Some OSs don't support saving the high part of 64-bit registers on context
3770 switch. Other OSs don't support saving Altivec registers. On those OSs,
3771 we don't touch the OPTION_MASK_POWERPC64 or OPTION_MASK_ALTIVEC settings;
3772 if the user wants either, the user must explicitly specify them and we
3773 won't interfere with the user's specification. */
3775 set_masks = POWERPC_MASKS;
3776 #ifdef OS_MISSING_POWERPC64
3777 if (OS_MISSING_POWERPC64)
3778 set_masks &= ~OPTION_MASK_POWERPC64;
3779 #endif
3780 #ifdef OS_MISSING_ALTIVEC
3781 if (OS_MISSING_ALTIVEC)
3782 set_masks &= ~(OPTION_MASK_ALTIVEC | OPTION_MASK_VSX);
3783 #endif
3785 /* Don't override by the processor default if given explicitly. */
3786 set_masks &= ~rs6000_isa_flags_explicit;
3788 /* Process the -mcpu=<xxx> and -mtune=<xxx> argument. If the user changed
3789 the cpu in a target attribute or pragma, but did not specify a tuning
3790 option, use the cpu for the tuning option rather than the option specified
3791 with -mtune on the command line. Process a '--with-cpu' configuration
3792 request as an implicit --cpu. */
3793 if (rs6000_cpu_index >= 0)
3795 cpu_index = rs6000_cpu_index;
3796 have_cpu = true;
3798 else if (main_target_opt != NULL && main_target_opt->x_rs6000_cpu_index >= 0)
3800 rs6000_cpu_index = cpu_index = main_target_opt->x_rs6000_cpu_index;
3801 have_cpu = true;
3803 else if (implicit_cpu)
3805 rs6000_cpu_index = cpu_index = rs6000_cpu_name_lookup (implicit_cpu);
3806 have_cpu = true;
3808 else
3810 /* PowerPC 64-bit LE requires at least ISA 2.07. */
3811 const char *default_cpu = ((!TARGET_POWERPC64)
3812 ? "powerpc"
3813 : ((BYTES_BIG_ENDIAN)
3814 ? "powerpc64"
3815 : "powerpc64le"));
3817 rs6000_cpu_index = cpu_index = rs6000_cpu_name_lookup (default_cpu);
3818 have_cpu = false;
3821 gcc_assert (cpu_index >= 0);
3823 /* If we have a cpu, either through an explicit -mcpu=<xxx> or if the
3824 compiler was configured with --with-cpu=<xxx>, replace all of the ISA bits
3825 with those from the cpu, except for options that were explicitly set. If
3826 we don't have a cpu, do not override the target bits set in
3827 TARGET_DEFAULT. */
3828 if (have_cpu)
3830 rs6000_isa_flags &= ~set_masks;
3831 rs6000_isa_flags |= (processor_target_table[cpu_index].target_enable
3832 & set_masks);
3834 else
3836 /* If no -mcpu=<xxx>, inherit any default options that were cleared via
3837 POWERPC_MASKS. Originally, TARGET_DEFAULT was used to initialize
3838 target_flags via the TARGET_DEFAULT_TARGET_FLAGS hook. When we switched
3839 to using rs6000_isa_flags, we need to do the initialization here.
3841 If there is a TARGET_DEFAULT, use that. Otherwise fall back to using
3842 -mcpu=powerpc, -mcpu=powerpc64, or -mcpu=powerpc64le defaults. */
3843 HOST_WIDE_INT flags = ((TARGET_DEFAULT) ? TARGET_DEFAULT
3844 : processor_target_table[cpu_index].target_enable);
3845 rs6000_isa_flags |= (flags & ~rs6000_isa_flags_explicit);
3848 if (rs6000_tune_index >= 0)
3849 tune_index = rs6000_tune_index;
3850 else if (have_cpu)
3851 rs6000_tune_index = tune_index = cpu_index;
3852 else
3854 size_t i;
3855 enum processor_type tune_proc
3856 = (TARGET_POWERPC64 ? PROCESSOR_DEFAULT64 : PROCESSOR_DEFAULT);
3858 tune_index = -1;
3859 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
3860 if (processor_target_table[i].processor == tune_proc)
3862 rs6000_tune_index = tune_index = i;
3863 break;
3867 gcc_assert (tune_index >= 0);
3868 rs6000_cpu = processor_target_table[tune_index].processor;
3870 /* Pick defaults for SPE related control flags. Do this early to make sure
3871 that the TARGET_ macros are representative ASAP. */
3873 int spe_capable_cpu =
3874 (rs6000_cpu == PROCESSOR_PPC8540
3875 || rs6000_cpu == PROCESSOR_PPC8548);
3877 if (!global_options_set.x_rs6000_spe_abi)
3878 rs6000_spe_abi = spe_capable_cpu;
3880 if (!global_options_set.x_rs6000_spe)
3881 rs6000_spe = spe_capable_cpu;
3883 if (!global_options_set.x_rs6000_float_gprs)
3884 rs6000_float_gprs =
3885 (rs6000_cpu == PROCESSOR_PPC8540 ? 1
3886 : rs6000_cpu == PROCESSOR_PPC8548 ? 2
3887 : 0);
3890 if (global_options_set.x_rs6000_spe_abi
3891 && rs6000_spe_abi
3892 && !TARGET_SPE_ABI)
3893 error ("not configured for SPE ABI");
3895 if (global_options_set.x_rs6000_spe
3896 && rs6000_spe
3897 && !TARGET_SPE)
3898 error ("not configured for SPE instruction set");
3900 if (main_target_opt != NULL
3901 && ((main_target_opt->x_rs6000_spe_abi != rs6000_spe_abi)
3902 || (main_target_opt->x_rs6000_spe != rs6000_spe)
3903 || (main_target_opt->x_rs6000_float_gprs != rs6000_float_gprs)))
3904 error ("target attribute or pragma changes SPE ABI");
3906 if (rs6000_cpu == PROCESSOR_PPCE300C2 || rs6000_cpu == PROCESSOR_PPCE300C3
3907 || rs6000_cpu == PROCESSOR_PPCE500MC || rs6000_cpu == PROCESSOR_PPCE500MC64
3908 || rs6000_cpu == PROCESSOR_PPCE5500)
3910 if (TARGET_ALTIVEC)
3911 error ("AltiVec not supported in this target");
3912 if (TARGET_SPE)
3913 error ("SPE not supported in this target");
3915 if (rs6000_cpu == PROCESSOR_PPCE6500)
3917 if (TARGET_SPE)
3918 error ("SPE not supported in this target");
3921 /* Disable Cell microcode if we are optimizing for the Cell
3922 and not optimizing for size. */
3923 if (rs6000_gen_cell_microcode == -1)
3924 rs6000_gen_cell_microcode = !(rs6000_cpu == PROCESSOR_CELL
3925 && !optimize_size);
3927 /* If we are optimizing big endian systems for space and it's OK to
3928 use instructions that would be microcoded on the Cell, use the
3929 load/store multiple and string instructions. */
3930 if (BYTES_BIG_ENDIAN && optimize_size && rs6000_gen_cell_microcode)
3931 rs6000_isa_flags |= ~rs6000_isa_flags_explicit & (OPTION_MASK_MULTIPLE
3932 | OPTION_MASK_STRING);
3934 /* Don't allow -mmultiple or -mstring on little endian systems
3935 unless the cpu is a 750, because the hardware doesn't support the
3936 instructions used in little endian mode, and causes an alignment
3937 trap. The 750 does not cause an alignment trap (except when the
3938 target is unaligned). */
3940 if (!BYTES_BIG_ENDIAN && rs6000_cpu != PROCESSOR_PPC750)
3942 if (TARGET_MULTIPLE)
3944 rs6000_isa_flags &= ~OPTION_MASK_MULTIPLE;
3945 if ((rs6000_isa_flags_explicit & OPTION_MASK_MULTIPLE) != 0)
3946 warning (0, "-mmultiple is not supported on little endian systems");
3949 if (TARGET_STRING)
3951 rs6000_isa_flags &= ~OPTION_MASK_STRING;
3952 if ((rs6000_isa_flags_explicit & OPTION_MASK_STRING) != 0)
3953 warning (0, "-mstring is not supported on little endian systems");
3957 /* If little-endian, default to -mstrict-align on older processors.
3958 Testing for htm matches power8 and later. */
3959 if (!BYTES_BIG_ENDIAN
3960 && !(processor_target_table[tune_index].target_enable & OPTION_MASK_HTM))
3961 rs6000_isa_flags |= ~rs6000_isa_flags_explicit & OPTION_MASK_STRICT_ALIGN;
3963 /* -maltivec={le,be} implies -maltivec. */
3964 if (rs6000_altivec_element_order != 0)
3965 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3967 /* Disallow -maltivec=le in big endian mode for now. This is not
3968 known to be useful for anyone. */
3969 if (BYTES_BIG_ENDIAN && rs6000_altivec_element_order == 1)
3971 warning (0, N_("-maltivec=le not allowed for big-endian targets"));
3972 rs6000_altivec_element_order = 0;
3975 /* Add some warnings for VSX. */
3976 if (TARGET_VSX)
3978 const char *msg = NULL;
3979 if (!TARGET_HARD_FLOAT || !TARGET_FPRS
3980 || !TARGET_SINGLE_FLOAT || !TARGET_DOUBLE_FLOAT)
3982 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
3983 msg = N_("-mvsx requires hardware floating point");
3984 else
3986 rs6000_isa_flags &= ~ OPTION_MASK_VSX;
3987 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
3990 else if (TARGET_PAIRED_FLOAT)
3991 msg = N_("-mvsx and -mpaired are incompatible");
3992 else if (TARGET_AVOID_XFORM > 0)
3993 msg = N_("-mvsx needs indexed addressing");
3994 else if (!TARGET_ALTIVEC && (rs6000_isa_flags_explicit
3995 & OPTION_MASK_ALTIVEC))
3997 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
3998 msg = N_("-mvsx and -mno-altivec are incompatible");
3999 else
4000 msg = N_("-mno-altivec disables vsx");
4003 if (msg)
4005 warning (0, msg);
4006 rs6000_isa_flags &= ~ OPTION_MASK_VSX;
4007 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
4011 /* If hard-float/altivec/vsx were explicitly turned off then don't allow
4012 the -mcpu setting to enable options that conflict. */
4013 if ((!TARGET_HARD_FLOAT || !TARGET_ALTIVEC || !TARGET_VSX)
4014 && (rs6000_isa_flags_explicit & (OPTION_MASK_SOFT_FLOAT
4015 | OPTION_MASK_ALTIVEC
4016 | OPTION_MASK_VSX)) != 0)
4017 rs6000_isa_flags &= ~((OPTION_MASK_P8_VECTOR | OPTION_MASK_CRYPTO
4018 | OPTION_MASK_DIRECT_MOVE)
4019 & ~rs6000_isa_flags_explicit);
4021 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4022 rs6000_print_isa_options (stderr, 0, "before defaults", rs6000_isa_flags);
4024 /* For the newer switches (vsx, dfp, etc.) set some of the older options,
4025 unless the user explicitly used the -mno-<option> to disable the code. */
4026 if (TARGET_P9_VECTOR || TARGET_MODULO || TARGET_P9_DFORM_SCALAR
4027 || TARGET_P9_DFORM_VECTOR || TARGET_P9_DFORM_BOTH > 0 || TARGET_P9_MINMAX)
4028 rs6000_isa_flags |= (ISA_3_0_MASKS_SERVER & ~rs6000_isa_flags_explicit);
4029 else if (TARGET_P8_VECTOR || TARGET_DIRECT_MOVE || TARGET_CRYPTO)
4030 rs6000_isa_flags |= (ISA_2_7_MASKS_SERVER & ~rs6000_isa_flags_explicit);
4031 else if (TARGET_VSX)
4032 rs6000_isa_flags |= (ISA_2_6_MASKS_SERVER & ~rs6000_isa_flags_explicit);
4033 else if (TARGET_POPCNTD)
4034 rs6000_isa_flags |= (ISA_2_6_MASKS_EMBEDDED & ~rs6000_isa_flags_explicit);
4035 else if (TARGET_DFP)
4036 rs6000_isa_flags |= (ISA_2_5_MASKS_SERVER & ~rs6000_isa_flags_explicit);
4037 else if (TARGET_CMPB)
4038 rs6000_isa_flags |= (ISA_2_5_MASKS_EMBEDDED & ~rs6000_isa_flags_explicit);
4039 else if (TARGET_FPRND)
4040 rs6000_isa_flags |= (ISA_2_4_MASKS & ~rs6000_isa_flags_explicit);
4041 else if (TARGET_POPCNTB)
4042 rs6000_isa_flags |= (ISA_2_2_MASKS & ~rs6000_isa_flags_explicit);
4043 else if (TARGET_ALTIVEC)
4044 rs6000_isa_flags |= (OPTION_MASK_PPC_GFXOPT & ~rs6000_isa_flags_explicit);
4046 if (TARGET_CRYPTO && !TARGET_ALTIVEC)
4048 if (rs6000_isa_flags_explicit & OPTION_MASK_CRYPTO)
4049 error ("-mcrypto requires -maltivec");
4050 rs6000_isa_flags &= ~OPTION_MASK_CRYPTO;
4053 if (TARGET_DIRECT_MOVE && !TARGET_VSX)
4055 if (rs6000_isa_flags_explicit & OPTION_MASK_DIRECT_MOVE)
4056 error ("-mdirect-move requires -mvsx");
4057 rs6000_isa_flags &= ~OPTION_MASK_DIRECT_MOVE;
4060 if (TARGET_P8_VECTOR && !TARGET_ALTIVEC)
4062 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
4063 error ("-mpower8-vector requires -maltivec");
4064 rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
4067 if (TARGET_P8_VECTOR && !TARGET_VSX)
4069 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
4070 error ("-mpower8-vector requires -mvsx");
4071 rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
4074 if (TARGET_VSX_TIMODE && !TARGET_VSX)
4076 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX_TIMODE)
4077 error ("-mvsx-timode requires -mvsx");
4078 rs6000_isa_flags &= ~OPTION_MASK_VSX_TIMODE;
4081 if (TARGET_DFP && !TARGET_HARD_FLOAT)
4083 if (rs6000_isa_flags_explicit & OPTION_MASK_DFP)
4084 error ("-mhard-dfp requires -mhard-float");
4085 rs6000_isa_flags &= ~OPTION_MASK_DFP;
4088 /* Allow an explicit -mupper-regs to set -mupper-regs-df, -mupper-regs-di,
4089 and -mupper-regs-sf, depending on the cpu, unless the user explicitly also
4090 set the individual option. */
4091 if (TARGET_UPPER_REGS > 0)
4093 if (TARGET_VSX
4094 && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF))
4096 rs6000_isa_flags |= OPTION_MASK_UPPER_REGS_DF;
4097 rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_DF;
4099 if (TARGET_VSX
4100 && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DI))
4102 rs6000_isa_flags |= OPTION_MASK_UPPER_REGS_DI;
4103 rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_DI;
4105 if (TARGET_P8_VECTOR
4106 && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_SF))
4108 rs6000_isa_flags |= OPTION_MASK_UPPER_REGS_SF;
4109 rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_SF;
4112 else if (TARGET_UPPER_REGS == 0)
4114 if (TARGET_VSX
4115 && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF))
4117 rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_DF;
4118 rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_DF;
4120 if (TARGET_VSX
4121 && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DI))
4123 rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_DI;
4124 rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_DI;
4126 if (TARGET_P8_VECTOR
4127 && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_SF))
4129 rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_SF;
4130 rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_SF;
4134 if (TARGET_UPPER_REGS_DF && !TARGET_VSX)
4136 if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF)
4137 error ("-mupper-regs-df requires -mvsx");
4138 rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_DF;
4141 if (TARGET_UPPER_REGS_DI && !TARGET_VSX)
4143 if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF)
4144 error ("-mupper-regs-di requires -mvsx");
4145 rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_DF;
4148 if (TARGET_UPPER_REGS_SF && !TARGET_P8_VECTOR)
4150 if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_SF)
4151 error ("-mupper-regs-sf requires -mpower8-vector");
4152 rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_SF;
4155 /* The quad memory instructions only works in 64-bit mode. In 32-bit mode,
4156 silently turn off quad memory mode. */
4157 if ((TARGET_QUAD_MEMORY || TARGET_QUAD_MEMORY_ATOMIC) && !TARGET_POWERPC64)
4159 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
4160 warning (0, N_("-mquad-memory requires 64-bit mode"));
4162 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) != 0)
4163 warning (0, N_("-mquad-memory-atomic requires 64-bit mode"));
4165 rs6000_isa_flags &= ~(OPTION_MASK_QUAD_MEMORY
4166 | OPTION_MASK_QUAD_MEMORY_ATOMIC);
4169 /* Non-atomic quad memory load/store are disabled for little endian, since
4170 the words are reversed, but atomic operations can still be done by
4171 swapping the words. */
4172 if (TARGET_QUAD_MEMORY && !WORDS_BIG_ENDIAN)
4174 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
4175 warning (0, N_("-mquad-memory is not available in little endian mode"));
4177 rs6000_isa_flags &= ~OPTION_MASK_QUAD_MEMORY;
4180 /* Assume if the user asked for normal quad memory instructions, they want
4181 the atomic versions as well, unless they explicity told us not to use quad
4182 word atomic instructions. */
4183 if (TARGET_QUAD_MEMORY
4184 && !TARGET_QUAD_MEMORY_ATOMIC
4185 && ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) == 0))
4186 rs6000_isa_flags |= OPTION_MASK_QUAD_MEMORY_ATOMIC;
4188 /* Enable power8 fusion if we are tuning for power8, even if we aren't
4189 generating power8 instructions. */
4190 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION))
4191 rs6000_isa_flags |= (processor_target_table[tune_index].target_enable
4192 & OPTION_MASK_P8_FUSION);
4194 /* Setting additional fusion flags turns on base fusion. */
4195 if (!TARGET_P8_FUSION && (TARGET_P8_FUSION_SIGN || TARGET_TOC_FUSION))
4197 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION)
4199 if (TARGET_P8_FUSION_SIGN)
4200 error ("-mpower8-fusion-sign requires -mpower8-fusion");
4202 if (TARGET_TOC_FUSION)
4203 error ("-mtoc-fusion requires -mpower8-fusion");
4205 rs6000_isa_flags &= ~OPTION_MASK_P8_FUSION;
4207 else
4208 rs6000_isa_flags |= OPTION_MASK_P8_FUSION;
4211 /* Power9 fusion is a superset over power8 fusion. */
4212 if (TARGET_P9_FUSION && !TARGET_P8_FUSION)
4214 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION)
4216 /* We prefer to not mention undocumented options in
4217 error messages. However, if users have managed to select
4218 power9-fusion without selecting power8-fusion, they
4219 already know about undocumented flags. */
4220 error ("-mpower9-fusion requires -mpower8-fusion");
4221 rs6000_isa_flags &= ~OPTION_MASK_P9_FUSION;
4223 else
4224 rs6000_isa_flags |= OPTION_MASK_P8_FUSION;
4227 /* Enable power9 fusion if we are tuning for power9, even if we aren't
4228 generating power9 instructions. */
4229 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P9_FUSION))
4230 rs6000_isa_flags |= (processor_target_table[tune_index].target_enable
4231 & OPTION_MASK_P9_FUSION);
4233 /* Power8 does not fuse sign extended loads with the addis. If we are
4234 optimizing at high levels for speed, convert a sign extended load into a
4235 zero extending load, and an explicit sign extension. */
4236 if (TARGET_P8_FUSION
4237 && !(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION_SIGN)
4238 && optimize_function_for_speed_p (cfun)
4239 && optimize >= 3)
4240 rs6000_isa_flags |= OPTION_MASK_P8_FUSION_SIGN;
4242 /* TOC fusion requires 64-bit and medium/large code model. */
4243 if (TARGET_TOC_FUSION && !TARGET_POWERPC64)
4245 rs6000_isa_flags &= ~OPTION_MASK_TOC_FUSION;
4246 if ((rs6000_isa_flags_explicit & OPTION_MASK_TOC_FUSION) != 0)
4247 warning (0, N_("-mtoc-fusion requires 64-bit"));
4250 if (TARGET_TOC_FUSION && (TARGET_CMODEL == CMODEL_SMALL))
4252 rs6000_isa_flags &= ~OPTION_MASK_TOC_FUSION;
4253 if ((rs6000_isa_flags_explicit & OPTION_MASK_TOC_FUSION) != 0)
4254 warning (0, N_("-mtoc-fusion requires medium/large code model"));
4257 /* Turn on -mtoc-fusion by default if p8-fusion and 64-bit medium/large code
4258 model. */
4259 if (TARGET_P8_FUSION && !TARGET_TOC_FUSION && TARGET_POWERPC64
4260 && (TARGET_CMODEL != CMODEL_SMALL)
4261 && !(rs6000_isa_flags_explicit & OPTION_MASK_TOC_FUSION))
4262 rs6000_isa_flags |= OPTION_MASK_TOC_FUSION;
4264 /* ISA 3.0 vector instructions include ISA 2.07. */
4265 if (TARGET_P9_VECTOR && !TARGET_P8_VECTOR)
4267 /* We prefer to not mention undocumented options in
4268 error messages. However, if users have managed to select
4269 power9-vector without selecting power8-vector, they
4270 already know about undocumented flags. */
4271 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
4272 error ("-mpower9-vector requires -mpower8-vector");
4273 rs6000_isa_flags &= ~OPTION_MASK_P9_VECTOR;
4276 /* -mpower9-dform turns on both -mpower9-dform-scalar and
4277 -mpower9-dform-vector. */
4278 if (TARGET_P9_DFORM_BOTH > 0)
4280 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P9_DFORM_VECTOR))
4281 rs6000_isa_flags |= OPTION_MASK_P9_DFORM_VECTOR;
4283 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P9_DFORM_SCALAR))
4284 rs6000_isa_flags |= OPTION_MASK_P9_DFORM_SCALAR;
4286 else if (TARGET_P9_DFORM_BOTH == 0)
4288 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P9_DFORM_VECTOR))
4289 rs6000_isa_flags &= ~OPTION_MASK_P9_DFORM_VECTOR;
4291 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P9_DFORM_SCALAR))
4292 rs6000_isa_flags &= ~OPTION_MASK_P9_DFORM_SCALAR;
4295 /* ISA 3.0 D-form instructions require p9-vector and upper-regs. */
4296 if ((TARGET_P9_DFORM_SCALAR || TARGET_P9_DFORM_VECTOR) && !TARGET_P9_VECTOR)
4298 /* We prefer to not mention undocumented options in
4299 error messages. However, if users have managed to select
4300 power9-dform without selecting power9-vector, they
4301 already know about undocumented flags. */
4302 if (rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR)
4303 error ("-mpower9-dform requires -mpower9-vector");
4304 rs6000_isa_flags &= ~(OPTION_MASK_P9_DFORM_SCALAR
4305 | OPTION_MASK_P9_DFORM_VECTOR);
4308 if (TARGET_P9_DFORM_SCALAR && !TARGET_UPPER_REGS_DF)
4310 /* We prefer to not mention undocumented options in
4311 error messages. However, if users have managed to select
4312 power9-dform without selecting upper-regs-df, they
4313 already know about undocumented flags. */
4314 if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF)
4315 error ("-mpower9-dform requires -mupper-regs-df");
4316 rs6000_isa_flags &= ~OPTION_MASK_P9_DFORM_SCALAR;
4319 if (TARGET_P9_DFORM_SCALAR && !TARGET_UPPER_REGS_SF)
4321 if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_SF)
4322 error ("-mpower9-dform requires -mupper-regs-sf");
4323 rs6000_isa_flags &= ~OPTION_MASK_P9_DFORM_SCALAR;
4326 /* There have been bugs with -mvsx-timode that don't show up with -mlra,
4327 but do show up with -mno-lra. Given -mlra will become the default once
4328 PR 69847 is fixed, turn off the options with problems by default if
4329 -mno-lra was used, and warn if the user explicitly asked for the option.
4331 Enable -mpower9-dform-vector by default if LRA and other power9 options.
4332 Enable -mvsx-timode by default if LRA and VSX. */
4333 if (!TARGET_LRA)
4335 if (TARGET_VSX_TIMODE)
4337 if ((rs6000_isa_flags_explicit & OPTION_MASK_VSX_TIMODE) != 0)
4338 warning (0, "-mvsx-timode might need -mlra");
4340 else
4341 rs6000_isa_flags &= ~OPTION_MASK_VSX_TIMODE;
4345 else
4347 if (TARGET_VSX && !TARGET_VSX_TIMODE
4348 && (rs6000_isa_flags_explicit & OPTION_MASK_VSX_TIMODE) == 0)
4349 rs6000_isa_flags |= OPTION_MASK_VSX_TIMODE;
4352 /* Set -mallow-movmisalign to explicitly on if we have full ISA 2.07
4353 support. If we only have ISA 2.06 support, and the user did not specify
4354 the switch, leave it set to -1 so the movmisalign patterns are enabled,
4355 but we don't enable the full vectorization support */
4356 if (TARGET_ALLOW_MOVMISALIGN == -1 && TARGET_P8_VECTOR && TARGET_DIRECT_MOVE)
4357 TARGET_ALLOW_MOVMISALIGN = 1;
4359 else if (TARGET_ALLOW_MOVMISALIGN && !TARGET_VSX)
4361 if (TARGET_ALLOW_MOVMISALIGN > 0
4362 && global_options_set.x_TARGET_ALLOW_MOVMISALIGN)
4363 error ("-mallow-movmisalign requires -mvsx");
4365 TARGET_ALLOW_MOVMISALIGN = 0;
4368 /* Determine when unaligned vector accesses are permitted, and when
4369 they are preferred over masked Altivec loads. Note that if
4370 TARGET_ALLOW_MOVMISALIGN has been disabled by the user, then
4371 TARGET_EFFICIENT_UNALIGNED_VSX must be as well. The converse is
4372 not true. */
4373 if (TARGET_EFFICIENT_UNALIGNED_VSX)
4375 if (!TARGET_VSX)
4377 if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX)
4378 error ("-mefficient-unaligned-vsx requires -mvsx");
4380 rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX;
4383 else if (!TARGET_ALLOW_MOVMISALIGN)
4385 if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX)
4386 error ("-mefficient-unaligned-vsx requires -mallow-movmisalign");
4388 rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX;
4392 /* __float128 requires VSX support. */
4393 if (TARGET_FLOAT128 && !TARGET_VSX)
4395 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128) != 0)
4396 error ("-mfloat128 requires VSX support");
4398 rs6000_isa_flags &= ~(OPTION_MASK_FLOAT128 | OPTION_MASK_FLOAT128_HW);
4401 /* If we have -mfloat128 and full ISA 3.0 support, enable -mfloat128-hardware
4402 by default. */
4403 if (TARGET_FLOAT128 && !TARGET_FLOAT128_HW
4404 && (rs6000_isa_flags & ISA_3_0_MASKS_IEEE) == ISA_3_0_MASKS_IEEE
4405 && !(rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW))
4407 rs6000_isa_flags |= OPTION_MASK_FLOAT128_HW;
4408 if ((rs6000_isa_flags & OPTION_MASK_FLOAT128) != 0)
4409 rs6000_isa_flags_explicit |= OPTION_MASK_FLOAT128_HW;
4412 /* IEEE 128-bit floating point hardware instructions imply enabling
4413 __float128. */
4414 if (TARGET_FLOAT128_HW
4415 && (rs6000_isa_flags & ISA_3_0_MASKS_IEEE) != ISA_3_0_MASKS_IEEE)
4417 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0)
4418 error ("-mfloat128-hardware requires full ISA 3.0 support");
4420 rs6000_isa_flags &= ~OPTION_MASK_FLOAT128_HW;
4423 if (TARGET_FLOAT128_HW
4424 && (rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128) == 0)
4425 rs6000_isa_flags |= OPTION_MASK_FLOAT128;
4427 /* Print the options after updating the defaults. */
4428 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4429 rs6000_print_isa_options (stderr, 0, "after defaults", rs6000_isa_flags);
4431 /* E500mc does "better" if we inline more aggressively. Respect the
4432 user's opinion, though. */
4433 if (rs6000_block_move_inline_limit == 0
4434 && (rs6000_cpu == PROCESSOR_PPCE500MC
4435 || rs6000_cpu == PROCESSOR_PPCE500MC64
4436 || rs6000_cpu == PROCESSOR_PPCE5500
4437 || rs6000_cpu == PROCESSOR_PPCE6500))
4438 rs6000_block_move_inline_limit = 128;
4440 /* store_one_arg depends on expand_block_move to handle at least the
4441 size of reg_parm_stack_space. */
4442 if (rs6000_block_move_inline_limit < (TARGET_POWERPC64 ? 64 : 32))
4443 rs6000_block_move_inline_limit = (TARGET_POWERPC64 ? 64 : 32);
4445 if (global_init_p)
4447 /* If the appropriate debug option is enabled, replace the target hooks
4448 with debug versions that call the real version and then prints
4449 debugging information. */
4450 if (TARGET_DEBUG_COST)
4452 targetm.rtx_costs = rs6000_debug_rtx_costs;
4453 targetm.address_cost = rs6000_debug_address_cost;
4454 targetm.sched.adjust_cost = rs6000_debug_adjust_cost;
4457 if (TARGET_DEBUG_ADDR)
4459 targetm.legitimate_address_p = rs6000_debug_legitimate_address_p;
4460 targetm.legitimize_address = rs6000_debug_legitimize_address;
4461 rs6000_secondary_reload_class_ptr
4462 = rs6000_debug_secondary_reload_class;
4463 rs6000_secondary_memory_needed_ptr
4464 = rs6000_debug_secondary_memory_needed;
4465 rs6000_cannot_change_mode_class_ptr
4466 = rs6000_debug_cannot_change_mode_class;
4467 rs6000_preferred_reload_class_ptr
4468 = rs6000_debug_preferred_reload_class;
4469 rs6000_legitimize_reload_address_ptr
4470 = rs6000_debug_legitimize_reload_address;
4471 rs6000_mode_dependent_address_ptr
4472 = rs6000_debug_mode_dependent_address;
4475 if (rs6000_veclibabi_name)
4477 if (strcmp (rs6000_veclibabi_name, "mass") == 0)
4478 rs6000_veclib_handler = rs6000_builtin_vectorized_libmass;
4479 else
4481 error ("unknown vectorization library ABI type (%s) for "
4482 "-mveclibabi= switch", rs6000_veclibabi_name);
4483 ret = false;
4488 if (!global_options_set.x_rs6000_long_double_type_size)
4490 if (main_target_opt != NULL
4491 && (main_target_opt->x_rs6000_long_double_type_size
4492 != RS6000_DEFAULT_LONG_DOUBLE_SIZE))
4493 error ("target attribute or pragma changes long double size");
4494 else
4495 rs6000_long_double_type_size = RS6000_DEFAULT_LONG_DOUBLE_SIZE;
4498 #if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD)
4499 if (!global_options_set.x_rs6000_ieeequad)
4500 rs6000_ieeequad = 1;
4501 #endif
4503 /* Disable VSX and Altivec silently if the user switched cpus to power7 in a
4504 target attribute or pragma which automatically enables both options,
4505 unless the altivec ABI was set. This is set by default for 64-bit, but
4506 not for 32-bit. */
4507 if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi)
4508 rs6000_isa_flags &= ~((OPTION_MASK_VSX | OPTION_MASK_ALTIVEC
4509 | OPTION_MASK_FLOAT128)
4510 & ~rs6000_isa_flags_explicit);
4512 /* Enable Altivec ABI for AIX -maltivec. */
4513 if (TARGET_XCOFF && (TARGET_ALTIVEC || TARGET_VSX))
4515 if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi)
4516 error ("target attribute or pragma changes AltiVec ABI");
4517 else
4518 rs6000_altivec_abi = 1;
4521 /* The AltiVec ABI is the default for PowerPC-64 GNU/Linux. For
4522 PowerPC-32 GNU/Linux, -maltivec implies the AltiVec ABI. It can
4523 be explicitly overridden in either case. */
4524 if (TARGET_ELF)
4526 if (!global_options_set.x_rs6000_altivec_abi
4527 && (TARGET_64BIT || TARGET_ALTIVEC || TARGET_VSX))
4529 if (main_target_opt != NULL &&
4530 !main_target_opt->x_rs6000_altivec_abi)
4531 error ("target attribute or pragma changes AltiVec ABI");
4532 else
4533 rs6000_altivec_abi = 1;
4537 /* Set the Darwin64 ABI as default for 64-bit Darwin.
4538 So far, the only darwin64 targets are also MACH-O. */
4539 if (TARGET_MACHO
4540 && DEFAULT_ABI == ABI_DARWIN
4541 && TARGET_64BIT)
4543 if (main_target_opt != NULL && !main_target_opt->x_rs6000_darwin64_abi)
4544 error ("target attribute or pragma changes darwin64 ABI");
4545 else
4547 rs6000_darwin64_abi = 1;
4548 /* Default to natural alignment, for better performance. */
4549 rs6000_alignment_flags = MASK_ALIGN_NATURAL;
4553 /* Place FP constants in the constant pool instead of TOC
4554 if section anchors enabled. */
4555 if (flag_section_anchors
4556 && !global_options_set.x_TARGET_NO_FP_IN_TOC)
4557 TARGET_NO_FP_IN_TOC = 1;
4559 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4560 rs6000_print_isa_options (stderr, 0, "before subtarget", rs6000_isa_flags);
4562 #ifdef SUBTARGET_OVERRIDE_OPTIONS
4563 SUBTARGET_OVERRIDE_OPTIONS;
4564 #endif
4565 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
4566 SUBSUBTARGET_OVERRIDE_OPTIONS;
4567 #endif
4568 #ifdef SUB3TARGET_OVERRIDE_OPTIONS
4569 SUB3TARGET_OVERRIDE_OPTIONS;
4570 #endif
4572 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4573 rs6000_print_isa_options (stderr, 0, "after subtarget", rs6000_isa_flags);
4575 /* For the E500 family of cores, reset the single/double FP flags to let us
4576 check that they remain constant across attributes or pragmas. Also,
4577 clear a possible request for string instructions, not supported and which
4578 we might have silently queried above for -Os.
4580 For other families, clear ISEL in case it was set implicitly.
4583 switch (rs6000_cpu)
4585 case PROCESSOR_PPC8540:
4586 case PROCESSOR_PPC8548:
4587 case PROCESSOR_PPCE500MC:
4588 case PROCESSOR_PPCE500MC64:
4589 case PROCESSOR_PPCE5500:
4590 case PROCESSOR_PPCE6500:
4592 rs6000_single_float = TARGET_E500_SINGLE || TARGET_E500_DOUBLE;
4593 rs6000_double_float = TARGET_E500_DOUBLE;
4595 rs6000_isa_flags &= ~OPTION_MASK_STRING;
4597 break;
4599 default:
4601 if (have_cpu && !(rs6000_isa_flags_explicit & OPTION_MASK_ISEL))
4602 rs6000_isa_flags &= ~OPTION_MASK_ISEL;
4604 break;
4607 if (main_target_opt)
4609 if (main_target_opt->x_rs6000_single_float != rs6000_single_float)
4610 error ("target attribute or pragma changes single precision floating "
4611 "point");
4612 if (main_target_opt->x_rs6000_double_float != rs6000_double_float)
4613 error ("target attribute or pragma changes double precision floating "
4614 "point");
4617 /* Detect invalid option combinations with E500. */
4618 CHECK_E500_OPTIONS;
4620 rs6000_always_hint = (rs6000_cpu != PROCESSOR_POWER4
4621 && rs6000_cpu != PROCESSOR_POWER5
4622 && rs6000_cpu != PROCESSOR_POWER6
4623 && rs6000_cpu != PROCESSOR_POWER7
4624 && rs6000_cpu != PROCESSOR_POWER8
4625 && rs6000_cpu != PROCESSOR_POWER9
4626 && rs6000_cpu != PROCESSOR_PPCA2
4627 && rs6000_cpu != PROCESSOR_CELL
4628 && rs6000_cpu != PROCESSOR_PPC476);
4629 rs6000_sched_groups = (rs6000_cpu == PROCESSOR_POWER4
4630 || rs6000_cpu == PROCESSOR_POWER5
4631 || rs6000_cpu == PROCESSOR_POWER7
4632 || rs6000_cpu == PROCESSOR_POWER8);
4633 rs6000_align_branch_targets = (rs6000_cpu == PROCESSOR_POWER4
4634 || rs6000_cpu == PROCESSOR_POWER5
4635 || rs6000_cpu == PROCESSOR_POWER6
4636 || rs6000_cpu == PROCESSOR_POWER7
4637 || rs6000_cpu == PROCESSOR_POWER8
4638 || rs6000_cpu == PROCESSOR_POWER9
4639 || rs6000_cpu == PROCESSOR_PPCE500MC
4640 || rs6000_cpu == PROCESSOR_PPCE500MC64
4641 || rs6000_cpu == PROCESSOR_PPCE5500
4642 || rs6000_cpu == PROCESSOR_PPCE6500);
4644 /* Allow debug switches to override the above settings. These are set to -1
4645 in rs6000.opt to indicate the user hasn't directly set the switch. */
4646 if (TARGET_ALWAYS_HINT >= 0)
4647 rs6000_always_hint = TARGET_ALWAYS_HINT;
4649 if (TARGET_SCHED_GROUPS >= 0)
4650 rs6000_sched_groups = TARGET_SCHED_GROUPS;
4652 if (TARGET_ALIGN_BRANCH_TARGETS >= 0)
4653 rs6000_align_branch_targets = TARGET_ALIGN_BRANCH_TARGETS;
4655 rs6000_sched_restricted_insns_priority
4656 = (rs6000_sched_groups ? 1 : 0);
4658 /* Handle -msched-costly-dep option. */
4659 rs6000_sched_costly_dep
4660 = (rs6000_sched_groups ? true_store_to_load_dep_costly : no_dep_costly);
4662 if (rs6000_sched_costly_dep_str)
4664 if (! strcmp (rs6000_sched_costly_dep_str, "no"))
4665 rs6000_sched_costly_dep = no_dep_costly;
4666 else if (! strcmp (rs6000_sched_costly_dep_str, "all"))
4667 rs6000_sched_costly_dep = all_deps_costly;
4668 else if (! strcmp (rs6000_sched_costly_dep_str, "true_store_to_load"))
4669 rs6000_sched_costly_dep = true_store_to_load_dep_costly;
4670 else if (! strcmp (rs6000_sched_costly_dep_str, "store_to_load"))
4671 rs6000_sched_costly_dep = store_to_load_dep_costly;
4672 else
4673 rs6000_sched_costly_dep = ((enum rs6000_dependence_cost)
4674 atoi (rs6000_sched_costly_dep_str));
4677 /* Handle -minsert-sched-nops option. */
4678 rs6000_sched_insert_nops
4679 = (rs6000_sched_groups ? sched_finish_regroup_exact : sched_finish_none);
4681 if (rs6000_sched_insert_nops_str)
4683 if (! strcmp (rs6000_sched_insert_nops_str, "no"))
4684 rs6000_sched_insert_nops = sched_finish_none;
4685 else if (! strcmp (rs6000_sched_insert_nops_str, "pad"))
4686 rs6000_sched_insert_nops = sched_finish_pad_groups;
4687 else if (! strcmp (rs6000_sched_insert_nops_str, "regroup_exact"))
4688 rs6000_sched_insert_nops = sched_finish_regroup_exact;
4689 else
4690 rs6000_sched_insert_nops = ((enum rs6000_nop_insertion)
4691 atoi (rs6000_sched_insert_nops_str));
4694 if (global_init_p)
4696 #ifdef TARGET_REGNAMES
4697 /* If the user desires alternate register names, copy in the
4698 alternate names now. */
4699 if (TARGET_REGNAMES)
4700 memcpy (rs6000_reg_names, alt_reg_names, sizeof (rs6000_reg_names));
4701 #endif
4703 /* Set aix_struct_return last, after the ABI is determined.
4704 If -maix-struct-return or -msvr4-struct-return was explicitly
4705 used, don't override with the ABI default. */
4706 if (!global_options_set.x_aix_struct_return)
4707 aix_struct_return = (DEFAULT_ABI != ABI_V4 || DRAFT_V4_STRUCT_RET);
4709 #if 0
4710 /* IBM XL compiler defaults to unsigned bitfields. */
4711 if (TARGET_XL_COMPAT)
4712 flag_signed_bitfields = 0;
4713 #endif
4715 if (TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
4716 REAL_MODE_FORMAT (TFmode) = &ibm_extended_format;
4718 ASM_GENERATE_INTERNAL_LABEL (toc_label_name, "LCTOC", 1);
4720 /* We can only guarantee the availability of DI pseudo-ops when
4721 assembling for 64-bit targets. */
4722 if (!TARGET_64BIT)
4724 targetm.asm_out.aligned_op.di = NULL;
4725 targetm.asm_out.unaligned_op.di = NULL;
4729 /* Set branch target alignment, if not optimizing for size. */
4730 if (!optimize_size)
4732 /* Cell wants to be aligned 8byte for dual issue. Titan wants to be
4733 aligned 8byte to avoid misprediction by the branch predictor. */
4734 if (rs6000_cpu == PROCESSOR_TITAN
4735 || rs6000_cpu == PROCESSOR_CELL)
4737 if (align_functions <= 0)
4738 align_functions = 8;
4739 if (align_jumps <= 0)
4740 align_jumps = 8;
4741 if (align_loops <= 0)
4742 align_loops = 8;
4744 if (rs6000_align_branch_targets)
4746 if (align_functions <= 0)
4747 align_functions = 16;
4748 if (align_jumps <= 0)
4749 align_jumps = 16;
4750 if (align_loops <= 0)
4752 can_override_loop_align = 1;
4753 align_loops = 16;
4756 if (align_jumps_max_skip <= 0)
4757 align_jumps_max_skip = 15;
4758 if (align_loops_max_skip <= 0)
4759 align_loops_max_skip = 15;
4762 /* Arrange to save and restore machine status around nested functions. */
4763 init_machine_status = rs6000_init_machine_status;
4765 /* We should always be splitting complex arguments, but we can't break
4766 Linux and Darwin ABIs at the moment. For now, only AIX is fixed. */
4767 if (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN)
4768 targetm.calls.split_complex_arg = NULL;
4771 /* Initialize rs6000_cost with the appropriate target costs. */
4772 if (optimize_size)
4773 rs6000_cost = TARGET_POWERPC64 ? &size64_cost : &size32_cost;
4774 else
4775 switch (rs6000_cpu)
4777 case PROCESSOR_RS64A:
4778 rs6000_cost = &rs64a_cost;
4779 break;
4781 case PROCESSOR_MPCCORE:
4782 rs6000_cost = &mpccore_cost;
4783 break;
4785 case PROCESSOR_PPC403:
4786 rs6000_cost = &ppc403_cost;
4787 break;
4789 case PROCESSOR_PPC405:
4790 rs6000_cost = &ppc405_cost;
4791 break;
4793 case PROCESSOR_PPC440:
4794 rs6000_cost = &ppc440_cost;
4795 break;
4797 case PROCESSOR_PPC476:
4798 rs6000_cost = &ppc476_cost;
4799 break;
4801 case PROCESSOR_PPC601:
4802 rs6000_cost = &ppc601_cost;
4803 break;
4805 case PROCESSOR_PPC603:
4806 rs6000_cost = &ppc603_cost;
4807 break;
4809 case PROCESSOR_PPC604:
4810 rs6000_cost = &ppc604_cost;
4811 break;
4813 case PROCESSOR_PPC604e:
4814 rs6000_cost = &ppc604e_cost;
4815 break;
4817 case PROCESSOR_PPC620:
4818 rs6000_cost = &ppc620_cost;
4819 break;
4821 case PROCESSOR_PPC630:
4822 rs6000_cost = &ppc630_cost;
4823 break;
4825 case PROCESSOR_CELL:
4826 rs6000_cost = &ppccell_cost;
4827 break;
4829 case PROCESSOR_PPC750:
4830 case PROCESSOR_PPC7400:
4831 rs6000_cost = &ppc750_cost;
4832 break;
4834 case PROCESSOR_PPC7450:
4835 rs6000_cost = &ppc7450_cost;
4836 break;
4838 case PROCESSOR_PPC8540:
4839 case PROCESSOR_PPC8548:
4840 rs6000_cost = &ppc8540_cost;
4841 break;
4843 case PROCESSOR_PPCE300C2:
4844 case PROCESSOR_PPCE300C3:
4845 rs6000_cost = &ppce300c2c3_cost;
4846 break;
4848 case PROCESSOR_PPCE500MC:
4849 rs6000_cost = &ppce500mc_cost;
4850 break;
4852 case PROCESSOR_PPCE500MC64:
4853 rs6000_cost = &ppce500mc64_cost;
4854 break;
4856 case PROCESSOR_PPCE5500:
4857 rs6000_cost = &ppce5500_cost;
4858 break;
4860 case PROCESSOR_PPCE6500:
4861 rs6000_cost = &ppce6500_cost;
4862 break;
4864 case PROCESSOR_TITAN:
4865 rs6000_cost = &titan_cost;
4866 break;
4868 case PROCESSOR_POWER4:
4869 case PROCESSOR_POWER5:
4870 rs6000_cost = &power4_cost;
4871 break;
4873 case PROCESSOR_POWER6:
4874 rs6000_cost = &power6_cost;
4875 break;
4877 case PROCESSOR_POWER7:
4878 rs6000_cost = &power7_cost;
4879 break;
4881 case PROCESSOR_POWER8:
4882 rs6000_cost = &power8_cost;
4883 break;
4885 case PROCESSOR_POWER9:
4886 rs6000_cost = &power9_cost;
4887 break;
4889 case PROCESSOR_PPCA2:
4890 rs6000_cost = &ppca2_cost;
4891 break;
4893 default:
4894 gcc_unreachable ();
4897 if (global_init_p)
4899 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
4900 rs6000_cost->simultaneous_prefetches,
4901 global_options.x_param_values,
4902 global_options_set.x_param_values);
4903 maybe_set_param_value (PARAM_L1_CACHE_SIZE, rs6000_cost->l1_cache_size,
4904 global_options.x_param_values,
4905 global_options_set.x_param_values);
4906 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
4907 rs6000_cost->cache_line_size,
4908 global_options.x_param_values,
4909 global_options_set.x_param_values);
4910 maybe_set_param_value (PARAM_L2_CACHE_SIZE, rs6000_cost->l2_cache_size,
4911 global_options.x_param_values,
4912 global_options_set.x_param_values);
4914 /* Increase loop peeling limits based on performance analysis. */
4915 maybe_set_param_value (PARAM_MAX_PEELED_INSNS, 400,
4916 global_options.x_param_values,
4917 global_options_set.x_param_values);
4918 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEELED_INSNS, 400,
4919 global_options.x_param_values,
4920 global_options_set.x_param_values);
4922 /* If using typedef char *va_list, signal that
4923 __builtin_va_start (&ap, 0) can be optimized to
4924 ap = __builtin_next_arg (0). */
4925 if (DEFAULT_ABI != ABI_V4)
4926 targetm.expand_builtin_va_start = NULL;
4929 /* Set up single/double float flags.
4930 If TARGET_HARD_FLOAT is set, but neither single or double is set,
4931 then set both flags. */
4932 if (TARGET_HARD_FLOAT && TARGET_FPRS
4933 && rs6000_single_float == 0 && rs6000_double_float == 0)
4934 rs6000_single_float = rs6000_double_float = 1;
4936 /* If not explicitly specified via option, decide whether to generate indexed
4937 load/store instructions. */
4938 if (TARGET_AVOID_XFORM == -1)
4939 /* Avoid indexed addressing when targeting Power6 in order to avoid the
4940 DERAT mispredict penalty. However the LVE and STVE altivec instructions
4941 need indexed accesses and the type used is the scalar type of the element
4942 being loaded or stored. */
4943 TARGET_AVOID_XFORM = (rs6000_cpu == PROCESSOR_POWER6 && TARGET_CMPB
4944 && !TARGET_ALTIVEC);
4946 /* Set the -mrecip options. */
4947 if (rs6000_recip_name)
4949 char *p = ASTRDUP (rs6000_recip_name);
4950 char *q;
4951 unsigned int mask, i;
4952 bool invert;
4954 while ((q = strtok (p, ",")) != NULL)
4956 p = NULL;
4957 if (*q == '!')
4959 invert = true;
4960 q++;
4962 else
4963 invert = false;
4965 if (!strcmp (q, "default"))
4966 mask = ((TARGET_RECIP_PRECISION)
4967 ? RECIP_HIGH_PRECISION : RECIP_LOW_PRECISION);
4968 else
4970 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
4971 if (!strcmp (q, recip_options[i].string))
4973 mask = recip_options[i].mask;
4974 break;
4977 if (i == ARRAY_SIZE (recip_options))
4979 error ("unknown option for -mrecip=%s", q);
4980 invert = false;
4981 mask = 0;
4982 ret = false;
4986 if (invert)
4987 rs6000_recip_control &= ~mask;
4988 else
4989 rs6000_recip_control |= mask;
4993 /* Set the builtin mask of the various options used that could affect which
4994 builtins were used. In the past we used target_flags, but we've run out
4995 of bits, and some options like SPE and PAIRED are no longer in
4996 target_flags. */
4997 rs6000_builtin_mask = rs6000_builtin_mask_calculate ();
4998 if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
4999 rs6000_print_builtin_options (stderr, 0, "builtin mask",
5000 rs6000_builtin_mask);
5002 /* Initialize all of the registers. */
5003 rs6000_init_hard_regno_mode_ok (global_init_p);
5005 /* Save the initial options in case the user does function specific options */
5006 if (global_init_p)
5007 target_option_default_node = target_option_current_node
5008 = build_target_option_node (&global_options);
5010 /* If not explicitly specified via option, decide whether to generate the
5011 extra blr's required to preserve the link stack on some cpus (eg, 476). */
5012 if (TARGET_LINK_STACK == -1)
5013 SET_TARGET_LINK_STACK (rs6000_cpu == PROCESSOR_PPC476 && flag_pic);
5015 return ret;
5018 /* Implement TARGET_OPTION_OVERRIDE. On the RS/6000 this is used to
5019 define the target cpu type. */
5021 static void
5022 rs6000_option_override (void)
5024 (void) rs6000_option_override_internal (true);
5026 /* Register machine-specific passes. This needs to be done at start-up.
5027 It's convenient to do it here (like i386 does). */
5028 opt_pass *pass_analyze_swaps = make_pass_analyze_swaps (g);
5030 struct register_pass_info analyze_swaps_info
5031 = { pass_analyze_swaps, "cse1", 1, PASS_POS_INSERT_BEFORE };
5033 register_pass (&analyze_swaps_info);
5037 /* Implement targetm.vectorize.builtin_mask_for_load. */
5038 static tree
5039 rs6000_builtin_mask_for_load (void)
5041 /* Don't use lvsl/vperm for P8 and similarly efficient machines. */
5042 if ((TARGET_ALTIVEC && !TARGET_VSX)
5043 || (TARGET_VSX && !TARGET_EFFICIENT_UNALIGNED_VSX))
5044 return altivec_builtin_mask_for_load;
5045 else
5046 return 0;
5049 /* Implement LOOP_ALIGN. */
5051 rs6000_loop_align (rtx label)
5053 basic_block bb;
5054 int ninsns;
5056 /* Don't override loop alignment if -falign-loops was specified. */
5057 if (!can_override_loop_align)
5058 return align_loops_log;
5060 bb = BLOCK_FOR_INSN (label);
5061 ninsns = num_loop_insns(bb->loop_father);
5063 /* Align small loops to 32 bytes to fit in an icache sector, otherwise return default. */
5064 if (ninsns > 4 && ninsns <= 8
5065 && (rs6000_cpu == PROCESSOR_POWER4
5066 || rs6000_cpu == PROCESSOR_POWER5
5067 || rs6000_cpu == PROCESSOR_POWER6
5068 || rs6000_cpu == PROCESSOR_POWER7
5069 || rs6000_cpu == PROCESSOR_POWER8
5070 || rs6000_cpu == PROCESSOR_POWER9))
5071 return 5;
5072 else
5073 return align_loops_log;
5076 /* Implement TARGET_LOOP_ALIGN_MAX_SKIP. */
5077 static int
5078 rs6000_loop_align_max_skip (rtx_insn *label)
5080 return (1 << rs6000_loop_align (label)) - 1;
5083 /* Return true iff, data reference of TYPE can reach vector alignment (16)
5084 after applying N number of iterations. This routine does not determine
5085 how may iterations are required to reach desired alignment. */
5087 static bool
5088 rs6000_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
5090 if (is_packed)
5091 return false;
5093 if (TARGET_32BIT)
5095 if (rs6000_alignment_flags == MASK_ALIGN_NATURAL)
5096 return true;
5098 if (rs6000_alignment_flags == MASK_ALIGN_POWER)
5099 return true;
5101 return false;
5103 else
5105 if (TARGET_MACHO)
5106 return false;
5108 /* Assuming that all other types are naturally aligned. CHECKME! */
5109 return true;
5113 /* Return true if the vector misalignment factor is supported by the
5114 target. */
5115 static bool
5116 rs6000_builtin_support_vector_misalignment (machine_mode mode,
5117 const_tree type,
5118 int misalignment,
5119 bool is_packed)
5121 if (TARGET_VSX)
5123 if (TARGET_EFFICIENT_UNALIGNED_VSX)
5124 return true;
5126 /* Return if movmisalign pattern is not supported for this mode. */
5127 if (optab_handler (movmisalign_optab, mode) == CODE_FOR_nothing)
5128 return false;
5130 if (misalignment == -1)
5132 /* Misalignment factor is unknown at compile time but we know
5133 it's word aligned. */
5134 if (rs6000_vector_alignment_reachable (type, is_packed))
5136 int element_size = TREE_INT_CST_LOW (TYPE_SIZE (type));
5138 if (element_size == 64 || element_size == 32)
5139 return true;
5142 return false;
5145 /* VSX supports word-aligned vector. */
5146 if (misalignment % 4 == 0)
5147 return true;
5149 return false;
5152 /* Implement targetm.vectorize.builtin_vectorization_cost. */
5153 static int
5154 rs6000_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
5155 tree vectype, int misalign)
5157 unsigned elements;
5158 tree elem_type;
5160 switch (type_of_cost)
5162 case scalar_stmt:
5163 case scalar_load:
5164 case scalar_store:
5165 case vector_stmt:
5166 case vector_load:
5167 case vector_store:
5168 case vec_to_scalar:
5169 case scalar_to_vec:
5170 case cond_branch_not_taken:
5171 return 1;
5173 case vec_perm:
5174 if (TARGET_VSX)
5175 return 3;
5176 else
5177 return 1;
5179 case vec_promote_demote:
5180 if (TARGET_VSX)
5181 return 4;
5182 else
5183 return 1;
5185 case cond_branch_taken:
5186 return 3;
5188 case unaligned_load:
5189 if (TARGET_EFFICIENT_UNALIGNED_VSX)
5190 return 1;
5192 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
5194 elements = TYPE_VECTOR_SUBPARTS (vectype);
5195 if (elements == 2)
5196 /* Double word aligned. */
5197 return 2;
5199 if (elements == 4)
5201 switch (misalign)
5203 case 8:
5204 /* Double word aligned. */
5205 return 2;
5207 case -1:
5208 /* Unknown misalignment. */
5209 case 4:
5210 case 12:
5211 /* Word aligned. */
5212 return 22;
5214 default:
5215 gcc_unreachable ();
5220 if (TARGET_ALTIVEC)
5221 /* Misaligned loads are not supported. */
5222 gcc_unreachable ();
5224 return 2;
5226 case unaligned_store:
5227 if (TARGET_EFFICIENT_UNALIGNED_VSX)
5228 return 1;
5230 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
5232 elements = TYPE_VECTOR_SUBPARTS (vectype);
5233 if (elements == 2)
5234 /* Double word aligned. */
5235 return 2;
5237 if (elements == 4)
5239 switch (misalign)
5241 case 8:
5242 /* Double word aligned. */
5243 return 2;
5245 case -1:
5246 /* Unknown misalignment. */
5247 case 4:
5248 case 12:
5249 /* Word aligned. */
5250 return 23;
5252 default:
5253 gcc_unreachable ();
5258 if (TARGET_ALTIVEC)
5259 /* Misaligned stores are not supported. */
5260 gcc_unreachable ();
5262 return 2;
5264 case vec_construct:
5265 elements = TYPE_VECTOR_SUBPARTS (vectype);
5266 elem_type = TREE_TYPE (vectype);
5267 /* 32-bit vectors loaded into registers are stored as double
5268 precision, so we need n/2 converts in addition to the usual
5269 n/2 merges to construct a vector of short floats from them. */
5270 if (SCALAR_FLOAT_TYPE_P (elem_type)
5271 && TYPE_PRECISION (elem_type) == 32)
5272 return elements + 1;
5273 else
5274 return elements / 2 + 1;
5276 default:
5277 gcc_unreachable ();
5281 /* Implement targetm.vectorize.preferred_simd_mode. */
5283 static machine_mode
5284 rs6000_preferred_simd_mode (machine_mode mode)
5286 if (TARGET_VSX)
5287 switch (mode)
5289 case DFmode:
5290 return V2DFmode;
5291 default:;
5293 if (TARGET_ALTIVEC || TARGET_VSX)
5294 switch (mode)
5296 case SFmode:
5297 return V4SFmode;
5298 case TImode:
5299 return V1TImode;
5300 case DImode:
5301 return V2DImode;
5302 case SImode:
5303 return V4SImode;
5304 case HImode:
5305 return V8HImode;
5306 case QImode:
5307 return V16QImode;
5308 default:;
5310 if (TARGET_SPE)
5311 switch (mode)
5313 case SFmode:
5314 return V2SFmode;
5315 case SImode:
5316 return V2SImode;
5317 default:;
5319 if (TARGET_PAIRED_FLOAT
5320 && mode == SFmode)
5321 return V2SFmode;
5322 return word_mode;
5325 typedef struct _rs6000_cost_data
5327 struct loop *loop_info;
5328 unsigned cost[3];
5329 } rs6000_cost_data;
5331 /* Test for likely overcommitment of vector hardware resources. If a
5332 loop iteration is relatively large, and too large a percentage of
5333 instructions in the loop are vectorized, the cost model may not
5334 adequately reflect delays from unavailable vector resources.
5335 Penalize the loop body cost for this case. */
5337 static void
5338 rs6000_density_test (rs6000_cost_data *data)
5340 const int DENSITY_PCT_THRESHOLD = 85;
5341 const int DENSITY_SIZE_THRESHOLD = 70;
5342 const int DENSITY_PENALTY = 10;
5343 struct loop *loop = data->loop_info;
5344 basic_block *bbs = get_loop_body (loop);
5345 int nbbs = loop->num_nodes;
5346 int vec_cost = data->cost[vect_body], not_vec_cost = 0;
5347 int i, density_pct;
5349 for (i = 0; i < nbbs; i++)
5351 basic_block bb = bbs[i];
5352 gimple_stmt_iterator gsi;
5354 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
5356 gimple *stmt = gsi_stmt (gsi);
5357 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5359 if (!STMT_VINFO_RELEVANT_P (stmt_info)
5360 && !STMT_VINFO_IN_PATTERN_P (stmt_info))
5361 not_vec_cost++;
5365 free (bbs);
5366 density_pct = (vec_cost * 100) / (vec_cost + not_vec_cost);
5368 if (density_pct > DENSITY_PCT_THRESHOLD
5369 && vec_cost + not_vec_cost > DENSITY_SIZE_THRESHOLD)
5371 data->cost[vect_body] = vec_cost * (100 + DENSITY_PENALTY) / 100;
5372 if (dump_enabled_p ())
5373 dump_printf_loc (MSG_NOTE, vect_location,
5374 "density %d%%, cost %d exceeds threshold, penalizing "
5375 "loop body cost by %d%%", density_pct,
5376 vec_cost + not_vec_cost, DENSITY_PENALTY);
5380 /* Implement targetm.vectorize.init_cost. */
5382 static void *
5383 rs6000_init_cost (struct loop *loop_info)
5385 rs6000_cost_data *data = XNEW (struct _rs6000_cost_data);
5386 data->loop_info = loop_info;
5387 data->cost[vect_prologue] = 0;
5388 data->cost[vect_body] = 0;
5389 data->cost[vect_epilogue] = 0;
5390 return data;
5393 /* Implement targetm.vectorize.add_stmt_cost. */
5395 static unsigned
5396 rs6000_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
5397 struct _stmt_vec_info *stmt_info, int misalign,
5398 enum vect_cost_model_location where)
5400 rs6000_cost_data *cost_data = (rs6000_cost_data*) data;
5401 unsigned retval = 0;
5403 if (flag_vect_cost_model)
5405 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
5406 int stmt_cost = rs6000_builtin_vectorization_cost (kind, vectype,
5407 misalign);
5408 /* Statements in an inner loop relative to the loop being
5409 vectorized are weighted more heavily. The value here is
5410 arbitrary and could potentially be improved with analysis. */
5411 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
5412 count *= 50; /* FIXME. */
5414 retval = (unsigned) (count * stmt_cost);
5415 cost_data->cost[where] += retval;
5418 return retval;
5421 /* Implement targetm.vectorize.finish_cost. */
5423 static void
5424 rs6000_finish_cost (void *data, unsigned *prologue_cost,
5425 unsigned *body_cost, unsigned *epilogue_cost)
5427 rs6000_cost_data *cost_data = (rs6000_cost_data*) data;
5429 if (cost_data->loop_info)
5430 rs6000_density_test (cost_data);
5432 *prologue_cost = cost_data->cost[vect_prologue];
5433 *body_cost = cost_data->cost[vect_body];
5434 *epilogue_cost = cost_data->cost[vect_epilogue];
5437 /* Implement targetm.vectorize.destroy_cost_data. */
5439 static void
5440 rs6000_destroy_cost_data (void *data)
5442 free (data);
5445 /* Handler for the Mathematical Acceleration Subsystem (mass) interface to a
5446 library with vectorized intrinsics. */
5448 static tree
5449 rs6000_builtin_vectorized_libmass (combined_fn fn, tree type_out,
5450 tree type_in)
5452 char name[32];
5453 const char *suffix = NULL;
5454 tree fntype, new_fndecl, bdecl = NULL_TREE;
5455 int n_args = 1;
5456 const char *bname;
5457 machine_mode el_mode, in_mode;
5458 int n, in_n;
5460 /* Libmass is suitable for unsafe math only as it does not correctly support
5461 parts of IEEE with the required precision such as denormals. Only support
5462 it if we have VSX to use the simd d2 or f4 functions.
5463 XXX: Add variable length support. */
5464 if (!flag_unsafe_math_optimizations || !TARGET_VSX)
5465 return NULL_TREE;
5467 el_mode = TYPE_MODE (TREE_TYPE (type_out));
5468 n = TYPE_VECTOR_SUBPARTS (type_out);
5469 in_mode = TYPE_MODE (TREE_TYPE (type_in));
5470 in_n = TYPE_VECTOR_SUBPARTS (type_in);
5471 if (el_mode != in_mode
5472 || n != in_n)
5473 return NULL_TREE;
5475 switch (fn)
5477 CASE_CFN_ATAN2:
5478 CASE_CFN_HYPOT:
5479 CASE_CFN_POW:
5480 n_args = 2;
5481 /* fall through */
5483 CASE_CFN_ACOS:
5484 CASE_CFN_ACOSH:
5485 CASE_CFN_ASIN:
5486 CASE_CFN_ASINH:
5487 CASE_CFN_ATAN:
5488 CASE_CFN_ATANH:
5489 CASE_CFN_CBRT:
5490 CASE_CFN_COS:
5491 CASE_CFN_COSH:
5492 CASE_CFN_ERF:
5493 CASE_CFN_ERFC:
5494 CASE_CFN_EXP2:
5495 CASE_CFN_EXP:
5496 CASE_CFN_EXPM1:
5497 CASE_CFN_LGAMMA:
5498 CASE_CFN_LOG10:
5499 CASE_CFN_LOG1P:
5500 CASE_CFN_LOG2:
5501 CASE_CFN_LOG:
5502 CASE_CFN_SIN:
5503 CASE_CFN_SINH:
5504 CASE_CFN_SQRT:
5505 CASE_CFN_TAN:
5506 CASE_CFN_TANH:
5507 if (el_mode == DFmode && n == 2)
5509 bdecl = mathfn_built_in (double_type_node, fn);
5510 suffix = "d2"; /* pow -> powd2 */
5512 else if (el_mode == SFmode && n == 4)
5514 bdecl = mathfn_built_in (float_type_node, fn);
5515 suffix = "4"; /* powf -> powf4 */
5517 else
5518 return NULL_TREE;
5519 if (!bdecl)
5520 return NULL_TREE;
5521 break;
5523 default:
5524 return NULL_TREE;
5527 gcc_assert (suffix != NULL);
5528 bname = IDENTIFIER_POINTER (DECL_NAME (bdecl));
5529 if (!bname)
5530 return NULL_TREE;
5532 strcpy (name, bname + sizeof ("__builtin_") - 1);
5533 strcat (name, suffix);
5535 if (n_args == 1)
5536 fntype = build_function_type_list (type_out, type_in, NULL);
5537 else if (n_args == 2)
5538 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
5539 else
5540 gcc_unreachable ();
5542 /* Build a function declaration for the vectorized function. */
5543 new_fndecl = build_decl (BUILTINS_LOCATION,
5544 FUNCTION_DECL, get_identifier (name), fntype);
5545 TREE_PUBLIC (new_fndecl) = 1;
5546 DECL_EXTERNAL (new_fndecl) = 1;
5547 DECL_IS_NOVOPS (new_fndecl) = 1;
5548 TREE_READONLY (new_fndecl) = 1;
5550 return new_fndecl;
5553 /* Returns a function decl for a vectorized version of the builtin function
5554 with builtin function code FN and the result vector type TYPE, or NULL_TREE
5555 if it is not available. */
5557 static tree
5558 rs6000_builtin_vectorized_function (unsigned int fn, tree type_out,
5559 tree type_in)
5561 machine_mode in_mode, out_mode;
5562 int in_n, out_n;
5564 if (TARGET_DEBUG_BUILTIN)
5565 fprintf (stderr, "rs6000_builtin_vectorized_function (%s, %s, %s)\n",
5566 combined_fn_name (combined_fn (fn)),
5567 GET_MODE_NAME (TYPE_MODE (type_out)),
5568 GET_MODE_NAME (TYPE_MODE (type_in)));
5570 if (TREE_CODE (type_out) != VECTOR_TYPE
5571 || TREE_CODE (type_in) != VECTOR_TYPE
5572 || !TARGET_VECTORIZE_BUILTINS)
5573 return NULL_TREE;
5575 out_mode = TYPE_MODE (TREE_TYPE (type_out));
5576 out_n = TYPE_VECTOR_SUBPARTS (type_out);
5577 in_mode = TYPE_MODE (TREE_TYPE (type_in));
5578 in_n = TYPE_VECTOR_SUBPARTS (type_in);
5580 switch (fn)
5582 CASE_CFN_COPYSIGN:
5583 if (VECTOR_UNIT_VSX_P (V2DFmode)
5584 && out_mode == DFmode && out_n == 2
5585 && in_mode == DFmode && in_n == 2)
5586 return rs6000_builtin_decls[VSX_BUILTIN_CPSGNDP];
5587 if (VECTOR_UNIT_VSX_P (V4SFmode)
5588 && out_mode == SFmode && out_n == 4
5589 && in_mode == SFmode && in_n == 4)
5590 return rs6000_builtin_decls[VSX_BUILTIN_CPSGNSP];
5591 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5592 && out_mode == SFmode && out_n == 4
5593 && in_mode == SFmode && in_n == 4)
5594 return rs6000_builtin_decls[ALTIVEC_BUILTIN_COPYSIGN_V4SF];
5595 break;
5596 CASE_CFN_CEIL:
5597 if (VECTOR_UNIT_VSX_P (V2DFmode)
5598 && out_mode == DFmode && out_n == 2
5599 && in_mode == DFmode && in_n == 2)
5600 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIP];
5601 if (VECTOR_UNIT_VSX_P (V4SFmode)
5602 && out_mode == SFmode && out_n == 4
5603 && in_mode == SFmode && in_n == 4)
5604 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIP];
5605 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5606 && out_mode == SFmode && out_n == 4
5607 && in_mode == SFmode && in_n == 4)
5608 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIP];
5609 break;
5610 CASE_CFN_FLOOR:
5611 if (VECTOR_UNIT_VSX_P (V2DFmode)
5612 && out_mode == DFmode && out_n == 2
5613 && in_mode == DFmode && in_n == 2)
5614 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIM];
5615 if (VECTOR_UNIT_VSX_P (V4SFmode)
5616 && out_mode == SFmode && out_n == 4
5617 && in_mode == SFmode && in_n == 4)
5618 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIM];
5619 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5620 && out_mode == SFmode && out_n == 4
5621 && in_mode == SFmode && in_n == 4)
5622 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIM];
5623 break;
5624 CASE_CFN_FMA:
5625 if (VECTOR_UNIT_VSX_P (V2DFmode)
5626 && out_mode == DFmode && out_n == 2
5627 && in_mode == DFmode && in_n == 2)
5628 return rs6000_builtin_decls[VSX_BUILTIN_XVMADDDP];
5629 if (VECTOR_UNIT_VSX_P (V4SFmode)
5630 && out_mode == SFmode && out_n == 4
5631 && in_mode == SFmode && in_n == 4)
5632 return rs6000_builtin_decls[VSX_BUILTIN_XVMADDSP];
5633 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5634 && out_mode == SFmode && out_n == 4
5635 && in_mode == SFmode && in_n == 4)
5636 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VMADDFP];
5637 break;
5638 CASE_CFN_TRUNC:
5639 if (VECTOR_UNIT_VSX_P (V2DFmode)
5640 && out_mode == DFmode && out_n == 2
5641 && in_mode == DFmode && in_n == 2)
5642 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIZ];
5643 if (VECTOR_UNIT_VSX_P (V4SFmode)
5644 && out_mode == SFmode && out_n == 4
5645 && in_mode == SFmode && in_n == 4)
5646 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIZ];
5647 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5648 && out_mode == SFmode && out_n == 4
5649 && in_mode == SFmode && in_n == 4)
5650 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIZ];
5651 break;
5652 CASE_CFN_NEARBYINT:
5653 if (VECTOR_UNIT_VSX_P (V2DFmode)
5654 && flag_unsafe_math_optimizations
5655 && out_mode == DFmode && out_n == 2
5656 && in_mode == DFmode && in_n == 2)
5657 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPI];
5658 if (VECTOR_UNIT_VSX_P (V4SFmode)
5659 && flag_unsafe_math_optimizations
5660 && out_mode == SFmode && out_n == 4
5661 && in_mode == SFmode && in_n == 4)
5662 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPI];
5663 break;
5664 CASE_CFN_RINT:
5665 if (VECTOR_UNIT_VSX_P (V2DFmode)
5666 && !flag_trapping_math
5667 && out_mode == DFmode && out_n == 2
5668 && in_mode == DFmode && in_n == 2)
5669 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIC];
5670 if (VECTOR_UNIT_VSX_P (V4SFmode)
5671 && !flag_trapping_math
5672 && out_mode == SFmode && out_n == 4
5673 && in_mode == SFmode && in_n == 4)
5674 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIC];
5675 break;
5676 default:
5677 break;
5680 /* Generate calls to libmass if appropriate. */
5681 if (rs6000_veclib_handler)
5682 return rs6000_veclib_handler (combined_fn (fn), type_out, type_in);
5684 return NULL_TREE;
5687 /* Implement TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION. */
5689 static tree
5690 rs6000_builtin_md_vectorized_function (tree fndecl, tree type_out,
5691 tree type_in)
5693 machine_mode in_mode, out_mode;
5694 int in_n, out_n;
5696 if (TARGET_DEBUG_BUILTIN)
5697 fprintf (stderr, "rs6000_builtin_md_vectorized_function (%s, %s, %s)\n",
5698 IDENTIFIER_POINTER (DECL_NAME (fndecl)),
5699 GET_MODE_NAME (TYPE_MODE (type_out)),
5700 GET_MODE_NAME (TYPE_MODE (type_in)));
5702 if (TREE_CODE (type_out) != VECTOR_TYPE
5703 || TREE_CODE (type_in) != VECTOR_TYPE
5704 || !TARGET_VECTORIZE_BUILTINS)
5705 return NULL_TREE;
5707 out_mode = TYPE_MODE (TREE_TYPE (type_out));
5708 out_n = TYPE_VECTOR_SUBPARTS (type_out);
5709 in_mode = TYPE_MODE (TREE_TYPE (type_in));
5710 in_n = TYPE_VECTOR_SUBPARTS (type_in);
5712 enum rs6000_builtins fn
5713 = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
5714 switch (fn)
5716 case RS6000_BUILTIN_RSQRTF:
5717 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)
5718 && out_mode == SFmode && out_n == 4
5719 && in_mode == SFmode && in_n == 4)
5720 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRSQRTFP];
5721 break;
5722 case RS6000_BUILTIN_RSQRT:
5723 if (VECTOR_UNIT_VSX_P (V2DFmode)
5724 && out_mode == DFmode && out_n == 2
5725 && in_mode == DFmode && in_n == 2)
5726 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_2DF];
5727 break;
5728 case RS6000_BUILTIN_RECIPF:
5729 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)
5730 && out_mode == SFmode && out_n == 4
5731 && in_mode == SFmode && in_n == 4)
5732 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRECIPFP];
5733 break;
5734 case RS6000_BUILTIN_RECIP:
5735 if (VECTOR_UNIT_VSX_P (V2DFmode)
5736 && out_mode == DFmode && out_n == 2
5737 && in_mode == DFmode && in_n == 2)
5738 return rs6000_builtin_decls[VSX_BUILTIN_RECIP_V2DF];
5739 break;
5740 default:
5741 break;
5743 return NULL_TREE;
5746 /* Default CPU string for rs6000*_file_start functions. */
5747 static const char *rs6000_default_cpu;
5749 /* Do anything needed at the start of the asm file. */
5751 static void
5752 rs6000_file_start (void)
5754 char buffer[80];
5755 const char *start = buffer;
5756 FILE *file = asm_out_file;
5758 rs6000_default_cpu = TARGET_CPU_DEFAULT;
5760 default_file_start ();
5762 if (flag_verbose_asm)
5764 sprintf (buffer, "\n%s rs6000/powerpc options:", ASM_COMMENT_START);
5766 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
5768 fprintf (file, "%s --with-cpu=%s", start, rs6000_default_cpu);
5769 start = "";
5772 if (global_options_set.x_rs6000_cpu_index)
5774 fprintf (file, "%s -mcpu=%s", start,
5775 processor_target_table[rs6000_cpu_index].name);
5776 start = "";
5779 if (global_options_set.x_rs6000_tune_index)
5781 fprintf (file, "%s -mtune=%s", start,
5782 processor_target_table[rs6000_tune_index].name);
5783 start = "";
5786 if (PPC405_ERRATUM77)
5788 fprintf (file, "%s PPC405CR_ERRATUM77", start);
5789 start = "";
5792 #ifdef USING_ELFOS_H
5793 switch (rs6000_sdata)
5795 case SDATA_NONE: fprintf (file, "%s -msdata=none", start); start = ""; break;
5796 case SDATA_DATA: fprintf (file, "%s -msdata=data", start); start = ""; break;
5797 case SDATA_SYSV: fprintf (file, "%s -msdata=sysv", start); start = ""; break;
5798 case SDATA_EABI: fprintf (file, "%s -msdata=eabi", start); start = ""; break;
5801 if (rs6000_sdata && g_switch_value)
5803 fprintf (file, "%s -G %d", start,
5804 g_switch_value);
5805 start = "";
5807 #endif
5809 if (*start == '\0')
5810 putc ('\n', file);
5813 #ifdef USING_ELFOS_H
5814 if (!(rs6000_default_cpu && rs6000_default_cpu[0])
5815 && !global_options_set.x_rs6000_cpu_index)
5817 fputs ("\t.machine ", asm_out_file);
5818 if ((rs6000_isa_flags & OPTION_MASK_MODULO) != 0)
5819 fputs ("power9\n", asm_out_file);
5820 else if ((rs6000_isa_flags & OPTION_MASK_DIRECT_MOVE) != 0)
5821 fputs ("power8\n", asm_out_file);
5822 else if ((rs6000_isa_flags & OPTION_MASK_POPCNTD) != 0)
5823 fputs ("power7\n", asm_out_file);
5824 else if ((rs6000_isa_flags & OPTION_MASK_CMPB) != 0)
5825 fputs ("power6\n", asm_out_file);
5826 else if ((rs6000_isa_flags & OPTION_MASK_POPCNTB) != 0)
5827 fputs ("power5\n", asm_out_file);
5828 else if ((rs6000_isa_flags & OPTION_MASK_MFCRF) != 0)
5829 fputs ("power4\n", asm_out_file);
5830 else if ((rs6000_isa_flags & OPTION_MASK_POWERPC64) != 0)
5831 fputs ("ppc64\n", asm_out_file);
5832 else
5833 fputs ("ppc\n", asm_out_file);
5835 #endif
5837 if (DEFAULT_ABI == ABI_ELFv2)
5838 fprintf (file, "\t.abiversion 2\n");
5842 /* Return nonzero if this function is known to have a null epilogue. */
5845 direct_return (void)
5847 if (reload_completed)
5849 rs6000_stack_t *info = rs6000_stack_info ();
5851 if (info->first_gp_reg_save == 32
5852 && info->first_fp_reg_save == 64
5853 && info->first_altivec_reg_save == LAST_ALTIVEC_REGNO + 1
5854 && ! info->lr_save_p
5855 && ! info->cr_save_p
5856 && info->vrsave_size == 0
5857 && ! info->push_p)
5858 return 1;
5861 return 0;
5864 /* Return the number of instructions it takes to form a constant in an
5865 integer register. */
5868 num_insns_constant_wide (HOST_WIDE_INT value)
5870 /* signed constant loadable with addi */
5871 if (((unsigned HOST_WIDE_INT) value + 0x8000) < 0x10000)
5872 return 1;
5874 /* constant loadable with addis */
5875 else if ((value & 0xffff) == 0
5876 && (value >> 31 == -1 || value >> 31 == 0))
5877 return 1;
5879 else if (TARGET_POWERPC64)
5881 HOST_WIDE_INT low = ((value & 0xffffffff) ^ 0x80000000) - 0x80000000;
5882 HOST_WIDE_INT high = value >> 31;
5884 if (high == 0 || high == -1)
5885 return 2;
5887 high >>= 1;
5889 if (low == 0)
5890 return num_insns_constant_wide (high) + 1;
5891 else if (high == 0)
5892 return num_insns_constant_wide (low) + 1;
5893 else
5894 return (num_insns_constant_wide (high)
5895 + num_insns_constant_wide (low) + 1);
5898 else
5899 return 2;
5903 num_insns_constant (rtx op, machine_mode mode)
5905 HOST_WIDE_INT low, high;
5907 switch (GET_CODE (op))
5909 case CONST_INT:
5910 if ((INTVAL (op) >> 31) != 0 && (INTVAL (op) >> 31) != -1
5911 && rs6000_is_valid_and_mask (op, mode))
5912 return 2;
5913 else
5914 return num_insns_constant_wide (INTVAL (op));
5916 case CONST_WIDE_INT:
5918 int i;
5919 int ins = CONST_WIDE_INT_NUNITS (op) - 1;
5920 for (i = 0; i < CONST_WIDE_INT_NUNITS (op); i++)
5921 ins += num_insns_constant_wide (CONST_WIDE_INT_ELT (op, i));
5922 return ins;
5925 case CONST_DOUBLE:
5926 if (mode == SFmode || mode == SDmode)
5928 long l;
5930 if (DECIMAL_FLOAT_MODE_P (mode))
5931 REAL_VALUE_TO_TARGET_DECIMAL32
5932 (*CONST_DOUBLE_REAL_VALUE (op), l);
5933 else
5934 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), l);
5935 return num_insns_constant_wide ((HOST_WIDE_INT) l);
5938 long l[2];
5939 if (DECIMAL_FLOAT_MODE_P (mode))
5940 REAL_VALUE_TO_TARGET_DECIMAL64 (*CONST_DOUBLE_REAL_VALUE (op), l);
5941 else
5942 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (op), l);
5943 high = l[WORDS_BIG_ENDIAN == 0];
5944 low = l[WORDS_BIG_ENDIAN != 0];
5946 if (TARGET_32BIT)
5947 return (num_insns_constant_wide (low)
5948 + num_insns_constant_wide (high));
5949 else
5951 if ((high == 0 && low >= 0)
5952 || (high == -1 && low < 0))
5953 return num_insns_constant_wide (low);
5955 else if (rs6000_is_valid_and_mask (op, mode))
5956 return 2;
5958 else if (low == 0)
5959 return num_insns_constant_wide (high) + 1;
5961 else
5962 return (num_insns_constant_wide (high)
5963 + num_insns_constant_wide (low) + 1);
5966 default:
5967 gcc_unreachable ();
5971 /* Interpret element ELT of the CONST_VECTOR OP as an integer value.
5972 If the mode of OP is MODE_VECTOR_INT, this simply returns the
5973 corresponding element of the vector, but for V4SFmode and V2SFmode,
5974 the corresponding "float" is interpreted as an SImode integer. */
5976 HOST_WIDE_INT
5977 const_vector_elt_as_int (rtx op, unsigned int elt)
5979 rtx tmp;
5981 /* We can't handle V2DImode and V2DFmode vector constants here yet. */
5982 gcc_assert (GET_MODE (op) != V2DImode
5983 && GET_MODE (op) != V2DFmode);
5985 tmp = CONST_VECTOR_ELT (op, elt);
5986 if (GET_MODE (op) == V4SFmode
5987 || GET_MODE (op) == V2SFmode)
5988 tmp = gen_lowpart (SImode, tmp);
5989 return INTVAL (tmp);
5992 /* Return true if OP can be synthesized with a particular vspltisb, vspltish
5993 or vspltisw instruction. OP is a CONST_VECTOR. Which instruction is used
5994 depends on STEP and COPIES, one of which will be 1. If COPIES > 1,
5995 all items are set to the same value and contain COPIES replicas of the
5996 vsplt's operand; if STEP > 1, one in STEP elements is set to the vsplt's
5997 operand and the others are set to the value of the operand's msb. */
5999 static bool
6000 vspltis_constant (rtx op, unsigned step, unsigned copies)
6002 machine_mode mode = GET_MODE (op);
6003 machine_mode inner = GET_MODE_INNER (mode);
6005 unsigned i;
6006 unsigned nunits;
6007 unsigned bitsize;
6008 unsigned mask;
6010 HOST_WIDE_INT val;
6011 HOST_WIDE_INT splat_val;
6012 HOST_WIDE_INT msb_val;
6014 if (mode == V2DImode || mode == V2DFmode || mode == V1TImode)
6015 return false;
6017 nunits = GET_MODE_NUNITS (mode);
6018 bitsize = GET_MODE_BITSIZE (inner);
6019 mask = GET_MODE_MASK (inner);
6021 val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
6022 splat_val = val;
6023 msb_val = val >= 0 ? 0 : -1;
6025 /* Construct the value to be splatted, if possible. If not, return 0. */
6026 for (i = 2; i <= copies; i *= 2)
6028 HOST_WIDE_INT small_val;
6029 bitsize /= 2;
6030 small_val = splat_val >> bitsize;
6031 mask >>= bitsize;
6032 if (splat_val != ((small_val << bitsize) | (small_val & mask)))
6033 return false;
6034 splat_val = small_val;
6037 /* Check if SPLAT_VAL can really be the operand of a vspltis[bhw]. */
6038 if (EASY_VECTOR_15 (splat_val))
6041 /* Also check if we can splat, and then add the result to itself. Do so if
6042 the value is positive, of if the splat instruction is using OP's mode;
6043 for splat_val < 0, the splat and the add should use the same mode. */
6044 else if (EASY_VECTOR_15_ADD_SELF (splat_val)
6045 && (splat_val >= 0 || (step == 1 && copies == 1)))
6048 /* Also check if are loading up the most significant bit which can be done by
6049 loading up -1 and shifting the value left by -1. */
6050 else if (EASY_VECTOR_MSB (splat_val, inner))
6053 else
6054 return false;
6056 /* Check if VAL is present in every STEP-th element, and the
6057 other elements are filled with its most significant bit. */
6058 for (i = 1; i < nunits; ++i)
6060 HOST_WIDE_INT desired_val;
6061 unsigned elt = BYTES_BIG_ENDIAN ? nunits - 1 - i : i;
6062 if ((i & (step - 1)) == 0)
6063 desired_val = val;
6064 else
6065 desired_val = msb_val;
6067 if (desired_val != const_vector_elt_as_int (op, elt))
6068 return false;
6071 return true;
6074 /* Like vsplitis_constant, but allow the value to be shifted left with a VSLDOI
6075 instruction, filling in the bottom elements with 0 or -1.
6077 Return 0 if the constant cannot be generated with VSLDOI. Return positive
6078 for the number of zeroes to shift in, or negative for the number of 0xff
6079 bytes to shift in.
6081 OP is a CONST_VECTOR. */
6084 vspltis_shifted (rtx op)
6086 machine_mode mode = GET_MODE (op);
6087 machine_mode inner = GET_MODE_INNER (mode);
6089 unsigned i, j;
6090 unsigned nunits;
6091 unsigned mask;
6093 HOST_WIDE_INT val;
6095 if (mode != V16QImode && mode != V8HImode && mode != V4SImode)
6096 return false;
6098 /* We need to create pseudo registers to do the shift, so don't recognize
6099 shift vector constants after reload. */
6100 if (!can_create_pseudo_p ())
6101 return false;
6103 nunits = GET_MODE_NUNITS (mode);
6104 mask = GET_MODE_MASK (inner);
6106 val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? 0 : nunits - 1);
6108 /* Check if the value can really be the operand of a vspltis[bhw]. */
6109 if (EASY_VECTOR_15 (val))
6112 /* Also check if we are loading up the most significant bit which can be done
6113 by loading up -1 and shifting the value left by -1. */
6114 else if (EASY_VECTOR_MSB (val, inner))
6117 else
6118 return 0;
6120 /* Check if VAL is present in every STEP-th element until we find elements
6121 that are 0 or all 1 bits. */
6122 for (i = 1; i < nunits; ++i)
6124 unsigned elt = BYTES_BIG_ENDIAN ? i : nunits - 1 - i;
6125 HOST_WIDE_INT elt_val = const_vector_elt_as_int (op, elt);
6127 /* If the value isn't the splat value, check for the remaining elements
6128 being 0/-1. */
6129 if (val != elt_val)
6131 if (elt_val == 0)
6133 for (j = i+1; j < nunits; ++j)
6135 unsigned elt2 = BYTES_BIG_ENDIAN ? j : nunits - 1 - j;
6136 if (const_vector_elt_as_int (op, elt2) != 0)
6137 return 0;
6140 return (nunits - i) * GET_MODE_SIZE (inner);
6143 else if ((elt_val & mask) == mask)
6145 for (j = i+1; j < nunits; ++j)
6147 unsigned elt2 = BYTES_BIG_ENDIAN ? j : nunits - 1 - j;
6148 if ((const_vector_elt_as_int (op, elt2) & mask) != mask)
6149 return 0;
6152 return -((nunits - i) * GET_MODE_SIZE (inner));
6155 else
6156 return 0;
6160 /* If all elements are equal, we don't need to do VLSDOI. */
6161 return 0;
6165 /* Return true if OP is of the given MODE and can be synthesized
6166 with a vspltisb, vspltish or vspltisw. */
6168 bool
6169 easy_altivec_constant (rtx op, machine_mode mode)
6171 unsigned step, copies;
6173 if (mode == VOIDmode)
6174 mode = GET_MODE (op);
6175 else if (mode != GET_MODE (op))
6176 return false;
6178 /* V2DI/V2DF was added with VSX. Only allow 0 and all 1's as easy
6179 constants. */
6180 if (mode == V2DFmode)
6181 return zero_constant (op, mode);
6183 else if (mode == V2DImode)
6185 if (GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_INT
6186 || GET_CODE (CONST_VECTOR_ELT (op, 1)) != CONST_INT)
6187 return false;
6189 if (zero_constant (op, mode))
6190 return true;
6192 if (INTVAL (CONST_VECTOR_ELT (op, 0)) == -1
6193 && INTVAL (CONST_VECTOR_ELT (op, 1)) == -1)
6194 return true;
6196 return false;
6199 /* V1TImode is a special container for TImode. Ignore for now. */
6200 else if (mode == V1TImode)
6201 return false;
6203 /* Start with a vspltisw. */
6204 step = GET_MODE_NUNITS (mode) / 4;
6205 copies = 1;
6207 if (vspltis_constant (op, step, copies))
6208 return true;
6210 /* Then try with a vspltish. */
6211 if (step == 1)
6212 copies <<= 1;
6213 else
6214 step >>= 1;
6216 if (vspltis_constant (op, step, copies))
6217 return true;
6219 /* And finally a vspltisb. */
6220 if (step == 1)
6221 copies <<= 1;
6222 else
6223 step >>= 1;
6225 if (vspltis_constant (op, step, copies))
6226 return true;
6228 if (vspltis_shifted (op) != 0)
6229 return true;
6231 return false;
6234 /* Generate a VEC_DUPLICATE representing a vspltis[bhw] instruction whose
6235 result is OP. Abort if it is not possible. */
6238 gen_easy_altivec_constant (rtx op)
6240 machine_mode mode = GET_MODE (op);
6241 int nunits = GET_MODE_NUNITS (mode);
6242 rtx val = CONST_VECTOR_ELT (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
6243 unsigned step = nunits / 4;
6244 unsigned copies = 1;
6246 /* Start with a vspltisw. */
6247 if (vspltis_constant (op, step, copies))
6248 return gen_rtx_VEC_DUPLICATE (V4SImode, gen_lowpart (SImode, val));
6250 /* Then try with a vspltish. */
6251 if (step == 1)
6252 copies <<= 1;
6253 else
6254 step >>= 1;
6256 if (vspltis_constant (op, step, copies))
6257 return gen_rtx_VEC_DUPLICATE (V8HImode, gen_lowpart (HImode, val));
6259 /* And finally a vspltisb. */
6260 if (step == 1)
6261 copies <<= 1;
6262 else
6263 step >>= 1;
6265 if (vspltis_constant (op, step, copies))
6266 return gen_rtx_VEC_DUPLICATE (V16QImode, gen_lowpart (QImode, val));
6268 gcc_unreachable ();
6271 /* Return true if OP is of the given MODE and can be synthesized with ISA 3.0
6272 instructions (xxspltib, vupkhsb/vextsb2w/vextb2d).
6274 Return the number of instructions needed (1 or 2) into the address pointed
6275 via NUM_INSNS_PTR.
6277 Return the constant that is being split via CONSTANT_PTR. */
6279 bool
6280 xxspltib_constant_p (rtx op,
6281 machine_mode mode,
6282 int *num_insns_ptr,
6283 int *constant_ptr)
6285 size_t nunits = GET_MODE_NUNITS (mode);
6286 size_t i;
6287 HOST_WIDE_INT value;
6288 rtx element;
6290 /* Set the returned values to out of bound values. */
6291 *num_insns_ptr = -1;
6292 *constant_ptr = 256;
6294 if (!TARGET_P9_VECTOR)
6295 return false;
6297 if (mode == VOIDmode)
6298 mode = GET_MODE (op);
6300 else if (mode != GET_MODE (op) && GET_MODE (op) != VOIDmode)
6301 return false;
6303 /* Handle (vec_duplicate <constant>). */
6304 if (GET_CODE (op) == VEC_DUPLICATE)
6306 if (mode != V16QImode && mode != V8HImode && mode != V4SImode
6307 && mode != V2DImode)
6308 return false;
6310 element = XEXP (op, 0);
6311 if (!CONST_INT_P (element))
6312 return false;
6314 value = INTVAL (element);
6315 if (!IN_RANGE (value, -128, 127))
6316 return false;
6319 /* Handle (const_vector [...]). */
6320 else if (GET_CODE (op) == CONST_VECTOR)
6322 if (mode != V16QImode && mode != V8HImode && mode != V4SImode
6323 && mode != V2DImode)
6324 return false;
6326 element = CONST_VECTOR_ELT (op, 0);
6327 if (!CONST_INT_P (element))
6328 return false;
6330 value = INTVAL (element);
6331 if (!IN_RANGE (value, -128, 127))
6332 return false;
6334 for (i = 1; i < nunits; i++)
6336 element = CONST_VECTOR_ELT (op, i);
6337 if (!CONST_INT_P (element))
6338 return false;
6340 if (value != INTVAL (element))
6341 return false;
6345 /* Handle integer constants being loaded into the upper part of the VSX
6346 register as a scalar. If the value isn't 0/-1, only allow it if the mode
6347 can go in Altivec registers. Prefer VSPLTISW/VUPKHSW over XXSPLITIB. */
6348 else if (CONST_INT_P (op))
6350 if (!SCALAR_INT_MODE_P (mode))
6351 return false;
6353 value = INTVAL (op);
6354 if (!IN_RANGE (value, -128, 127))
6355 return false;
6357 if (!IN_RANGE (value, -1, 0))
6359 if (!(reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_VALID))
6360 return false;
6362 if (EASY_VECTOR_15 (value))
6363 return false;
6367 else
6368 return false;
6370 /* See if we could generate vspltisw/vspltish directly instead of xxspltib +
6371 sign extend. Special case 0/-1 to allow getting any VSX register instead
6372 of an Altivec register. */
6373 if ((mode == V4SImode || mode == V8HImode) && !IN_RANGE (value, -1, 0)
6374 && EASY_VECTOR_15 (value))
6375 return false;
6377 /* Return # of instructions and the constant byte for XXSPLTIB. */
6378 if (mode == V16QImode)
6379 *num_insns_ptr = 1;
6381 else if (IN_RANGE (value, -1, 0))
6382 *num_insns_ptr = 1;
6384 else
6385 *num_insns_ptr = 2;
6387 *constant_ptr = (int) value;
6388 return true;
6391 const char *
6392 output_vec_const_move (rtx *operands)
6394 int cst, cst2, shift;
6395 machine_mode mode;
6396 rtx dest, vec;
6398 dest = operands[0];
6399 vec = operands[1];
6400 mode = GET_MODE (dest);
6402 if (TARGET_VSX)
6404 bool dest_vmx_p = ALTIVEC_REGNO_P (REGNO (dest));
6405 int xxspltib_value = 256;
6406 int num_insns = -1;
6408 if (zero_constant (vec, mode))
6410 if (TARGET_P9_VECTOR)
6411 return "xxspltib %x0,0";
6413 else if (dest_vmx_p)
6414 return "vspltisw %0,0";
6416 else
6417 return "xxlxor %x0,%x0,%x0";
6420 if (all_ones_constant (vec, mode))
6422 if (TARGET_P9_VECTOR)
6423 return "xxspltib %x0,255";
6425 else if (dest_vmx_p)
6426 return "vspltisw %0,-1";
6428 else if (TARGET_P8_VECTOR)
6429 return "xxlorc %x0,%x0,%x0";
6431 else
6432 gcc_unreachable ();
6435 if (TARGET_P9_VECTOR
6436 && xxspltib_constant_p (vec, mode, &num_insns, &xxspltib_value))
6438 if (num_insns == 1)
6440 operands[2] = GEN_INT (xxspltib_value & 0xff);
6441 return "xxspltib %x0,%2";
6444 return "#";
6448 if (TARGET_ALTIVEC)
6450 rtx splat_vec;
6452 gcc_assert (ALTIVEC_REGNO_P (REGNO (dest)));
6453 if (zero_constant (vec, mode))
6454 return "vspltisw %0,0";
6456 if (all_ones_constant (vec, mode))
6457 return "vspltisw %0,-1";
6459 /* Do we need to construct a value using VSLDOI? */
6460 shift = vspltis_shifted (vec);
6461 if (shift != 0)
6462 return "#";
6464 splat_vec = gen_easy_altivec_constant (vec);
6465 gcc_assert (GET_CODE (splat_vec) == VEC_DUPLICATE);
6466 operands[1] = XEXP (splat_vec, 0);
6467 if (!EASY_VECTOR_15 (INTVAL (operands[1])))
6468 return "#";
6470 switch (GET_MODE (splat_vec))
6472 case V4SImode:
6473 return "vspltisw %0,%1";
6475 case V8HImode:
6476 return "vspltish %0,%1";
6478 case V16QImode:
6479 return "vspltisb %0,%1";
6481 default:
6482 gcc_unreachable ();
6486 gcc_assert (TARGET_SPE);
6488 /* Vector constant 0 is handled as a splitter of V2SI, and in the
6489 pattern of V1DI, V4HI, and V2SF.
6491 FIXME: We should probably return # and add post reload
6492 splitters for these, but this way is so easy ;-). */
6493 cst = INTVAL (CONST_VECTOR_ELT (vec, 0));
6494 cst2 = INTVAL (CONST_VECTOR_ELT (vec, 1));
6495 operands[1] = CONST_VECTOR_ELT (vec, 0);
6496 operands[2] = CONST_VECTOR_ELT (vec, 1);
6497 if (cst == cst2)
6498 return "li %0,%1\n\tevmergelo %0,%0,%0";
6499 else if (WORDS_BIG_ENDIAN)
6500 return "li %0,%1\n\tevmergelo %0,%0,%0\n\tli %0,%2";
6501 else
6502 return "li %0,%2\n\tevmergelo %0,%0,%0\n\tli %0,%1";
6505 /* Initialize TARGET of vector PAIRED to VALS. */
6507 void
6508 paired_expand_vector_init (rtx target, rtx vals)
6510 machine_mode mode = GET_MODE (target);
6511 int n_elts = GET_MODE_NUNITS (mode);
6512 int n_var = 0;
6513 rtx x, new_rtx, tmp, constant_op, op1, op2;
6514 int i;
6516 for (i = 0; i < n_elts; ++i)
6518 x = XVECEXP (vals, 0, i);
6519 if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x)))
6520 ++n_var;
6522 if (n_var == 0)
6524 /* Load from constant pool. */
6525 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
6526 return;
6529 if (n_var == 2)
6531 /* The vector is initialized only with non-constants. */
6532 new_rtx = gen_rtx_VEC_CONCAT (V2SFmode, XVECEXP (vals, 0, 0),
6533 XVECEXP (vals, 0, 1));
6535 emit_move_insn (target, new_rtx);
6536 return;
6539 /* One field is non-constant and the other one is a constant. Load the
6540 constant from the constant pool and use ps_merge instruction to
6541 construct the whole vector. */
6542 op1 = XVECEXP (vals, 0, 0);
6543 op2 = XVECEXP (vals, 0, 1);
6545 constant_op = (CONSTANT_P (op1)) ? op1 : op2;
6547 tmp = gen_reg_rtx (GET_MODE (constant_op));
6548 emit_move_insn (tmp, constant_op);
6550 if (CONSTANT_P (op1))
6551 new_rtx = gen_rtx_VEC_CONCAT (V2SFmode, tmp, op2);
6552 else
6553 new_rtx = gen_rtx_VEC_CONCAT (V2SFmode, op1, tmp);
6555 emit_move_insn (target, new_rtx);
6558 void
6559 paired_expand_vector_move (rtx operands[])
6561 rtx op0 = operands[0], op1 = operands[1];
6563 emit_move_insn (op0, op1);
6566 /* Emit vector compare for code RCODE. DEST is destination, OP1 and
6567 OP2 are two VEC_COND_EXPR operands, CC_OP0 and CC_OP1 are the two
6568 operands for the relation operation COND. This is a recursive
6569 function. */
6571 static void
6572 paired_emit_vector_compare (enum rtx_code rcode,
6573 rtx dest, rtx op0, rtx op1,
6574 rtx cc_op0, rtx cc_op1)
6576 rtx tmp = gen_reg_rtx (V2SFmode);
6577 rtx tmp1, max, min;
6579 gcc_assert (TARGET_PAIRED_FLOAT);
6580 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
6582 switch (rcode)
6584 case LT:
6585 case LTU:
6586 paired_emit_vector_compare (GE, dest, op1, op0, cc_op0, cc_op1);
6587 return;
6588 case GE:
6589 case GEU:
6590 emit_insn (gen_subv2sf3 (tmp, cc_op0, cc_op1));
6591 emit_insn (gen_selv2sf4 (dest, tmp, op0, op1, CONST0_RTX (SFmode)));
6592 return;
6593 case LE:
6594 case LEU:
6595 paired_emit_vector_compare (GE, dest, op0, op1, cc_op1, cc_op0);
6596 return;
6597 case GT:
6598 paired_emit_vector_compare (LE, dest, op1, op0, cc_op0, cc_op1);
6599 return;
6600 case EQ:
6601 tmp1 = gen_reg_rtx (V2SFmode);
6602 max = gen_reg_rtx (V2SFmode);
6603 min = gen_reg_rtx (V2SFmode);
6604 gen_reg_rtx (V2SFmode);
6606 emit_insn (gen_subv2sf3 (tmp, cc_op0, cc_op1));
6607 emit_insn (gen_selv2sf4
6608 (max, tmp, cc_op0, cc_op1, CONST0_RTX (SFmode)));
6609 emit_insn (gen_subv2sf3 (tmp, cc_op1, cc_op0));
6610 emit_insn (gen_selv2sf4
6611 (min, tmp, cc_op0, cc_op1, CONST0_RTX (SFmode)));
6612 emit_insn (gen_subv2sf3 (tmp1, min, max));
6613 emit_insn (gen_selv2sf4 (dest, tmp1, op0, op1, CONST0_RTX (SFmode)));
6614 return;
6615 case NE:
6616 paired_emit_vector_compare (EQ, dest, op1, op0, cc_op0, cc_op1);
6617 return;
6618 case UNLE:
6619 paired_emit_vector_compare (LE, dest, op1, op0, cc_op0, cc_op1);
6620 return;
6621 case UNLT:
6622 paired_emit_vector_compare (LT, dest, op1, op0, cc_op0, cc_op1);
6623 return;
6624 case UNGE:
6625 paired_emit_vector_compare (GE, dest, op1, op0, cc_op0, cc_op1);
6626 return;
6627 case UNGT:
6628 paired_emit_vector_compare (GT, dest, op1, op0, cc_op0, cc_op1);
6629 return;
6630 default:
6631 gcc_unreachable ();
6634 return;
6637 /* Emit vector conditional expression.
6638 DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands.
6639 CC_OP0 and CC_OP1 are the two operands for the relation operation COND. */
6642 paired_emit_vector_cond_expr (rtx dest, rtx op1, rtx op2,
6643 rtx cond, rtx cc_op0, rtx cc_op1)
6645 enum rtx_code rcode = GET_CODE (cond);
6647 if (!TARGET_PAIRED_FLOAT)
6648 return 0;
6650 paired_emit_vector_compare (rcode, dest, op1, op2, cc_op0, cc_op1);
6652 return 1;
6655 /* Initialize vector TARGET to VALS. */
6657 void
6658 rs6000_expand_vector_init (rtx target, rtx vals)
6660 machine_mode mode = GET_MODE (target);
6661 machine_mode inner_mode = GET_MODE_INNER (mode);
6662 int n_elts = GET_MODE_NUNITS (mode);
6663 int n_var = 0, one_var = -1;
6664 bool all_same = true, all_const_zero = true;
6665 rtx x, mem;
6666 int i;
6668 for (i = 0; i < n_elts; ++i)
6670 x = XVECEXP (vals, 0, i);
6671 if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x)))
6672 ++n_var, one_var = i;
6673 else if (x != CONST0_RTX (inner_mode))
6674 all_const_zero = false;
6676 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
6677 all_same = false;
6680 if (n_var == 0)
6682 rtx const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
6683 bool int_vector_p = (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
6684 if ((int_vector_p || TARGET_VSX) && all_const_zero)
6686 /* Zero register. */
6687 emit_insn (gen_rtx_SET (target, gen_rtx_XOR (mode, target, target)));
6688 return;
6690 else if (int_vector_p && easy_vector_constant (const_vec, mode))
6692 /* Splat immediate. */
6693 emit_insn (gen_rtx_SET (target, const_vec));
6694 return;
6696 else
6698 /* Load from constant pool. */
6699 emit_move_insn (target, const_vec);
6700 return;
6704 /* Double word values on VSX can use xxpermdi or lxvdsx. */
6705 if (VECTOR_MEM_VSX_P (mode) && (mode == V2DFmode || mode == V2DImode))
6707 rtx op0 = XVECEXP (vals, 0, 0);
6708 rtx op1 = XVECEXP (vals, 0, 1);
6709 if (all_same)
6711 if (!MEM_P (op0) && !REG_P (op0))
6712 op0 = force_reg (inner_mode, op0);
6713 if (mode == V2DFmode)
6714 emit_insn (gen_vsx_splat_v2df (target, op0));
6715 else
6716 emit_insn (gen_vsx_splat_v2di (target, op0));
6718 else
6720 op0 = force_reg (inner_mode, op0);
6721 op1 = force_reg (inner_mode, op1);
6722 if (mode == V2DFmode)
6723 emit_insn (gen_vsx_concat_v2df (target, op0, op1));
6724 else
6725 emit_insn (gen_vsx_concat_v2di (target, op0, op1));
6727 return;
6730 /* Word values on ISA 3.0 can use mtvsrws, lxvwsx, or vspltisw. V4SF is
6731 complicated since scalars are stored as doubles in the registers. */
6732 if (TARGET_P9_VECTOR && mode == V4SImode && all_same
6733 && VECTOR_MEM_VSX_P (mode))
6735 emit_insn (gen_vsx_splat_v4si (target, XVECEXP (vals, 0, 0)));
6736 return;
6739 /* With single precision floating point on VSX, know that internally single
6740 precision is actually represented as a double, and either make 2 V2DF
6741 vectors, and convert these vectors to single precision, or do one
6742 conversion, and splat the result to the other elements. */
6743 if (mode == V4SFmode && VECTOR_MEM_VSX_P (mode))
6745 if (all_same)
6747 rtx op0 = XVECEXP (vals, 0, 0);
6749 if (TARGET_P9_VECTOR)
6750 emit_insn (gen_vsx_splat_v4sf (target, op0));
6752 else
6754 rtx freg = gen_reg_rtx (V4SFmode);
6755 rtx sreg = force_reg (SFmode, op0);
6756 rtx cvt = (TARGET_XSCVDPSPN
6757 ? gen_vsx_xscvdpspn_scalar (freg, sreg)
6758 : gen_vsx_xscvdpsp_scalar (freg, sreg));
6760 emit_insn (cvt);
6761 emit_insn (gen_vsx_xxspltw_v4sf_direct (target, freg,
6762 const0_rtx));
6765 else
6767 rtx dbl_even = gen_reg_rtx (V2DFmode);
6768 rtx dbl_odd = gen_reg_rtx (V2DFmode);
6769 rtx flt_even = gen_reg_rtx (V4SFmode);
6770 rtx flt_odd = gen_reg_rtx (V4SFmode);
6771 rtx op0 = force_reg (SFmode, XVECEXP (vals, 0, 0));
6772 rtx op1 = force_reg (SFmode, XVECEXP (vals, 0, 1));
6773 rtx op2 = force_reg (SFmode, XVECEXP (vals, 0, 2));
6774 rtx op3 = force_reg (SFmode, XVECEXP (vals, 0, 3));
6776 emit_insn (gen_vsx_concat_v2sf (dbl_even, op0, op1));
6777 emit_insn (gen_vsx_concat_v2sf (dbl_odd, op2, op3));
6778 emit_insn (gen_vsx_xvcvdpsp (flt_even, dbl_even));
6779 emit_insn (gen_vsx_xvcvdpsp (flt_odd, dbl_odd));
6780 rs6000_expand_extract_even (target, flt_even, flt_odd);
6782 return;
6785 /* Store value to stack temp. Load vector element. Splat. However, splat
6786 of 64-bit items is not supported on Altivec. */
6787 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
6789 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
6790 emit_move_insn (adjust_address_nv (mem, inner_mode, 0),
6791 XVECEXP (vals, 0, 0));
6792 x = gen_rtx_UNSPEC (VOIDmode,
6793 gen_rtvec (1, const0_rtx), UNSPEC_LVE);
6794 emit_insn (gen_rtx_PARALLEL (VOIDmode,
6795 gen_rtvec (2,
6796 gen_rtx_SET (target, mem),
6797 x)));
6798 x = gen_rtx_VEC_SELECT (inner_mode, target,
6799 gen_rtx_PARALLEL (VOIDmode,
6800 gen_rtvec (1, const0_rtx)));
6801 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_DUPLICATE (mode, x)));
6802 return;
6805 /* One field is non-constant. Load constant then overwrite
6806 varying field. */
6807 if (n_var == 1)
6809 rtx copy = copy_rtx (vals);
6811 /* Load constant part of vector, substitute neighboring value for
6812 varying element. */
6813 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
6814 rs6000_expand_vector_init (target, copy);
6816 /* Insert variable. */
6817 rs6000_expand_vector_set (target, XVECEXP (vals, 0, one_var), one_var);
6818 return;
6821 /* Construct the vector in memory one field at a time
6822 and load the whole vector. */
6823 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
6824 for (i = 0; i < n_elts; i++)
6825 emit_move_insn (adjust_address_nv (mem, inner_mode,
6826 i * GET_MODE_SIZE (inner_mode)),
6827 XVECEXP (vals, 0, i));
6828 emit_move_insn (target, mem);
6831 /* Set field ELT of TARGET to VAL. */
6833 void
6834 rs6000_expand_vector_set (rtx target, rtx val, int elt)
6836 machine_mode mode = GET_MODE (target);
6837 machine_mode inner_mode = GET_MODE_INNER (mode);
6838 rtx reg = gen_reg_rtx (mode);
6839 rtx mask, mem, x;
6840 int width = GET_MODE_SIZE (inner_mode);
6841 int i;
6843 if (VECTOR_MEM_VSX_P (mode) && (mode == V2DFmode || mode == V2DImode))
6845 rtx (*set_func) (rtx, rtx, rtx, rtx)
6846 = ((mode == V2DFmode) ? gen_vsx_set_v2df : gen_vsx_set_v2di);
6847 emit_insn (set_func (target, target, val, GEN_INT (elt)));
6848 return;
6851 /* Simplify setting single element vectors like V1TImode. */
6852 if (GET_MODE_SIZE (mode) == GET_MODE_SIZE (inner_mode) && elt == 0)
6854 emit_move_insn (target, gen_lowpart (mode, val));
6855 return;
6858 /* Load single variable value. */
6859 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
6860 emit_move_insn (adjust_address_nv (mem, inner_mode, 0), val);
6861 x = gen_rtx_UNSPEC (VOIDmode,
6862 gen_rtvec (1, const0_rtx), UNSPEC_LVE);
6863 emit_insn (gen_rtx_PARALLEL (VOIDmode,
6864 gen_rtvec (2,
6865 gen_rtx_SET (reg, mem),
6866 x)));
6868 /* Linear sequence. */
6869 mask = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
6870 for (i = 0; i < 16; ++i)
6871 XVECEXP (mask, 0, i) = GEN_INT (i);
6873 /* Set permute mask to insert element into target. */
6874 for (i = 0; i < width; ++i)
6875 XVECEXP (mask, 0, elt*width + i)
6876 = GEN_INT (i + 0x10);
6877 x = gen_rtx_CONST_VECTOR (V16QImode, XVEC (mask, 0));
6879 if (BYTES_BIG_ENDIAN)
6880 x = gen_rtx_UNSPEC (mode,
6881 gen_rtvec (3, target, reg,
6882 force_reg (V16QImode, x)),
6883 UNSPEC_VPERM);
6884 else
6886 if (TARGET_P9_VECTOR)
6887 x = gen_rtx_UNSPEC (mode,
6888 gen_rtvec (3, target, reg,
6889 force_reg (V16QImode, x)),
6890 UNSPEC_VPERMR);
6891 else
6893 /* Invert selector. We prefer to generate VNAND on P8 so
6894 that future fusion opportunities can kick in, but must
6895 generate VNOR elsewhere. */
6896 rtx notx = gen_rtx_NOT (V16QImode, force_reg (V16QImode, x));
6897 rtx iorx = (TARGET_P8_VECTOR
6898 ? gen_rtx_IOR (V16QImode, notx, notx)
6899 : gen_rtx_AND (V16QImode, notx, notx));
6900 rtx tmp = gen_reg_rtx (V16QImode);
6901 emit_insn (gen_rtx_SET (tmp, iorx));
6903 /* Permute with operands reversed and adjusted selector. */
6904 x = gen_rtx_UNSPEC (mode, gen_rtvec (3, reg, target, tmp),
6905 UNSPEC_VPERM);
6909 emit_insn (gen_rtx_SET (target, x));
6912 /* Extract field ELT from VEC into TARGET. */
6914 void
6915 rs6000_expand_vector_extract (rtx target, rtx vec, rtx elt)
6917 machine_mode mode = GET_MODE (vec);
6918 machine_mode inner_mode = GET_MODE_INNER (mode);
6919 rtx mem;
6921 if (VECTOR_MEM_VSX_P (mode) && CONST_INT_P (elt))
6923 switch (mode)
6925 default:
6926 break;
6927 case V1TImode:
6928 gcc_assert (INTVAL (elt) == 0 && inner_mode == TImode);
6929 emit_move_insn (target, gen_lowpart (TImode, vec));
6930 break;
6931 case V2DFmode:
6932 emit_insn (gen_vsx_extract_v2df (target, vec, elt));
6933 return;
6934 case V2DImode:
6935 emit_insn (gen_vsx_extract_v2di (target, vec, elt));
6936 return;
6937 case V4SFmode:
6938 emit_insn (gen_vsx_extract_v4sf (target, vec, elt));
6939 return;
6940 case V16QImode:
6941 if (TARGET_DIRECT_MOVE_64BIT)
6943 emit_insn (gen_vsx_extract_v16qi (target, vec, elt));
6944 return;
6946 else
6947 break;
6948 case V8HImode:
6949 if (TARGET_DIRECT_MOVE_64BIT)
6951 emit_insn (gen_vsx_extract_v8hi (target, vec, elt));
6952 return;
6954 else
6955 break;
6956 case V4SImode:
6957 if (TARGET_DIRECT_MOVE_64BIT)
6959 emit_insn (gen_vsx_extract_v4si (target, vec, elt));
6960 return;
6962 break;
6965 else if (VECTOR_MEM_VSX_P (mode) && !CONST_INT_P (elt)
6966 && TARGET_DIRECT_MOVE_64BIT)
6968 if (GET_MODE (elt) != DImode)
6970 rtx tmp = gen_reg_rtx (DImode);
6971 convert_move (tmp, elt, 0);
6972 elt = tmp;
6975 switch (mode)
6977 case V2DFmode:
6978 emit_insn (gen_vsx_extract_v2df_var (target, vec, elt));
6979 return;
6981 case V2DImode:
6982 emit_insn (gen_vsx_extract_v2di_var (target, vec, elt));
6983 return;
6985 case V4SFmode:
6986 if (TARGET_UPPER_REGS_SF)
6988 emit_insn (gen_vsx_extract_v4sf_var (target, vec, elt));
6989 return;
6991 break;
6993 case V4SImode:
6994 emit_insn (gen_vsx_extract_v4si_var (target, vec, elt));
6995 return;
6997 case V8HImode:
6998 emit_insn (gen_vsx_extract_v8hi_var (target, vec, elt));
6999 return;
7001 case V16QImode:
7002 emit_insn (gen_vsx_extract_v16qi_var (target, vec, elt));
7003 return;
7005 default:
7006 gcc_unreachable ();
7010 gcc_assert (CONST_INT_P (elt));
7012 /* Allocate mode-sized buffer. */
7013 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
7015 emit_move_insn (mem, vec);
7017 /* Add offset to field within buffer matching vector element. */
7018 mem = adjust_address_nv (mem, inner_mode,
7019 INTVAL (elt) * GET_MODE_SIZE (inner_mode));
7021 emit_move_insn (target, adjust_address_nv (mem, inner_mode, 0));
7024 /* Adjust a memory address (MEM) of a vector type to point to a scalar field
7025 within the vector (ELEMENT) with a mode (SCALAR_MODE). Use a base register
7026 temporary (BASE_TMP) to fixup the address. Return the new memory address
7027 that is valid for reads or writes to a given register (SCALAR_REG). */
7030 rs6000_adjust_vec_address (rtx scalar_reg,
7031 rtx mem,
7032 rtx element,
7033 rtx base_tmp,
7034 machine_mode scalar_mode)
7036 unsigned scalar_size = GET_MODE_SIZE (scalar_mode);
7037 rtx addr = XEXP (mem, 0);
7038 rtx element_offset;
7039 rtx new_addr;
7040 bool valid_addr_p;
7042 /* Vector addresses should not have PRE_INC, PRE_DEC, or PRE_MODIFY. */
7043 gcc_assert (GET_RTX_CLASS (GET_CODE (addr)) != RTX_AUTOINC);
7045 /* Calculate what we need to add to the address to get the element
7046 address. */
7047 if (CONST_INT_P (element))
7048 element_offset = GEN_INT (INTVAL (element) * scalar_size);
7049 else
7051 int byte_shift = exact_log2 (scalar_size);
7052 gcc_assert (byte_shift >= 0);
7054 if (byte_shift == 0)
7055 element_offset = element;
7057 else
7059 if (TARGET_POWERPC64)
7060 emit_insn (gen_ashldi3 (base_tmp, element, GEN_INT (byte_shift)));
7061 else
7062 emit_insn (gen_ashlsi3 (base_tmp, element, GEN_INT (byte_shift)));
7064 element_offset = base_tmp;
7068 /* Create the new address pointing to the element within the vector. If we
7069 are adding 0, we don't have to change the address. */
7070 if (element_offset == const0_rtx)
7071 new_addr = addr;
7073 /* A simple indirect address can be converted into a reg + offset
7074 address. */
7075 else if (REG_P (addr) || SUBREG_P (addr))
7076 new_addr = gen_rtx_PLUS (Pmode, addr, element_offset);
7078 /* Optimize D-FORM addresses with constant offset with a constant element, to
7079 include the element offset in the address directly. */
7080 else if (GET_CODE (addr) == PLUS)
7082 rtx op0 = XEXP (addr, 0);
7083 rtx op1 = XEXP (addr, 1);
7084 rtx insn;
7086 gcc_assert (REG_P (op0) || SUBREG_P (op0));
7087 if (CONST_INT_P (op1) && CONST_INT_P (element_offset))
7089 HOST_WIDE_INT offset = INTVAL (op1) + INTVAL (element_offset);
7090 rtx offset_rtx = GEN_INT (offset);
7092 if (IN_RANGE (offset, -32768, 32767)
7093 && (scalar_size < 8 || (offset & 0x3) == 0))
7094 new_addr = gen_rtx_PLUS (Pmode, op0, offset_rtx);
7095 else
7097 emit_move_insn (base_tmp, offset_rtx);
7098 new_addr = gen_rtx_PLUS (Pmode, op0, base_tmp);
7101 else
7103 if (REG_P (op1) || SUBREG_P (op1))
7105 insn = gen_add3_insn (base_tmp, op1, element_offset);
7106 gcc_assert (insn != NULL_RTX);
7107 emit_insn (insn);
7110 else if (REG_P (element_offset) || SUBREG_P (element_offset))
7112 insn = gen_add3_insn (base_tmp, element_offset, op1);
7113 gcc_assert (insn != NULL_RTX);
7114 emit_insn (insn);
7117 else
7119 emit_move_insn (base_tmp, op1);
7120 emit_insn (gen_add2_insn (base_tmp, element_offset));
7123 new_addr = gen_rtx_PLUS (Pmode, op0, base_tmp);
7127 else
7129 emit_move_insn (base_tmp, addr);
7130 new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
7133 /* If we have a PLUS, we need to see whether the particular register class
7134 allows for D-FORM or X-FORM addressing. */
7135 if (GET_CODE (new_addr) == PLUS)
7137 rtx op1 = XEXP (new_addr, 1);
7138 addr_mask_type addr_mask;
7139 int scalar_regno;
7141 if (REG_P (scalar_reg))
7142 scalar_regno = REGNO (scalar_reg);
7143 else if (SUBREG_P (scalar_reg))
7144 scalar_regno = subreg_regno (scalar_reg);
7145 else
7146 gcc_unreachable ();
7148 gcc_assert (scalar_regno < FIRST_PSEUDO_REGISTER);
7149 if (INT_REGNO_P (scalar_regno))
7150 addr_mask = reg_addr[scalar_mode].addr_mask[RELOAD_REG_GPR];
7152 else if (FP_REGNO_P (scalar_regno))
7153 addr_mask = reg_addr[scalar_mode].addr_mask[RELOAD_REG_FPR];
7155 else if (ALTIVEC_REGNO_P (scalar_regno))
7156 addr_mask = reg_addr[scalar_mode].addr_mask[RELOAD_REG_VMX];
7158 else
7159 gcc_unreachable ();
7161 if (REG_P (op1) || SUBREG_P (op1))
7162 valid_addr_p = (addr_mask & RELOAD_REG_INDEXED) != 0;
7163 else
7164 valid_addr_p = (addr_mask & RELOAD_REG_OFFSET) != 0;
7167 else if (REG_P (new_addr) || SUBREG_P (new_addr))
7168 valid_addr_p = true;
7170 else
7171 valid_addr_p = false;
7173 if (!valid_addr_p)
7175 emit_move_insn (base_tmp, new_addr);
7176 new_addr = base_tmp;
7179 return change_address (mem, scalar_mode, new_addr);
7182 /* Split a variable vec_extract operation into the component instructions. */
7184 void
7185 rs6000_split_vec_extract_var (rtx dest, rtx src, rtx element, rtx tmp_gpr,
7186 rtx tmp_altivec)
7188 machine_mode mode = GET_MODE (src);
7189 machine_mode scalar_mode = GET_MODE (dest);
7190 unsigned scalar_size = GET_MODE_SIZE (scalar_mode);
7191 int byte_shift = exact_log2 (scalar_size);
7193 gcc_assert (byte_shift >= 0);
7195 /* If we are given a memory address, optimize to load just the element. We
7196 don't have to adjust the vector element number on little endian
7197 systems. */
7198 if (MEM_P (src))
7200 gcc_assert (REG_P (tmp_gpr));
7201 emit_move_insn (dest, rs6000_adjust_vec_address (dest, src, element,
7202 tmp_gpr, scalar_mode));
7203 return;
7206 else if (REG_P (src) || SUBREG_P (src))
7208 int bit_shift = byte_shift + 3;
7209 rtx element2;
7211 gcc_assert (REG_P (tmp_gpr) && REG_P (tmp_altivec));
7213 /* For little endian, adjust element ordering. For V2DI/V2DF, we can use
7214 an XOR, otherwise we need to subtract. The shift amount is so VSLO
7215 will shift the element into the upper position (adding 3 to convert a
7216 byte shift into a bit shift). */
7217 if (scalar_size == 8)
7219 if (!VECTOR_ELT_ORDER_BIG)
7221 emit_insn (gen_xordi3 (tmp_gpr, element, const1_rtx));
7222 element2 = tmp_gpr;
7224 else
7225 element2 = element;
7227 /* Generate RLDIC directly to shift left 6 bits and retrieve 1
7228 bit. */
7229 emit_insn (gen_rtx_SET (tmp_gpr,
7230 gen_rtx_AND (DImode,
7231 gen_rtx_ASHIFT (DImode,
7232 element2,
7233 GEN_INT (6)),
7234 GEN_INT (64))));
7236 else
7238 if (!VECTOR_ELT_ORDER_BIG)
7240 rtx num_ele_m1 = GEN_INT (GET_MODE_NUNITS (mode) - 1);
7242 emit_insn (gen_anddi3 (tmp_gpr, element, num_ele_m1));
7243 emit_insn (gen_subdi3 (tmp_gpr, num_ele_m1, tmp_gpr));
7244 element2 = tmp_gpr;
7246 else
7247 element2 = element;
7249 emit_insn (gen_ashldi3 (tmp_gpr, element2, GEN_INT (bit_shift)));
7252 /* Get the value into the lower byte of the Altivec register where VSLO
7253 expects it. */
7254 if (TARGET_P9_VECTOR)
7255 emit_insn (gen_vsx_splat_v2di (tmp_altivec, tmp_gpr));
7256 else if (can_create_pseudo_p ())
7257 emit_insn (gen_vsx_concat_v2di (tmp_altivec, tmp_gpr, tmp_gpr));
7258 else
7260 rtx tmp_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
7261 emit_move_insn (tmp_di, tmp_gpr);
7262 emit_insn (gen_vsx_concat_v2di (tmp_altivec, tmp_di, tmp_di));
7265 /* Do the VSLO to get the value into the final location. */
7266 switch (mode)
7268 case V2DFmode:
7269 emit_insn (gen_vsx_vslo_v2df (dest, src, tmp_altivec));
7270 return;
7272 case V2DImode:
7273 emit_insn (gen_vsx_vslo_v2di (dest, src, tmp_altivec));
7274 return;
7276 case V4SFmode:
7278 rtx tmp_altivec_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
7279 rtx tmp_altivec_v4sf = gen_rtx_REG (V4SFmode, REGNO (tmp_altivec));
7280 rtx src_v2di = gen_rtx_REG (V2DImode, REGNO (src));
7281 emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di, src_v2di,
7282 tmp_altivec));
7284 emit_insn (gen_vsx_xscvspdp_scalar2 (dest, tmp_altivec_v4sf));
7285 return;
7288 case V4SImode:
7289 case V8HImode:
7290 case V16QImode:
7292 rtx tmp_altivec_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
7293 rtx src_v2di = gen_rtx_REG (V2DImode, REGNO (src));
7294 rtx tmp_gpr_di = gen_rtx_REG (DImode, REGNO (dest));
7295 emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di, src_v2di,
7296 tmp_altivec));
7297 emit_move_insn (tmp_gpr_di, tmp_altivec_di);
7298 emit_insn (gen_ashrdi3 (tmp_gpr_di, tmp_gpr_di,
7299 GEN_INT (64 - (8 * scalar_size))));
7300 return;
7303 default:
7304 gcc_unreachable ();
7307 return;
7309 else
7310 gcc_unreachable ();
7313 /* Return TRUE if OP is an invalid SUBREG operation on the e500. */
7315 bool
7316 invalid_e500_subreg (rtx op, machine_mode mode)
7318 if (TARGET_E500_DOUBLE)
7320 /* Reject (subreg:SI (reg:DF)); likewise with subreg:DI or
7321 subreg:TI and reg:TF. Decimal float modes are like integer
7322 modes (only low part of each register used) for this
7323 purpose. */
7324 if (GET_CODE (op) == SUBREG
7325 && (mode == SImode || mode == DImode || mode == TImode
7326 || mode == DDmode || mode == TDmode || mode == PTImode)
7327 && REG_P (SUBREG_REG (op))
7328 && (GET_MODE (SUBREG_REG (op)) == DFmode
7329 || GET_MODE (SUBREG_REG (op)) == TFmode
7330 || GET_MODE (SUBREG_REG (op)) == IFmode
7331 || GET_MODE (SUBREG_REG (op)) == KFmode))
7332 return true;
7334 /* Reject (subreg:DF (reg:DI)); likewise with subreg:TF and
7335 reg:TI. */
7336 if (GET_CODE (op) == SUBREG
7337 && (mode == DFmode || mode == TFmode || mode == IFmode
7338 || mode == KFmode)
7339 && REG_P (SUBREG_REG (op))
7340 && (GET_MODE (SUBREG_REG (op)) == DImode
7341 || GET_MODE (SUBREG_REG (op)) == TImode
7342 || GET_MODE (SUBREG_REG (op)) == PTImode
7343 || GET_MODE (SUBREG_REG (op)) == DDmode
7344 || GET_MODE (SUBREG_REG (op)) == TDmode))
7345 return true;
7348 if (TARGET_SPE
7349 && GET_CODE (op) == SUBREG
7350 && mode == SImode
7351 && REG_P (SUBREG_REG (op))
7352 && SPE_VECTOR_MODE (GET_MODE (SUBREG_REG (op))))
7353 return true;
7355 return false;
7358 /* Return alignment of TYPE. Existing alignment is ALIGN. HOW
7359 selects whether the alignment is abi mandated, optional, or
7360 both abi and optional alignment. */
7362 unsigned int
7363 rs6000_data_alignment (tree type, unsigned int align, enum data_align how)
7365 if (how != align_opt)
7367 if (TREE_CODE (type) == VECTOR_TYPE)
7369 if ((TARGET_SPE && SPE_VECTOR_MODE (TYPE_MODE (type)))
7370 || (TARGET_PAIRED_FLOAT && PAIRED_VECTOR_MODE (TYPE_MODE (type))))
7372 if (align < 64)
7373 align = 64;
7375 else if (align < 128)
7376 align = 128;
7378 else if (TARGET_E500_DOUBLE
7379 && TREE_CODE (type) == REAL_TYPE
7380 && TYPE_MODE (type) == DFmode)
7382 if (align < 64)
7383 align = 64;
7387 if (how != align_abi)
7389 if (TREE_CODE (type) == ARRAY_TYPE
7390 && TYPE_MODE (TREE_TYPE (type)) == QImode)
7392 if (align < BITS_PER_WORD)
7393 align = BITS_PER_WORD;
7397 return align;
7400 /* Previous GCC releases forced all vector types to have 16-byte alignment. */
7402 bool
7403 rs6000_special_adjust_field_align_p (tree field, unsigned int computed)
7405 if (TARGET_ALTIVEC && TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
7407 if (computed != 128)
7409 static bool warned;
7410 if (!warned && warn_psabi)
7412 warned = true;
7413 inform (input_location,
7414 "the layout of aggregates containing vectors with"
7415 " %d-byte alignment has changed in GCC 5",
7416 computed / BITS_PER_UNIT);
7419 /* In current GCC there is no special case. */
7420 return false;
7423 return false;
7426 /* AIX increases natural record alignment to doubleword if the first
7427 field is an FP double while the FP fields remain word aligned. */
7429 unsigned int
7430 rs6000_special_round_type_align (tree type, unsigned int computed,
7431 unsigned int specified)
7433 unsigned int align = MAX (computed, specified);
7434 tree field = TYPE_FIELDS (type);
7436 /* Skip all non field decls */
7437 while (field != NULL && TREE_CODE (field) != FIELD_DECL)
7438 field = DECL_CHAIN (field);
7440 if (field != NULL && field != type)
7442 type = TREE_TYPE (field);
7443 while (TREE_CODE (type) == ARRAY_TYPE)
7444 type = TREE_TYPE (type);
7446 if (type != error_mark_node && TYPE_MODE (type) == DFmode)
7447 align = MAX (align, 64);
7450 return align;
7453 /* Darwin increases record alignment to the natural alignment of
7454 the first field. */
7456 unsigned int
7457 darwin_rs6000_special_round_type_align (tree type, unsigned int computed,
7458 unsigned int specified)
7460 unsigned int align = MAX (computed, specified);
7462 if (TYPE_PACKED (type))
7463 return align;
7465 /* Find the first field, looking down into aggregates. */
7466 do {
7467 tree field = TYPE_FIELDS (type);
7468 /* Skip all non field decls */
7469 while (field != NULL && TREE_CODE (field) != FIELD_DECL)
7470 field = DECL_CHAIN (field);
7471 if (! field)
7472 break;
7473 /* A packed field does not contribute any extra alignment. */
7474 if (DECL_PACKED (field))
7475 return align;
7476 type = TREE_TYPE (field);
7477 while (TREE_CODE (type) == ARRAY_TYPE)
7478 type = TREE_TYPE (type);
7479 } while (AGGREGATE_TYPE_P (type));
7481 if (! AGGREGATE_TYPE_P (type) && type != error_mark_node)
7482 align = MAX (align, TYPE_ALIGN (type));
7484 return align;
7487 /* Return 1 for an operand in small memory on V.4/eabi. */
7490 small_data_operand (rtx op ATTRIBUTE_UNUSED,
7491 machine_mode mode ATTRIBUTE_UNUSED)
7493 #if TARGET_ELF
7494 rtx sym_ref;
7496 if (rs6000_sdata == SDATA_NONE || rs6000_sdata == SDATA_DATA)
7497 return 0;
7499 if (DEFAULT_ABI != ABI_V4)
7500 return 0;
7502 /* Vector and float memory instructions have a limited offset on the
7503 SPE, so using a vector or float variable directly as an operand is
7504 not useful. */
7505 if (TARGET_SPE
7506 && (SPE_VECTOR_MODE (mode) || FLOAT_MODE_P (mode)))
7507 return 0;
7509 if (GET_CODE (op) == SYMBOL_REF)
7510 sym_ref = op;
7512 else if (GET_CODE (op) != CONST
7513 || GET_CODE (XEXP (op, 0)) != PLUS
7514 || GET_CODE (XEXP (XEXP (op, 0), 0)) != SYMBOL_REF
7515 || GET_CODE (XEXP (XEXP (op, 0), 1)) != CONST_INT)
7516 return 0;
7518 else
7520 rtx sum = XEXP (op, 0);
7521 HOST_WIDE_INT summand;
7523 /* We have to be careful here, because it is the referenced address
7524 that must be 32k from _SDA_BASE_, not just the symbol. */
7525 summand = INTVAL (XEXP (sum, 1));
7526 if (summand < 0 || summand > g_switch_value)
7527 return 0;
7529 sym_ref = XEXP (sum, 0);
7532 return SYMBOL_REF_SMALL_P (sym_ref);
7533 #else
7534 return 0;
7535 #endif
7538 /* Return true if either operand is a general purpose register. */
7540 bool
7541 gpr_or_gpr_p (rtx op0, rtx op1)
7543 return ((REG_P (op0) && INT_REGNO_P (REGNO (op0)))
7544 || (REG_P (op1) && INT_REGNO_P (REGNO (op1))));
7547 /* Return true if this is a move direct operation between GPR registers and
7548 floating point/VSX registers. */
7550 bool
7551 direct_move_p (rtx op0, rtx op1)
7553 int regno0, regno1;
7555 if (!REG_P (op0) || !REG_P (op1))
7556 return false;
7558 if (!TARGET_DIRECT_MOVE && !TARGET_MFPGPR)
7559 return false;
7561 regno0 = REGNO (op0);
7562 regno1 = REGNO (op1);
7563 if (regno0 >= FIRST_PSEUDO_REGISTER || regno1 >= FIRST_PSEUDO_REGISTER)
7564 return false;
7566 if (INT_REGNO_P (regno0))
7567 return (TARGET_DIRECT_MOVE) ? VSX_REGNO_P (regno1) : FP_REGNO_P (regno1);
7569 else if (INT_REGNO_P (regno1))
7571 if (TARGET_MFPGPR && FP_REGNO_P (regno0))
7572 return true;
7574 else if (TARGET_DIRECT_MOVE && VSX_REGNO_P (regno0))
7575 return true;
7578 return false;
7581 /* Return true if the OFFSET is valid for the quad address instructions that
7582 use d-form (register + offset) addressing. */
7584 static inline bool
7585 quad_address_offset_p (HOST_WIDE_INT offset)
7587 return (IN_RANGE (offset, -32768, 32767) && ((offset) & 0xf) == 0);
7590 /* Return true if the ADDR is an acceptable address for a quad memory
7591 operation of mode MODE (either LQ/STQ for general purpose registers, or
7592 LXV/STXV for vector registers under ISA 3.0. GPR_P is true if this address
7593 is intended for LQ/STQ. If it is false, the address is intended for the ISA
7594 3.0 LXV/STXV instruction. */
7596 bool
7597 quad_address_p (rtx addr, machine_mode mode, bool strict)
7599 rtx op0, op1;
7601 if (GET_MODE_SIZE (mode) != 16)
7602 return false;
7604 if (legitimate_indirect_address_p (addr, strict))
7605 return true;
7607 if (VECTOR_MODE_P (mode) && !mode_supports_vsx_dform_quad (mode))
7608 return false;
7610 if (GET_CODE (addr) != PLUS)
7611 return false;
7613 op0 = XEXP (addr, 0);
7614 if (!REG_P (op0) || !INT_REG_OK_FOR_BASE_P (op0, strict))
7615 return false;
7617 op1 = XEXP (addr, 1);
7618 if (!CONST_INT_P (op1))
7619 return false;
7621 return quad_address_offset_p (INTVAL (op1));
7624 /* Return true if this is a load or store quad operation. This function does
7625 not handle the atomic quad memory instructions. */
7627 bool
7628 quad_load_store_p (rtx op0, rtx op1)
7630 bool ret;
7632 if (!TARGET_QUAD_MEMORY)
7633 ret = false;
7635 else if (REG_P (op0) && MEM_P (op1))
7636 ret = (quad_int_reg_operand (op0, GET_MODE (op0))
7637 && quad_memory_operand (op1, GET_MODE (op1))
7638 && !reg_overlap_mentioned_p (op0, op1));
7640 else if (MEM_P (op0) && REG_P (op1))
7641 ret = (quad_memory_operand (op0, GET_MODE (op0))
7642 && quad_int_reg_operand (op1, GET_MODE (op1)));
7644 else
7645 ret = false;
7647 if (TARGET_DEBUG_ADDR)
7649 fprintf (stderr, "\n========== quad_load_store, return %s\n",
7650 ret ? "true" : "false");
7651 debug_rtx (gen_rtx_SET (op0, op1));
7654 return ret;
7657 /* Given an address, return a constant offset term if one exists. */
7659 static rtx
7660 address_offset (rtx op)
7662 if (GET_CODE (op) == PRE_INC
7663 || GET_CODE (op) == PRE_DEC)
7664 op = XEXP (op, 0);
7665 else if (GET_CODE (op) == PRE_MODIFY
7666 || GET_CODE (op) == LO_SUM)
7667 op = XEXP (op, 1);
7669 if (GET_CODE (op) == CONST)
7670 op = XEXP (op, 0);
7672 if (GET_CODE (op) == PLUS)
7673 op = XEXP (op, 1);
7675 if (CONST_INT_P (op))
7676 return op;
7678 return NULL_RTX;
7681 /* Return true if the MEM operand is a memory operand suitable for use
7682 with a (full width, possibly multiple) gpr load/store. On
7683 powerpc64 this means the offset must be divisible by 4.
7684 Implements 'Y' constraint.
7686 Accept direct, indexed, offset, lo_sum and tocref. Since this is
7687 a constraint function we know the operand has satisfied a suitable
7688 memory predicate. Also accept some odd rtl generated by reload
7689 (see rs6000_legitimize_reload_address for various forms). It is
7690 important that reload rtl be accepted by appropriate constraints
7691 but not by the operand predicate.
7693 Offsetting a lo_sum should not be allowed, except where we know by
7694 alignment that a 32k boundary is not crossed, but see the ???
7695 comment in rs6000_legitimize_reload_address. Note that by
7696 "offsetting" here we mean a further offset to access parts of the
7697 MEM. It's fine to have a lo_sum where the inner address is offset
7698 from a sym, since the same sym+offset will appear in the high part
7699 of the address calculation. */
7701 bool
7702 mem_operand_gpr (rtx op, machine_mode mode)
7704 unsigned HOST_WIDE_INT offset;
7705 int extra;
7706 rtx addr = XEXP (op, 0);
7708 op = address_offset (addr);
7709 if (op == NULL_RTX)
7710 return true;
7712 offset = INTVAL (op);
7713 if (TARGET_POWERPC64 && (offset & 3) != 0)
7714 return false;
7716 if (mode_supports_vsx_dform_quad (mode)
7717 && !quad_address_offset_p (offset))
7718 return false;
7720 extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD;
7721 if (extra < 0)
7722 extra = 0;
7724 if (GET_CODE (addr) == LO_SUM)
7725 /* For lo_sum addresses, we must allow any offset except one that
7726 causes a wrap, so test only the low 16 bits. */
7727 offset = ((offset & 0xffff) ^ 0x8000) - 0x8000;
7729 return offset + 0x8000 < 0x10000u - extra;
7732 /* Subroutines of rs6000_legitimize_address and rs6000_legitimate_address_p. */
7734 static bool
7735 reg_offset_addressing_ok_p (machine_mode mode)
7737 switch (mode)
7739 case V16QImode:
7740 case V8HImode:
7741 case V4SFmode:
7742 case V4SImode:
7743 case V2DFmode:
7744 case V2DImode:
7745 case V1TImode:
7746 case TImode:
7747 case TFmode:
7748 case KFmode:
7749 /* AltiVec/VSX vector modes. Only reg+reg addressing was valid until the
7750 ISA 3.0 vector d-form addressing mode was added. While TImode is not
7751 a vector mode, if we want to use the VSX registers to move it around,
7752 we need to restrict ourselves to reg+reg addressing. Similarly for
7753 IEEE 128-bit floating point that is passed in a single vector
7754 register. */
7755 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode))
7756 return mode_supports_vsx_dform_quad (mode);
7757 break;
7759 case V4HImode:
7760 case V2SImode:
7761 case V1DImode:
7762 case V2SFmode:
7763 /* Paired vector modes. Only reg+reg addressing is valid. */
7764 if (TARGET_PAIRED_FLOAT)
7765 return false;
7766 break;
7768 case SDmode:
7769 /* If we can do direct load/stores of SDmode, restrict it to reg+reg
7770 addressing for the LFIWZX and STFIWX instructions. */
7771 if (TARGET_NO_SDMODE_STACK)
7772 return false;
7773 break;
7775 default:
7776 break;
7779 return true;
7782 static bool
7783 virtual_stack_registers_memory_p (rtx op)
7785 int regnum;
7787 if (GET_CODE (op) == REG)
7788 regnum = REGNO (op);
7790 else if (GET_CODE (op) == PLUS
7791 && GET_CODE (XEXP (op, 0)) == REG
7792 && GET_CODE (XEXP (op, 1)) == CONST_INT)
7793 regnum = REGNO (XEXP (op, 0));
7795 else
7796 return false;
7798 return (regnum >= FIRST_VIRTUAL_REGISTER
7799 && regnum <= LAST_VIRTUAL_POINTER_REGISTER);
7802 /* Return true if a MODE sized memory accesses to OP plus OFFSET
7803 is known to not straddle a 32k boundary. This function is used
7804 to determine whether -mcmodel=medium code can use TOC pointer
7805 relative addressing for OP. This means the alignment of the TOC
7806 pointer must also be taken into account, and unfortunately that is
7807 only 8 bytes. */
7809 #ifndef POWERPC64_TOC_POINTER_ALIGNMENT
7810 #define POWERPC64_TOC_POINTER_ALIGNMENT 8
7811 #endif
7813 static bool
7814 offsettable_ok_by_alignment (rtx op, HOST_WIDE_INT offset,
7815 machine_mode mode)
7817 tree decl;
7818 unsigned HOST_WIDE_INT dsize, dalign, lsb, mask;
7820 if (GET_CODE (op) != SYMBOL_REF)
7821 return false;
7823 /* ISA 3.0 vector d-form addressing is restricted, don't allow
7824 SYMBOL_REF. */
7825 if (mode_supports_vsx_dform_quad (mode))
7826 return false;
7828 dsize = GET_MODE_SIZE (mode);
7829 decl = SYMBOL_REF_DECL (op);
7830 if (!decl)
7832 if (dsize == 0)
7833 return false;
7835 /* -fsection-anchors loses the original SYMBOL_REF_DECL when
7836 replacing memory addresses with an anchor plus offset. We
7837 could find the decl by rummaging around in the block->objects
7838 VEC for the given offset but that seems like too much work. */
7839 dalign = BITS_PER_UNIT;
7840 if (SYMBOL_REF_HAS_BLOCK_INFO_P (op)
7841 && SYMBOL_REF_ANCHOR_P (op)
7842 && SYMBOL_REF_BLOCK (op) != NULL)
7844 struct object_block *block = SYMBOL_REF_BLOCK (op);
7846 dalign = block->alignment;
7847 offset += SYMBOL_REF_BLOCK_OFFSET (op);
7849 else if (CONSTANT_POOL_ADDRESS_P (op))
7851 /* It would be nice to have get_pool_align().. */
7852 machine_mode cmode = get_pool_mode (op);
7854 dalign = GET_MODE_ALIGNMENT (cmode);
7857 else if (DECL_P (decl))
7859 dalign = DECL_ALIGN (decl);
7861 if (dsize == 0)
7863 /* Allow BLKmode when the entire object is known to not
7864 cross a 32k boundary. */
7865 if (!DECL_SIZE_UNIT (decl))
7866 return false;
7868 if (!tree_fits_uhwi_p (DECL_SIZE_UNIT (decl)))
7869 return false;
7871 dsize = tree_to_uhwi (DECL_SIZE_UNIT (decl));
7872 if (dsize > 32768)
7873 return false;
7875 dalign /= BITS_PER_UNIT;
7876 if (dalign > POWERPC64_TOC_POINTER_ALIGNMENT)
7877 dalign = POWERPC64_TOC_POINTER_ALIGNMENT;
7878 return dalign >= dsize;
7881 else
7882 gcc_unreachable ();
7884 /* Find how many bits of the alignment we know for this access. */
7885 dalign /= BITS_PER_UNIT;
7886 if (dalign > POWERPC64_TOC_POINTER_ALIGNMENT)
7887 dalign = POWERPC64_TOC_POINTER_ALIGNMENT;
7888 mask = dalign - 1;
7889 lsb = offset & -offset;
7890 mask &= lsb - 1;
7891 dalign = mask + 1;
7893 return dalign >= dsize;
7896 static bool
7897 constant_pool_expr_p (rtx op)
7899 rtx base, offset;
7901 split_const (op, &base, &offset);
7902 return (GET_CODE (base) == SYMBOL_REF
7903 && CONSTANT_POOL_ADDRESS_P (base)
7904 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (base), Pmode));
7907 static const_rtx tocrel_base, tocrel_offset;
7909 /* Return true if OP is a toc pointer relative address (the output
7910 of create_TOC_reference). If STRICT, do not match high part or
7911 non-split -mcmodel=large/medium toc pointer relative addresses. */
7913 bool
7914 toc_relative_expr_p (const_rtx op, bool strict)
7916 if (!TARGET_TOC)
7917 return false;
7919 if (TARGET_CMODEL != CMODEL_SMALL)
7921 /* Only match the low part. */
7922 if (GET_CODE (op) == LO_SUM
7923 && REG_P (XEXP (op, 0))
7924 && INT_REG_OK_FOR_BASE_P (XEXP (op, 0), strict))
7925 op = XEXP (op, 1);
7926 else if (strict)
7927 return false;
7930 tocrel_base = op;
7931 tocrel_offset = const0_rtx;
7932 if (GET_CODE (op) == PLUS && add_cint_operand (XEXP (op, 1), GET_MODE (op)))
7934 tocrel_base = XEXP (op, 0);
7935 tocrel_offset = XEXP (op, 1);
7938 return (GET_CODE (tocrel_base) == UNSPEC
7939 && XINT (tocrel_base, 1) == UNSPEC_TOCREL);
7942 /* Return true if X is a constant pool address, and also for cmodel=medium
7943 if X is a toc-relative address known to be offsettable within MODE. */
7945 bool
7946 legitimate_constant_pool_address_p (const_rtx x, machine_mode mode,
7947 bool strict)
7949 return (toc_relative_expr_p (x, strict)
7950 && (TARGET_CMODEL != CMODEL_MEDIUM
7951 || constant_pool_expr_p (XVECEXP (tocrel_base, 0, 0))
7952 || mode == QImode
7953 || offsettable_ok_by_alignment (XVECEXP (tocrel_base, 0, 0),
7954 INTVAL (tocrel_offset), mode)));
7957 static bool
7958 legitimate_small_data_p (machine_mode mode, rtx x)
7960 return (DEFAULT_ABI == ABI_V4
7961 && !flag_pic && !TARGET_TOC
7962 && (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == CONST)
7963 && small_data_operand (x, mode));
7966 /* SPE offset addressing is limited to 5-bits worth of double words. */
7967 #define SPE_CONST_OFFSET_OK(x) (((x) & ~0xf8) == 0)
7969 bool
7970 rs6000_legitimate_offset_address_p (machine_mode mode, rtx x,
7971 bool strict, bool worst_case)
7973 unsigned HOST_WIDE_INT offset;
7974 unsigned int extra;
7976 if (GET_CODE (x) != PLUS)
7977 return false;
7978 if (!REG_P (XEXP (x, 0)))
7979 return false;
7980 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
7981 return false;
7982 if (mode_supports_vsx_dform_quad (mode))
7983 return quad_address_p (x, mode, strict);
7984 if (!reg_offset_addressing_ok_p (mode))
7985 return virtual_stack_registers_memory_p (x);
7986 if (legitimate_constant_pool_address_p (x, mode, strict || lra_in_progress))
7987 return true;
7988 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
7989 return false;
7991 offset = INTVAL (XEXP (x, 1));
7992 extra = 0;
7993 switch (mode)
7995 case V4HImode:
7996 case V2SImode:
7997 case V1DImode:
7998 case V2SFmode:
7999 /* SPE vector modes. */
8000 return SPE_CONST_OFFSET_OK (offset);
8002 case DFmode:
8003 case DDmode:
8004 case DImode:
8005 /* On e500v2, we may have:
8007 (subreg:DF (mem:DI (plus (reg) (const_int))) 0).
8009 Which gets addressed with evldd instructions. */
8010 if (TARGET_E500_DOUBLE)
8011 return SPE_CONST_OFFSET_OK (offset);
8013 /* If we are using VSX scalar loads, restrict ourselves to reg+reg
8014 addressing. */
8015 if (VECTOR_MEM_VSX_P (mode))
8016 return false;
8018 if (!worst_case)
8019 break;
8020 if (!TARGET_POWERPC64)
8021 extra = 4;
8022 else if (offset & 3)
8023 return false;
8024 break;
8026 case TFmode:
8027 case IFmode:
8028 case KFmode:
8029 if (TARGET_E500_DOUBLE)
8030 return (SPE_CONST_OFFSET_OK (offset)
8031 && SPE_CONST_OFFSET_OK (offset + 8));
8032 /* fall through */
8034 case TDmode:
8035 case TImode:
8036 case PTImode:
8037 extra = 8;
8038 if (!worst_case)
8039 break;
8040 if (!TARGET_POWERPC64)
8041 extra = 12;
8042 else if (offset & 3)
8043 return false;
8044 break;
8046 default:
8047 break;
8050 offset += 0x8000;
8051 return offset < 0x10000 - extra;
8054 bool
8055 legitimate_indexed_address_p (rtx x, int strict)
8057 rtx op0, op1;
8059 if (GET_CODE (x) != PLUS)
8060 return false;
8062 op0 = XEXP (x, 0);
8063 op1 = XEXP (x, 1);
8065 /* Recognize the rtl generated by reload which we know will later be
8066 replaced with proper base and index regs. */
8067 if (!strict
8068 && reload_in_progress
8069 && (REG_P (op0) || GET_CODE (op0) == PLUS)
8070 && REG_P (op1))
8071 return true;
8073 return (REG_P (op0) && REG_P (op1)
8074 && ((INT_REG_OK_FOR_BASE_P (op0, strict)
8075 && INT_REG_OK_FOR_INDEX_P (op1, strict))
8076 || (INT_REG_OK_FOR_BASE_P (op1, strict)
8077 && INT_REG_OK_FOR_INDEX_P (op0, strict))));
8080 bool
8081 avoiding_indexed_address_p (machine_mode mode)
8083 /* Avoid indexed addressing for modes that have non-indexed
8084 load/store instruction forms. */
8085 return (TARGET_AVOID_XFORM && VECTOR_MEM_NONE_P (mode));
8088 bool
8089 legitimate_indirect_address_p (rtx x, int strict)
8091 return GET_CODE (x) == REG && INT_REG_OK_FOR_BASE_P (x, strict);
8094 bool
8095 macho_lo_sum_memory_operand (rtx x, machine_mode mode)
8097 if (!TARGET_MACHO || !flag_pic
8098 || mode != SImode || GET_CODE (x) != MEM)
8099 return false;
8100 x = XEXP (x, 0);
8102 if (GET_CODE (x) != LO_SUM)
8103 return false;
8104 if (GET_CODE (XEXP (x, 0)) != REG)
8105 return false;
8106 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), 0))
8107 return false;
8108 x = XEXP (x, 1);
8110 return CONSTANT_P (x);
8113 static bool
8114 legitimate_lo_sum_address_p (machine_mode mode, rtx x, int strict)
8116 if (GET_CODE (x) != LO_SUM)
8117 return false;
8118 if (GET_CODE (XEXP (x, 0)) != REG)
8119 return false;
8120 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
8121 return false;
8122 /* quad word addresses are restricted, and we can't use LO_SUM. */
8123 if (mode_supports_vsx_dform_quad (mode))
8124 return false;
8125 /* Restrict addressing for DI because of our SUBREG hackery. */
8126 if (TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
8127 return false;
8128 x = XEXP (x, 1);
8130 if (TARGET_ELF || TARGET_MACHO)
8132 bool large_toc_ok;
8134 if (DEFAULT_ABI == ABI_V4 && flag_pic)
8135 return false;
8136 /* LRA doesn't use LEGITIMIZE_RELOAD_ADDRESS as it usually calls
8137 push_reload from reload pass code. LEGITIMIZE_RELOAD_ADDRESS
8138 recognizes some LO_SUM addresses as valid although this
8139 function says opposite. In most cases, LRA through different
8140 transformations can generate correct code for address reloads.
8141 It can not manage only some LO_SUM cases. So we need to add
8142 code analogous to one in rs6000_legitimize_reload_address for
8143 LOW_SUM here saying that some addresses are still valid. */
8144 large_toc_ok = (lra_in_progress && TARGET_CMODEL != CMODEL_SMALL
8145 && small_toc_ref (x, VOIDmode));
8146 if (TARGET_TOC && ! large_toc_ok)
8147 return false;
8148 if (GET_MODE_NUNITS (mode) != 1)
8149 return false;
8150 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
8151 && !(/* ??? Assume floating point reg based on mode? */
8152 TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
8153 && (mode == DFmode || mode == DDmode)))
8154 return false;
8156 return CONSTANT_P (x) || large_toc_ok;
8159 return false;
8163 /* Try machine-dependent ways of modifying an illegitimate address
8164 to be legitimate. If we find one, return the new, valid address.
8165 This is used from only one place: `memory_address' in explow.c.
8167 OLDX is the address as it was before break_out_memory_refs was
8168 called. In some cases it is useful to look at this to decide what
8169 needs to be done.
8171 It is always safe for this function to do nothing. It exists to
8172 recognize opportunities to optimize the output.
8174 On RS/6000, first check for the sum of a register with a constant
8175 integer that is out of range. If so, generate code to add the
8176 constant with the low-order 16 bits masked to the register and force
8177 this result into another register (this can be done with `cau').
8178 Then generate an address of REG+(CONST&0xffff), allowing for the
8179 possibility of bit 16 being a one.
8181 Then check for the sum of a register and something not constant, try to
8182 load the other things into a register and return the sum. */
8184 static rtx
8185 rs6000_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
8186 machine_mode mode)
8188 unsigned int extra;
8190 if (!reg_offset_addressing_ok_p (mode)
8191 || mode_supports_vsx_dform_quad (mode))
8193 if (virtual_stack_registers_memory_p (x))
8194 return x;
8196 /* In theory we should not be seeing addresses of the form reg+0,
8197 but just in case it is generated, optimize it away. */
8198 if (GET_CODE (x) == PLUS && XEXP (x, 1) == const0_rtx)
8199 return force_reg (Pmode, XEXP (x, 0));
8201 /* For TImode with load/store quad, restrict addresses to just a single
8202 pointer, so it works with both GPRs and VSX registers. */
8203 /* Make sure both operands are registers. */
8204 else if (GET_CODE (x) == PLUS
8205 && (mode != TImode || !TARGET_QUAD_MEMORY))
8206 return gen_rtx_PLUS (Pmode,
8207 force_reg (Pmode, XEXP (x, 0)),
8208 force_reg (Pmode, XEXP (x, 1)));
8209 else
8210 return force_reg (Pmode, x);
8212 if (GET_CODE (x) == SYMBOL_REF)
8214 enum tls_model model = SYMBOL_REF_TLS_MODEL (x);
8215 if (model != 0)
8216 return rs6000_legitimize_tls_address (x, model);
8219 extra = 0;
8220 switch (mode)
8222 case TFmode:
8223 case TDmode:
8224 case TImode:
8225 case PTImode:
8226 case IFmode:
8227 case KFmode:
8228 /* As in legitimate_offset_address_p we do not assume
8229 worst-case. The mode here is just a hint as to the registers
8230 used. A TImode is usually in gprs, but may actually be in
8231 fprs. Leave worst-case scenario for reload to handle via
8232 insn constraints. PTImode is only GPRs. */
8233 extra = 8;
8234 break;
8235 default:
8236 break;
8239 if (GET_CODE (x) == PLUS
8240 && GET_CODE (XEXP (x, 0)) == REG
8241 && GET_CODE (XEXP (x, 1)) == CONST_INT
8242 && ((unsigned HOST_WIDE_INT) (INTVAL (XEXP (x, 1)) + 0x8000)
8243 >= 0x10000 - extra)
8244 && !(SPE_VECTOR_MODE (mode)
8245 || (TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD)))
8247 HOST_WIDE_INT high_int, low_int;
8248 rtx sum;
8249 low_int = ((INTVAL (XEXP (x, 1)) & 0xffff) ^ 0x8000) - 0x8000;
8250 if (low_int >= 0x8000 - extra)
8251 low_int = 0;
8252 high_int = INTVAL (XEXP (x, 1)) - low_int;
8253 sum = force_operand (gen_rtx_PLUS (Pmode, XEXP (x, 0),
8254 GEN_INT (high_int)), 0);
8255 return plus_constant (Pmode, sum, low_int);
8257 else if (GET_CODE (x) == PLUS
8258 && GET_CODE (XEXP (x, 0)) == REG
8259 && GET_CODE (XEXP (x, 1)) != CONST_INT
8260 && GET_MODE_NUNITS (mode) == 1
8261 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
8262 || (/* ??? Assume floating point reg based on mode? */
8263 (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
8264 && (mode == DFmode || mode == DDmode)))
8265 && !avoiding_indexed_address_p (mode))
8267 return gen_rtx_PLUS (Pmode, XEXP (x, 0),
8268 force_reg (Pmode, force_operand (XEXP (x, 1), 0)));
8270 else if (SPE_VECTOR_MODE (mode)
8271 || (TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD))
8273 if (mode == DImode)
8274 return x;
8275 /* We accept [reg + reg] and [reg + OFFSET]. */
8277 if (GET_CODE (x) == PLUS)
8279 rtx op1 = XEXP (x, 0);
8280 rtx op2 = XEXP (x, 1);
8281 rtx y;
8283 op1 = force_reg (Pmode, op1);
8285 if (GET_CODE (op2) != REG
8286 && (GET_CODE (op2) != CONST_INT
8287 || !SPE_CONST_OFFSET_OK (INTVAL (op2))
8288 || (GET_MODE_SIZE (mode) > 8
8289 && !SPE_CONST_OFFSET_OK (INTVAL (op2) + 8))))
8290 op2 = force_reg (Pmode, op2);
8292 /* We can't always do [reg + reg] for these, because [reg +
8293 reg + offset] is not a legitimate addressing mode. */
8294 y = gen_rtx_PLUS (Pmode, op1, op2);
8296 if ((GET_MODE_SIZE (mode) > 8 || mode == DDmode) && REG_P (op2))
8297 return force_reg (Pmode, y);
8298 else
8299 return y;
8302 return force_reg (Pmode, x);
8304 else if ((TARGET_ELF
8305 #if TARGET_MACHO
8306 || !MACHO_DYNAMIC_NO_PIC_P
8307 #endif
8309 && TARGET_32BIT
8310 && TARGET_NO_TOC
8311 && ! flag_pic
8312 && GET_CODE (x) != CONST_INT
8313 && GET_CODE (x) != CONST_WIDE_INT
8314 && GET_CODE (x) != CONST_DOUBLE
8315 && CONSTANT_P (x)
8316 && GET_MODE_NUNITS (mode) == 1
8317 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
8318 || (/* ??? Assume floating point reg based on mode? */
8319 (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
8320 && (mode == DFmode || mode == DDmode))))
8322 rtx reg = gen_reg_rtx (Pmode);
8323 if (TARGET_ELF)
8324 emit_insn (gen_elf_high (reg, x));
8325 else
8326 emit_insn (gen_macho_high (reg, x));
8327 return gen_rtx_LO_SUM (Pmode, reg, x);
8329 else if (TARGET_TOC
8330 && GET_CODE (x) == SYMBOL_REF
8331 && constant_pool_expr_p (x)
8332 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (x), Pmode))
8333 return create_TOC_reference (x, NULL_RTX);
8334 else
8335 return x;
8338 /* Debug version of rs6000_legitimize_address. */
8339 static rtx
8340 rs6000_debug_legitimize_address (rtx x, rtx oldx, machine_mode mode)
8342 rtx ret;
8343 rtx_insn *insns;
8345 start_sequence ();
8346 ret = rs6000_legitimize_address (x, oldx, mode);
8347 insns = get_insns ();
8348 end_sequence ();
8350 if (ret != x)
8352 fprintf (stderr,
8353 "\nrs6000_legitimize_address: mode %s, old code %s, "
8354 "new code %s, modified\n",
8355 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)),
8356 GET_RTX_NAME (GET_CODE (ret)));
8358 fprintf (stderr, "Original address:\n");
8359 debug_rtx (x);
8361 fprintf (stderr, "oldx:\n");
8362 debug_rtx (oldx);
8364 fprintf (stderr, "New address:\n");
8365 debug_rtx (ret);
8367 if (insns)
8369 fprintf (stderr, "Insns added:\n");
8370 debug_rtx_list (insns, 20);
8373 else
8375 fprintf (stderr,
8376 "\nrs6000_legitimize_address: mode %s, code %s, no change:\n",
8377 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)));
8379 debug_rtx (x);
8382 if (insns)
8383 emit_insn (insns);
8385 return ret;
8388 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
8389 We need to emit DTP-relative relocations. */
8391 static void rs6000_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
8392 static void
8393 rs6000_output_dwarf_dtprel (FILE *file, int size, rtx x)
8395 switch (size)
8397 case 4:
8398 fputs ("\t.long\t", file);
8399 break;
8400 case 8:
8401 fputs (DOUBLE_INT_ASM_OP, file);
8402 break;
8403 default:
8404 gcc_unreachable ();
8406 output_addr_const (file, x);
8407 if (TARGET_ELF)
8408 fputs ("@dtprel+0x8000", file);
8409 else if (TARGET_XCOFF && GET_CODE (x) == SYMBOL_REF)
8411 switch (SYMBOL_REF_TLS_MODEL (x))
8413 case 0:
8414 break;
8415 case TLS_MODEL_LOCAL_EXEC:
8416 fputs ("@le", file);
8417 break;
8418 case TLS_MODEL_INITIAL_EXEC:
8419 fputs ("@ie", file);
8420 break;
8421 case TLS_MODEL_GLOBAL_DYNAMIC:
8422 case TLS_MODEL_LOCAL_DYNAMIC:
8423 fputs ("@m", file);
8424 break;
8425 default:
8426 gcc_unreachable ();
8431 /* Return true if X is a symbol that refers to real (rather than emulated)
8432 TLS. */
8434 static bool
8435 rs6000_real_tls_symbol_ref_p (rtx x)
8437 return (GET_CODE (x) == SYMBOL_REF
8438 && SYMBOL_REF_TLS_MODEL (x) >= TLS_MODEL_REAL);
8441 /* In the name of slightly smaller debug output, and to cater to
8442 general assembler lossage, recognize various UNSPEC sequences
8443 and turn them back into a direct symbol reference. */
8445 static rtx
8446 rs6000_delegitimize_address (rtx orig_x)
8448 rtx x, y, offset;
8450 orig_x = delegitimize_mem_from_attrs (orig_x);
8451 x = orig_x;
8452 if (MEM_P (x))
8453 x = XEXP (x, 0);
8455 y = x;
8456 if (TARGET_CMODEL != CMODEL_SMALL
8457 && GET_CODE (y) == LO_SUM)
8458 y = XEXP (y, 1);
8460 offset = NULL_RTX;
8461 if (GET_CODE (y) == PLUS
8462 && GET_MODE (y) == Pmode
8463 && CONST_INT_P (XEXP (y, 1)))
8465 offset = XEXP (y, 1);
8466 y = XEXP (y, 0);
8469 if (GET_CODE (y) == UNSPEC
8470 && XINT (y, 1) == UNSPEC_TOCREL)
8472 y = XVECEXP (y, 0, 0);
8474 #ifdef HAVE_AS_TLS
8475 /* Do not associate thread-local symbols with the original
8476 constant pool symbol. */
8477 if (TARGET_XCOFF
8478 && GET_CODE (y) == SYMBOL_REF
8479 && CONSTANT_POOL_ADDRESS_P (y)
8480 && rs6000_real_tls_symbol_ref_p (get_pool_constant (y)))
8481 return orig_x;
8482 #endif
8484 if (offset != NULL_RTX)
8485 y = gen_rtx_PLUS (Pmode, y, offset);
8486 if (!MEM_P (orig_x))
8487 return y;
8488 else
8489 return replace_equiv_address_nv (orig_x, y);
8492 if (TARGET_MACHO
8493 && GET_CODE (orig_x) == LO_SUM
8494 && GET_CODE (XEXP (orig_x, 1)) == CONST)
8496 y = XEXP (XEXP (orig_x, 1), 0);
8497 if (GET_CODE (y) == UNSPEC
8498 && XINT (y, 1) == UNSPEC_MACHOPIC_OFFSET)
8499 return XVECEXP (y, 0, 0);
8502 return orig_x;
8505 /* Return true if X shouldn't be emitted into the debug info.
8506 The linker doesn't like .toc section references from
8507 .debug_* sections, so reject .toc section symbols. */
8509 static bool
8510 rs6000_const_not_ok_for_debug_p (rtx x)
8512 if (GET_CODE (x) == SYMBOL_REF
8513 && CONSTANT_POOL_ADDRESS_P (x))
8515 rtx c = get_pool_constant (x);
8516 machine_mode cmode = get_pool_mode (x);
8517 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (c, cmode))
8518 return true;
8521 return false;
8524 /* Construct the SYMBOL_REF for the tls_get_addr function. */
8526 static GTY(()) rtx rs6000_tls_symbol;
8527 static rtx
8528 rs6000_tls_get_addr (void)
8530 if (!rs6000_tls_symbol)
8531 rs6000_tls_symbol = init_one_libfunc ("__tls_get_addr");
8533 return rs6000_tls_symbol;
8536 /* Construct the SYMBOL_REF for TLS GOT references. */
8538 static GTY(()) rtx rs6000_got_symbol;
8539 static rtx
8540 rs6000_got_sym (void)
8542 if (!rs6000_got_symbol)
8544 rs6000_got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
8545 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_LOCAL;
8546 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_EXTERNAL;
8549 return rs6000_got_symbol;
8552 /* AIX Thread-Local Address support. */
8554 static rtx
8555 rs6000_legitimize_tls_address_aix (rtx addr, enum tls_model model)
8557 rtx sym, mem, tocref, tlsreg, tmpreg, dest, tlsaddr;
8558 const char *name;
8559 char *tlsname;
8561 name = XSTR (addr, 0);
8562 /* Append TLS CSECT qualifier, unless the symbol already is qualified
8563 or the symbol will be in TLS private data section. */
8564 if (name[strlen (name) - 1] != ']'
8565 && (TREE_PUBLIC (SYMBOL_REF_DECL (addr))
8566 || bss_initializer_p (SYMBOL_REF_DECL (addr))))
8568 tlsname = XALLOCAVEC (char, strlen (name) + 4);
8569 strcpy (tlsname, name);
8570 strcat (tlsname,
8571 bss_initializer_p (SYMBOL_REF_DECL (addr)) ? "[UL]" : "[TL]");
8572 tlsaddr = copy_rtx (addr);
8573 XSTR (tlsaddr, 0) = ggc_strdup (tlsname);
8575 else
8576 tlsaddr = addr;
8578 /* Place addr into TOC constant pool. */
8579 sym = force_const_mem (GET_MODE (tlsaddr), tlsaddr);
8581 /* Output the TOC entry and create the MEM referencing the value. */
8582 if (constant_pool_expr_p (XEXP (sym, 0))
8583 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (XEXP (sym, 0)), Pmode))
8585 tocref = create_TOC_reference (XEXP (sym, 0), NULL_RTX);
8586 mem = gen_const_mem (Pmode, tocref);
8587 set_mem_alias_set (mem, get_TOC_alias_set ());
8589 else
8590 return sym;
8592 /* Use global-dynamic for local-dynamic. */
8593 if (model == TLS_MODEL_GLOBAL_DYNAMIC
8594 || model == TLS_MODEL_LOCAL_DYNAMIC)
8596 /* Create new TOC reference for @m symbol. */
8597 name = XSTR (XVECEXP (XEXP (mem, 0), 0, 0), 0);
8598 tlsname = XALLOCAVEC (char, strlen (name) + 1);
8599 strcpy (tlsname, "*LCM");
8600 strcat (tlsname, name + 3);
8601 rtx modaddr = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (tlsname));
8602 SYMBOL_REF_FLAGS (modaddr) |= SYMBOL_FLAG_LOCAL;
8603 tocref = create_TOC_reference (modaddr, NULL_RTX);
8604 rtx modmem = gen_const_mem (Pmode, tocref);
8605 set_mem_alias_set (modmem, get_TOC_alias_set ());
8607 rtx modreg = gen_reg_rtx (Pmode);
8608 emit_insn (gen_rtx_SET (modreg, modmem));
8610 tmpreg = gen_reg_rtx (Pmode);
8611 emit_insn (gen_rtx_SET (tmpreg, mem));
8613 dest = gen_reg_rtx (Pmode);
8614 if (TARGET_32BIT)
8615 emit_insn (gen_tls_get_addrsi (dest, modreg, tmpreg));
8616 else
8617 emit_insn (gen_tls_get_addrdi (dest, modreg, tmpreg));
8618 return dest;
8620 /* Obtain TLS pointer: 32 bit call or 64 bit GPR 13. */
8621 else if (TARGET_32BIT)
8623 tlsreg = gen_reg_rtx (SImode);
8624 emit_insn (gen_tls_get_tpointer (tlsreg));
8626 else
8627 tlsreg = gen_rtx_REG (DImode, 13);
8629 /* Load the TOC value into temporary register. */
8630 tmpreg = gen_reg_rtx (Pmode);
8631 emit_insn (gen_rtx_SET (tmpreg, mem));
8632 set_unique_reg_note (get_last_insn (), REG_EQUAL,
8633 gen_rtx_MINUS (Pmode, addr, tlsreg));
8635 /* Add TOC symbol value to TLS pointer. */
8636 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tmpreg, tlsreg));
8638 return dest;
8641 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
8642 this (thread-local) address. */
8644 static rtx
8645 rs6000_legitimize_tls_address (rtx addr, enum tls_model model)
8647 rtx dest, insn;
8649 if (TARGET_XCOFF)
8650 return rs6000_legitimize_tls_address_aix (addr, model);
8652 dest = gen_reg_rtx (Pmode);
8653 if (model == TLS_MODEL_LOCAL_EXEC && rs6000_tls_size == 16)
8655 rtx tlsreg;
8657 if (TARGET_64BIT)
8659 tlsreg = gen_rtx_REG (Pmode, 13);
8660 insn = gen_tls_tprel_64 (dest, tlsreg, addr);
8662 else
8664 tlsreg = gen_rtx_REG (Pmode, 2);
8665 insn = gen_tls_tprel_32 (dest, tlsreg, addr);
8667 emit_insn (insn);
8669 else if (model == TLS_MODEL_LOCAL_EXEC && rs6000_tls_size == 32)
8671 rtx tlsreg, tmp;
8673 tmp = gen_reg_rtx (Pmode);
8674 if (TARGET_64BIT)
8676 tlsreg = gen_rtx_REG (Pmode, 13);
8677 insn = gen_tls_tprel_ha_64 (tmp, tlsreg, addr);
8679 else
8681 tlsreg = gen_rtx_REG (Pmode, 2);
8682 insn = gen_tls_tprel_ha_32 (tmp, tlsreg, addr);
8684 emit_insn (insn);
8685 if (TARGET_64BIT)
8686 insn = gen_tls_tprel_lo_64 (dest, tmp, addr);
8687 else
8688 insn = gen_tls_tprel_lo_32 (dest, tmp, addr);
8689 emit_insn (insn);
8691 else
8693 rtx r3, got, tga, tmp1, tmp2, call_insn;
8695 /* We currently use relocations like @got@tlsgd for tls, which
8696 means the linker will handle allocation of tls entries, placing
8697 them in the .got section. So use a pointer to the .got section,
8698 not one to secondary TOC sections used by 64-bit -mminimal-toc,
8699 or to secondary GOT sections used by 32-bit -fPIC. */
8700 if (TARGET_64BIT)
8701 got = gen_rtx_REG (Pmode, 2);
8702 else
8704 if (flag_pic == 1)
8705 got = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
8706 else
8708 rtx gsym = rs6000_got_sym ();
8709 got = gen_reg_rtx (Pmode);
8710 if (flag_pic == 0)
8711 rs6000_emit_move (got, gsym, Pmode);
8712 else
8714 rtx mem, lab, last;
8716 tmp1 = gen_reg_rtx (Pmode);
8717 tmp2 = gen_reg_rtx (Pmode);
8718 mem = gen_const_mem (Pmode, tmp1);
8719 lab = gen_label_rtx ();
8720 emit_insn (gen_load_toc_v4_PIC_1b (gsym, lab));
8721 emit_move_insn (tmp1, gen_rtx_REG (Pmode, LR_REGNO));
8722 if (TARGET_LINK_STACK)
8723 emit_insn (gen_addsi3 (tmp1, tmp1, GEN_INT (4)));
8724 emit_move_insn (tmp2, mem);
8725 last = emit_insn (gen_addsi3 (got, tmp1, tmp2));
8726 set_unique_reg_note (last, REG_EQUAL, gsym);
8731 if (model == TLS_MODEL_GLOBAL_DYNAMIC)
8733 tga = rs6000_tls_get_addr ();
8734 emit_library_call_value (tga, dest, LCT_CONST, Pmode,
8735 1, const0_rtx, Pmode);
8737 r3 = gen_rtx_REG (Pmode, 3);
8738 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
8740 if (TARGET_64BIT)
8741 insn = gen_tls_gd_aix64 (r3, got, addr, tga, const0_rtx);
8742 else
8743 insn = gen_tls_gd_aix32 (r3, got, addr, tga, const0_rtx);
8745 else if (DEFAULT_ABI == ABI_V4)
8746 insn = gen_tls_gd_sysvsi (r3, got, addr, tga, const0_rtx);
8747 else
8748 gcc_unreachable ();
8749 call_insn = last_call_insn ();
8750 PATTERN (call_insn) = insn;
8751 if (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
8752 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn),
8753 pic_offset_table_rtx);
8755 else if (model == TLS_MODEL_LOCAL_DYNAMIC)
8757 tga = rs6000_tls_get_addr ();
8758 tmp1 = gen_reg_rtx (Pmode);
8759 emit_library_call_value (tga, tmp1, LCT_CONST, Pmode,
8760 1, const0_rtx, Pmode);
8762 r3 = gen_rtx_REG (Pmode, 3);
8763 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
8765 if (TARGET_64BIT)
8766 insn = gen_tls_ld_aix64 (r3, got, tga, const0_rtx);
8767 else
8768 insn = gen_tls_ld_aix32 (r3, got, tga, const0_rtx);
8770 else if (DEFAULT_ABI == ABI_V4)
8771 insn = gen_tls_ld_sysvsi (r3, got, tga, const0_rtx);
8772 else
8773 gcc_unreachable ();
8774 call_insn = last_call_insn ();
8775 PATTERN (call_insn) = insn;
8776 if (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
8777 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn),
8778 pic_offset_table_rtx);
8780 if (rs6000_tls_size == 16)
8782 if (TARGET_64BIT)
8783 insn = gen_tls_dtprel_64 (dest, tmp1, addr);
8784 else
8785 insn = gen_tls_dtprel_32 (dest, tmp1, addr);
8787 else if (rs6000_tls_size == 32)
8789 tmp2 = gen_reg_rtx (Pmode);
8790 if (TARGET_64BIT)
8791 insn = gen_tls_dtprel_ha_64 (tmp2, tmp1, addr);
8792 else
8793 insn = gen_tls_dtprel_ha_32 (tmp2, tmp1, addr);
8794 emit_insn (insn);
8795 if (TARGET_64BIT)
8796 insn = gen_tls_dtprel_lo_64 (dest, tmp2, addr);
8797 else
8798 insn = gen_tls_dtprel_lo_32 (dest, tmp2, addr);
8800 else
8802 tmp2 = gen_reg_rtx (Pmode);
8803 if (TARGET_64BIT)
8804 insn = gen_tls_got_dtprel_64 (tmp2, got, addr);
8805 else
8806 insn = gen_tls_got_dtprel_32 (tmp2, got, addr);
8807 emit_insn (insn);
8808 insn = gen_rtx_SET (dest, gen_rtx_PLUS (Pmode, tmp2, tmp1));
8810 emit_insn (insn);
8812 else
8814 /* IE, or 64-bit offset LE. */
8815 tmp2 = gen_reg_rtx (Pmode);
8816 if (TARGET_64BIT)
8817 insn = gen_tls_got_tprel_64 (tmp2, got, addr);
8818 else
8819 insn = gen_tls_got_tprel_32 (tmp2, got, addr);
8820 emit_insn (insn);
8821 if (TARGET_64BIT)
8822 insn = gen_tls_tls_64 (dest, tmp2, addr);
8823 else
8824 insn = gen_tls_tls_32 (dest, tmp2, addr);
8825 emit_insn (insn);
8829 return dest;
8832 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
8834 static bool
8835 rs6000_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8837 if (GET_CODE (x) == HIGH
8838 && GET_CODE (XEXP (x, 0)) == UNSPEC)
8839 return true;
8841 /* A TLS symbol in the TOC cannot contain a sum. */
8842 if (GET_CODE (x) == CONST
8843 && GET_CODE (XEXP (x, 0)) == PLUS
8844 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
8845 && SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0)) != 0)
8846 return true;
8848 /* Do not place an ELF TLS symbol in the constant pool. */
8849 return TARGET_ELF && tls_referenced_p (x);
8852 /* Return true iff the given SYMBOL_REF refers to a constant pool entry
8853 that we have put in the TOC, or for cmodel=medium, if the SYMBOL_REF
8854 can be addressed relative to the toc pointer. */
8856 static bool
8857 use_toc_relative_ref (rtx sym, machine_mode mode)
8859 return ((constant_pool_expr_p (sym)
8860 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (sym),
8861 get_pool_mode (sym)))
8862 || (TARGET_CMODEL == CMODEL_MEDIUM
8863 && SYMBOL_REF_LOCAL_P (sym)
8864 && GET_MODE_SIZE (mode) <= POWERPC64_TOC_POINTER_ALIGNMENT));
8867 /* Our implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
8868 replace the input X, or the original X if no replacement is called for.
8869 The output parameter *WIN is 1 if the calling macro should goto WIN,
8870 0 if it should not.
8872 For RS/6000, we wish to handle large displacements off a base
8873 register by splitting the addend across an addiu/addis and the mem insn.
8874 This cuts number of extra insns needed from 3 to 1.
8876 On Darwin, we use this to generate code for floating point constants.
8877 A movsf_low is generated so we wind up with 2 instructions rather than 3.
8878 The Darwin code is inside #if TARGET_MACHO because only then are the
8879 machopic_* functions defined. */
8880 static rtx
8881 rs6000_legitimize_reload_address (rtx x, machine_mode mode,
8882 int opnum, int type,
8883 int ind_levels ATTRIBUTE_UNUSED, int *win)
8885 bool reg_offset_p = reg_offset_addressing_ok_p (mode);
8886 bool quad_offset_p = mode_supports_vsx_dform_quad (mode);
8888 /* Nasty hack for vsx_splat_v2df/v2di load from mem, which takes a
8889 DFmode/DImode MEM. Ditto for ISA 3.0 vsx_splat_v4sf/v4si. */
8890 if (reg_offset_p
8891 && opnum == 1
8892 && ((mode == DFmode && recog_data.operand_mode[0] == V2DFmode)
8893 || (mode == DImode && recog_data.operand_mode[0] == V2DImode)
8894 || (mode == SFmode && recog_data.operand_mode[0] == V4SFmode
8895 && TARGET_P9_VECTOR)
8896 || (mode == SImode && recog_data.operand_mode[0] == V4SImode
8897 && TARGET_P9_VECTOR)))
8898 reg_offset_p = false;
8900 /* We must recognize output that we have already generated ourselves. */
8901 if (GET_CODE (x) == PLUS
8902 && GET_CODE (XEXP (x, 0)) == PLUS
8903 && GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
8904 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
8905 && GET_CODE (XEXP (x, 1)) == CONST_INT)
8907 if (TARGET_DEBUG_ADDR)
8909 fprintf (stderr, "\nlegitimize_reload_address push_reload #1:\n");
8910 debug_rtx (x);
8912 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
8913 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
8914 opnum, (enum reload_type) type);
8915 *win = 1;
8916 return x;
8919 /* Likewise for (lo_sum (high ...) ...) output we have generated. */
8920 if (GET_CODE (x) == LO_SUM
8921 && GET_CODE (XEXP (x, 0)) == HIGH)
8923 if (TARGET_DEBUG_ADDR)
8925 fprintf (stderr, "\nlegitimize_reload_address push_reload #2:\n");
8926 debug_rtx (x);
8928 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
8929 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
8930 opnum, (enum reload_type) type);
8931 *win = 1;
8932 return x;
8935 #if TARGET_MACHO
8936 if (DEFAULT_ABI == ABI_DARWIN && flag_pic
8937 && GET_CODE (x) == LO_SUM
8938 && GET_CODE (XEXP (x, 0)) == PLUS
8939 && XEXP (XEXP (x, 0), 0) == pic_offset_table_rtx
8940 && GET_CODE (XEXP (XEXP (x, 0), 1)) == HIGH
8941 && XEXP (XEXP (XEXP (x, 0), 1), 0) == XEXP (x, 1)
8942 && machopic_operand_p (XEXP (x, 1)))
8944 /* Result of previous invocation of this function on Darwin
8945 floating point constant. */
8946 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
8947 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
8948 opnum, (enum reload_type) type);
8949 *win = 1;
8950 return x;
8952 #endif
8954 if (TARGET_CMODEL != CMODEL_SMALL
8955 && reg_offset_p
8956 && !quad_offset_p
8957 && small_toc_ref (x, VOIDmode))
8959 rtx hi = gen_rtx_HIGH (Pmode, copy_rtx (x));
8960 x = gen_rtx_LO_SUM (Pmode, hi, x);
8961 if (TARGET_DEBUG_ADDR)
8963 fprintf (stderr, "\nlegitimize_reload_address push_reload #3:\n");
8964 debug_rtx (x);
8966 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
8967 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
8968 opnum, (enum reload_type) type);
8969 *win = 1;
8970 return x;
8973 if (GET_CODE (x) == PLUS
8974 && REG_P (XEXP (x, 0))
8975 && REGNO (XEXP (x, 0)) < FIRST_PSEUDO_REGISTER
8976 && INT_REG_OK_FOR_BASE_P (XEXP (x, 0), 1)
8977 && CONST_INT_P (XEXP (x, 1))
8978 && reg_offset_p
8979 && !SPE_VECTOR_MODE (mode)
8980 && !(TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
8981 && (quad_offset_p || !VECTOR_MODE_P (mode) || VECTOR_MEM_NONE_P (mode)))
8983 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
8984 HOST_WIDE_INT low = ((val & 0xffff) ^ 0x8000) - 0x8000;
8985 HOST_WIDE_INT high
8986 = (((val - low) & 0xffffffff) ^ 0x80000000) - 0x80000000;
8988 /* Check for 32-bit overflow or quad addresses with one of the
8989 four least significant bits set. */
8990 if (high + low != val
8991 || (quad_offset_p && (low & 0xf)))
8993 *win = 0;
8994 return x;
8997 /* Reload the high part into a base reg; leave the low part
8998 in the mem directly. */
9000 x = gen_rtx_PLUS (GET_MODE (x),
9001 gen_rtx_PLUS (GET_MODE (x), XEXP (x, 0),
9002 GEN_INT (high)),
9003 GEN_INT (low));
9005 if (TARGET_DEBUG_ADDR)
9007 fprintf (stderr, "\nlegitimize_reload_address push_reload #4:\n");
9008 debug_rtx (x);
9010 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
9011 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
9012 opnum, (enum reload_type) type);
9013 *win = 1;
9014 return x;
9017 if (GET_CODE (x) == SYMBOL_REF
9018 && reg_offset_p
9019 && !quad_offset_p
9020 && (!VECTOR_MODE_P (mode) || VECTOR_MEM_NONE_P (mode))
9021 && !SPE_VECTOR_MODE (mode)
9022 #if TARGET_MACHO
9023 && DEFAULT_ABI == ABI_DARWIN
9024 && (flag_pic || MACHO_DYNAMIC_NO_PIC_P)
9025 && machopic_symbol_defined_p (x)
9026 #else
9027 && DEFAULT_ABI == ABI_V4
9028 && !flag_pic
9029 #endif
9030 /* Don't do this for TFmode or TDmode, since the result isn't offsettable.
9031 The same goes for DImode without 64-bit gprs and DFmode and DDmode
9032 without fprs.
9033 ??? Assume floating point reg based on mode? This assumption is
9034 violated by eg. powerpc-linux -m32 compile of gcc.dg/pr28796-2.c
9035 where reload ends up doing a DFmode load of a constant from
9036 mem using two gprs. Unfortunately, at this point reload
9037 hasn't yet selected regs so poking around in reload data
9038 won't help and even if we could figure out the regs reliably,
9039 we'd still want to allow this transformation when the mem is
9040 naturally aligned. Since we say the address is good here, we
9041 can't disable offsets from LO_SUMs in mem_operand_gpr.
9042 FIXME: Allow offset from lo_sum for other modes too, when
9043 mem is sufficiently aligned.
9045 Also disallow this if the type can go in VMX/Altivec registers, since
9046 those registers do not have d-form (reg+offset) address modes. */
9047 && !reg_addr[mode].scalar_in_vmx_p
9048 && mode != TFmode
9049 && mode != TDmode
9050 && mode != IFmode
9051 && mode != KFmode
9052 && (mode != TImode || !TARGET_VSX_TIMODE)
9053 && mode != PTImode
9054 && (mode != DImode || TARGET_POWERPC64)
9055 && ((mode != DFmode && mode != DDmode) || TARGET_POWERPC64
9056 || (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)))
9058 #if TARGET_MACHO
9059 if (flag_pic)
9061 rtx offset = machopic_gen_offset (x);
9062 x = gen_rtx_LO_SUM (GET_MODE (x),
9063 gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
9064 gen_rtx_HIGH (Pmode, offset)), offset);
9066 else
9067 #endif
9068 x = gen_rtx_LO_SUM (GET_MODE (x),
9069 gen_rtx_HIGH (Pmode, x), x);
9071 if (TARGET_DEBUG_ADDR)
9073 fprintf (stderr, "\nlegitimize_reload_address push_reload #5:\n");
9074 debug_rtx (x);
9076 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
9077 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
9078 opnum, (enum reload_type) type);
9079 *win = 1;
9080 return x;
9083 /* Reload an offset address wrapped by an AND that represents the
9084 masking of the lower bits. Strip the outer AND and let reload
9085 convert the offset address into an indirect address. For VSX,
9086 force reload to create the address with an AND in a separate
9087 register, because we can't guarantee an altivec register will
9088 be used. */
9089 if (VECTOR_MEM_ALTIVEC_P (mode)
9090 && GET_CODE (x) == AND
9091 && GET_CODE (XEXP (x, 0)) == PLUS
9092 && GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
9093 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
9094 && GET_CODE (XEXP (x, 1)) == CONST_INT
9095 && INTVAL (XEXP (x, 1)) == -16)
9097 x = XEXP (x, 0);
9098 *win = 1;
9099 return x;
9102 if (TARGET_TOC
9103 && reg_offset_p
9104 && !quad_offset_p
9105 && GET_CODE (x) == SYMBOL_REF
9106 && use_toc_relative_ref (x, mode))
9108 x = create_TOC_reference (x, NULL_RTX);
9109 if (TARGET_CMODEL != CMODEL_SMALL)
9111 if (TARGET_DEBUG_ADDR)
9113 fprintf (stderr, "\nlegitimize_reload_address push_reload #6:\n");
9114 debug_rtx (x);
9116 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
9117 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
9118 opnum, (enum reload_type) type);
9120 *win = 1;
9121 return x;
9123 *win = 0;
9124 return x;
9127 /* Debug version of rs6000_legitimize_reload_address. */
9128 static rtx
9129 rs6000_debug_legitimize_reload_address (rtx x, machine_mode mode,
9130 int opnum, int type,
9131 int ind_levels, int *win)
9133 rtx ret = rs6000_legitimize_reload_address (x, mode, opnum, type,
9134 ind_levels, win);
9135 fprintf (stderr,
9136 "\nrs6000_legitimize_reload_address: mode = %s, opnum = %d, "
9137 "type = %d, ind_levels = %d, win = %d, original addr:\n",
9138 GET_MODE_NAME (mode), opnum, type, ind_levels, *win);
9139 debug_rtx (x);
9141 if (x == ret)
9142 fprintf (stderr, "Same address returned\n");
9143 else if (!ret)
9144 fprintf (stderr, "NULL returned\n");
9145 else
9147 fprintf (stderr, "New address:\n");
9148 debug_rtx (ret);
9151 return ret;
9154 /* TARGET_LEGITIMATE_ADDRESS_P recognizes an RTL expression
9155 that is a valid memory address for an instruction.
9156 The MODE argument is the machine mode for the MEM expression
9157 that wants to use this address.
9159 On the RS/6000, there are four valid address: a SYMBOL_REF that
9160 refers to a constant pool entry of an address (or the sum of it
9161 plus a constant), a short (16-bit signed) constant plus a register,
9162 the sum of two registers, or a register indirect, possibly with an
9163 auto-increment. For DFmode, DDmode and DImode with a constant plus
9164 register, we must ensure that both words are addressable or PowerPC64
9165 with offset word aligned.
9167 For modes spanning multiple registers (DFmode and DDmode in 32-bit GPRs,
9168 32-bit DImode, TImode, TFmode, TDmode), indexed addressing cannot be used
9169 because adjacent memory cells are accessed by adding word-sized offsets
9170 during assembly output. */
9171 static bool
9172 rs6000_legitimate_address_p (machine_mode mode, rtx x, bool reg_ok_strict)
9174 bool reg_offset_p = reg_offset_addressing_ok_p (mode);
9175 bool quad_offset_p = mode_supports_vsx_dform_quad (mode);
9177 /* If this is an unaligned stvx/ldvx type address, discard the outer AND. */
9178 if (VECTOR_MEM_ALTIVEC_P (mode)
9179 && GET_CODE (x) == AND
9180 && GET_CODE (XEXP (x, 1)) == CONST_INT
9181 && INTVAL (XEXP (x, 1)) == -16)
9182 x = XEXP (x, 0);
9184 if (TARGET_ELF && RS6000_SYMBOL_REF_TLS_P (x))
9185 return 0;
9186 if (legitimate_indirect_address_p (x, reg_ok_strict))
9187 return 1;
9188 if (TARGET_UPDATE
9189 && (GET_CODE (x) == PRE_INC || GET_CODE (x) == PRE_DEC)
9190 && mode_supports_pre_incdec_p (mode)
9191 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict))
9192 return 1;
9193 /* Handle restricted vector d-form offsets in ISA 3.0. */
9194 if (quad_offset_p)
9196 if (quad_address_p (x, mode, reg_ok_strict))
9197 return 1;
9199 else if (virtual_stack_registers_memory_p (x))
9200 return 1;
9202 else if (reg_offset_p)
9204 if (legitimate_small_data_p (mode, x))
9205 return 1;
9206 if (legitimate_constant_pool_address_p (x, mode,
9207 reg_ok_strict || lra_in_progress))
9208 return 1;
9209 if (reg_addr[mode].fused_toc && GET_CODE (x) == UNSPEC
9210 && XINT (x, 1) == UNSPEC_FUSION_ADDIS)
9211 return 1;
9214 /* For TImode, if we have load/store quad and TImode in VSX registers, only
9215 allow register indirect addresses. This will allow the values to go in
9216 either GPRs or VSX registers without reloading. The vector types would
9217 tend to go into VSX registers, so we allow REG+REG, while TImode seems
9218 somewhat split, in that some uses are GPR based, and some VSX based. */
9219 if (mode == TImode && TARGET_QUAD_MEMORY && TARGET_VSX_TIMODE)
9220 return 0;
9221 /* If not REG_OK_STRICT (before reload) let pass any stack offset. */
9222 if (! reg_ok_strict
9223 && reg_offset_p
9224 && GET_CODE (x) == PLUS
9225 && GET_CODE (XEXP (x, 0)) == REG
9226 && (XEXP (x, 0) == virtual_stack_vars_rtx
9227 || XEXP (x, 0) == arg_pointer_rtx)
9228 && GET_CODE (XEXP (x, 1)) == CONST_INT)
9229 return 1;
9230 if (rs6000_legitimate_offset_address_p (mode, x, reg_ok_strict, false))
9231 return 1;
9232 if (!FLOAT128_2REG_P (mode)
9233 && ((TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
9234 || TARGET_POWERPC64
9235 || (mode != DFmode && mode != DDmode)
9236 || (TARGET_E500_DOUBLE && mode != DDmode))
9237 && (TARGET_POWERPC64 || mode != DImode)
9238 && (mode != TImode || VECTOR_MEM_VSX_P (TImode))
9239 && mode != PTImode
9240 && !avoiding_indexed_address_p (mode)
9241 && legitimate_indexed_address_p (x, reg_ok_strict))
9242 return 1;
9243 if (TARGET_UPDATE && GET_CODE (x) == PRE_MODIFY
9244 && mode_supports_pre_modify_p (mode)
9245 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict)
9246 && (rs6000_legitimate_offset_address_p (mode, XEXP (x, 1),
9247 reg_ok_strict, false)
9248 || (!avoiding_indexed_address_p (mode)
9249 && legitimate_indexed_address_p (XEXP (x, 1), reg_ok_strict)))
9250 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
9251 return 1;
9252 if (reg_offset_p && !quad_offset_p
9253 && legitimate_lo_sum_address_p (mode, x, reg_ok_strict))
9254 return 1;
9255 return 0;
9258 /* Debug version of rs6000_legitimate_address_p. */
9259 static bool
9260 rs6000_debug_legitimate_address_p (machine_mode mode, rtx x,
9261 bool reg_ok_strict)
9263 bool ret = rs6000_legitimate_address_p (mode, x, reg_ok_strict);
9264 fprintf (stderr,
9265 "\nrs6000_legitimate_address_p: return = %s, mode = %s, "
9266 "strict = %d, reload = %s, code = %s\n",
9267 ret ? "true" : "false",
9268 GET_MODE_NAME (mode),
9269 reg_ok_strict,
9270 (reload_completed
9271 ? "after"
9272 : (reload_in_progress ? "progress" : "before")),
9273 GET_RTX_NAME (GET_CODE (x)));
9274 debug_rtx (x);
9276 return ret;
9279 /* Implement TARGET_MODE_DEPENDENT_ADDRESS_P. */
9281 static bool
9282 rs6000_mode_dependent_address_p (const_rtx addr,
9283 addr_space_t as ATTRIBUTE_UNUSED)
9285 return rs6000_mode_dependent_address_ptr (addr);
9288 /* Go to LABEL if ADDR (a legitimate address expression)
9289 has an effect that depends on the machine mode it is used for.
9291 On the RS/6000 this is true of all integral offsets (since AltiVec
9292 and VSX modes don't allow them) or is a pre-increment or decrement.
9294 ??? Except that due to conceptual problems in offsettable_address_p
9295 we can't really report the problems of integral offsets. So leave
9296 this assuming that the adjustable offset must be valid for the
9297 sub-words of a TFmode operand, which is what we had before. */
9299 static bool
9300 rs6000_mode_dependent_address (const_rtx addr)
9302 switch (GET_CODE (addr))
9304 case PLUS:
9305 /* Any offset from virtual_stack_vars_rtx and arg_pointer_rtx
9306 is considered a legitimate address before reload, so there
9307 are no offset restrictions in that case. Note that this
9308 condition is safe in strict mode because any address involving
9309 virtual_stack_vars_rtx or arg_pointer_rtx would already have
9310 been rejected as illegitimate. */
9311 if (XEXP (addr, 0) != virtual_stack_vars_rtx
9312 && XEXP (addr, 0) != arg_pointer_rtx
9313 && GET_CODE (XEXP (addr, 1)) == CONST_INT)
9315 unsigned HOST_WIDE_INT val = INTVAL (XEXP (addr, 1));
9316 return val + 0x8000 >= 0x10000 - (TARGET_POWERPC64 ? 8 : 12);
9318 break;
9320 case LO_SUM:
9321 /* Anything in the constant pool is sufficiently aligned that
9322 all bytes have the same high part address. */
9323 return !legitimate_constant_pool_address_p (addr, QImode, false);
9325 /* Auto-increment cases are now treated generically in recog.c. */
9326 case PRE_MODIFY:
9327 return TARGET_UPDATE;
9329 /* AND is only allowed in Altivec loads. */
9330 case AND:
9331 return true;
9333 default:
9334 break;
9337 return false;
9340 /* Debug version of rs6000_mode_dependent_address. */
9341 static bool
9342 rs6000_debug_mode_dependent_address (const_rtx addr)
9344 bool ret = rs6000_mode_dependent_address (addr);
9346 fprintf (stderr, "\nrs6000_mode_dependent_address: ret = %s\n",
9347 ret ? "true" : "false");
9348 debug_rtx (addr);
9350 return ret;
9353 /* Implement FIND_BASE_TERM. */
9356 rs6000_find_base_term (rtx op)
9358 rtx base;
9360 base = op;
9361 if (GET_CODE (base) == CONST)
9362 base = XEXP (base, 0);
9363 if (GET_CODE (base) == PLUS)
9364 base = XEXP (base, 0);
9365 if (GET_CODE (base) == UNSPEC)
9366 switch (XINT (base, 1))
9368 case UNSPEC_TOCREL:
9369 case UNSPEC_MACHOPIC_OFFSET:
9370 /* OP represents SYM [+ OFFSET] - ANCHOR. SYM is the base term
9371 for aliasing purposes. */
9372 return XVECEXP (base, 0, 0);
9375 return op;
9378 /* More elaborate version of recog's offsettable_memref_p predicate
9379 that works around the ??? note of rs6000_mode_dependent_address.
9380 In particular it accepts
9382 (mem:DI (plus:SI (reg/f:SI 31 31) (const_int 32760 [0x7ff8])))
9384 in 32-bit mode, that the recog predicate rejects. */
9386 static bool
9387 rs6000_offsettable_memref_p (rtx op, machine_mode reg_mode)
9389 bool worst_case;
9391 if (!MEM_P (op))
9392 return false;
9394 /* First mimic offsettable_memref_p. */
9395 if (offsettable_address_p (true, GET_MODE (op), XEXP (op, 0)))
9396 return true;
9398 /* offsettable_address_p invokes rs6000_mode_dependent_address, but
9399 the latter predicate knows nothing about the mode of the memory
9400 reference and, therefore, assumes that it is the largest supported
9401 mode (TFmode). As a consequence, legitimate offsettable memory
9402 references are rejected. rs6000_legitimate_offset_address_p contains
9403 the correct logic for the PLUS case of rs6000_mode_dependent_address,
9404 at least with a little bit of help here given that we know the
9405 actual registers used. */
9406 worst_case = ((TARGET_POWERPC64 && GET_MODE_CLASS (reg_mode) == MODE_INT)
9407 || GET_MODE_SIZE (reg_mode) == 4);
9408 return rs6000_legitimate_offset_address_p (GET_MODE (op), XEXP (op, 0),
9409 true, worst_case);
9412 /* Determine the reassociation width to be used in reassociate_bb.
9413 This takes into account how many parallel operations we
9414 can actually do of a given type, and also the latency.
9416 int add/sub 6/cycle
9417 mul 2/cycle
9418 vect add/sub/mul 2/cycle
9419 fp add/sub/mul 2/cycle
9420 dfp 1/cycle
9423 static int
9424 rs6000_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED,
9425 enum machine_mode mode)
9427 switch (rs6000_cpu)
9429 case PROCESSOR_POWER8:
9430 case PROCESSOR_POWER9:
9431 if (DECIMAL_FLOAT_MODE_P (mode))
9432 return 1;
9433 if (VECTOR_MODE_P (mode))
9434 return 4;
9435 if (INTEGRAL_MODE_P (mode))
9436 return opc == MULT_EXPR ? 4 : 6;
9437 if (FLOAT_MODE_P (mode))
9438 return 4;
9439 break;
9440 default:
9441 break;
9443 return 1;
9446 /* Change register usage conditional on target flags. */
9447 static void
9448 rs6000_conditional_register_usage (void)
9450 int i;
9452 if (TARGET_DEBUG_TARGET)
9453 fprintf (stderr, "rs6000_conditional_register_usage called\n");
9455 /* Set MQ register fixed (already call_used) so that it will not be
9456 allocated. */
9457 fixed_regs[64] = 1;
9459 /* 64-bit AIX and Linux reserve GPR13 for thread-private data. */
9460 if (TARGET_64BIT)
9461 fixed_regs[13] = call_used_regs[13]
9462 = call_really_used_regs[13] = 1;
9464 /* Conditionally disable FPRs. */
9465 if (TARGET_SOFT_FLOAT || !TARGET_FPRS)
9466 for (i = 32; i < 64; i++)
9467 fixed_regs[i] = call_used_regs[i]
9468 = call_really_used_regs[i] = 1;
9470 /* The TOC register is not killed across calls in a way that is
9471 visible to the compiler. */
9472 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
9473 call_really_used_regs[2] = 0;
9475 if (DEFAULT_ABI == ABI_V4 && flag_pic == 2)
9476 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
9478 if (DEFAULT_ABI == ABI_V4 && flag_pic == 1)
9479 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
9480 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
9481 = call_really_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
9483 if (DEFAULT_ABI == ABI_DARWIN && flag_pic)
9484 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
9485 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
9486 = call_really_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
9488 if (TARGET_TOC && TARGET_MINIMAL_TOC)
9489 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
9490 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
9492 if (TARGET_SPE)
9494 global_regs[SPEFSCR_REGNO] = 1;
9495 /* We used to use r14 as FIXED_SCRATCH to address SPE 64-bit
9496 registers in prologues and epilogues. We no longer use r14
9497 for FIXED_SCRATCH, but we're keeping r14 out of the allocation
9498 pool for link-compatibility with older versions of GCC. Once
9499 "old" code has died out, we can return r14 to the allocation
9500 pool. */
9501 fixed_regs[14]
9502 = call_used_regs[14]
9503 = call_really_used_regs[14] = 1;
9506 if (!TARGET_ALTIVEC && !TARGET_VSX)
9508 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
9509 fixed_regs[i] = call_used_regs[i] = call_really_used_regs[i] = 1;
9510 call_really_used_regs[VRSAVE_REGNO] = 1;
9513 if (TARGET_ALTIVEC || TARGET_VSX)
9514 global_regs[VSCR_REGNO] = 1;
9516 if (TARGET_ALTIVEC_ABI)
9518 for (i = FIRST_ALTIVEC_REGNO; i < FIRST_ALTIVEC_REGNO + 20; ++i)
9519 call_used_regs[i] = call_really_used_regs[i] = 1;
9521 /* AIX reserves VR20:31 in non-extended ABI mode. */
9522 if (TARGET_XCOFF)
9523 for (i = FIRST_ALTIVEC_REGNO + 20; i < FIRST_ALTIVEC_REGNO + 32; ++i)
9524 fixed_regs[i] = call_used_regs[i] = call_really_used_regs[i] = 1;
9529 /* Output insns to set DEST equal to the constant SOURCE as a series of
9530 lis, ori and shl instructions and return TRUE. */
9532 bool
9533 rs6000_emit_set_const (rtx dest, rtx source)
9535 machine_mode mode = GET_MODE (dest);
9536 rtx temp, set;
9537 rtx_insn *insn;
9538 HOST_WIDE_INT c;
9540 gcc_checking_assert (CONST_INT_P (source));
9541 c = INTVAL (source);
9542 switch (mode)
9544 case QImode:
9545 case HImode:
9546 emit_insn (gen_rtx_SET (dest, source));
9547 return true;
9549 case SImode:
9550 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (SImode);
9552 emit_insn (gen_rtx_SET (copy_rtx (temp),
9553 GEN_INT (c & ~(HOST_WIDE_INT) 0xffff)));
9554 emit_insn (gen_rtx_SET (dest,
9555 gen_rtx_IOR (SImode, copy_rtx (temp),
9556 GEN_INT (c & 0xffff))));
9557 break;
9559 case DImode:
9560 if (!TARGET_POWERPC64)
9562 rtx hi, lo;
9564 hi = operand_subword_force (copy_rtx (dest), WORDS_BIG_ENDIAN == 0,
9565 DImode);
9566 lo = operand_subword_force (dest, WORDS_BIG_ENDIAN != 0,
9567 DImode);
9568 emit_move_insn (hi, GEN_INT (c >> 32));
9569 c = ((c & 0xffffffff) ^ 0x80000000) - 0x80000000;
9570 emit_move_insn (lo, GEN_INT (c));
9572 else
9573 rs6000_emit_set_long_const (dest, c);
9574 break;
9576 default:
9577 gcc_unreachable ();
9580 insn = get_last_insn ();
9581 set = single_set (insn);
9582 if (! CONSTANT_P (SET_SRC (set)))
9583 set_unique_reg_note (insn, REG_EQUAL, GEN_INT (c));
9585 return true;
9588 /* Subroutine of rs6000_emit_set_const, handling PowerPC64 DImode.
9589 Output insns to set DEST equal to the constant C as a series of
9590 lis, ori and shl instructions. */
9592 static void
9593 rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c)
9595 rtx temp;
9596 HOST_WIDE_INT ud1, ud2, ud3, ud4;
9598 ud1 = c & 0xffff;
9599 c = c >> 16;
9600 ud2 = c & 0xffff;
9601 c = c >> 16;
9602 ud3 = c & 0xffff;
9603 c = c >> 16;
9604 ud4 = c & 0xffff;
9606 if ((ud4 == 0xffff && ud3 == 0xffff && ud2 == 0xffff && (ud1 & 0x8000))
9607 || (ud4 == 0 && ud3 == 0 && ud2 == 0 && ! (ud1 & 0x8000)))
9608 emit_move_insn (dest, GEN_INT ((ud1 ^ 0x8000) - 0x8000));
9610 else if ((ud4 == 0xffff && ud3 == 0xffff && (ud2 & 0x8000))
9611 || (ud4 == 0 && ud3 == 0 && ! (ud2 & 0x8000)))
9613 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
9615 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
9616 GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
9617 if (ud1 != 0)
9618 emit_move_insn (dest,
9619 gen_rtx_IOR (DImode, copy_rtx (temp),
9620 GEN_INT (ud1)));
9622 else if (ud3 == 0 && ud4 == 0)
9624 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
9626 gcc_assert (ud2 & 0x8000);
9627 emit_move_insn (copy_rtx (temp),
9628 GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
9629 if (ud1 != 0)
9630 emit_move_insn (copy_rtx (temp),
9631 gen_rtx_IOR (DImode, copy_rtx (temp),
9632 GEN_INT (ud1)));
9633 emit_move_insn (dest,
9634 gen_rtx_ZERO_EXTEND (DImode,
9635 gen_lowpart (SImode,
9636 copy_rtx (temp))));
9638 else if ((ud4 == 0xffff && (ud3 & 0x8000))
9639 || (ud4 == 0 && ! (ud3 & 0x8000)))
9641 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
9643 emit_move_insn (copy_rtx (temp),
9644 GEN_INT (((ud3 << 16) ^ 0x80000000) - 0x80000000));
9645 if (ud2 != 0)
9646 emit_move_insn (copy_rtx (temp),
9647 gen_rtx_IOR (DImode, copy_rtx (temp),
9648 GEN_INT (ud2)));
9649 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
9650 gen_rtx_ASHIFT (DImode, copy_rtx (temp),
9651 GEN_INT (16)));
9652 if (ud1 != 0)
9653 emit_move_insn (dest,
9654 gen_rtx_IOR (DImode, copy_rtx (temp),
9655 GEN_INT (ud1)));
9657 else
9659 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
9661 emit_move_insn (copy_rtx (temp),
9662 GEN_INT (((ud4 << 16) ^ 0x80000000) - 0x80000000));
9663 if (ud3 != 0)
9664 emit_move_insn (copy_rtx (temp),
9665 gen_rtx_IOR (DImode, copy_rtx (temp),
9666 GEN_INT (ud3)));
9668 emit_move_insn (ud2 != 0 || ud1 != 0 ? copy_rtx (temp) : dest,
9669 gen_rtx_ASHIFT (DImode, copy_rtx (temp),
9670 GEN_INT (32)));
9671 if (ud2 != 0)
9672 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
9673 gen_rtx_IOR (DImode, copy_rtx (temp),
9674 GEN_INT (ud2 << 16)));
9675 if (ud1 != 0)
9676 emit_move_insn (dest,
9677 gen_rtx_IOR (DImode, copy_rtx (temp),
9678 GEN_INT (ud1)));
9682 /* Helper for the following. Get rid of [r+r] memory refs
9683 in cases where it won't work (TImode, TFmode, TDmode, PTImode). */
9685 static void
9686 rs6000_eliminate_indexed_memrefs (rtx operands[2])
9688 if (reload_in_progress)
9689 return;
9691 if (GET_CODE (operands[0]) == MEM
9692 && GET_CODE (XEXP (operands[0], 0)) != REG
9693 && ! legitimate_constant_pool_address_p (XEXP (operands[0], 0),
9694 GET_MODE (operands[0]), false))
9695 operands[0]
9696 = replace_equiv_address (operands[0],
9697 copy_addr_to_reg (XEXP (operands[0], 0)));
9699 if (GET_CODE (operands[1]) == MEM
9700 && GET_CODE (XEXP (operands[1], 0)) != REG
9701 && ! legitimate_constant_pool_address_p (XEXP (operands[1], 0),
9702 GET_MODE (operands[1]), false))
9703 operands[1]
9704 = replace_equiv_address (operands[1],
9705 copy_addr_to_reg (XEXP (operands[1], 0)));
9708 /* Generate a vector of constants to permute MODE for a little-endian
9709 storage operation by swapping the two halves of a vector. */
9710 static rtvec
9711 rs6000_const_vec (machine_mode mode)
9713 int i, subparts;
9714 rtvec v;
9716 switch (mode)
9718 case V1TImode:
9719 subparts = 1;
9720 break;
9721 case V2DFmode:
9722 case V2DImode:
9723 subparts = 2;
9724 break;
9725 case V4SFmode:
9726 case V4SImode:
9727 subparts = 4;
9728 break;
9729 case V8HImode:
9730 subparts = 8;
9731 break;
9732 case V16QImode:
9733 subparts = 16;
9734 break;
9735 default:
9736 gcc_unreachable();
9739 v = rtvec_alloc (subparts);
9741 for (i = 0; i < subparts / 2; ++i)
9742 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i + subparts / 2);
9743 for (i = subparts / 2; i < subparts; ++i)
9744 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i - subparts / 2);
9746 return v;
9749 /* Generate a permute rtx that represents an lxvd2x, stxvd2x, or xxpermdi
9750 for a VSX load or store operation. */
9752 rs6000_gen_le_vsx_permute (rtx source, machine_mode mode)
9754 /* Use ROTATE instead of VEC_SELECT on IEEE 128-bit floating point, and
9755 128-bit integers if they are allowed in VSX registers. */
9756 if (FLOAT128_VECTOR_P (mode) || mode == TImode)
9757 return gen_rtx_ROTATE (mode, source, GEN_INT (64));
9758 else
9760 rtx par = gen_rtx_PARALLEL (VOIDmode, rs6000_const_vec (mode));
9761 return gen_rtx_VEC_SELECT (mode, source, par);
9765 /* Emit a little-endian load from vector memory location SOURCE to VSX
9766 register DEST in mode MODE. The load is done with two permuting
9767 insn's that represent an lxvd2x and xxpermdi. */
9768 void
9769 rs6000_emit_le_vsx_load (rtx dest, rtx source, machine_mode mode)
9771 rtx tmp, permute_mem, permute_reg;
9773 /* Use V2DImode to do swaps of types with 128-bit scalare parts (TImode,
9774 V1TImode). */
9775 if (mode == TImode || mode == V1TImode)
9777 mode = V2DImode;
9778 dest = gen_lowpart (V2DImode, dest);
9779 source = adjust_address (source, V2DImode, 0);
9782 tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (dest) : dest;
9783 permute_mem = rs6000_gen_le_vsx_permute (source, mode);
9784 permute_reg = rs6000_gen_le_vsx_permute (tmp, mode);
9785 emit_insn (gen_rtx_SET (tmp, permute_mem));
9786 emit_insn (gen_rtx_SET (dest, permute_reg));
9789 /* Emit a little-endian store to vector memory location DEST from VSX
9790 register SOURCE in mode MODE. The store is done with two permuting
9791 insn's that represent an xxpermdi and an stxvd2x. */
9792 void
9793 rs6000_emit_le_vsx_store (rtx dest, rtx source, machine_mode mode)
9795 rtx tmp, permute_src, permute_tmp;
9797 /* This should never be called during or after reload, because it does
9798 not re-permute the source register. It is intended only for use
9799 during expand. */
9800 gcc_assert (!reload_in_progress && !lra_in_progress && !reload_completed);
9802 /* Use V2DImode to do swaps of types with 128-bit scalar parts (TImode,
9803 V1TImode). */
9804 if (mode == TImode || mode == V1TImode)
9806 mode = V2DImode;
9807 dest = adjust_address (dest, V2DImode, 0);
9808 source = gen_lowpart (V2DImode, source);
9811 tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (source) : source;
9812 permute_src = rs6000_gen_le_vsx_permute (source, mode);
9813 permute_tmp = rs6000_gen_le_vsx_permute (tmp, mode);
9814 emit_insn (gen_rtx_SET (tmp, permute_src));
9815 emit_insn (gen_rtx_SET (dest, permute_tmp));
9818 /* Emit a sequence representing a little-endian VSX load or store,
9819 moving data from SOURCE to DEST in mode MODE. This is done
9820 separately from rs6000_emit_move to ensure it is called only
9821 during expand. LE VSX loads and stores introduced later are
9822 handled with a split. The expand-time RTL generation allows
9823 us to optimize away redundant pairs of register-permutes. */
9824 void
9825 rs6000_emit_le_vsx_move (rtx dest, rtx source, machine_mode mode)
9827 gcc_assert (!BYTES_BIG_ENDIAN
9828 && VECTOR_MEM_VSX_P (mode)
9829 && !TARGET_P9_VECTOR
9830 && !gpr_or_gpr_p (dest, source)
9831 && (MEM_P (source) ^ MEM_P (dest)));
9833 if (MEM_P (source))
9835 gcc_assert (REG_P (dest) || GET_CODE (dest) == SUBREG);
9836 rs6000_emit_le_vsx_load (dest, source, mode);
9838 else
9840 if (!REG_P (source))
9841 source = force_reg (mode, source);
9842 rs6000_emit_le_vsx_store (dest, source, mode);
9846 /* Emit a move from SOURCE to DEST in mode MODE. */
9847 void
9848 rs6000_emit_move (rtx dest, rtx source, machine_mode mode)
9850 rtx operands[2];
9851 operands[0] = dest;
9852 operands[1] = source;
9854 if (TARGET_DEBUG_ADDR)
9856 fprintf (stderr,
9857 "\nrs6000_emit_move: mode = %s, reload_in_progress = %d, "
9858 "reload_completed = %d, can_create_pseudos = %d.\ndest:\n",
9859 GET_MODE_NAME (mode),
9860 reload_in_progress,
9861 reload_completed,
9862 can_create_pseudo_p ());
9863 debug_rtx (dest);
9864 fprintf (stderr, "source:\n");
9865 debug_rtx (source);
9868 /* Sanity checks. Check that we get CONST_DOUBLE only when we should. */
9869 if (CONST_WIDE_INT_P (operands[1])
9870 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
9872 /* This should be fixed with the introduction of CONST_WIDE_INT. */
9873 gcc_unreachable ();
9876 /* Check if GCC is setting up a block move that will end up using FP
9877 registers as temporaries. We must make sure this is acceptable. */
9878 if (GET_CODE (operands[0]) == MEM
9879 && GET_CODE (operands[1]) == MEM
9880 && mode == DImode
9881 && (SLOW_UNALIGNED_ACCESS (DImode, MEM_ALIGN (operands[0]))
9882 || SLOW_UNALIGNED_ACCESS (DImode, MEM_ALIGN (operands[1])))
9883 && ! (SLOW_UNALIGNED_ACCESS (SImode, (MEM_ALIGN (operands[0]) > 32
9884 ? 32 : MEM_ALIGN (operands[0])))
9885 || SLOW_UNALIGNED_ACCESS (SImode, (MEM_ALIGN (operands[1]) > 32
9886 ? 32
9887 : MEM_ALIGN (operands[1]))))
9888 && ! MEM_VOLATILE_P (operands [0])
9889 && ! MEM_VOLATILE_P (operands [1]))
9891 emit_move_insn (adjust_address (operands[0], SImode, 0),
9892 adjust_address (operands[1], SImode, 0));
9893 emit_move_insn (adjust_address (copy_rtx (operands[0]), SImode, 4),
9894 adjust_address (copy_rtx (operands[1]), SImode, 4));
9895 return;
9898 if (can_create_pseudo_p () && GET_CODE (operands[0]) == MEM
9899 && !gpc_reg_operand (operands[1], mode))
9900 operands[1] = force_reg (mode, operands[1]);
9902 /* Recognize the case where operand[1] is a reference to thread-local
9903 data and load its address to a register. */
9904 if (tls_referenced_p (operands[1]))
9906 enum tls_model model;
9907 rtx tmp = operands[1];
9908 rtx addend = NULL;
9910 if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS)
9912 addend = XEXP (XEXP (tmp, 0), 1);
9913 tmp = XEXP (XEXP (tmp, 0), 0);
9916 gcc_assert (GET_CODE (tmp) == SYMBOL_REF);
9917 model = SYMBOL_REF_TLS_MODEL (tmp);
9918 gcc_assert (model != 0);
9920 tmp = rs6000_legitimize_tls_address (tmp, model);
9921 if (addend)
9923 tmp = gen_rtx_PLUS (mode, tmp, addend);
9924 tmp = force_operand (tmp, operands[0]);
9926 operands[1] = tmp;
9929 /* Handle the case where reload calls us with an invalid address. */
9930 if (reload_in_progress && mode == Pmode
9931 && (! general_operand (operands[1], mode)
9932 || ! nonimmediate_operand (operands[0], mode)))
9933 goto emit_set;
9935 /* 128-bit constant floating-point values on Darwin should really be loaded
9936 as two parts. However, this premature splitting is a problem when DFmode
9937 values can go into Altivec registers. */
9938 if (FLOAT128_IBM_P (mode) && !reg_addr[DFmode].scalar_in_vmx_p
9939 && GET_CODE (operands[1]) == CONST_DOUBLE)
9941 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode, 0),
9942 simplify_gen_subreg (DFmode, operands[1], mode, 0),
9943 DFmode);
9944 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode,
9945 GET_MODE_SIZE (DFmode)),
9946 simplify_gen_subreg (DFmode, operands[1], mode,
9947 GET_MODE_SIZE (DFmode)),
9948 DFmode);
9949 return;
9952 if (reload_in_progress && cfun->machine->sdmode_stack_slot != NULL_RTX)
9953 cfun->machine->sdmode_stack_slot =
9954 eliminate_regs (cfun->machine->sdmode_stack_slot, VOIDmode, NULL_RTX);
9957 /* Transform (p0:DD, (SUBREG:DD p1:SD)) to ((SUBREG:SD p0:DD),
9958 p1:SD) if p1 is not of floating point class and p0 is spilled as
9959 we can have no analogous movsd_store for this. */
9960 if (lra_in_progress && mode == DDmode
9961 && REG_P (operands[0]) && REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER
9962 && reg_preferred_class (REGNO (operands[0])) == NO_REGS
9963 && GET_CODE (operands[1]) == SUBREG && REG_P (SUBREG_REG (operands[1]))
9964 && GET_MODE (SUBREG_REG (operands[1])) == SDmode)
9966 enum reg_class cl;
9967 int regno = REGNO (SUBREG_REG (operands[1]));
9969 if (regno >= FIRST_PSEUDO_REGISTER)
9971 cl = reg_preferred_class (regno);
9972 regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][1];
9974 if (regno >= 0 && ! FP_REGNO_P (regno))
9976 mode = SDmode;
9977 operands[0] = gen_lowpart_SUBREG (SDmode, operands[0]);
9978 operands[1] = SUBREG_REG (operands[1]);
9981 if (lra_in_progress
9982 && mode == SDmode
9983 && REG_P (operands[0]) && REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER
9984 && reg_preferred_class (REGNO (operands[0])) == NO_REGS
9985 && (REG_P (operands[1])
9986 || (GET_CODE (operands[1]) == SUBREG
9987 && REG_P (SUBREG_REG (operands[1])))))
9989 int regno = REGNO (GET_CODE (operands[1]) == SUBREG
9990 ? SUBREG_REG (operands[1]) : operands[1]);
9991 enum reg_class cl;
9993 if (regno >= FIRST_PSEUDO_REGISTER)
9995 cl = reg_preferred_class (regno);
9996 gcc_assert (cl != NO_REGS);
9997 regno = ira_class_hard_regs[cl][0];
9999 if (FP_REGNO_P (regno))
10001 if (GET_MODE (operands[0]) != DDmode)
10002 operands[0] = gen_rtx_SUBREG (DDmode, operands[0], 0);
10003 emit_insn (gen_movsd_store (operands[0], operands[1]));
10005 else if (INT_REGNO_P (regno))
10006 emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
10007 else
10008 gcc_unreachable();
10009 return;
10011 /* Transform ((SUBREG:DD p0:SD), p1:DD) to (p0:SD, (SUBREG:SD
10012 p:DD)) if p0 is not of floating point class and p1 is spilled as
10013 we can have no analogous movsd_load for this. */
10014 if (lra_in_progress && mode == DDmode
10015 && GET_CODE (operands[0]) == SUBREG && REG_P (SUBREG_REG (operands[0]))
10016 && GET_MODE (SUBREG_REG (operands[0])) == SDmode
10017 && REG_P (operands[1]) && REGNO (operands[1]) >= FIRST_PSEUDO_REGISTER
10018 && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
10020 enum reg_class cl;
10021 int regno = REGNO (SUBREG_REG (operands[0]));
10023 if (regno >= FIRST_PSEUDO_REGISTER)
10025 cl = reg_preferred_class (regno);
10026 regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][0];
10028 if (regno >= 0 && ! FP_REGNO_P (regno))
10030 mode = SDmode;
10031 operands[0] = SUBREG_REG (operands[0]);
10032 operands[1] = gen_lowpart_SUBREG (SDmode, operands[1]);
10035 if (lra_in_progress
10036 && mode == SDmode
10037 && (REG_P (operands[0])
10038 || (GET_CODE (operands[0]) == SUBREG
10039 && REG_P (SUBREG_REG (operands[0]))))
10040 && REG_P (operands[1]) && REGNO (operands[1]) >= FIRST_PSEUDO_REGISTER
10041 && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
10043 int regno = REGNO (GET_CODE (operands[0]) == SUBREG
10044 ? SUBREG_REG (operands[0]) : operands[0]);
10045 enum reg_class cl;
10047 if (regno >= FIRST_PSEUDO_REGISTER)
10049 cl = reg_preferred_class (regno);
10050 gcc_assert (cl != NO_REGS);
10051 regno = ira_class_hard_regs[cl][0];
10053 if (FP_REGNO_P (regno))
10055 if (GET_MODE (operands[1]) != DDmode)
10056 operands[1] = gen_rtx_SUBREG (DDmode, operands[1], 0);
10057 emit_insn (gen_movsd_load (operands[0], operands[1]));
10059 else if (INT_REGNO_P (regno))
10060 emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
10061 else
10062 gcc_unreachable();
10063 return;
10066 if (reload_in_progress
10067 && mode == SDmode
10068 && cfun->machine->sdmode_stack_slot != NULL_RTX
10069 && MEM_P (operands[0])
10070 && rtx_equal_p (operands[0], cfun->machine->sdmode_stack_slot)
10071 && REG_P (operands[1]))
10073 if (FP_REGNO_P (REGNO (operands[1])))
10075 rtx mem = adjust_address_nv (operands[0], DDmode, 0);
10076 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
10077 emit_insn (gen_movsd_store (mem, operands[1]));
10079 else if (INT_REGNO_P (REGNO (operands[1])))
10081 rtx mem = operands[0];
10082 if (BYTES_BIG_ENDIAN)
10083 mem = adjust_address_nv (mem, mode, 4);
10084 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
10085 emit_insn (gen_movsd_hardfloat (mem, operands[1]));
10087 else
10088 gcc_unreachable();
10089 return;
10091 if (reload_in_progress
10092 && mode == SDmode
10093 && REG_P (operands[0])
10094 && MEM_P (operands[1])
10095 && cfun->machine->sdmode_stack_slot != NULL_RTX
10096 && rtx_equal_p (operands[1], cfun->machine->sdmode_stack_slot))
10098 if (FP_REGNO_P (REGNO (operands[0])))
10100 rtx mem = adjust_address_nv (operands[1], DDmode, 0);
10101 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
10102 emit_insn (gen_movsd_load (operands[0], mem));
10104 else if (INT_REGNO_P (REGNO (operands[0])))
10106 rtx mem = operands[1];
10107 if (BYTES_BIG_ENDIAN)
10108 mem = adjust_address_nv (mem, mode, 4);
10109 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
10110 emit_insn (gen_movsd_hardfloat (operands[0], mem));
10112 else
10113 gcc_unreachable();
10114 return;
10117 /* FIXME: In the long term, this switch statement should go away
10118 and be replaced by a sequence of tests based on things like
10119 mode == Pmode. */
10120 switch (mode)
10122 case HImode:
10123 case QImode:
10124 if (CONSTANT_P (operands[1])
10125 && GET_CODE (operands[1]) != CONST_INT)
10126 operands[1] = force_const_mem (mode, operands[1]);
10127 break;
10129 case TFmode:
10130 case TDmode:
10131 case IFmode:
10132 case KFmode:
10133 if (FLOAT128_2REG_P (mode))
10134 rs6000_eliminate_indexed_memrefs (operands);
10135 /* fall through */
10137 case DFmode:
10138 case DDmode:
10139 case SFmode:
10140 case SDmode:
10141 if (CONSTANT_P (operands[1])
10142 && ! easy_fp_constant (operands[1], mode))
10143 operands[1] = force_const_mem (mode, operands[1]);
10144 break;
10146 case V16QImode:
10147 case V8HImode:
10148 case V4SFmode:
10149 case V4SImode:
10150 case V4HImode:
10151 case V2SFmode:
10152 case V2SImode:
10153 case V1DImode:
10154 case V2DFmode:
10155 case V2DImode:
10156 case V1TImode:
10157 if (CONSTANT_P (operands[1])
10158 && !easy_vector_constant (operands[1], mode))
10159 operands[1] = force_const_mem (mode, operands[1]);
10160 break;
10162 case SImode:
10163 case DImode:
10164 /* Use default pattern for address of ELF small data */
10165 if (TARGET_ELF
10166 && mode == Pmode
10167 && DEFAULT_ABI == ABI_V4
10168 && (GET_CODE (operands[1]) == SYMBOL_REF
10169 || GET_CODE (operands[1]) == CONST)
10170 && small_data_operand (operands[1], mode))
10172 emit_insn (gen_rtx_SET (operands[0], operands[1]));
10173 return;
10176 if (DEFAULT_ABI == ABI_V4
10177 && mode == Pmode && mode == SImode
10178 && flag_pic == 1 && got_operand (operands[1], mode))
10180 emit_insn (gen_movsi_got (operands[0], operands[1]));
10181 return;
10184 if ((TARGET_ELF || DEFAULT_ABI == ABI_DARWIN)
10185 && TARGET_NO_TOC
10186 && ! flag_pic
10187 && mode == Pmode
10188 && CONSTANT_P (operands[1])
10189 && GET_CODE (operands[1]) != HIGH
10190 && GET_CODE (operands[1]) != CONST_INT)
10192 rtx target = (!can_create_pseudo_p ()
10193 ? operands[0]
10194 : gen_reg_rtx (mode));
10196 /* If this is a function address on -mcall-aixdesc,
10197 convert it to the address of the descriptor. */
10198 if (DEFAULT_ABI == ABI_AIX
10199 && GET_CODE (operands[1]) == SYMBOL_REF
10200 && XSTR (operands[1], 0)[0] == '.')
10202 const char *name = XSTR (operands[1], 0);
10203 rtx new_ref;
10204 while (*name == '.')
10205 name++;
10206 new_ref = gen_rtx_SYMBOL_REF (Pmode, name);
10207 CONSTANT_POOL_ADDRESS_P (new_ref)
10208 = CONSTANT_POOL_ADDRESS_P (operands[1]);
10209 SYMBOL_REF_FLAGS (new_ref) = SYMBOL_REF_FLAGS (operands[1]);
10210 SYMBOL_REF_USED (new_ref) = SYMBOL_REF_USED (operands[1]);
10211 SYMBOL_REF_DATA (new_ref) = SYMBOL_REF_DATA (operands[1]);
10212 operands[1] = new_ref;
10215 if (DEFAULT_ABI == ABI_DARWIN)
10217 #if TARGET_MACHO
10218 if (MACHO_DYNAMIC_NO_PIC_P)
10220 /* Take care of any required data indirection. */
10221 operands[1] = rs6000_machopic_legitimize_pic_address (
10222 operands[1], mode, operands[0]);
10223 if (operands[0] != operands[1])
10224 emit_insn (gen_rtx_SET (operands[0], operands[1]));
10225 return;
10227 #endif
10228 emit_insn (gen_macho_high (target, operands[1]));
10229 emit_insn (gen_macho_low (operands[0], target, operands[1]));
10230 return;
10233 emit_insn (gen_elf_high (target, operands[1]));
10234 emit_insn (gen_elf_low (operands[0], target, operands[1]));
10235 return;
10238 /* If this is a SYMBOL_REF that refers to a constant pool entry,
10239 and we have put it in the TOC, we just need to make a TOC-relative
10240 reference to it. */
10241 if (TARGET_TOC
10242 && GET_CODE (operands[1]) == SYMBOL_REF
10243 && use_toc_relative_ref (operands[1], mode))
10244 operands[1] = create_TOC_reference (operands[1], operands[0]);
10245 else if (mode == Pmode
10246 && CONSTANT_P (operands[1])
10247 && GET_CODE (operands[1]) != HIGH
10248 && ((GET_CODE (operands[1]) != CONST_INT
10249 && ! easy_fp_constant (operands[1], mode))
10250 || (GET_CODE (operands[1]) == CONST_INT
10251 && (num_insns_constant (operands[1], mode)
10252 > (TARGET_CMODEL != CMODEL_SMALL ? 3 : 2)))
10253 || (GET_CODE (operands[0]) == REG
10254 && FP_REGNO_P (REGNO (operands[0]))))
10255 && !toc_relative_expr_p (operands[1], false)
10256 && (TARGET_CMODEL == CMODEL_SMALL
10257 || can_create_pseudo_p ()
10258 || (REG_P (operands[0])
10259 && INT_REG_OK_FOR_BASE_P (operands[0], true))))
10262 #if TARGET_MACHO
10263 /* Darwin uses a special PIC legitimizer. */
10264 if (DEFAULT_ABI == ABI_DARWIN && MACHOPIC_INDIRECT)
10266 operands[1] =
10267 rs6000_machopic_legitimize_pic_address (operands[1], mode,
10268 operands[0]);
10269 if (operands[0] != operands[1])
10270 emit_insn (gen_rtx_SET (operands[0], operands[1]));
10271 return;
10273 #endif
10275 /* If we are to limit the number of things we put in the TOC and
10276 this is a symbol plus a constant we can add in one insn,
10277 just put the symbol in the TOC and add the constant. Don't do
10278 this if reload is in progress. */
10279 if (GET_CODE (operands[1]) == CONST
10280 && TARGET_NO_SUM_IN_TOC && ! reload_in_progress
10281 && GET_CODE (XEXP (operands[1], 0)) == PLUS
10282 && add_operand (XEXP (XEXP (operands[1], 0), 1), mode)
10283 && (GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == LABEL_REF
10284 || GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == SYMBOL_REF)
10285 && ! side_effects_p (operands[0]))
10287 rtx sym =
10288 force_const_mem (mode, XEXP (XEXP (operands[1], 0), 0));
10289 rtx other = XEXP (XEXP (operands[1], 0), 1);
10291 sym = force_reg (mode, sym);
10292 emit_insn (gen_add3_insn (operands[0], sym, other));
10293 return;
10296 operands[1] = force_const_mem (mode, operands[1]);
10298 if (TARGET_TOC
10299 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
10300 && constant_pool_expr_p (XEXP (operands[1], 0))
10301 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (
10302 get_pool_constant (XEXP (operands[1], 0)),
10303 get_pool_mode (XEXP (operands[1], 0))))
10305 rtx tocref = create_TOC_reference (XEXP (operands[1], 0),
10306 operands[0]);
10307 operands[1] = gen_const_mem (mode, tocref);
10308 set_mem_alias_set (operands[1], get_TOC_alias_set ());
10311 break;
10313 case TImode:
10314 if (!VECTOR_MEM_VSX_P (TImode))
10315 rs6000_eliminate_indexed_memrefs (operands);
10316 break;
10318 case PTImode:
10319 rs6000_eliminate_indexed_memrefs (operands);
10320 break;
10322 default:
10323 fatal_insn ("bad move", gen_rtx_SET (dest, source));
10326 /* Above, we may have called force_const_mem which may have returned
10327 an invalid address. If we can, fix this up; otherwise, reload will
10328 have to deal with it. */
10329 if (GET_CODE (operands[1]) == MEM && ! reload_in_progress)
10330 operands[1] = validize_mem (operands[1]);
10332 emit_set:
10333 emit_insn (gen_rtx_SET (operands[0], operands[1]));
10336 /* Return true if a structure, union or array containing FIELD should be
10337 accessed using `BLKMODE'.
10339 For the SPE, simd types are V2SI, and gcc can be tempted to put the
10340 entire thing in a DI and use subregs to access the internals.
10341 store_bit_field() will force (subreg:DI (reg:V2SI x))'s to the
10342 back-end. Because a single GPR can hold a V2SI, but not a DI, the
10343 best thing to do is set structs to BLKmode and avoid Severe Tire
10344 Damage.
10346 On e500 v2, DF and DI modes suffer from the same anomaly. DF can
10347 fit into 1, whereas DI still needs two. */
10349 static bool
10350 rs6000_member_type_forces_blk (const_tree field, machine_mode mode)
10352 return ((TARGET_SPE && TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
10353 || (TARGET_E500_DOUBLE && mode == DFmode));
10356 /* Nonzero if we can use a floating-point register to pass this arg. */
10357 #define USE_FP_FOR_ARG_P(CUM,MODE) \
10358 (SCALAR_FLOAT_MODE_NOT_VECTOR_P (MODE) \
10359 && (CUM)->fregno <= FP_ARG_MAX_REG \
10360 && TARGET_HARD_FLOAT && TARGET_FPRS)
10362 /* Nonzero if we can use an AltiVec register to pass this arg. */
10363 #define USE_ALTIVEC_FOR_ARG_P(CUM,MODE,NAMED) \
10364 (ALTIVEC_OR_VSX_VECTOR_MODE (MODE) \
10365 && (CUM)->vregno <= ALTIVEC_ARG_MAX_REG \
10366 && TARGET_ALTIVEC_ABI \
10367 && (NAMED))
10369 /* Walk down the type tree of TYPE counting consecutive base elements.
10370 If *MODEP is VOIDmode, then set it to the first valid floating point
10371 or vector type. If a non-floating point or vector type is found, or
10372 if a floating point or vector type that doesn't match a non-VOIDmode
10373 *MODEP is found, then return -1, otherwise return the count in the
10374 sub-tree. */
10376 static int
10377 rs6000_aggregate_candidate (const_tree type, machine_mode *modep)
10379 machine_mode mode;
10380 HOST_WIDE_INT size;
10382 switch (TREE_CODE (type))
10384 case REAL_TYPE:
10385 mode = TYPE_MODE (type);
10386 if (!SCALAR_FLOAT_MODE_P (mode))
10387 return -1;
10389 if (*modep == VOIDmode)
10390 *modep = mode;
10392 if (*modep == mode)
10393 return 1;
10395 break;
10397 case COMPLEX_TYPE:
10398 mode = TYPE_MODE (TREE_TYPE (type));
10399 if (!SCALAR_FLOAT_MODE_P (mode))
10400 return -1;
10402 if (*modep == VOIDmode)
10403 *modep = mode;
10405 if (*modep == mode)
10406 return 2;
10408 break;
10410 case VECTOR_TYPE:
10411 if (!TARGET_ALTIVEC_ABI || !TARGET_ALTIVEC)
10412 return -1;
10414 /* Use V4SImode as representative of all 128-bit vector types. */
10415 size = int_size_in_bytes (type);
10416 switch (size)
10418 case 16:
10419 mode = V4SImode;
10420 break;
10421 default:
10422 return -1;
10425 if (*modep == VOIDmode)
10426 *modep = mode;
10428 /* Vector modes are considered to be opaque: two vectors are
10429 equivalent for the purposes of being homogeneous aggregates
10430 if they are the same size. */
10431 if (*modep == mode)
10432 return 1;
10434 break;
10436 case ARRAY_TYPE:
10438 int count;
10439 tree index = TYPE_DOMAIN (type);
10441 /* Can't handle incomplete types nor sizes that are not
10442 fixed. */
10443 if (!COMPLETE_TYPE_P (type)
10444 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
10445 return -1;
10447 count = rs6000_aggregate_candidate (TREE_TYPE (type), modep);
10448 if (count == -1
10449 || !index
10450 || !TYPE_MAX_VALUE (index)
10451 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
10452 || !TYPE_MIN_VALUE (index)
10453 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
10454 || count < 0)
10455 return -1;
10457 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
10458 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
10460 /* There must be no padding. */
10461 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
10462 return -1;
10464 return count;
10467 case RECORD_TYPE:
10469 int count = 0;
10470 int sub_count;
10471 tree field;
10473 /* Can't handle incomplete types nor sizes that are not
10474 fixed. */
10475 if (!COMPLETE_TYPE_P (type)
10476 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
10477 return -1;
10479 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
10481 if (TREE_CODE (field) != FIELD_DECL)
10482 continue;
10484 sub_count = rs6000_aggregate_candidate (TREE_TYPE (field), modep);
10485 if (sub_count < 0)
10486 return -1;
10487 count += sub_count;
10490 /* There must be no padding. */
10491 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
10492 return -1;
10494 return count;
10497 case UNION_TYPE:
10498 case QUAL_UNION_TYPE:
10500 /* These aren't very interesting except in a degenerate case. */
10501 int count = 0;
10502 int sub_count;
10503 tree field;
10505 /* Can't handle incomplete types nor sizes that are not
10506 fixed. */
10507 if (!COMPLETE_TYPE_P (type)
10508 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
10509 return -1;
10511 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
10513 if (TREE_CODE (field) != FIELD_DECL)
10514 continue;
10516 sub_count = rs6000_aggregate_candidate (TREE_TYPE (field), modep);
10517 if (sub_count < 0)
10518 return -1;
10519 count = count > sub_count ? count : sub_count;
10522 /* There must be no padding. */
10523 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
10524 return -1;
10526 return count;
10529 default:
10530 break;
10533 return -1;
10536 /* If an argument, whose type is described by TYPE and MODE, is a homogeneous
10537 float or vector aggregate that shall be passed in FP/vector registers
10538 according to the ELFv2 ABI, return the homogeneous element mode in
10539 *ELT_MODE and the number of elements in *N_ELTS, and return TRUE.
10541 Otherwise, set *ELT_MODE to MODE and *N_ELTS to 1, and return FALSE. */
10543 static bool
10544 rs6000_discover_homogeneous_aggregate (machine_mode mode, const_tree type,
10545 machine_mode *elt_mode,
10546 int *n_elts)
10548 /* Note that we do not accept complex types at the top level as
10549 homogeneous aggregates; these types are handled via the
10550 targetm.calls.split_complex_arg mechanism. Complex types
10551 can be elements of homogeneous aggregates, however. */
10552 if (DEFAULT_ABI == ABI_ELFv2 && type && AGGREGATE_TYPE_P (type))
10554 machine_mode field_mode = VOIDmode;
10555 int field_count = rs6000_aggregate_candidate (type, &field_mode);
10557 if (field_count > 0)
10559 int n_regs = (SCALAR_FLOAT_MODE_P (field_mode) ?
10560 (GET_MODE_SIZE (field_mode) + 7) >> 3 : 1);
10562 /* The ELFv2 ABI allows homogeneous aggregates to occupy
10563 up to AGGR_ARG_NUM_REG registers. */
10564 if (field_count * n_regs <= AGGR_ARG_NUM_REG)
10566 if (elt_mode)
10567 *elt_mode = field_mode;
10568 if (n_elts)
10569 *n_elts = field_count;
10570 return true;
10575 if (elt_mode)
10576 *elt_mode = mode;
10577 if (n_elts)
10578 *n_elts = 1;
10579 return false;
10582 /* Return a nonzero value to say to return the function value in
10583 memory, just as large structures are always returned. TYPE will be
10584 the data type of the value, and FNTYPE will be the type of the
10585 function doing the returning, or @code{NULL} for libcalls.
10587 The AIX ABI for the RS/6000 specifies that all structures are
10588 returned in memory. The Darwin ABI does the same.
10590 For the Darwin 64 Bit ABI, a function result can be returned in
10591 registers or in memory, depending on the size of the return data
10592 type. If it is returned in registers, the value occupies the same
10593 registers as it would if it were the first and only function
10594 argument. Otherwise, the function places its result in memory at
10595 the location pointed to by GPR3.
10597 The SVR4 ABI specifies that structures <= 8 bytes are returned in r3/r4,
10598 but a draft put them in memory, and GCC used to implement the draft
10599 instead of the final standard. Therefore, aix_struct_return
10600 controls this instead of DEFAULT_ABI; V.4 targets needing backward
10601 compatibility can change DRAFT_V4_STRUCT_RET to override the
10602 default, and -m switches get the final word. See
10603 rs6000_option_override_internal for more details.
10605 The PPC32 SVR4 ABI uses IEEE double extended for long double, if 128-bit
10606 long double support is enabled. These values are returned in memory.
10608 int_size_in_bytes returns -1 for variable size objects, which go in
10609 memory always. The cast to unsigned makes -1 > 8. */
10611 static bool
10612 rs6000_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
10614 /* For the Darwin64 ABI, test if we can fit the return value in regs. */
10615 if (TARGET_MACHO
10616 && rs6000_darwin64_abi
10617 && TREE_CODE (type) == RECORD_TYPE
10618 && int_size_in_bytes (type) > 0)
10620 CUMULATIVE_ARGS valcum;
10621 rtx valret;
10623 valcum.words = 0;
10624 valcum.fregno = FP_ARG_MIN_REG;
10625 valcum.vregno = ALTIVEC_ARG_MIN_REG;
10626 /* Do a trial code generation as if this were going to be passed
10627 as an argument; if any part goes in memory, we return NULL. */
10628 valret = rs6000_darwin64_record_arg (&valcum, type, true, true);
10629 if (valret)
10630 return false;
10631 /* Otherwise fall through to more conventional ABI rules. */
10634 /* The ELFv2 ABI returns homogeneous VFP aggregates in registers */
10635 if (rs6000_discover_homogeneous_aggregate (TYPE_MODE (type), type,
10636 NULL, NULL))
10637 return false;
10639 /* The ELFv2 ABI returns aggregates up to 16B in registers */
10640 if (DEFAULT_ABI == ABI_ELFv2 && AGGREGATE_TYPE_P (type)
10641 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) <= 16)
10642 return false;
10644 if (AGGREGATE_TYPE_P (type)
10645 && (aix_struct_return
10646 || (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8))
10647 return true;
10649 /* Allow -maltivec -mabi=no-altivec without warning. Altivec vector
10650 modes only exist for GCC vector types if -maltivec. */
10651 if (TARGET_32BIT && !TARGET_ALTIVEC_ABI
10652 && ALTIVEC_VECTOR_MODE (TYPE_MODE (type)))
10653 return false;
10655 /* Return synthetic vectors in memory. */
10656 if (TREE_CODE (type) == VECTOR_TYPE
10657 && int_size_in_bytes (type) > (TARGET_ALTIVEC_ABI ? 16 : 8))
10659 static bool warned_for_return_big_vectors = false;
10660 if (!warned_for_return_big_vectors)
10662 warning (0, "GCC vector returned by reference: "
10663 "non-standard ABI extension with no compatibility guarantee");
10664 warned_for_return_big_vectors = true;
10666 return true;
10669 if (DEFAULT_ABI == ABI_V4 && TARGET_IEEEQUAD
10670 && FLOAT128_IEEE_P (TYPE_MODE (type)))
10671 return true;
10673 return false;
10676 /* Specify whether values returned in registers should be at the most
10677 significant end of a register. We want aggregates returned by
10678 value to match the way aggregates are passed to functions. */
10680 static bool
10681 rs6000_return_in_msb (const_tree valtype)
10683 return (DEFAULT_ABI == ABI_ELFv2
10684 && BYTES_BIG_ENDIAN
10685 && AGGREGATE_TYPE_P (valtype)
10686 && FUNCTION_ARG_PADDING (TYPE_MODE (valtype), valtype) == upward);
10689 #ifdef HAVE_AS_GNU_ATTRIBUTE
10690 /* Return TRUE if a call to function FNDECL may be one that
10691 potentially affects the function calling ABI of the object file. */
10693 static bool
10694 call_ABI_of_interest (tree fndecl)
10696 if (symtab->state == EXPANSION)
10698 struct cgraph_node *c_node;
10700 /* Libcalls are always interesting. */
10701 if (fndecl == NULL_TREE)
10702 return true;
10704 /* Any call to an external function is interesting. */
10705 if (DECL_EXTERNAL (fndecl))
10706 return true;
10708 /* Interesting functions that we are emitting in this object file. */
10709 c_node = cgraph_node::get (fndecl);
10710 c_node = c_node->ultimate_alias_target ();
10711 return !c_node->only_called_directly_p ();
10713 return false;
10715 #endif
10717 /* Initialize a variable CUM of type CUMULATIVE_ARGS
10718 for a call to a function whose data type is FNTYPE.
10719 For a library call, FNTYPE is 0 and RETURN_MODE the return value mode.
10721 For incoming args we set the number of arguments in the prototype large
10722 so we never return a PARALLEL. */
10724 void
10725 init_cumulative_args (CUMULATIVE_ARGS *cum, tree fntype,
10726 rtx libname ATTRIBUTE_UNUSED, int incoming,
10727 int libcall, int n_named_args,
10728 tree fndecl ATTRIBUTE_UNUSED,
10729 machine_mode return_mode ATTRIBUTE_UNUSED)
10731 static CUMULATIVE_ARGS zero_cumulative;
10733 *cum = zero_cumulative;
10734 cum->words = 0;
10735 cum->fregno = FP_ARG_MIN_REG;
10736 cum->vregno = ALTIVEC_ARG_MIN_REG;
10737 cum->prototype = (fntype && prototype_p (fntype));
10738 cum->call_cookie = ((DEFAULT_ABI == ABI_V4 && libcall)
10739 ? CALL_LIBCALL : CALL_NORMAL);
10740 cum->sysv_gregno = GP_ARG_MIN_REG;
10741 cum->stdarg = stdarg_p (fntype);
10742 cum->libcall = libcall;
10744 cum->nargs_prototype = 0;
10745 if (incoming || cum->prototype)
10746 cum->nargs_prototype = n_named_args;
10748 /* Check for a longcall attribute. */
10749 if ((!fntype && rs6000_default_long_calls)
10750 || (fntype
10751 && lookup_attribute ("longcall", TYPE_ATTRIBUTES (fntype))
10752 && !lookup_attribute ("shortcall", TYPE_ATTRIBUTES (fntype))))
10753 cum->call_cookie |= CALL_LONG;
10755 if (TARGET_DEBUG_ARG)
10757 fprintf (stderr, "\ninit_cumulative_args:");
10758 if (fntype)
10760 tree ret_type = TREE_TYPE (fntype);
10761 fprintf (stderr, " ret code = %s,",
10762 get_tree_code_name (TREE_CODE (ret_type)));
10765 if (cum->call_cookie & CALL_LONG)
10766 fprintf (stderr, " longcall,");
10768 fprintf (stderr, " proto = %d, nargs = %d\n",
10769 cum->prototype, cum->nargs_prototype);
10772 #ifdef HAVE_AS_GNU_ATTRIBUTE
10773 if (DEFAULT_ABI == ABI_V4)
10775 cum->escapes = call_ABI_of_interest (fndecl);
10776 if (cum->escapes)
10778 tree return_type;
10780 if (fntype)
10782 return_type = TREE_TYPE (fntype);
10783 return_mode = TYPE_MODE (return_type);
10785 else
10786 return_type = lang_hooks.types.type_for_mode (return_mode, 0);
10788 if (return_type != NULL)
10790 if (TREE_CODE (return_type) == RECORD_TYPE
10791 && TYPE_TRANSPARENT_AGGR (return_type))
10793 return_type = TREE_TYPE (first_field (return_type));
10794 return_mode = TYPE_MODE (return_type);
10796 if (AGGREGATE_TYPE_P (return_type)
10797 && ((unsigned HOST_WIDE_INT) int_size_in_bytes (return_type)
10798 <= 8))
10799 rs6000_returns_struct = true;
10801 if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (return_mode))
10802 rs6000_passes_float = true;
10803 else if (ALTIVEC_OR_VSX_VECTOR_MODE (return_mode)
10804 || SPE_VECTOR_MODE (return_mode))
10805 rs6000_passes_vector = true;
10808 #endif
10810 if (fntype
10811 && !TARGET_ALTIVEC
10812 && TARGET_ALTIVEC_ABI
10813 && ALTIVEC_VECTOR_MODE (TYPE_MODE (TREE_TYPE (fntype))))
10815 error ("cannot return value in vector register because"
10816 " altivec instructions are disabled, use -maltivec"
10817 " to enable them");
10821 /* The mode the ABI uses for a word. This is not the same as word_mode
10822 for -m32 -mpowerpc64. This is used to implement various target hooks. */
10824 static machine_mode
10825 rs6000_abi_word_mode (void)
10827 return TARGET_32BIT ? SImode : DImode;
10830 /* Implement the TARGET_OFFLOAD_OPTIONS hook. */
10831 static char *
10832 rs6000_offload_options (void)
10834 if (TARGET_64BIT)
10835 return xstrdup ("-foffload-abi=lp64");
10836 else
10837 return xstrdup ("-foffload-abi=ilp32");
10840 /* On rs6000, function arguments are promoted, as are function return
10841 values. */
10843 static machine_mode
10844 rs6000_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
10845 machine_mode mode,
10846 int *punsignedp ATTRIBUTE_UNUSED,
10847 const_tree, int)
10849 PROMOTE_MODE (mode, *punsignedp, type);
10851 return mode;
10854 /* Return true if TYPE must be passed on the stack and not in registers. */
10856 static bool
10857 rs6000_must_pass_in_stack (machine_mode mode, const_tree type)
10859 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2 || TARGET_64BIT)
10860 return must_pass_in_stack_var_size (mode, type);
10861 else
10862 return must_pass_in_stack_var_size_or_pad (mode, type);
10865 static inline bool
10866 is_complex_IBM_long_double (machine_mode mode)
10868 return mode == ICmode || (!TARGET_IEEEQUAD && mode == TCmode);
10871 /* Whether ABI_V4 passes MODE args to a function in floating point
10872 registers. */
10874 static bool
10875 abi_v4_pass_in_fpr (machine_mode mode)
10877 if (!TARGET_FPRS || !TARGET_HARD_FLOAT)
10878 return false;
10879 if (TARGET_SINGLE_FLOAT && mode == SFmode)
10880 return true;
10881 if (TARGET_DOUBLE_FLOAT && mode == DFmode)
10882 return true;
10883 /* ABI_V4 passes complex IBM long double in 8 gprs.
10884 Stupid, but we can't change the ABI now. */
10885 if (is_complex_IBM_long_double (mode))
10886 return false;
10887 if (FLOAT128_2REG_P (mode))
10888 return true;
10889 if (DECIMAL_FLOAT_MODE_P (mode))
10890 return true;
10891 return false;
10894 /* If defined, a C expression which determines whether, and in which
10895 direction, to pad out an argument with extra space. The value
10896 should be of type `enum direction': either `upward' to pad above
10897 the argument, `downward' to pad below, or `none' to inhibit
10898 padding.
10900 For the AIX ABI structs are always stored left shifted in their
10901 argument slot. */
10903 enum direction
10904 function_arg_padding (machine_mode mode, const_tree type)
10906 #ifndef AGGREGATE_PADDING_FIXED
10907 #define AGGREGATE_PADDING_FIXED 0
10908 #endif
10909 #ifndef AGGREGATES_PAD_UPWARD_ALWAYS
10910 #define AGGREGATES_PAD_UPWARD_ALWAYS 0
10911 #endif
10913 if (!AGGREGATE_PADDING_FIXED)
10915 /* GCC used to pass structures of the same size as integer types as
10916 if they were in fact integers, ignoring FUNCTION_ARG_PADDING.
10917 i.e. Structures of size 1 or 2 (or 4 when TARGET_64BIT) were
10918 passed padded downward, except that -mstrict-align further
10919 muddied the water in that multi-component structures of 2 and 4
10920 bytes in size were passed padded upward.
10922 The following arranges for best compatibility with previous
10923 versions of gcc, but removes the -mstrict-align dependency. */
10924 if (BYTES_BIG_ENDIAN)
10926 HOST_WIDE_INT size = 0;
10928 if (mode == BLKmode)
10930 if (type && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
10931 size = int_size_in_bytes (type);
10933 else
10934 size = GET_MODE_SIZE (mode);
10936 if (size == 1 || size == 2 || size == 4)
10937 return downward;
10939 return upward;
10942 if (AGGREGATES_PAD_UPWARD_ALWAYS)
10944 if (type != 0 && AGGREGATE_TYPE_P (type))
10945 return upward;
10948 /* Fall back to the default. */
10949 return DEFAULT_FUNCTION_ARG_PADDING (mode, type);
10952 /* If defined, a C expression that gives the alignment boundary, in bits,
10953 of an argument with the specified mode and type. If it is not defined,
10954 PARM_BOUNDARY is used for all arguments.
10956 V.4 wants long longs and doubles to be double word aligned. Just
10957 testing the mode size is a boneheaded way to do this as it means
10958 that other types such as complex int are also double word aligned.
10959 However, we're stuck with this because changing the ABI might break
10960 existing library interfaces.
10962 Doubleword align SPE vectors.
10963 Quadword align Altivec/VSX vectors.
10964 Quadword align large synthetic vector types. */
10966 static unsigned int
10967 rs6000_function_arg_boundary (machine_mode mode, const_tree type)
10969 machine_mode elt_mode;
10970 int n_elts;
10972 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
10974 if (DEFAULT_ABI == ABI_V4
10975 && (GET_MODE_SIZE (mode) == 8
10976 || (TARGET_HARD_FLOAT
10977 && TARGET_FPRS
10978 && !is_complex_IBM_long_double (mode)
10979 && FLOAT128_2REG_P (mode))))
10980 return 64;
10981 else if (FLOAT128_VECTOR_P (mode))
10982 return 128;
10983 else if (SPE_VECTOR_MODE (mode)
10984 || (type && TREE_CODE (type) == VECTOR_TYPE
10985 && int_size_in_bytes (type) >= 8
10986 && int_size_in_bytes (type) < 16))
10987 return 64;
10988 else if (ALTIVEC_OR_VSX_VECTOR_MODE (elt_mode)
10989 || (type && TREE_CODE (type) == VECTOR_TYPE
10990 && int_size_in_bytes (type) >= 16))
10991 return 128;
10993 /* Aggregate types that need > 8 byte alignment are quadword-aligned
10994 in the parameter area in the ELFv2 ABI, and in the AIX ABI unless
10995 -mcompat-align-parm is used. */
10996 if (((DEFAULT_ABI == ABI_AIX && !rs6000_compat_align_parm)
10997 || DEFAULT_ABI == ABI_ELFv2)
10998 && type && TYPE_ALIGN (type) > 64)
11000 /* "Aggregate" means any AGGREGATE_TYPE except for single-element
11001 or homogeneous float/vector aggregates here. We already handled
11002 vector aggregates above, but still need to check for float here. */
11003 bool aggregate_p = (AGGREGATE_TYPE_P (type)
11004 && !SCALAR_FLOAT_MODE_P (elt_mode));
11006 /* We used to check for BLKmode instead of the above aggregate type
11007 check. Warn when this results in any difference to the ABI. */
11008 if (aggregate_p != (mode == BLKmode))
11010 static bool warned;
11011 if (!warned && warn_psabi)
11013 warned = true;
11014 inform (input_location,
11015 "the ABI of passing aggregates with %d-byte alignment"
11016 " has changed in GCC 5",
11017 (int) TYPE_ALIGN (type) / BITS_PER_UNIT);
11021 if (aggregate_p)
11022 return 128;
11025 /* Similar for the Darwin64 ABI. Note that for historical reasons we
11026 implement the "aggregate type" check as a BLKmode check here; this
11027 means certain aggregate types are in fact not aligned. */
11028 if (TARGET_MACHO && rs6000_darwin64_abi
11029 && mode == BLKmode
11030 && type && TYPE_ALIGN (type) > 64)
11031 return 128;
11033 return PARM_BOUNDARY;
11036 /* The offset in words to the start of the parameter save area. */
11038 static unsigned int
11039 rs6000_parm_offset (void)
11041 return (DEFAULT_ABI == ABI_V4 ? 2
11042 : DEFAULT_ABI == ABI_ELFv2 ? 4
11043 : 6);
11046 /* For a function parm of MODE and TYPE, return the starting word in
11047 the parameter area. NWORDS of the parameter area are already used. */
11049 static unsigned int
11050 rs6000_parm_start (machine_mode mode, const_tree type,
11051 unsigned int nwords)
11053 unsigned int align;
11055 align = rs6000_function_arg_boundary (mode, type) / PARM_BOUNDARY - 1;
11056 return nwords + (-(rs6000_parm_offset () + nwords) & align);
11059 /* Compute the size (in words) of a function argument. */
11061 static unsigned long
11062 rs6000_arg_size (machine_mode mode, const_tree type)
11064 unsigned long size;
11066 if (mode != BLKmode)
11067 size = GET_MODE_SIZE (mode);
11068 else
11069 size = int_size_in_bytes (type);
11071 if (TARGET_32BIT)
11072 return (size + 3) >> 2;
11073 else
11074 return (size + 7) >> 3;
11077 /* Use this to flush pending int fields. */
11079 static void
11080 rs6000_darwin64_record_arg_advance_flush (CUMULATIVE_ARGS *cum,
11081 HOST_WIDE_INT bitpos, int final)
11083 unsigned int startbit, endbit;
11084 int intregs, intoffset;
11085 machine_mode mode;
11087 /* Handle the situations where a float is taking up the first half
11088 of the GPR, and the other half is empty (typically due to
11089 alignment restrictions). We can detect this by a 8-byte-aligned
11090 int field, or by seeing that this is the final flush for this
11091 argument. Count the word and continue on. */
11092 if (cum->floats_in_gpr == 1
11093 && (cum->intoffset % 64 == 0
11094 || (cum->intoffset == -1 && final)))
11096 cum->words++;
11097 cum->floats_in_gpr = 0;
11100 if (cum->intoffset == -1)
11101 return;
11103 intoffset = cum->intoffset;
11104 cum->intoffset = -1;
11105 cum->floats_in_gpr = 0;
11107 if (intoffset % BITS_PER_WORD != 0)
11109 mode = mode_for_size (BITS_PER_WORD - intoffset % BITS_PER_WORD,
11110 MODE_INT, 0);
11111 if (mode == BLKmode)
11113 /* We couldn't find an appropriate mode, which happens,
11114 e.g., in packed structs when there are 3 bytes to load.
11115 Back intoffset back to the beginning of the word in this
11116 case. */
11117 intoffset = ROUND_DOWN (intoffset, BITS_PER_WORD);
11121 startbit = ROUND_DOWN (intoffset, BITS_PER_WORD);
11122 endbit = ROUND_UP (bitpos, BITS_PER_WORD);
11123 intregs = (endbit - startbit) / BITS_PER_WORD;
11124 cum->words += intregs;
11125 /* words should be unsigned. */
11126 if ((unsigned)cum->words < (endbit/BITS_PER_WORD))
11128 int pad = (endbit/BITS_PER_WORD) - cum->words;
11129 cum->words += pad;
11133 /* The darwin64 ABI calls for us to recurse down through structs,
11134 looking for elements passed in registers. Unfortunately, we have
11135 to track int register count here also because of misalignments
11136 in powerpc alignment mode. */
11138 static void
11139 rs6000_darwin64_record_arg_advance_recurse (CUMULATIVE_ARGS *cum,
11140 const_tree type,
11141 HOST_WIDE_INT startbitpos)
11143 tree f;
11145 for (f = TYPE_FIELDS (type); f ; f = DECL_CHAIN (f))
11146 if (TREE_CODE (f) == FIELD_DECL)
11148 HOST_WIDE_INT bitpos = startbitpos;
11149 tree ftype = TREE_TYPE (f);
11150 machine_mode mode;
11151 if (ftype == error_mark_node)
11152 continue;
11153 mode = TYPE_MODE (ftype);
11155 if (DECL_SIZE (f) != 0
11156 && tree_fits_uhwi_p (bit_position (f)))
11157 bitpos += int_bit_position (f);
11159 /* ??? FIXME: else assume zero offset. */
11161 if (TREE_CODE (ftype) == RECORD_TYPE)
11162 rs6000_darwin64_record_arg_advance_recurse (cum, ftype, bitpos);
11163 else if (USE_FP_FOR_ARG_P (cum, mode))
11165 unsigned n_fpregs = (GET_MODE_SIZE (mode) + 7) >> 3;
11166 rs6000_darwin64_record_arg_advance_flush (cum, bitpos, 0);
11167 cum->fregno += n_fpregs;
11168 /* Single-precision floats present a special problem for
11169 us, because they are smaller than an 8-byte GPR, and so
11170 the structure-packing rules combined with the standard
11171 varargs behavior mean that we want to pack float/float
11172 and float/int combinations into a single register's
11173 space. This is complicated by the arg advance flushing,
11174 which works on arbitrarily large groups of int-type
11175 fields. */
11176 if (mode == SFmode)
11178 if (cum->floats_in_gpr == 1)
11180 /* Two floats in a word; count the word and reset
11181 the float count. */
11182 cum->words++;
11183 cum->floats_in_gpr = 0;
11185 else if (bitpos % 64 == 0)
11187 /* A float at the beginning of an 8-byte word;
11188 count it and put off adjusting cum->words until
11189 we see if a arg advance flush is going to do it
11190 for us. */
11191 cum->floats_in_gpr++;
11193 else
11195 /* The float is at the end of a word, preceded
11196 by integer fields, so the arg advance flush
11197 just above has already set cum->words and
11198 everything is taken care of. */
11201 else
11202 cum->words += n_fpregs;
11204 else if (USE_ALTIVEC_FOR_ARG_P (cum, mode, 1))
11206 rs6000_darwin64_record_arg_advance_flush (cum, bitpos, 0);
11207 cum->vregno++;
11208 cum->words += 2;
11210 else if (cum->intoffset == -1)
11211 cum->intoffset = bitpos;
11215 /* Check for an item that needs to be considered specially under the darwin 64
11216 bit ABI. These are record types where the mode is BLK or the structure is
11217 8 bytes in size. */
11218 static int
11219 rs6000_darwin64_struct_check_p (machine_mode mode, const_tree type)
11221 return rs6000_darwin64_abi
11222 && ((mode == BLKmode
11223 && TREE_CODE (type) == RECORD_TYPE
11224 && int_size_in_bytes (type) > 0)
11225 || (type && TREE_CODE (type) == RECORD_TYPE
11226 && int_size_in_bytes (type) == 8)) ? 1 : 0;
11229 /* Update the data in CUM to advance over an argument
11230 of mode MODE and data type TYPE.
11231 (TYPE is null for libcalls where that information may not be available.)
11233 Note that for args passed by reference, function_arg will be called
11234 with MODE and TYPE set to that of the pointer to the arg, not the arg
11235 itself. */
11237 static void
11238 rs6000_function_arg_advance_1 (CUMULATIVE_ARGS *cum, machine_mode mode,
11239 const_tree type, bool named, int depth)
11241 machine_mode elt_mode;
11242 int n_elts;
11244 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
11246 /* Only tick off an argument if we're not recursing. */
11247 if (depth == 0)
11248 cum->nargs_prototype--;
11250 #ifdef HAVE_AS_GNU_ATTRIBUTE
11251 if (DEFAULT_ABI == ABI_V4
11252 && cum->escapes)
11254 if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode))
11255 rs6000_passes_float = true;
11256 else if (named && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
11257 rs6000_passes_vector = true;
11258 else if (SPE_VECTOR_MODE (mode)
11259 && !cum->stdarg
11260 && cum->sysv_gregno <= GP_ARG_MAX_REG)
11261 rs6000_passes_vector = true;
11263 #endif
11265 if (TARGET_ALTIVEC_ABI
11266 && (ALTIVEC_OR_VSX_VECTOR_MODE (elt_mode)
11267 || (type && TREE_CODE (type) == VECTOR_TYPE
11268 && int_size_in_bytes (type) == 16)))
11270 bool stack = false;
11272 if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
11274 cum->vregno += n_elts;
11276 if (!TARGET_ALTIVEC)
11277 error ("cannot pass argument in vector register because"
11278 " altivec instructions are disabled, use -maltivec"
11279 " to enable them");
11281 /* PowerPC64 Linux and AIX allocate GPRs for a vector argument
11282 even if it is going to be passed in a vector register.
11283 Darwin does the same for variable-argument functions. */
11284 if (((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
11285 && TARGET_64BIT)
11286 || (cum->stdarg && DEFAULT_ABI != ABI_V4))
11287 stack = true;
11289 else
11290 stack = true;
11292 if (stack)
11294 int align;
11296 /* Vector parameters must be 16-byte aligned. In 32-bit
11297 mode this means we need to take into account the offset
11298 to the parameter save area. In 64-bit mode, they just
11299 have to start on an even word, since the parameter save
11300 area is 16-byte aligned. */
11301 if (TARGET_32BIT)
11302 align = -(rs6000_parm_offset () + cum->words) & 3;
11303 else
11304 align = cum->words & 1;
11305 cum->words += align + rs6000_arg_size (mode, type);
11307 if (TARGET_DEBUG_ARG)
11309 fprintf (stderr, "function_adv: words = %2d, align=%d, ",
11310 cum->words, align);
11311 fprintf (stderr, "nargs = %4d, proto = %d, mode = %4s\n",
11312 cum->nargs_prototype, cum->prototype,
11313 GET_MODE_NAME (mode));
11317 else if (TARGET_SPE_ABI && TARGET_SPE && SPE_VECTOR_MODE (mode)
11318 && !cum->stdarg
11319 && cum->sysv_gregno <= GP_ARG_MAX_REG)
11320 cum->sysv_gregno++;
11322 else if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
11324 int size = int_size_in_bytes (type);
11325 /* Variable sized types have size == -1 and are
11326 treated as if consisting entirely of ints.
11327 Pad to 16 byte boundary if needed. */
11328 if (TYPE_ALIGN (type) >= 2 * BITS_PER_WORD
11329 && (cum->words % 2) != 0)
11330 cum->words++;
11331 /* For varargs, we can just go up by the size of the struct. */
11332 if (!named)
11333 cum->words += (size + 7) / 8;
11334 else
11336 /* It is tempting to say int register count just goes up by
11337 sizeof(type)/8, but this is wrong in a case such as
11338 { int; double; int; } [powerpc alignment]. We have to
11339 grovel through the fields for these too. */
11340 cum->intoffset = 0;
11341 cum->floats_in_gpr = 0;
11342 rs6000_darwin64_record_arg_advance_recurse (cum, type, 0);
11343 rs6000_darwin64_record_arg_advance_flush (cum,
11344 size * BITS_PER_UNIT, 1);
11346 if (TARGET_DEBUG_ARG)
11348 fprintf (stderr, "function_adv: words = %2d, align=%d, size=%d",
11349 cum->words, TYPE_ALIGN (type), size);
11350 fprintf (stderr,
11351 "nargs = %4d, proto = %d, mode = %4s (darwin64 abi)\n",
11352 cum->nargs_prototype, cum->prototype,
11353 GET_MODE_NAME (mode));
11356 else if (DEFAULT_ABI == ABI_V4)
11358 if (abi_v4_pass_in_fpr (mode))
11360 /* _Decimal128 must use an even/odd register pair. This assumes
11361 that the register number is odd when fregno is odd. */
11362 if (mode == TDmode && (cum->fregno % 2) == 1)
11363 cum->fregno++;
11365 if (cum->fregno + (FLOAT128_2REG_P (mode) ? 1 : 0)
11366 <= FP_ARG_V4_MAX_REG)
11367 cum->fregno += (GET_MODE_SIZE (mode) + 7) >> 3;
11368 else
11370 cum->fregno = FP_ARG_V4_MAX_REG + 1;
11371 if (mode == DFmode || FLOAT128_IBM_P (mode)
11372 || mode == DDmode || mode == TDmode)
11373 cum->words += cum->words & 1;
11374 cum->words += rs6000_arg_size (mode, type);
11377 else
11379 int n_words = rs6000_arg_size (mode, type);
11380 int gregno = cum->sysv_gregno;
11382 /* Long long and SPE vectors are put in (r3,r4), (r5,r6),
11383 (r7,r8) or (r9,r10). As does any other 2 word item such
11384 as complex int due to a historical mistake. */
11385 if (n_words == 2)
11386 gregno += (1 - gregno) & 1;
11388 /* Multi-reg args are not split between registers and stack. */
11389 if (gregno + n_words - 1 > GP_ARG_MAX_REG)
11391 /* Long long and SPE vectors are aligned on the stack.
11392 So are other 2 word items such as complex int due to
11393 a historical mistake. */
11394 if (n_words == 2)
11395 cum->words += cum->words & 1;
11396 cum->words += n_words;
11399 /* Note: continuing to accumulate gregno past when we've started
11400 spilling to the stack indicates the fact that we've started
11401 spilling to the stack to expand_builtin_saveregs. */
11402 cum->sysv_gregno = gregno + n_words;
11405 if (TARGET_DEBUG_ARG)
11407 fprintf (stderr, "function_adv: words = %2d, fregno = %2d, ",
11408 cum->words, cum->fregno);
11409 fprintf (stderr, "gregno = %2d, nargs = %4d, proto = %d, ",
11410 cum->sysv_gregno, cum->nargs_prototype, cum->prototype);
11411 fprintf (stderr, "mode = %4s, named = %d\n",
11412 GET_MODE_NAME (mode), named);
11415 else
11417 int n_words = rs6000_arg_size (mode, type);
11418 int start_words = cum->words;
11419 int align_words = rs6000_parm_start (mode, type, start_words);
11421 cum->words = align_words + n_words;
11423 if (SCALAR_FLOAT_MODE_P (elt_mode) && TARGET_HARD_FLOAT && TARGET_FPRS)
11425 /* _Decimal128 must be passed in an even/odd float register pair.
11426 This assumes that the register number is odd when fregno is
11427 odd. */
11428 if (elt_mode == TDmode && (cum->fregno % 2) == 1)
11429 cum->fregno++;
11430 cum->fregno += n_elts * ((GET_MODE_SIZE (elt_mode) + 7) >> 3);
11433 if (TARGET_DEBUG_ARG)
11435 fprintf (stderr, "function_adv: words = %2d, fregno = %2d, ",
11436 cum->words, cum->fregno);
11437 fprintf (stderr, "nargs = %4d, proto = %d, mode = %4s, ",
11438 cum->nargs_prototype, cum->prototype, GET_MODE_NAME (mode));
11439 fprintf (stderr, "named = %d, align = %d, depth = %d\n",
11440 named, align_words - start_words, depth);
11445 static void
11446 rs6000_function_arg_advance (cumulative_args_t cum, machine_mode mode,
11447 const_tree type, bool named)
11449 rs6000_function_arg_advance_1 (get_cumulative_args (cum), mode, type, named,
11453 static rtx
11454 spe_build_register_parallel (machine_mode mode, int gregno)
11456 rtx r1, r3, r5, r7;
11458 switch (mode)
11460 case DFmode:
11461 r1 = gen_rtx_REG (DImode, gregno);
11462 r1 = gen_rtx_EXPR_LIST (VOIDmode, r1, const0_rtx);
11463 return gen_rtx_PARALLEL (mode, gen_rtvec (1, r1));
11465 case DCmode:
11466 case TFmode:
11467 r1 = gen_rtx_REG (DImode, gregno);
11468 r1 = gen_rtx_EXPR_LIST (VOIDmode, r1, const0_rtx);
11469 r3 = gen_rtx_REG (DImode, gregno + 2);
11470 r3 = gen_rtx_EXPR_LIST (VOIDmode, r3, GEN_INT (8));
11471 return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r3));
11473 case TCmode:
11474 r1 = gen_rtx_REG (DImode, gregno);
11475 r1 = gen_rtx_EXPR_LIST (VOIDmode, r1, const0_rtx);
11476 r3 = gen_rtx_REG (DImode, gregno + 2);
11477 r3 = gen_rtx_EXPR_LIST (VOIDmode, r3, GEN_INT (8));
11478 r5 = gen_rtx_REG (DImode, gregno + 4);
11479 r5 = gen_rtx_EXPR_LIST (VOIDmode, r5, GEN_INT (16));
11480 r7 = gen_rtx_REG (DImode, gregno + 6);
11481 r7 = gen_rtx_EXPR_LIST (VOIDmode, r7, GEN_INT (24));
11482 return gen_rtx_PARALLEL (mode, gen_rtvec (4, r1, r3, r5, r7));
11484 default:
11485 gcc_unreachable ();
11489 /* Determine where to put a SIMD argument on the SPE. */
11490 static rtx
11491 rs6000_spe_function_arg (const CUMULATIVE_ARGS *cum, machine_mode mode,
11492 const_tree type)
11494 int gregno = cum->sysv_gregno;
11496 /* On E500 v2, double arithmetic is done on the full 64-bit GPR, but
11497 are passed and returned in a pair of GPRs for ABI compatibility. */
11498 if (TARGET_E500_DOUBLE && (mode == DFmode || mode == TFmode
11499 || mode == DCmode || mode == TCmode))
11501 int n_words = rs6000_arg_size (mode, type);
11503 /* Doubles go in an odd/even register pair (r5/r6, etc). */
11504 if (mode == DFmode)
11505 gregno += (1 - gregno) & 1;
11507 /* Multi-reg args are not split between registers and stack. */
11508 if (gregno + n_words - 1 > GP_ARG_MAX_REG)
11509 return NULL_RTX;
11511 return spe_build_register_parallel (mode, gregno);
11513 if (cum->stdarg)
11515 int n_words = rs6000_arg_size (mode, type);
11517 /* SPE vectors are put in odd registers. */
11518 if (n_words == 2 && (gregno & 1) == 0)
11519 gregno += 1;
11521 if (gregno + n_words - 1 <= GP_ARG_MAX_REG)
11523 rtx r1, r2;
11524 machine_mode m = SImode;
11526 r1 = gen_rtx_REG (m, gregno);
11527 r1 = gen_rtx_EXPR_LIST (m, r1, const0_rtx);
11528 r2 = gen_rtx_REG (m, gregno + 1);
11529 r2 = gen_rtx_EXPR_LIST (m, r2, GEN_INT (4));
11530 return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r2));
11532 else
11533 return NULL_RTX;
11535 else
11537 if (gregno <= GP_ARG_MAX_REG)
11538 return gen_rtx_REG (mode, gregno);
11539 else
11540 return NULL_RTX;
11544 /* A subroutine of rs6000_darwin64_record_arg. Assign the bits of the
11545 structure between cum->intoffset and bitpos to integer registers. */
11547 static void
11548 rs6000_darwin64_record_arg_flush (CUMULATIVE_ARGS *cum,
11549 HOST_WIDE_INT bitpos, rtx rvec[], int *k)
11551 machine_mode mode;
11552 unsigned int regno;
11553 unsigned int startbit, endbit;
11554 int this_regno, intregs, intoffset;
11555 rtx reg;
11557 if (cum->intoffset == -1)
11558 return;
11560 intoffset = cum->intoffset;
11561 cum->intoffset = -1;
11563 /* If this is the trailing part of a word, try to only load that
11564 much into the register. Otherwise load the whole register. Note
11565 that in the latter case we may pick up unwanted bits. It's not a
11566 problem at the moment but may wish to revisit. */
11568 if (intoffset % BITS_PER_WORD != 0)
11570 mode = mode_for_size (BITS_PER_WORD - intoffset % BITS_PER_WORD,
11571 MODE_INT, 0);
11572 if (mode == BLKmode)
11574 /* We couldn't find an appropriate mode, which happens,
11575 e.g., in packed structs when there are 3 bytes to load.
11576 Back intoffset back to the beginning of the word in this
11577 case. */
11578 intoffset = ROUND_DOWN (intoffset, BITS_PER_WORD);
11579 mode = word_mode;
11582 else
11583 mode = word_mode;
11585 startbit = ROUND_DOWN (intoffset, BITS_PER_WORD);
11586 endbit = ROUND_UP (bitpos, BITS_PER_WORD);
11587 intregs = (endbit - startbit) / BITS_PER_WORD;
11588 this_regno = cum->words + intoffset / BITS_PER_WORD;
11590 if (intregs > 0 && intregs > GP_ARG_NUM_REG - this_regno)
11591 cum->use_stack = 1;
11593 intregs = MIN (intregs, GP_ARG_NUM_REG - this_regno);
11594 if (intregs <= 0)
11595 return;
11597 intoffset /= BITS_PER_UNIT;
11600 regno = GP_ARG_MIN_REG + this_regno;
11601 reg = gen_rtx_REG (mode, regno);
11602 rvec[(*k)++] =
11603 gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (intoffset));
11605 this_regno += 1;
11606 intoffset = (intoffset | (UNITS_PER_WORD-1)) + 1;
11607 mode = word_mode;
11608 intregs -= 1;
11610 while (intregs > 0);
11613 /* Recursive workhorse for the following. */
11615 static void
11616 rs6000_darwin64_record_arg_recurse (CUMULATIVE_ARGS *cum, const_tree type,
11617 HOST_WIDE_INT startbitpos, rtx rvec[],
11618 int *k)
11620 tree f;
11622 for (f = TYPE_FIELDS (type); f ; f = DECL_CHAIN (f))
11623 if (TREE_CODE (f) == FIELD_DECL)
11625 HOST_WIDE_INT bitpos = startbitpos;
11626 tree ftype = TREE_TYPE (f);
11627 machine_mode mode;
11628 if (ftype == error_mark_node)
11629 continue;
11630 mode = TYPE_MODE (ftype);
11632 if (DECL_SIZE (f) != 0
11633 && tree_fits_uhwi_p (bit_position (f)))
11634 bitpos += int_bit_position (f);
11636 /* ??? FIXME: else assume zero offset. */
11638 if (TREE_CODE (ftype) == RECORD_TYPE)
11639 rs6000_darwin64_record_arg_recurse (cum, ftype, bitpos, rvec, k);
11640 else if (cum->named && USE_FP_FOR_ARG_P (cum, mode))
11642 unsigned n_fpreg = (GET_MODE_SIZE (mode) + 7) >> 3;
11643 #if 0
11644 switch (mode)
11646 case SCmode: mode = SFmode; break;
11647 case DCmode: mode = DFmode; break;
11648 case TCmode: mode = TFmode; break;
11649 default: break;
11651 #endif
11652 rs6000_darwin64_record_arg_flush (cum, bitpos, rvec, k);
11653 if (cum->fregno + n_fpreg > FP_ARG_MAX_REG + 1)
11655 gcc_assert (cum->fregno == FP_ARG_MAX_REG
11656 && (mode == TFmode || mode == TDmode));
11657 /* Long double or _Decimal128 split over regs and memory. */
11658 mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode : DFmode;
11659 cum->use_stack=1;
11661 rvec[(*k)++]
11662 = gen_rtx_EXPR_LIST (VOIDmode,
11663 gen_rtx_REG (mode, cum->fregno++),
11664 GEN_INT (bitpos / BITS_PER_UNIT));
11665 if (FLOAT128_2REG_P (mode))
11666 cum->fregno++;
11668 else if (cum->named && USE_ALTIVEC_FOR_ARG_P (cum, mode, 1))
11670 rs6000_darwin64_record_arg_flush (cum, bitpos, rvec, k);
11671 rvec[(*k)++]
11672 = gen_rtx_EXPR_LIST (VOIDmode,
11673 gen_rtx_REG (mode, cum->vregno++),
11674 GEN_INT (bitpos / BITS_PER_UNIT));
11676 else if (cum->intoffset == -1)
11677 cum->intoffset = bitpos;
11681 /* For the darwin64 ABI, we want to construct a PARALLEL consisting of
11682 the register(s) to be used for each field and subfield of a struct
11683 being passed by value, along with the offset of where the
11684 register's value may be found in the block. FP fields go in FP
11685 register, vector fields go in vector registers, and everything
11686 else goes in int registers, packed as in memory.
11688 This code is also used for function return values. RETVAL indicates
11689 whether this is the case.
11691 Much of this is taken from the SPARC V9 port, which has a similar
11692 calling convention. */
11694 static rtx
11695 rs6000_darwin64_record_arg (CUMULATIVE_ARGS *orig_cum, const_tree type,
11696 bool named, bool retval)
11698 rtx rvec[FIRST_PSEUDO_REGISTER];
11699 int k = 1, kbase = 1;
11700 HOST_WIDE_INT typesize = int_size_in_bytes (type);
11701 /* This is a copy; modifications are not visible to our caller. */
11702 CUMULATIVE_ARGS copy_cum = *orig_cum;
11703 CUMULATIVE_ARGS *cum = &copy_cum;
11705 /* Pad to 16 byte boundary if needed. */
11706 if (!retval && TYPE_ALIGN (type) >= 2 * BITS_PER_WORD
11707 && (cum->words % 2) != 0)
11708 cum->words++;
11710 cum->intoffset = 0;
11711 cum->use_stack = 0;
11712 cum->named = named;
11714 /* Put entries into rvec[] for individual FP and vector fields, and
11715 for the chunks of memory that go in int regs. Note we start at
11716 element 1; 0 is reserved for an indication of using memory, and
11717 may or may not be filled in below. */
11718 rs6000_darwin64_record_arg_recurse (cum, type, /* startbit pos= */ 0, rvec, &k);
11719 rs6000_darwin64_record_arg_flush (cum, typesize * BITS_PER_UNIT, rvec, &k);
11721 /* If any part of the struct went on the stack put all of it there.
11722 This hack is because the generic code for
11723 FUNCTION_ARG_PARTIAL_NREGS cannot handle cases where the register
11724 parts of the struct are not at the beginning. */
11725 if (cum->use_stack)
11727 if (retval)
11728 return NULL_RTX; /* doesn't go in registers at all */
11729 kbase = 0;
11730 rvec[0] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
11732 if (k > 1 || cum->use_stack)
11733 return gen_rtx_PARALLEL (BLKmode, gen_rtvec_v (k - kbase, &rvec[kbase]));
11734 else
11735 return NULL_RTX;
11738 /* Determine where to place an argument in 64-bit mode with 32-bit ABI. */
11740 static rtx
11741 rs6000_mixed_function_arg (machine_mode mode, const_tree type,
11742 int align_words)
11744 int n_units;
11745 int i, k;
11746 rtx rvec[GP_ARG_NUM_REG + 1];
11748 if (align_words >= GP_ARG_NUM_REG)
11749 return NULL_RTX;
11751 n_units = rs6000_arg_size (mode, type);
11753 /* Optimize the simple case where the arg fits in one gpr, except in
11754 the case of BLKmode due to assign_parms assuming that registers are
11755 BITS_PER_WORD wide. */
11756 if (n_units == 0
11757 || (n_units == 1 && mode != BLKmode))
11758 return gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
11760 k = 0;
11761 if (align_words + n_units > GP_ARG_NUM_REG)
11762 /* Not all of the arg fits in gprs. Say that it goes in memory too,
11763 using a magic NULL_RTX component.
11764 This is not strictly correct. Only some of the arg belongs in
11765 memory, not all of it. However, the normal scheme using
11766 function_arg_partial_nregs can result in unusual subregs, eg.
11767 (subreg:SI (reg:DF) 4), which are not handled well. The code to
11768 store the whole arg to memory is often more efficient than code
11769 to store pieces, and we know that space is available in the right
11770 place for the whole arg. */
11771 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
11773 i = 0;
11776 rtx r = gen_rtx_REG (SImode, GP_ARG_MIN_REG + align_words);
11777 rtx off = GEN_INT (i++ * 4);
11778 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
11780 while (++align_words < GP_ARG_NUM_REG && --n_units != 0);
11782 return gen_rtx_PARALLEL (mode, gen_rtvec_v (k, rvec));
11785 /* We have an argument of MODE and TYPE that goes into FPRs or VRs,
11786 but must also be copied into the parameter save area starting at
11787 offset ALIGN_WORDS. Fill in RVEC with the elements corresponding
11788 to the GPRs and/or memory. Return the number of elements used. */
11790 static int
11791 rs6000_psave_function_arg (machine_mode mode, const_tree type,
11792 int align_words, rtx *rvec)
11794 int k = 0;
11796 if (align_words < GP_ARG_NUM_REG)
11798 int n_words = rs6000_arg_size (mode, type);
11800 if (align_words + n_words > GP_ARG_NUM_REG
11801 || mode == BLKmode
11802 || (TARGET_32BIT && TARGET_POWERPC64))
11804 /* If this is partially on the stack, then we only
11805 include the portion actually in registers here. */
11806 machine_mode rmode = TARGET_32BIT ? SImode : DImode;
11807 int i = 0;
11809 if (align_words + n_words > GP_ARG_NUM_REG)
11811 /* Not all of the arg fits in gprs. Say that it goes in memory
11812 too, using a magic NULL_RTX component. Also see comment in
11813 rs6000_mixed_function_arg for why the normal
11814 function_arg_partial_nregs scheme doesn't work in this case. */
11815 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
11820 rtx r = gen_rtx_REG (rmode, GP_ARG_MIN_REG + align_words);
11821 rtx off = GEN_INT (i++ * GET_MODE_SIZE (rmode));
11822 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
11824 while (++align_words < GP_ARG_NUM_REG && --n_words != 0);
11826 else
11828 /* The whole arg fits in gprs. */
11829 rtx r = gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
11830 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, const0_rtx);
11833 else
11835 /* It's entirely in memory. */
11836 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
11839 return k;
11842 /* RVEC is a vector of K components of an argument of mode MODE.
11843 Construct the final function_arg return value from it. */
11845 static rtx
11846 rs6000_finish_function_arg (machine_mode mode, rtx *rvec, int k)
11848 gcc_assert (k >= 1);
11850 /* Avoid returning a PARALLEL in the trivial cases. */
11851 if (k == 1)
11853 if (XEXP (rvec[0], 0) == NULL_RTX)
11854 return NULL_RTX;
11856 if (GET_MODE (XEXP (rvec[0], 0)) == mode)
11857 return XEXP (rvec[0], 0);
11860 return gen_rtx_PARALLEL (mode, gen_rtvec_v (k, rvec));
11863 /* Determine where to put an argument to a function.
11864 Value is zero to push the argument on the stack,
11865 or a hard register in which to store the argument.
11867 MODE is the argument's machine mode.
11868 TYPE is the data type of the argument (as a tree).
11869 This is null for libcalls where that information may
11870 not be available.
11871 CUM is a variable of type CUMULATIVE_ARGS which gives info about
11872 the preceding args and about the function being called. It is
11873 not modified in this routine.
11874 NAMED is nonzero if this argument is a named parameter
11875 (otherwise it is an extra parameter matching an ellipsis).
11877 On RS/6000 the first eight words of non-FP are normally in registers
11878 and the rest are pushed. Under AIX, the first 13 FP args are in registers.
11879 Under V.4, the first 8 FP args are in registers.
11881 If this is floating-point and no prototype is specified, we use
11882 both an FP and integer register (or possibly FP reg and stack). Library
11883 functions (when CALL_LIBCALL is set) always have the proper types for args,
11884 so we can pass the FP value just in one register. emit_library_function
11885 doesn't support PARALLEL anyway.
11887 Note that for args passed by reference, function_arg will be called
11888 with MODE and TYPE set to that of the pointer to the arg, not the arg
11889 itself. */
11891 static rtx
11892 rs6000_function_arg (cumulative_args_t cum_v, machine_mode mode,
11893 const_tree type, bool named)
11895 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
11896 enum rs6000_abi abi = DEFAULT_ABI;
11897 machine_mode elt_mode;
11898 int n_elts;
11900 /* Return a marker to indicate whether CR1 needs to set or clear the
11901 bit that V.4 uses to say fp args were passed in registers.
11902 Assume that we don't need the marker for software floating point,
11903 or compiler generated library calls. */
11904 if (mode == VOIDmode)
11906 if (abi == ABI_V4
11907 && (cum->call_cookie & CALL_LIBCALL) == 0
11908 && (cum->stdarg
11909 || (cum->nargs_prototype < 0
11910 && (cum->prototype || TARGET_NO_PROTOTYPE))))
11912 /* For the SPE, we need to crxor CR6 always. */
11913 if (TARGET_SPE_ABI)
11914 return GEN_INT (cum->call_cookie | CALL_V4_SET_FP_ARGS);
11915 else if (TARGET_HARD_FLOAT && TARGET_FPRS)
11916 return GEN_INT (cum->call_cookie
11917 | ((cum->fregno == FP_ARG_MIN_REG)
11918 ? CALL_V4_SET_FP_ARGS
11919 : CALL_V4_CLEAR_FP_ARGS));
11922 return GEN_INT (cum->call_cookie & ~CALL_LIBCALL);
11925 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
11927 if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
11929 rtx rslt = rs6000_darwin64_record_arg (cum, type, named, /*retval= */false);
11930 if (rslt != NULL_RTX)
11931 return rslt;
11932 /* Else fall through to usual handling. */
11935 if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
11937 rtx rvec[GP_ARG_NUM_REG + AGGR_ARG_NUM_REG + 1];
11938 rtx r, off;
11939 int i, k = 0;
11941 /* Do we also need to pass this argument in the parameter save area?
11942 Library support functions for IEEE 128-bit are assumed to not need the
11943 value passed both in GPRs and in vector registers. */
11944 if (TARGET_64BIT && !cum->prototype
11945 && (!cum->libcall || !FLOAT128_VECTOR_P (elt_mode)))
11947 int align_words = ROUND_UP (cum->words, 2);
11948 k = rs6000_psave_function_arg (mode, type, align_words, rvec);
11951 /* Describe where this argument goes in the vector registers. */
11952 for (i = 0; i < n_elts && cum->vregno + i <= ALTIVEC_ARG_MAX_REG; i++)
11954 r = gen_rtx_REG (elt_mode, cum->vregno + i);
11955 off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
11956 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
11959 return rs6000_finish_function_arg (mode, rvec, k);
11961 else if (TARGET_ALTIVEC_ABI
11962 && (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
11963 || (type && TREE_CODE (type) == VECTOR_TYPE
11964 && int_size_in_bytes (type) == 16)))
11966 if (named || abi == ABI_V4)
11967 return NULL_RTX;
11968 else
11970 /* Vector parameters to varargs functions under AIX or Darwin
11971 get passed in memory and possibly also in GPRs. */
11972 int align, align_words, n_words;
11973 machine_mode part_mode;
11975 /* Vector parameters must be 16-byte aligned. In 32-bit
11976 mode this means we need to take into account the offset
11977 to the parameter save area. In 64-bit mode, they just
11978 have to start on an even word, since the parameter save
11979 area is 16-byte aligned. */
11980 if (TARGET_32BIT)
11981 align = -(rs6000_parm_offset () + cum->words) & 3;
11982 else
11983 align = cum->words & 1;
11984 align_words = cum->words + align;
11986 /* Out of registers? Memory, then. */
11987 if (align_words >= GP_ARG_NUM_REG)
11988 return NULL_RTX;
11990 if (TARGET_32BIT && TARGET_POWERPC64)
11991 return rs6000_mixed_function_arg (mode, type, align_words);
11993 /* The vector value goes in GPRs. Only the part of the
11994 value in GPRs is reported here. */
11995 part_mode = mode;
11996 n_words = rs6000_arg_size (mode, type);
11997 if (align_words + n_words > GP_ARG_NUM_REG)
11998 /* Fortunately, there are only two possibilities, the value
11999 is either wholly in GPRs or half in GPRs and half not. */
12000 part_mode = DImode;
12002 return gen_rtx_REG (part_mode, GP_ARG_MIN_REG + align_words);
12005 else if (TARGET_SPE_ABI && TARGET_SPE
12006 && (SPE_VECTOR_MODE (mode)
12007 || (TARGET_E500_DOUBLE && (mode == DFmode
12008 || mode == DCmode
12009 || mode == TFmode
12010 || mode == TCmode))))
12011 return rs6000_spe_function_arg (cum, mode, type);
12013 else if (abi == ABI_V4)
12015 if (abi_v4_pass_in_fpr (mode))
12017 /* _Decimal128 must use an even/odd register pair. This assumes
12018 that the register number is odd when fregno is odd. */
12019 if (mode == TDmode && (cum->fregno % 2) == 1)
12020 cum->fregno++;
12022 if (cum->fregno + (FLOAT128_2REG_P (mode) ? 1 : 0)
12023 <= FP_ARG_V4_MAX_REG)
12024 return gen_rtx_REG (mode, cum->fregno);
12025 else
12026 return NULL_RTX;
12028 else
12030 int n_words = rs6000_arg_size (mode, type);
12031 int gregno = cum->sysv_gregno;
12033 /* Long long and SPE vectors are put in (r3,r4), (r5,r6),
12034 (r7,r8) or (r9,r10). As does any other 2 word item such
12035 as complex int due to a historical mistake. */
12036 if (n_words == 2)
12037 gregno += (1 - gregno) & 1;
12039 /* Multi-reg args are not split between registers and stack. */
12040 if (gregno + n_words - 1 > GP_ARG_MAX_REG)
12041 return NULL_RTX;
12043 if (TARGET_32BIT && TARGET_POWERPC64)
12044 return rs6000_mixed_function_arg (mode, type,
12045 gregno - GP_ARG_MIN_REG);
12046 return gen_rtx_REG (mode, gregno);
12049 else
12051 int align_words = rs6000_parm_start (mode, type, cum->words);
12053 /* _Decimal128 must be passed in an even/odd float register pair.
12054 This assumes that the register number is odd when fregno is odd. */
12055 if (elt_mode == TDmode && (cum->fregno % 2) == 1)
12056 cum->fregno++;
12058 if (USE_FP_FOR_ARG_P (cum, elt_mode))
12060 rtx rvec[GP_ARG_NUM_REG + AGGR_ARG_NUM_REG + 1];
12061 rtx r, off;
12062 int i, k = 0;
12063 unsigned long n_fpreg = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
12064 int fpr_words;
12066 /* Do we also need to pass this argument in the parameter
12067 save area? */
12068 if (type && (cum->nargs_prototype <= 0
12069 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
12070 && TARGET_XL_COMPAT
12071 && align_words >= GP_ARG_NUM_REG)))
12072 k = rs6000_psave_function_arg (mode, type, align_words, rvec);
12074 /* Describe where this argument goes in the fprs. */
12075 for (i = 0; i < n_elts
12076 && cum->fregno + i * n_fpreg <= FP_ARG_MAX_REG; i++)
12078 /* Check if the argument is split over registers and memory.
12079 This can only ever happen for long double or _Decimal128;
12080 complex types are handled via split_complex_arg. */
12081 machine_mode fmode = elt_mode;
12082 if (cum->fregno + (i + 1) * n_fpreg > FP_ARG_MAX_REG + 1)
12084 gcc_assert (FLOAT128_2REG_P (fmode));
12085 fmode = DECIMAL_FLOAT_MODE_P (fmode) ? DDmode : DFmode;
12088 r = gen_rtx_REG (fmode, cum->fregno + i * n_fpreg);
12089 off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
12090 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
12093 /* If there were not enough FPRs to hold the argument, the rest
12094 usually goes into memory. However, if the current position
12095 is still within the register parameter area, a portion may
12096 actually have to go into GPRs.
12098 Note that it may happen that the portion of the argument
12099 passed in the first "half" of the first GPR was already
12100 passed in the last FPR as well.
12102 For unnamed arguments, we already set up GPRs to cover the
12103 whole argument in rs6000_psave_function_arg, so there is
12104 nothing further to do at this point. */
12105 fpr_words = (i * GET_MODE_SIZE (elt_mode)) / (TARGET_32BIT ? 4 : 8);
12106 if (i < n_elts && align_words + fpr_words < GP_ARG_NUM_REG
12107 && cum->nargs_prototype > 0)
12109 static bool warned;
12111 machine_mode rmode = TARGET_32BIT ? SImode : DImode;
12112 int n_words = rs6000_arg_size (mode, type);
12114 align_words += fpr_words;
12115 n_words -= fpr_words;
12119 r = gen_rtx_REG (rmode, GP_ARG_MIN_REG + align_words);
12120 off = GEN_INT (fpr_words++ * GET_MODE_SIZE (rmode));
12121 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
12123 while (++align_words < GP_ARG_NUM_REG && --n_words != 0);
12125 if (!warned && warn_psabi)
12127 warned = true;
12128 inform (input_location,
12129 "the ABI of passing homogeneous float aggregates"
12130 " has changed in GCC 5");
12134 return rs6000_finish_function_arg (mode, rvec, k);
12136 else if (align_words < GP_ARG_NUM_REG)
12138 if (TARGET_32BIT && TARGET_POWERPC64)
12139 return rs6000_mixed_function_arg (mode, type, align_words);
12141 return gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
12143 else
12144 return NULL_RTX;
12148 /* For an arg passed partly in registers and partly in memory, this is
12149 the number of bytes passed in registers. For args passed entirely in
12150 registers or entirely in memory, zero. When an arg is described by a
12151 PARALLEL, perhaps using more than one register type, this function
12152 returns the number of bytes used by the first element of the PARALLEL. */
12154 static int
12155 rs6000_arg_partial_bytes (cumulative_args_t cum_v, machine_mode mode,
12156 tree type, bool named)
12158 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
12159 bool passed_in_gprs = true;
12160 int ret = 0;
12161 int align_words;
12162 machine_mode elt_mode;
12163 int n_elts;
12165 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
12167 if (DEFAULT_ABI == ABI_V4)
12168 return 0;
12170 if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
12172 /* If we are passing this arg in the fixed parameter save area (gprs or
12173 memory) as well as VRs, we do not use the partial bytes mechanism;
12174 instead, rs6000_function_arg will return a PARALLEL including a memory
12175 element as necessary. Library support functions for IEEE 128-bit are
12176 assumed to not need the value passed both in GPRs and in vector
12177 registers. */
12178 if (TARGET_64BIT && !cum->prototype
12179 && (!cum->libcall || !FLOAT128_VECTOR_P (elt_mode)))
12180 return 0;
12182 /* Otherwise, we pass in VRs only. Check for partial copies. */
12183 passed_in_gprs = false;
12184 if (cum->vregno + n_elts > ALTIVEC_ARG_MAX_REG + 1)
12185 ret = (ALTIVEC_ARG_MAX_REG + 1 - cum->vregno) * 16;
12188 /* In this complicated case we just disable the partial_nregs code. */
12189 if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
12190 return 0;
12192 align_words = rs6000_parm_start (mode, type, cum->words);
12194 if (USE_FP_FOR_ARG_P (cum, elt_mode))
12196 unsigned long n_fpreg = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
12198 /* If we are passing this arg in the fixed parameter save area
12199 (gprs or memory) as well as FPRs, we do not use the partial
12200 bytes mechanism; instead, rs6000_function_arg will return a
12201 PARALLEL including a memory element as necessary. */
12202 if (type
12203 && (cum->nargs_prototype <= 0
12204 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
12205 && TARGET_XL_COMPAT
12206 && align_words >= GP_ARG_NUM_REG)))
12207 return 0;
12209 /* Otherwise, we pass in FPRs only. Check for partial copies. */
12210 passed_in_gprs = false;
12211 if (cum->fregno + n_elts * n_fpreg > FP_ARG_MAX_REG + 1)
12213 /* Compute number of bytes / words passed in FPRs. If there
12214 is still space available in the register parameter area
12215 *after* that amount, a part of the argument will be passed
12216 in GPRs. In that case, the total amount passed in any
12217 registers is equal to the amount that would have been passed
12218 in GPRs if everything were passed there, so we fall back to
12219 the GPR code below to compute the appropriate value. */
12220 int fpr = ((FP_ARG_MAX_REG + 1 - cum->fregno)
12221 * MIN (8, GET_MODE_SIZE (elt_mode)));
12222 int fpr_words = fpr / (TARGET_32BIT ? 4 : 8);
12224 if (align_words + fpr_words < GP_ARG_NUM_REG)
12225 passed_in_gprs = true;
12226 else
12227 ret = fpr;
12231 if (passed_in_gprs
12232 && align_words < GP_ARG_NUM_REG
12233 && GP_ARG_NUM_REG < align_words + rs6000_arg_size (mode, type))
12234 ret = (GP_ARG_NUM_REG - align_words) * (TARGET_32BIT ? 4 : 8);
12236 if (ret != 0 && TARGET_DEBUG_ARG)
12237 fprintf (stderr, "rs6000_arg_partial_bytes: %d\n", ret);
12239 return ret;
12242 /* A C expression that indicates when an argument must be passed by
12243 reference. If nonzero for an argument, a copy of that argument is
12244 made in memory and a pointer to the argument is passed instead of
12245 the argument itself. The pointer is passed in whatever way is
12246 appropriate for passing a pointer to that type.
12248 Under V.4, aggregates and long double are passed by reference.
12250 As an extension to all 32-bit ABIs, AltiVec vectors are passed by
12251 reference unless the AltiVec vector extension ABI is in force.
12253 As an extension to all ABIs, variable sized types are passed by
12254 reference. */
12256 static bool
12257 rs6000_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
12258 machine_mode mode, const_tree type,
12259 bool named ATTRIBUTE_UNUSED)
12261 if (!type)
12262 return 0;
12264 if (DEFAULT_ABI == ABI_V4 && TARGET_IEEEQUAD
12265 && FLOAT128_IEEE_P (TYPE_MODE (type)))
12267 if (TARGET_DEBUG_ARG)
12268 fprintf (stderr, "function_arg_pass_by_reference: V4 IEEE 128-bit\n");
12269 return 1;
12272 if (DEFAULT_ABI == ABI_V4 && AGGREGATE_TYPE_P (type))
12274 if (TARGET_DEBUG_ARG)
12275 fprintf (stderr, "function_arg_pass_by_reference: V4 aggregate\n");
12276 return 1;
12279 if (int_size_in_bytes (type) < 0)
12281 if (TARGET_DEBUG_ARG)
12282 fprintf (stderr, "function_arg_pass_by_reference: variable size\n");
12283 return 1;
12286 /* Allow -maltivec -mabi=no-altivec without warning. Altivec vector
12287 modes only exist for GCC vector types if -maltivec. */
12288 if (TARGET_32BIT && !TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (mode))
12290 if (TARGET_DEBUG_ARG)
12291 fprintf (stderr, "function_arg_pass_by_reference: AltiVec\n");
12292 return 1;
12295 /* Pass synthetic vectors in memory. */
12296 if (TREE_CODE (type) == VECTOR_TYPE
12297 && int_size_in_bytes (type) > (TARGET_ALTIVEC_ABI ? 16 : 8))
12299 static bool warned_for_pass_big_vectors = false;
12300 if (TARGET_DEBUG_ARG)
12301 fprintf (stderr, "function_arg_pass_by_reference: synthetic vector\n");
12302 if (!warned_for_pass_big_vectors)
12304 warning (0, "GCC vector passed by reference: "
12305 "non-standard ABI extension with no compatibility guarantee");
12306 warned_for_pass_big_vectors = true;
12308 return 1;
12311 return 0;
12314 /* Process parameter of type TYPE after ARGS_SO_FAR parameters were
12315 already processes. Return true if the parameter must be passed
12316 (fully or partially) on the stack. */
12318 static bool
12319 rs6000_parm_needs_stack (cumulative_args_t args_so_far, tree type)
12321 machine_mode mode;
12322 int unsignedp;
12323 rtx entry_parm;
12325 /* Catch errors. */
12326 if (type == NULL || type == error_mark_node)
12327 return true;
12329 /* Handle types with no storage requirement. */
12330 if (TYPE_MODE (type) == VOIDmode)
12331 return false;
12333 /* Handle complex types. */
12334 if (TREE_CODE (type) == COMPLEX_TYPE)
12335 return (rs6000_parm_needs_stack (args_so_far, TREE_TYPE (type))
12336 || rs6000_parm_needs_stack (args_so_far, TREE_TYPE (type)));
12338 /* Handle transparent aggregates. */
12339 if ((TREE_CODE (type) == UNION_TYPE || TREE_CODE (type) == RECORD_TYPE)
12340 && TYPE_TRANSPARENT_AGGR (type))
12341 type = TREE_TYPE (first_field (type));
12343 /* See if this arg was passed by invisible reference. */
12344 if (pass_by_reference (get_cumulative_args (args_so_far),
12345 TYPE_MODE (type), type, true))
12346 type = build_pointer_type (type);
12348 /* Find mode as it is passed by the ABI. */
12349 unsignedp = TYPE_UNSIGNED (type);
12350 mode = promote_mode (type, TYPE_MODE (type), &unsignedp);
12352 /* If we must pass in stack, we need a stack. */
12353 if (rs6000_must_pass_in_stack (mode, type))
12354 return true;
12356 /* If there is no incoming register, we need a stack. */
12357 entry_parm = rs6000_function_arg (args_so_far, mode, type, true);
12358 if (entry_parm == NULL)
12359 return true;
12361 /* Likewise if we need to pass both in registers and on the stack. */
12362 if (GET_CODE (entry_parm) == PARALLEL
12363 && XEXP (XVECEXP (entry_parm, 0, 0), 0) == NULL_RTX)
12364 return true;
12366 /* Also true if we're partially in registers and partially not. */
12367 if (rs6000_arg_partial_bytes (args_so_far, mode, type, true) != 0)
12368 return true;
12370 /* Update info on where next arg arrives in registers. */
12371 rs6000_function_arg_advance (args_so_far, mode, type, true);
12372 return false;
12375 /* Return true if FUN has no prototype, has a variable argument
12376 list, or passes any parameter in memory. */
12378 static bool
12379 rs6000_function_parms_need_stack (tree fun, bool incoming)
12381 tree fntype, result;
12382 CUMULATIVE_ARGS args_so_far_v;
12383 cumulative_args_t args_so_far;
12385 if (!fun)
12386 /* Must be a libcall, all of which only use reg parms. */
12387 return false;
12389 fntype = fun;
12390 if (!TYPE_P (fun))
12391 fntype = TREE_TYPE (fun);
12393 /* Varargs functions need the parameter save area. */
12394 if ((!incoming && !prototype_p (fntype)) || stdarg_p (fntype))
12395 return true;
12397 INIT_CUMULATIVE_INCOMING_ARGS (args_so_far_v, fntype, NULL_RTX);
12398 args_so_far = pack_cumulative_args (&args_so_far_v);
12400 /* When incoming, we will have been passed the function decl.
12401 It is necessary to use the decl to handle K&R style functions,
12402 where TYPE_ARG_TYPES may not be available. */
12403 if (incoming)
12405 gcc_assert (DECL_P (fun));
12406 result = DECL_RESULT (fun);
12408 else
12409 result = TREE_TYPE (fntype);
12411 if (result && aggregate_value_p (result, fntype))
12413 if (!TYPE_P (result))
12414 result = TREE_TYPE (result);
12415 result = build_pointer_type (result);
12416 rs6000_parm_needs_stack (args_so_far, result);
12419 if (incoming)
12421 tree parm;
12423 for (parm = DECL_ARGUMENTS (fun);
12424 parm && parm != void_list_node;
12425 parm = TREE_CHAIN (parm))
12426 if (rs6000_parm_needs_stack (args_so_far, TREE_TYPE (parm)))
12427 return true;
12429 else
12431 function_args_iterator args_iter;
12432 tree arg_type;
12434 FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
12435 if (rs6000_parm_needs_stack (args_so_far, arg_type))
12436 return true;
12439 return false;
12442 /* Return the size of the REG_PARM_STACK_SPACE are for FUN. This is
12443 usually a constant depending on the ABI. However, in the ELFv2 ABI
12444 the register parameter area is optional when calling a function that
12445 has a prototype is scope, has no variable argument list, and passes
12446 all parameters in registers. */
12449 rs6000_reg_parm_stack_space (tree fun, bool incoming)
12451 int reg_parm_stack_space;
12453 switch (DEFAULT_ABI)
12455 default:
12456 reg_parm_stack_space = 0;
12457 break;
12459 case ABI_AIX:
12460 case ABI_DARWIN:
12461 reg_parm_stack_space = TARGET_64BIT ? 64 : 32;
12462 break;
12464 case ABI_ELFv2:
12465 /* ??? Recomputing this every time is a bit expensive. Is there
12466 a place to cache this information? */
12467 if (rs6000_function_parms_need_stack (fun, incoming))
12468 reg_parm_stack_space = TARGET_64BIT ? 64 : 32;
12469 else
12470 reg_parm_stack_space = 0;
12471 break;
12474 return reg_parm_stack_space;
12477 static void
12478 rs6000_move_block_from_reg (int regno, rtx x, int nregs)
12480 int i;
12481 machine_mode reg_mode = TARGET_32BIT ? SImode : DImode;
12483 if (nregs == 0)
12484 return;
12486 for (i = 0; i < nregs; i++)
12488 rtx tem = adjust_address_nv (x, reg_mode, i * GET_MODE_SIZE (reg_mode));
12489 if (reload_completed)
12491 if (! strict_memory_address_p (reg_mode, XEXP (tem, 0)))
12492 tem = NULL_RTX;
12493 else
12494 tem = simplify_gen_subreg (reg_mode, x, BLKmode,
12495 i * GET_MODE_SIZE (reg_mode));
12497 else
12498 tem = replace_equiv_address (tem, XEXP (tem, 0));
12500 gcc_assert (tem);
12502 emit_move_insn (tem, gen_rtx_REG (reg_mode, regno + i));
12506 /* Perform any needed actions needed for a function that is receiving a
12507 variable number of arguments.
12509 CUM is as above.
12511 MODE and TYPE are the mode and type of the current parameter.
12513 PRETEND_SIZE is a variable that should be set to the amount of stack
12514 that must be pushed by the prolog to pretend that our caller pushed
12517 Normally, this macro will push all remaining incoming registers on the
12518 stack and set PRETEND_SIZE to the length of the registers pushed. */
12520 static void
12521 setup_incoming_varargs (cumulative_args_t cum, machine_mode mode,
12522 tree type, int *pretend_size ATTRIBUTE_UNUSED,
12523 int no_rtl)
12525 CUMULATIVE_ARGS next_cum;
12526 int reg_size = TARGET_32BIT ? 4 : 8;
12527 rtx save_area = NULL_RTX, mem;
12528 int first_reg_offset;
12529 alias_set_type set;
12531 /* Skip the last named argument. */
12532 next_cum = *get_cumulative_args (cum);
12533 rs6000_function_arg_advance_1 (&next_cum, mode, type, true, 0);
12535 if (DEFAULT_ABI == ABI_V4)
12537 first_reg_offset = next_cum.sysv_gregno - GP_ARG_MIN_REG;
12539 if (! no_rtl)
12541 int gpr_reg_num = 0, gpr_size = 0, fpr_size = 0;
12542 HOST_WIDE_INT offset = 0;
12544 /* Try to optimize the size of the varargs save area.
12545 The ABI requires that ap.reg_save_area is doubleword
12546 aligned, but we don't need to allocate space for all
12547 the bytes, only those to which we actually will save
12548 anything. */
12549 if (cfun->va_list_gpr_size && first_reg_offset < GP_ARG_NUM_REG)
12550 gpr_reg_num = GP_ARG_NUM_REG - first_reg_offset;
12551 if (TARGET_HARD_FLOAT && TARGET_FPRS
12552 && next_cum.fregno <= FP_ARG_V4_MAX_REG
12553 && cfun->va_list_fpr_size)
12555 if (gpr_reg_num)
12556 fpr_size = (next_cum.fregno - FP_ARG_MIN_REG)
12557 * UNITS_PER_FP_WORD;
12558 if (cfun->va_list_fpr_size
12559 < FP_ARG_V4_MAX_REG + 1 - next_cum.fregno)
12560 fpr_size += cfun->va_list_fpr_size * UNITS_PER_FP_WORD;
12561 else
12562 fpr_size += (FP_ARG_V4_MAX_REG + 1 - next_cum.fregno)
12563 * UNITS_PER_FP_WORD;
12565 if (gpr_reg_num)
12567 offset = -((first_reg_offset * reg_size) & ~7);
12568 if (!fpr_size && gpr_reg_num > cfun->va_list_gpr_size)
12570 gpr_reg_num = cfun->va_list_gpr_size;
12571 if (reg_size == 4 && (first_reg_offset & 1))
12572 gpr_reg_num++;
12574 gpr_size = (gpr_reg_num * reg_size + 7) & ~7;
12576 else if (fpr_size)
12577 offset = - (int) (next_cum.fregno - FP_ARG_MIN_REG)
12578 * UNITS_PER_FP_WORD
12579 - (int) (GP_ARG_NUM_REG * reg_size);
12581 if (gpr_size + fpr_size)
12583 rtx reg_save_area
12584 = assign_stack_local (BLKmode, gpr_size + fpr_size, 64);
12585 gcc_assert (GET_CODE (reg_save_area) == MEM);
12586 reg_save_area = XEXP (reg_save_area, 0);
12587 if (GET_CODE (reg_save_area) == PLUS)
12589 gcc_assert (XEXP (reg_save_area, 0)
12590 == virtual_stack_vars_rtx);
12591 gcc_assert (GET_CODE (XEXP (reg_save_area, 1)) == CONST_INT);
12592 offset += INTVAL (XEXP (reg_save_area, 1));
12594 else
12595 gcc_assert (reg_save_area == virtual_stack_vars_rtx);
12598 cfun->machine->varargs_save_offset = offset;
12599 save_area = plus_constant (Pmode, virtual_stack_vars_rtx, offset);
12602 else
12604 first_reg_offset = next_cum.words;
12605 save_area = crtl->args.internal_arg_pointer;
12607 if (targetm.calls.must_pass_in_stack (mode, type))
12608 first_reg_offset += rs6000_arg_size (TYPE_MODE (type), type);
12611 set = get_varargs_alias_set ();
12612 if (! no_rtl && first_reg_offset < GP_ARG_NUM_REG
12613 && cfun->va_list_gpr_size)
12615 int n_gpr, nregs = GP_ARG_NUM_REG - first_reg_offset;
12617 if (va_list_gpr_counter_field)
12618 /* V4 va_list_gpr_size counts number of registers needed. */
12619 n_gpr = cfun->va_list_gpr_size;
12620 else
12621 /* char * va_list instead counts number of bytes needed. */
12622 n_gpr = (cfun->va_list_gpr_size + reg_size - 1) / reg_size;
12624 if (nregs > n_gpr)
12625 nregs = n_gpr;
12627 mem = gen_rtx_MEM (BLKmode,
12628 plus_constant (Pmode, save_area,
12629 first_reg_offset * reg_size));
12630 MEM_NOTRAP_P (mem) = 1;
12631 set_mem_alias_set (mem, set);
12632 set_mem_align (mem, BITS_PER_WORD);
12634 rs6000_move_block_from_reg (GP_ARG_MIN_REG + first_reg_offset, mem,
12635 nregs);
12638 /* Save FP registers if needed. */
12639 if (DEFAULT_ABI == ABI_V4
12640 && TARGET_HARD_FLOAT && TARGET_FPRS
12641 && ! no_rtl
12642 && next_cum.fregno <= FP_ARG_V4_MAX_REG
12643 && cfun->va_list_fpr_size)
12645 int fregno = next_cum.fregno, nregs;
12646 rtx cr1 = gen_rtx_REG (CCmode, CR1_REGNO);
12647 rtx lab = gen_label_rtx ();
12648 int off = (GP_ARG_NUM_REG * reg_size) + ((fregno - FP_ARG_MIN_REG)
12649 * UNITS_PER_FP_WORD);
12651 emit_jump_insn
12652 (gen_rtx_SET (pc_rtx,
12653 gen_rtx_IF_THEN_ELSE (VOIDmode,
12654 gen_rtx_NE (VOIDmode, cr1,
12655 const0_rtx),
12656 gen_rtx_LABEL_REF (VOIDmode, lab),
12657 pc_rtx)));
12659 for (nregs = 0;
12660 fregno <= FP_ARG_V4_MAX_REG && nregs < cfun->va_list_fpr_size;
12661 fregno++, off += UNITS_PER_FP_WORD, nregs++)
12663 mem = gen_rtx_MEM ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
12664 ? DFmode : SFmode,
12665 plus_constant (Pmode, save_area, off));
12666 MEM_NOTRAP_P (mem) = 1;
12667 set_mem_alias_set (mem, set);
12668 set_mem_align (mem, GET_MODE_ALIGNMENT (
12669 (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
12670 ? DFmode : SFmode));
12671 emit_move_insn (mem, gen_rtx_REG (
12672 (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
12673 ? DFmode : SFmode, fregno));
12676 emit_label (lab);
12680 /* Create the va_list data type. */
12682 static tree
12683 rs6000_build_builtin_va_list (void)
12685 tree f_gpr, f_fpr, f_res, f_ovf, f_sav, record, type_decl;
12687 /* For AIX, prefer 'char *' because that's what the system
12688 header files like. */
12689 if (DEFAULT_ABI != ABI_V4)
12690 return build_pointer_type (char_type_node);
12692 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
12693 type_decl = build_decl (BUILTINS_LOCATION, TYPE_DECL,
12694 get_identifier ("__va_list_tag"), record);
12696 f_gpr = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("gpr"),
12697 unsigned_char_type_node);
12698 f_fpr = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("fpr"),
12699 unsigned_char_type_node);
12700 /* Give the two bytes of padding a name, so that -Wpadded won't warn on
12701 every user file. */
12702 f_res = build_decl (BUILTINS_LOCATION, FIELD_DECL,
12703 get_identifier ("reserved"), short_unsigned_type_node);
12704 f_ovf = build_decl (BUILTINS_LOCATION, FIELD_DECL,
12705 get_identifier ("overflow_arg_area"),
12706 ptr_type_node);
12707 f_sav = build_decl (BUILTINS_LOCATION, FIELD_DECL,
12708 get_identifier ("reg_save_area"),
12709 ptr_type_node);
12711 va_list_gpr_counter_field = f_gpr;
12712 va_list_fpr_counter_field = f_fpr;
12714 DECL_FIELD_CONTEXT (f_gpr) = record;
12715 DECL_FIELD_CONTEXT (f_fpr) = record;
12716 DECL_FIELD_CONTEXT (f_res) = record;
12717 DECL_FIELD_CONTEXT (f_ovf) = record;
12718 DECL_FIELD_CONTEXT (f_sav) = record;
12720 TYPE_STUB_DECL (record) = type_decl;
12721 TYPE_NAME (record) = type_decl;
12722 TYPE_FIELDS (record) = f_gpr;
12723 DECL_CHAIN (f_gpr) = f_fpr;
12724 DECL_CHAIN (f_fpr) = f_res;
12725 DECL_CHAIN (f_res) = f_ovf;
12726 DECL_CHAIN (f_ovf) = f_sav;
12728 layout_type (record);
12730 /* The correct type is an array type of one element. */
12731 return build_array_type (record, build_index_type (size_zero_node));
12734 /* Implement va_start. */
12736 static void
12737 rs6000_va_start (tree valist, rtx nextarg)
12739 HOST_WIDE_INT words, n_gpr, n_fpr;
12740 tree f_gpr, f_fpr, f_res, f_ovf, f_sav;
12741 tree gpr, fpr, ovf, sav, t;
12743 /* Only SVR4 needs something special. */
12744 if (DEFAULT_ABI != ABI_V4)
12746 std_expand_builtin_va_start (valist, nextarg);
12747 return;
12750 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
12751 f_fpr = DECL_CHAIN (f_gpr);
12752 f_res = DECL_CHAIN (f_fpr);
12753 f_ovf = DECL_CHAIN (f_res);
12754 f_sav = DECL_CHAIN (f_ovf);
12756 valist = build_simple_mem_ref (valist);
12757 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
12758 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
12759 f_fpr, NULL_TREE);
12760 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
12761 f_ovf, NULL_TREE);
12762 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
12763 f_sav, NULL_TREE);
12765 /* Count number of gp and fp argument registers used. */
12766 words = crtl->args.info.words;
12767 n_gpr = MIN (crtl->args.info.sysv_gregno - GP_ARG_MIN_REG,
12768 GP_ARG_NUM_REG);
12769 n_fpr = MIN (crtl->args.info.fregno - FP_ARG_MIN_REG,
12770 FP_ARG_NUM_REG);
12772 if (TARGET_DEBUG_ARG)
12773 fprintf (stderr, "va_start: words = " HOST_WIDE_INT_PRINT_DEC", n_gpr = "
12774 HOST_WIDE_INT_PRINT_DEC", n_fpr = " HOST_WIDE_INT_PRINT_DEC"\n",
12775 words, n_gpr, n_fpr);
12777 if (cfun->va_list_gpr_size)
12779 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
12780 build_int_cst (NULL_TREE, n_gpr));
12781 TREE_SIDE_EFFECTS (t) = 1;
12782 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12785 if (cfun->va_list_fpr_size)
12787 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
12788 build_int_cst (NULL_TREE, n_fpr));
12789 TREE_SIDE_EFFECTS (t) = 1;
12790 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12792 #ifdef HAVE_AS_GNU_ATTRIBUTE
12793 if (call_ABI_of_interest (cfun->decl))
12794 rs6000_passes_float = true;
12795 #endif
12798 /* Find the overflow area. */
12799 t = make_tree (TREE_TYPE (ovf), crtl->args.internal_arg_pointer);
12800 if (words != 0)
12801 t = fold_build_pointer_plus_hwi (t, words * MIN_UNITS_PER_WORD);
12802 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
12803 TREE_SIDE_EFFECTS (t) = 1;
12804 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12806 /* If there were no va_arg invocations, don't set up the register
12807 save area. */
12808 if (!cfun->va_list_gpr_size
12809 && !cfun->va_list_fpr_size
12810 && n_gpr < GP_ARG_NUM_REG
12811 && n_fpr < FP_ARG_V4_MAX_REG)
12812 return;
12814 /* Find the register save area. */
12815 t = make_tree (TREE_TYPE (sav), virtual_stack_vars_rtx);
12816 if (cfun->machine->varargs_save_offset)
12817 t = fold_build_pointer_plus_hwi (t, cfun->machine->varargs_save_offset);
12818 t = build2 (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
12819 TREE_SIDE_EFFECTS (t) = 1;
12820 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12823 /* Implement va_arg. */
12825 static tree
12826 rs6000_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
12827 gimple_seq *post_p)
12829 tree f_gpr, f_fpr, f_res, f_ovf, f_sav;
12830 tree gpr, fpr, ovf, sav, reg, t, u;
12831 int size, rsize, n_reg, sav_ofs, sav_scale;
12832 tree lab_false, lab_over, addr;
12833 int align;
12834 tree ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
12835 int regalign = 0;
12836 gimple *stmt;
12838 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
12840 t = rs6000_gimplify_va_arg (valist, ptrtype, pre_p, post_p);
12841 return build_va_arg_indirect_ref (t);
12844 /* We need to deal with the fact that the darwin ppc64 ABI is defined by an
12845 earlier version of gcc, with the property that it always applied alignment
12846 adjustments to the va-args (even for zero-sized types). The cheapest way
12847 to deal with this is to replicate the effect of the part of
12848 std_gimplify_va_arg_expr that carries out the align adjust, for the case
12849 of relevance.
12850 We don't need to check for pass-by-reference because of the test above.
12851 We can return a simplifed answer, since we know there's no offset to add. */
12853 if (((TARGET_MACHO
12854 && rs6000_darwin64_abi)
12855 || DEFAULT_ABI == ABI_ELFv2
12856 || (DEFAULT_ABI == ABI_AIX && !rs6000_compat_align_parm))
12857 && integer_zerop (TYPE_SIZE (type)))
12859 unsigned HOST_WIDE_INT align, boundary;
12860 tree valist_tmp = get_initialized_tmp_var (valist, pre_p, NULL);
12861 align = PARM_BOUNDARY / BITS_PER_UNIT;
12862 boundary = rs6000_function_arg_boundary (TYPE_MODE (type), type);
12863 if (boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
12864 boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
12865 boundary /= BITS_PER_UNIT;
12866 if (boundary > align)
12868 tree t ;
12869 /* This updates arg ptr by the amount that would be necessary
12870 to align the zero-sized (but not zero-alignment) item. */
12871 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist_tmp,
12872 fold_build_pointer_plus_hwi (valist_tmp, boundary - 1));
12873 gimplify_and_add (t, pre_p);
12875 t = fold_convert (sizetype, valist_tmp);
12876 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist_tmp,
12877 fold_convert (TREE_TYPE (valist),
12878 fold_build2 (BIT_AND_EXPR, sizetype, t,
12879 size_int (-boundary))));
12880 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
12881 gimplify_and_add (t, pre_p);
12883 /* Since it is zero-sized there's no increment for the item itself. */
12884 valist_tmp = fold_convert (build_pointer_type (type), valist_tmp);
12885 return build_va_arg_indirect_ref (valist_tmp);
12888 if (DEFAULT_ABI != ABI_V4)
12890 if (targetm.calls.split_complex_arg && TREE_CODE (type) == COMPLEX_TYPE)
12892 tree elem_type = TREE_TYPE (type);
12893 machine_mode elem_mode = TYPE_MODE (elem_type);
12894 int elem_size = GET_MODE_SIZE (elem_mode);
12896 if (elem_size < UNITS_PER_WORD)
12898 tree real_part, imag_part;
12899 gimple_seq post = NULL;
12901 real_part = rs6000_gimplify_va_arg (valist, elem_type, pre_p,
12902 &post);
12903 /* Copy the value into a temporary, lest the formal temporary
12904 be reused out from under us. */
12905 real_part = get_initialized_tmp_var (real_part, pre_p, &post);
12906 gimple_seq_add_seq (pre_p, post);
12908 imag_part = rs6000_gimplify_va_arg (valist, elem_type, pre_p,
12909 post_p);
12911 return build2 (COMPLEX_EXPR, type, real_part, imag_part);
12915 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
12918 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
12919 f_fpr = DECL_CHAIN (f_gpr);
12920 f_res = DECL_CHAIN (f_fpr);
12921 f_ovf = DECL_CHAIN (f_res);
12922 f_sav = DECL_CHAIN (f_ovf);
12924 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
12925 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
12926 f_fpr, NULL_TREE);
12927 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
12928 f_ovf, NULL_TREE);
12929 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
12930 f_sav, NULL_TREE);
12932 size = int_size_in_bytes (type);
12933 rsize = (size + 3) / 4;
12934 align = 1;
12936 machine_mode mode = TYPE_MODE (type);
12937 if (abi_v4_pass_in_fpr (mode))
12939 /* FP args go in FP registers, if present. */
12940 reg = fpr;
12941 n_reg = (size + 7) / 8;
12942 sav_ofs = ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT) ? 8 : 4) * 4;
12943 sav_scale = ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT) ? 8 : 4);
12944 if (mode != SFmode && mode != SDmode)
12945 align = 8;
12947 else
12949 /* Otherwise into GP registers. */
12950 reg = gpr;
12951 n_reg = rsize;
12952 sav_ofs = 0;
12953 sav_scale = 4;
12954 if (n_reg == 2)
12955 align = 8;
12958 /* Pull the value out of the saved registers.... */
12960 lab_over = NULL;
12961 addr = create_tmp_var (ptr_type_node, "addr");
12963 /* AltiVec vectors never go in registers when -mabi=altivec. */
12964 if (TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (mode))
12965 align = 16;
12966 else
12968 lab_false = create_artificial_label (input_location);
12969 lab_over = create_artificial_label (input_location);
12971 /* Long long and SPE vectors are aligned in the registers.
12972 As are any other 2 gpr item such as complex int due to a
12973 historical mistake. */
12974 u = reg;
12975 if (n_reg == 2 && reg == gpr)
12977 regalign = 1;
12978 u = build2 (BIT_AND_EXPR, TREE_TYPE (reg), unshare_expr (reg),
12979 build_int_cst (TREE_TYPE (reg), n_reg - 1));
12980 u = build2 (POSTINCREMENT_EXPR, TREE_TYPE (reg),
12981 unshare_expr (reg), u);
12983 /* _Decimal128 is passed in even/odd fpr pairs; the stored
12984 reg number is 0 for f1, so we want to make it odd. */
12985 else if (reg == fpr && mode == TDmode)
12987 t = build2 (BIT_IOR_EXPR, TREE_TYPE (reg), unshare_expr (reg),
12988 build_int_cst (TREE_TYPE (reg), 1));
12989 u = build2 (MODIFY_EXPR, void_type_node, unshare_expr (reg), t);
12992 t = fold_convert (TREE_TYPE (reg), size_int (8 - n_reg + 1));
12993 t = build2 (GE_EXPR, boolean_type_node, u, t);
12994 u = build1 (GOTO_EXPR, void_type_node, lab_false);
12995 t = build3 (COND_EXPR, void_type_node, t, u, NULL_TREE);
12996 gimplify_and_add (t, pre_p);
12998 t = sav;
12999 if (sav_ofs)
13000 t = fold_build_pointer_plus_hwi (sav, sav_ofs);
13002 u = build2 (POSTINCREMENT_EXPR, TREE_TYPE (reg), unshare_expr (reg),
13003 build_int_cst (TREE_TYPE (reg), n_reg));
13004 u = fold_convert (sizetype, u);
13005 u = build2 (MULT_EXPR, sizetype, u, size_int (sav_scale));
13006 t = fold_build_pointer_plus (t, u);
13008 /* _Decimal32 varargs are located in the second word of the 64-bit
13009 FP register for 32-bit binaries. */
13010 if (TARGET_32BIT
13011 && TARGET_HARD_FLOAT && TARGET_FPRS
13012 && mode == SDmode)
13013 t = fold_build_pointer_plus_hwi (t, size);
13015 gimplify_assign (addr, t, pre_p);
13017 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
13019 stmt = gimple_build_label (lab_false);
13020 gimple_seq_add_stmt (pre_p, stmt);
13022 if ((n_reg == 2 && !regalign) || n_reg > 2)
13024 /* Ensure that we don't find any more args in regs.
13025 Alignment has taken care of for special cases. */
13026 gimplify_assign (reg, build_int_cst (TREE_TYPE (reg), 8), pre_p);
13030 /* ... otherwise out of the overflow area. */
13032 /* Care for on-stack alignment if needed. */
13033 t = ovf;
13034 if (align != 1)
13036 t = fold_build_pointer_plus_hwi (t, align - 1);
13037 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
13038 build_int_cst (TREE_TYPE (t), -align));
13040 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
13042 gimplify_assign (unshare_expr (addr), t, pre_p);
13044 t = fold_build_pointer_plus_hwi (t, size);
13045 gimplify_assign (unshare_expr (ovf), t, pre_p);
13047 if (lab_over)
13049 stmt = gimple_build_label (lab_over);
13050 gimple_seq_add_stmt (pre_p, stmt);
13053 if (STRICT_ALIGNMENT
13054 && (TYPE_ALIGN (type)
13055 > (unsigned) BITS_PER_UNIT * (align < 4 ? 4 : align)))
13057 /* The value (of type complex double, for example) may not be
13058 aligned in memory in the saved registers, so copy via a
13059 temporary. (This is the same code as used for SPARC.) */
13060 tree tmp = create_tmp_var (type, "va_arg_tmp");
13061 tree dest_addr = build_fold_addr_expr (tmp);
13063 tree copy = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
13064 3, dest_addr, addr, size_int (rsize * 4));
13066 gimplify_and_add (copy, pre_p);
13067 addr = dest_addr;
13070 addr = fold_convert (ptrtype, addr);
13071 return build_va_arg_indirect_ref (addr);
13074 /* Builtins. */
13076 static void
13077 def_builtin (const char *name, tree type, enum rs6000_builtins code)
13079 tree t;
13080 unsigned classify = rs6000_builtin_info[(int)code].attr;
13081 const char *attr_string = "";
13083 gcc_assert (name != NULL);
13084 gcc_assert (IN_RANGE ((int)code, 0, (int)RS6000_BUILTIN_COUNT));
13086 if (rs6000_builtin_decls[(int)code])
13087 fatal_error (input_location,
13088 "internal error: builtin function %s already processed", name);
13090 rs6000_builtin_decls[(int)code] = t =
13091 add_builtin_function (name, type, (int)code, BUILT_IN_MD, NULL, NULL_TREE);
13093 /* Set any special attributes. */
13094 if ((classify & RS6000_BTC_CONST) != 0)
13096 /* const function, function only depends on the inputs. */
13097 TREE_READONLY (t) = 1;
13098 TREE_NOTHROW (t) = 1;
13099 attr_string = ", const";
13101 else if ((classify & RS6000_BTC_PURE) != 0)
13103 /* pure function, function can read global memory, but does not set any
13104 external state. */
13105 DECL_PURE_P (t) = 1;
13106 TREE_NOTHROW (t) = 1;
13107 attr_string = ", pure";
13109 else if ((classify & RS6000_BTC_FP) != 0)
13111 /* Function is a math function. If rounding mode is on, then treat the
13112 function as not reading global memory, but it can have arbitrary side
13113 effects. If it is off, then assume the function is a const function.
13114 This mimics the ATTR_MATHFN_FPROUNDING attribute in
13115 builtin-attribute.def that is used for the math functions. */
13116 TREE_NOTHROW (t) = 1;
13117 if (flag_rounding_math)
13119 DECL_PURE_P (t) = 1;
13120 DECL_IS_NOVOPS (t) = 1;
13121 attr_string = ", fp, pure";
13123 else
13125 TREE_READONLY (t) = 1;
13126 attr_string = ", fp, const";
13129 else if ((classify & RS6000_BTC_ATTR_MASK) != 0)
13130 gcc_unreachable ();
13132 if (TARGET_DEBUG_BUILTIN)
13133 fprintf (stderr, "rs6000_builtin, code = %4d, %s%s\n",
13134 (int)code, name, attr_string);
13137 /* Simple ternary operations: VECd = foo (VECa, VECb, VECc). */
13139 #undef RS6000_BUILTIN_0
13140 #undef RS6000_BUILTIN_1
13141 #undef RS6000_BUILTIN_2
13142 #undef RS6000_BUILTIN_3
13143 #undef RS6000_BUILTIN_A
13144 #undef RS6000_BUILTIN_D
13145 #undef RS6000_BUILTIN_E
13146 #undef RS6000_BUILTIN_H
13147 #undef RS6000_BUILTIN_P
13148 #undef RS6000_BUILTIN_Q
13149 #undef RS6000_BUILTIN_S
13150 #undef RS6000_BUILTIN_X
13152 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
13153 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
13154 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
13155 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE) \
13156 { MASK, ICODE, NAME, ENUM },
13158 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
13159 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
13160 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
13161 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
13162 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
13163 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
13164 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
13165 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13167 static const struct builtin_description bdesc_3arg[] =
13169 #include "rs6000-builtin.def"
13172 /* DST operations: void foo (void *, const int, const char). */
13174 #undef RS6000_BUILTIN_0
13175 #undef RS6000_BUILTIN_1
13176 #undef RS6000_BUILTIN_2
13177 #undef RS6000_BUILTIN_3
13178 #undef RS6000_BUILTIN_A
13179 #undef RS6000_BUILTIN_D
13180 #undef RS6000_BUILTIN_E
13181 #undef RS6000_BUILTIN_H
13182 #undef RS6000_BUILTIN_P
13183 #undef RS6000_BUILTIN_Q
13184 #undef RS6000_BUILTIN_S
13185 #undef RS6000_BUILTIN_X
13187 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
13188 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
13189 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
13190 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
13191 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
13192 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE) \
13193 { MASK, ICODE, NAME, ENUM },
13195 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
13196 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
13197 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
13198 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
13199 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
13200 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13202 static const struct builtin_description bdesc_dst[] =
13204 #include "rs6000-builtin.def"
13207 /* Simple binary operations: VECc = foo (VECa, VECb). */
13209 #undef RS6000_BUILTIN_0
13210 #undef RS6000_BUILTIN_1
13211 #undef RS6000_BUILTIN_2
13212 #undef RS6000_BUILTIN_3
13213 #undef RS6000_BUILTIN_A
13214 #undef RS6000_BUILTIN_D
13215 #undef RS6000_BUILTIN_E
13216 #undef RS6000_BUILTIN_H
13217 #undef RS6000_BUILTIN_P
13218 #undef RS6000_BUILTIN_Q
13219 #undef RS6000_BUILTIN_S
13220 #undef RS6000_BUILTIN_X
13222 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
13223 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
13224 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE) \
13225 { MASK, ICODE, NAME, ENUM },
13227 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
13228 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
13229 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
13230 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
13231 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
13232 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
13233 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
13234 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
13235 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13237 static const struct builtin_description bdesc_2arg[] =
13239 #include "rs6000-builtin.def"
13242 #undef RS6000_BUILTIN_0
13243 #undef RS6000_BUILTIN_1
13244 #undef RS6000_BUILTIN_2
13245 #undef RS6000_BUILTIN_3
13246 #undef RS6000_BUILTIN_A
13247 #undef RS6000_BUILTIN_D
13248 #undef RS6000_BUILTIN_E
13249 #undef RS6000_BUILTIN_H
13250 #undef RS6000_BUILTIN_P
13251 #undef RS6000_BUILTIN_Q
13252 #undef RS6000_BUILTIN_S
13253 #undef RS6000_BUILTIN_X
13255 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
13256 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
13257 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
13258 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
13259 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
13260 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
13261 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
13262 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
13263 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) \
13264 { MASK, ICODE, NAME, ENUM },
13266 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
13267 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
13268 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13270 /* AltiVec predicates. */
13272 static const struct builtin_description bdesc_altivec_preds[] =
13274 #include "rs6000-builtin.def"
13277 /* SPE predicates. */
13278 #undef RS6000_BUILTIN_0
13279 #undef RS6000_BUILTIN_1
13280 #undef RS6000_BUILTIN_2
13281 #undef RS6000_BUILTIN_3
13282 #undef RS6000_BUILTIN_A
13283 #undef RS6000_BUILTIN_D
13284 #undef RS6000_BUILTIN_E
13285 #undef RS6000_BUILTIN_H
13286 #undef RS6000_BUILTIN_P
13287 #undef RS6000_BUILTIN_Q
13288 #undef RS6000_BUILTIN_S
13289 #undef RS6000_BUILTIN_X
13291 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
13292 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
13293 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
13294 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
13295 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
13296 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
13297 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
13298 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
13299 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
13300 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
13301 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE) \
13302 { MASK, ICODE, NAME, ENUM },
13304 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13306 static const struct builtin_description bdesc_spe_predicates[] =
13308 #include "rs6000-builtin.def"
13311 /* SPE evsel predicates. */
13312 #undef RS6000_BUILTIN_0
13313 #undef RS6000_BUILTIN_1
13314 #undef RS6000_BUILTIN_2
13315 #undef RS6000_BUILTIN_3
13316 #undef RS6000_BUILTIN_A
13317 #undef RS6000_BUILTIN_D
13318 #undef RS6000_BUILTIN_E
13319 #undef RS6000_BUILTIN_H
13320 #undef RS6000_BUILTIN_P
13321 #undef RS6000_BUILTIN_Q
13322 #undef RS6000_BUILTIN_S
13323 #undef RS6000_BUILTIN_X
13325 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
13326 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
13327 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
13328 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
13329 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
13330 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
13331 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE) \
13332 { MASK, ICODE, NAME, ENUM },
13334 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
13335 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
13336 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
13337 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
13338 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13340 static const struct builtin_description bdesc_spe_evsel[] =
13342 #include "rs6000-builtin.def"
13345 /* PAIRED predicates. */
13346 #undef RS6000_BUILTIN_0
13347 #undef RS6000_BUILTIN_1
13348 #undef RS6000_BUILTIN_2
13349 #undef RS6000_BUILTIN_3
13350 #undef RS6000_BUILTIN_A
13351 #undef RS6000_BUILTIN_D
13352 #undef RS6000_BUILTIN_E
13353 #undef RS6000_BUILTIN_H
13354 #undef RS6000_BUILTIN_P
13355 #undef RS6000_BUILTIN_Q
13356 #undef RS6000_BUILTIN_S
13357 #undef RS6000_BUILTIN_X
13359 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
13360 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
13361 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
13362 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
13363 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
13364 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
13365 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
13366 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
13367 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
13368 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE) \
13369 { MASK, ICODE, NAME, ENUM },
13371 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
13372 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13374 static const struct builtin_description bdesc_paired_preds[] =
13376 #include "rs6000-builtin.def"
13379 /* ABS* operations. */
13381 #undef RS6000_BUILTIN_0
13382 #undef RS6000_BUILTIN_1
13383 #undef RS6000_BUILTIN_2
13384 #undef RS6000_BUILTIN_3
13385 #undef RS6000_BUILTIN_A
13386 #undef RS6000_BUILTIN_D
13387 #undef RS6000_BUILTIN_E
13388 #undef RS6000_BUILTIN_H
13389 #undef RS6000_BUILTIN_P
13390 #undef RS6000_BUILTIN_Q
13391 #undef RS6000_BUILTIN_S
13392 #undef RS6000_BUILTIN_X
13394 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
13395 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
13396 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
13397 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
13398 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE) \
13399 { MASK, ICODE, NAME, ENUM },
13401 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
13402 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
13403 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
13404 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
13405 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
13406 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
13407 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13409 static const struct builtin_description bdesc_abs[] =
13411 #include "rs6000-builtin.def"
13414 /* Simple unary operations: VECb = foo (unsigned literal) or VECb =
13415 foo (VECa). */
13417 #undef RS6000_BUILTIN_0
13418 #undef RS6000_BUILTIN_1
13419 #undef RS6000_BUILTIN_2
13420 #undef RS6000_BUILTIN_3
13421 #undef RS6000_BUILTIN_A
13422 #undef RS6000_BUILTIN_D
13423 #undef RS6000_BUILTIN_E
13424 #undef RS6000_BUILTIN_H
13425 #undef RS6000_BUILTIN_P
13426 #undef RS6000_BUILTIN_Q
13427 #undef RS6000_BUILTIN_S
13428 #undef RS6000_BUILTIN_X
13430 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
13431 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE) \
13432 { MASK, ICODE, NAME, ENUM },
13434 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
13435 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
13436 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
13437 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
13438 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
13439 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
13440 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
13441 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
13442 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
13443 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13445 static const struct builtin_description bdesc_1arg[] =
13447 #include "rs6000-builtin.def"
13450 /* Simple no-argument operations: result = __builtin_darn_32 () */
13452 #undef RS6000_BUILTIN_0
13453 #undef RS6000_BUILTIN_1
13454 #undef RS6000_BUILTIN_2
13455 #undef RS6000_BUILTIN_3
13456 #undef RS6000_BUILTIN_A
13457 #undef RS6000_BUILTIN_D
13458 #undef RS6000_BUILTIN_E
13459 #undef RS6000_BUILTIN_H
13460 #undef RS6000_BUILTIN_P
13461 #undef RS6000_BUILTIN_Q
13462 #undef RS6000_BUILTIN_S
13463 #undef RS6000_BUILTIN_X
13465 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE) \
13466 { MASK, ICODE, NAME, ENUM },
13468 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
13469 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
13470 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
13471 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
13472 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
13473 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
13474 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
13475 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
13476 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
13477 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
13478 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13480 static const struct builtin_description bdesc_0arg[] =
13482 #include "rs6000-builtin.def"
13485 /* HTM builtins. */
13486 #undef RS6000_BUILTIN_0
13487 #undef RS6000_BUILTIN_1
13488 #undef RS6000_BUILTIN_2
13489 #undef RS6000_BUILTIN_3
13490 #undef RS6000_BUILTIN_A
13491 #undef RS6000_BUILTIN_D
13492 #undef RS6000_BUILTIN_E
13493 #undef RS6000_BUILTIN_H
13494 #undef RS6000_BUILTIN_P
13495 #undef RS6000_BUILTIN_Q
13496 #undef RS6000_BUILTIN_S
13497 #undef RS6000_BUILTIN_X
13499 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
13500 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
13501 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
13502 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
13503 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
13504 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
13505 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
13506 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) \
13507 { MASK, ICODE, NAME, ENUM },
13509 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
13510 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
13511 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
13512 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13514 static const struct builtin_description bdesc_htm[] =
13516 #include "rs6000-builtin.def"
13519 #undef RS6000_BUILTIN_0
13520 #undef RS6000_BUILTIN_1
13521 #undef RS6000_BUILTIN_2
13522 #undef RS6000_BUILTIN_3
13523 #undef RS6000_BUILTIN_A
13524 #undef RS6000_BUILTIN_D
13525 #undef RS6000_BUILTIN_E
13526 #undef RS6000_BUILTIN_H
13527 #undef RS6000_BUILTIN_P
13528 #undef RS6000_BUILTIN_Q
13529 #undef RS6000_BUILTIN_S
13531 /* Return true if a builtin function is overloaded. */
13532 bool
13533 rs6000_overloaded_builtin_p (enum rs6000_builtins fncode)
13535 return (rs6000_builtin_info[(int)fncode].attr & RS6000_BTC_OVERLOADED) != 0;
13538 /* Expand an expression EXP that calls a builtin without arguments. */
13539 static rtx
13540 rs6000_expand_zeroop_builtin (enum insn_code icode, rtx target)
13542 rtx pat;
13543 machine_mode tmode = insn_data[icode].operand[0].mode;
13545 if (icode == CODE_FOR_nothing)
13546 /* Builtin not supported on this processor. */
13547 return 0;
13549 if (target == 0
13550 || GET_MODE (target) != tmode
13551 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13552 target = gen_reg_rtx (tmode);
13554 pat = GEN_FCN (icode) (target);
13555 if (! pat)
13556 return 0;
13557 emit_insn (pat);
13559 return target;
13563 static rtx
13564 rs6000_expand_mtfsf_builtin (enum insn_code icode, tree exp)
13566 rtx pat;
13567 tree arg0 = CALL_EXPR_ARG (exp, 0);
13568 tree arg1 = CALL_EXPR_ARG (exp, 1);
13569 rtx op0 = expand_normal (arg0);
13570 rtx op1 = expand_normal (arg1);
13571 machine_mode mode0 = insn_data[icode].operand[0].mode;
13572 machine_mode mode1 = insn_data[icode].operand[1].mode;
13574 if (icode == CODE_FOR_nothing)
13575 /* Builtin not supported on this processor. */
13576 return 0;
13578 /* If we got invalid arguments bail out before generating bad rtl. */
13579 if (arg0 == error_mark_node || arg1 == error_mark_node)
13580 return const0_rtx;
13582 if (GET_CODE (op0) != CONST_INT
13583 || INTVAL (op0) > 255
13584 || INTVAL (op0) < 0)
13586 error ("argument 1 must be an 8-bit field value");
13587 return const0_rtx;
13590 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
13591 op0 = copy_to_mode_reg (mode0, op0);
13593 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
13594 op1 = copy_to_mode_reg (mode1, op1);
13596 pat = GEN_FCN (icode) (op0, op1);
13597 if (! pat)
13598 return const0_rtx;
13599 emit_insn (pat);
13601 return NULL_RTX;
13604 static rtx
13605 rs6000_expand_unop_builtin (enum insn_code icode, tree exp, rtx target)
13607 rtx pat;
13608 tree arg0 = CALL_EXPR_ARG (exp, 0);
13609 rtx op0 = expand_normal (arg0);
13610 machine_mode tmode = insn_data[icode].operand[0].mode;
13611 machine_mode mode0 = insn_data[icode].operand[1].mode;
13613 if (icode == CODE_FOR_nothing)
13614 /* Builtin not supported on this processor. */
13615 return 0;
13617 /* If we got invalid arguments bail out before generating bad rtl. */
13618 if (arg0 == error_mark_node)
13619 return const0_rtx;
13621 if (icode == CODE_FOR_altivec_vspltisb
13622 || icode == CODE_FOR_altivec_vspltish
13623 || icode == CODE_FOR_altivec_vspltisw
13624 || icode == CODE_FOR_spe_evsplatfi
13625 || icode == CODE_FOR_spe_evsplati)
13627 /* Only allow 5-bit *signed* literals. */
13628 if (GET_CODE (op0) != CONST_INT
13629 || INTVAL (op0) > 15
13630 || INTVAL (op0) < -16)
13632 error ("argument 1 must be a 5-bit signed literal");
13633 return const0_rtx;
13637 if (target == 0
13638 || GET_MODE (target) != tmode
13639 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13640 target = gen_reg_rtx (tmode);
13642 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13643 op0 = copy_to_mode_reg (mode0, op0);
13645 pat = GEN_FCN (icode) (target, op0);
13646 if (! pat)
13647 return 0;
13648 emit_insn (pat);
13650 return target;
13653 static rtx
13654 altivec_expand_abs_builtin (enum insn_code icode, tree exp, rtx target)
13656 rtx pat, scratch1, scratch2;
13657 tree arg0 = CALL_EXPR_ARG (exp, 0);
13658 rtx op0 = expand_normal (arg0);
13659 machine_mode tmode = insn_data[icode].operand[0].mode;
13660 machine_mode mode0 = insn_data[icode].operand[1].mode;
13662 /* If we have invalid arguments, bail out before generating bad rtl. */
13663 if (arg0 == error_mark_node)
13664 return const0_rtx;
13666 if (target == 0
13667 || GET_MODE (target) != tmode
13668 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13669 target = gen_reg_rtx (tmode);
13671 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13672 op0 = copy_to_mode_reg (mode0, op0);
13674 scratch1 = gen_reg_rtx (mode0);
13675 scratch2 = gen_reg_rtx (mode0);
13677 pat = GEN_FCN (icode) (target, op0, scratch1, scratch2);
13678 if (! pat)
13679 return 0;
13680 emit_insn (pat);
13682 return target;
13685 static rtx
13686 rs6000_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
13688 rtx pat;
13689 tree arg0 = CALL_EXPR_ARG (exp, 0);
13690 tree arg1 = CALL_EXPR_ARG (exp, 1);
13691 rtx op0 = expand_normal (arg0);
13692 rtx op1 = expand_normal (arg1);
13693 machine_mode tmode = insn_data[icode].operand[0].mode;
13694 machine_mode mode0 = insn_data[icode].operand[1].mode;
13695 machine_mode mode1 = insn_data[icode].operand[2].mode;
13697 if (icode == CODE_FOR_nothing)
13698 /* Builtin not supported on this processor. */
13699 return 0;
13701 /* If we got invalid arguments bail out before generating bad rtl. */
13702 if (arg0 == error_mark_node || arg1 == error_mark_node)
13703 return const0_rtx;
13705 if (icode == CODE_FOR_altivec_vcfux
13706 || icode == CODE_FOR_altivec_vcfsx
13707 || icode == CODE_FOR_altivec_vctsxs
13708 || icode == CODE_FOR_altivec_vctuxs
13709 || icode == CODE_FOR_altivec_vspltb
13710 || icode == CODE_FOR_altivec_vsplth
13711 || icode == CODE_FOR_altivec_vspltw
13712 || icode == CODE_FOR_spe_evaddiw
13713 || icode == CODE_FOR_spe_evldd
13714 || icode == CODE_FOR_spe_evldh
13715 || icode == CODE_FOR_spe_evldw
13716 || icode == CODE_FOR_spe_evlhhesplat
13717 || icode == CODE_FOR_spe_evlhhossplat
13718 || icode == CODE_FOR_spe_evlhhousplat
13719 || icode == CODE_FOR_spe_evlwhe
13720 || icode == CODE_FOR_spe_evlwhos
13721 || icode == CODE_FOR_spe_evlwhou
13722 || icode == CODE_FOR_spe_evlwhsplat
13723 || icode == CODE_FOR_spe_evlwwsplat
13724 || icode == CODE_FOR_spe_evrlwi
13725 || icode == CODE_FOR_spe_evslwi
13726 || icode == CODE_FOR_spe_evsrwis
13727 || icode == CODE_FOR_spe_evsubifw
13728 || icode == CODE_FOR_spe_evsrwiu)
13730 /* Only allow 5-bit unsigned literals. */
13731 STRIP_NOPS (arg1);
13732 if (TREE_CODE (arg1) != INTEGER_CST
13733 || TREE_INT_CST_LOW (arg1) & ~0x1f)
13735 error ("argument 2 must be a 5-bit unsigned literal");
13736 return const0_rtx;
13739 else if (icode == CODE_FOR_dfptstsfi_eq_dd
13740 || icode == CODE_FOR_dfptstsfi_lt_dd
13741 || icode == CODE_FOR_dfptstsfi_gt_dd
13742 || icode == CODE_FOR_dfptstsfi_unordered_dd
13743 || icode == CODE_FOR_dfptstsfi_eq_td
13744 || icode == CODE_FOR_dfptstsfi_lt_td
13745 || icode == CODE_FOR_dfptstsfi_gt_td
13746 || icode == CODE_FOR_dfptstsfi_unordered_td)
13748 /* Only allow 6-bit unsigned literals. */
13749 STRIP_NOPS (arg0);
13750 if (TREE_CODE (arg0) != INTEGER_CST
13751 || !IN_RANGE (TREE_INT_CST_LOW (arg0), 0, 63))
13753 error ("argument 1 must be a 6-bit unsigned literal");
13754 return CONST0_RTX (tmode);
13758 if (target == 0
13759 || GET_MODE (target) != tmode
13760 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13761 target = gen_reg_rtx (tmode);
13763 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13764 op0 = copy_to_mode_reg (mode0, op0);
13765 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13766 op1 = copy_to_mode_reg (mode1, op1);
13768 pat = GEN_FCN (icode) (target, op0, op1);
13769 if (! pat)
13770 return 0;
13771 emit_insn (pat);
13773 return target;
13776 static rtx
13777 altivec_expand_predicate_builtin (enum insn_code icode, tree exp, rtx target)
13779 rtx pat, scratch;
13780 tree cr6_form = CALL_EXPR_ARG (exp, 0);
13781 tree arg0 = CALL_EXPR_ARG (exp, 1);
13782 tree arg1 = CALL_EXPR_ARG (exp, 2);
13783 rtx op0 = expand_normal (arg0);
13784 rtx op1 = expand_normal (arg1);
13785 machine_mode tmode = SImode;
13786 machine_mode mode0 = insn_data[icode].operand[1].mode;
13787 machine_mode mode1 = insn_data[icode].operand[2].mode;
13788 int cr6_form_int;
13790 if (TREE_CODE (cr6_form) != INTEGER_CST)
13792 error ("argument 1 of __builtin_altivec_predicate must be a constant");
13793 return const0_rtx;
13795 else
13796 cr6_form_int = TREE_INT_CST_LOW (cr6_form);
13798 gcc_assert (mode0 == mode1);
13800 /* If we have invalid arguments, bail out before generating bad rtl. */
13801 if (arg0 == error_mark_node || arg1 == error_mark_node)
13802 return const0_rtx;
13804 if (target == 0
13805 || GET_MODE (target) != tmode
13806 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13807 target = gen_reg_rtx (tmode);
13809 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13810 op0 = copy_to_mode_reg (mode0, op0);
13811 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13812 op1 = copy_to_mode_reg (mode1, op1);
13814 scratch = gen_reg_rtx (mode0);
13816 pat = GEN_FCN (icode) (scratch, op0, op1);
13817 if (! pat)
13818 return 0;
13819 emit_insn (pat);
13821 /* The vec_any* and vec_all* predicates use the same opcodes for two
13822 different operations, but the bits in CR6 will be different
13823 depending on what information we want. So we have to play tricks
13824 with CR6 to get the right bits out.
13826 If you think this is disgusting, look at the specs for the
13827 AltiVec predicates. */
13829 switch (cr6_form_int)
13831 case 0:
13832 emit_insn (gen_cr6_test_for_zero (target));
13833 break;
13834 case 1:
13835 emit_insn (gen_cr6_test_for_zero_reverse (target));
13836 break;
13837 case 2:
13838 emit_insn (gen_cr6_test_for_lt (target));
13839 break;
13840 case 3:
13841 emit_insn (gen_cr6_test_for_lt_reverse (target));
13842 break;
13843 default:
13844 error ("argument 1 of __builtin_altivec_predicate is out of range");
13845 break;
13848 return target;
13851 static rtx
13852 paired_expand_lv_builtin (enum insn_code icode, tree exp, rtx target)
13854 rtx pat, addr;
13855 tree arg0 = CALL_EXPR_ARG (exp, 0);
13856 tree arg1 = CALL_EXPR_ARG (exp, 1);
13857 machine_mode tmode = insn_data[icode].operand[0].mode;
13858 machine_mode mode0 = Pmode;
13859 machine_mode mode1 = Pmode;
13860 rtx op0 = expand_normal (arg0);
13861 rtx op1 = expand_normal (arg1);
13863 if (icode == CODE_FOR_nothing)
13864 /* Builtin not supported on this processor. */
13865 return 0;
13867 /* If we got invalid arguments bail out before generating bad rtl. */
13868 if (arg0 == error_mark_node || arg1 == error_mark_node)
13869 return const0_rtx;
13871 if (target == 0
13872 || GET_MODE (target) != tmode
13873 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13874 target = gen_reg_rtx (tmode);
13876 op1 = copy_to_mode_reg (mode1, op1);
13878 if (op0 == const0_rtx)
13880 addr = gen_rtx_MEM (tmode, op1);
13882 else
13884 op0 = copy_to_mode_reg (mode0, op0);
13885 addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op0, op1));
13888 pat = GEN_FCN (icode) (target, addr);
13890 if (! pat)
13891 return 0;
13892 emit_insn (pat);
13894 return target;
13897 /* Return a constant vector for use as a little-endian permute control vector
13898 to reverse the order of elements of the given vector mode. */
13899 static rtx
13900 swap_selector_for_mode (machine_mode mode)
13902 /* These are little endian vectors, so their elements are reversed
13903 from what you would normally expect for a permute control vector. */
13904 unsigned int swap2[16] = {7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8};
13905 unsigned int swap4[16] = {3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12};
13906 unsigned int swap8[16] = {1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14};
13907 unsigned int swap16[16] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
13908 unsigned int *swaparray, i;
13909 rtx perm[16];
13911 switch (mode)
13913 case V2DFmode:
13914 case V2DImode:
13915 swaparray = swap2;
13916 break;
13917 case V4SFmode:
13918 case V4SImode:
13919 swaparray = swap4;
13920 break;
13921 case V8HImode:
13922 swaparray = swap8;
13923 break;
13924 case V16QImode:
13925 swaparray = swap16;
13926 break;
13927 default:
13928 gcc_unreachable ();
13931 for (i = 0; i < 16; ++i)
13932 perm[i] = GEN_INT (swaparray[i]);
13934 return force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm)));
13937 /* Generate code for an "lvxl", or "lve*x" built-in for a little endian target
13938 with -maltivec=be specified. Issue the load followed by an element-
13939 reversing permute. */
13940 void
13941 altivec_expand_lvx_be (rtx op0, rtx op1, machine_mode mode, unsigned unspec)
13943 rtx tmp = gen_reg_rtx (mode);
13944 rtx load = gen_rtx_SET (tmp, op1);
13945 rtx lvx = gen_rtx_UNSPEC (mode, gen_rtvec (1, const0_rtx), unspec);
13946 rtx par = gen_rtx_PARALLEL (mode, gen_rtvec (2, load, lvx));
13947 rtx sel = swap_selector_for_mode (mode);
13948 rtx vperm = gen_rtx_UNSPEC (mode, gen_rtvec (3, tmp, tmp, sel), UNSPEC_VPERM);
13950 gcc_assert (REG_P (op0));
13951 emit_insn (par);
13952 emit_insn (gen_rtx_SET (op0, vperm));
13955 /* Generate code for a "stvxl" built-in for a little endian target with
13956 -maltivec=be specified. Issue the store preceded by an element-reversing
13957 permute. */
13958 void
13959 altivec_expand_stvx_be (rtx op0, rtx op1, machine_mode mode, unsigned unspec)
13961 rtx tmp = gen_reg_rtx (mode);
13962 rtx store = gen_rtx_SET (op0, tmp);
13963 rtx stvx = gen_rtx_UNSPEC (mode, gen_rtvec (1, const0_rtx), unspec);
13964 rtx par = gen_rtx_PARALLEL (mode, gen_rtvec (2, store, stvx));
13965 rtx sel = swap_selector_for_mode (mode);
13966 rtx vperm;
13968 gcc_assert (REG_P (op1));
13969 vperm = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op1, sel), UNSPEC_VPERM);
13970 emit_insn (gen_rtx_SET (tmp, vperm));
13971 emit_insn (par);
13974 /* Generate code for a "stve*x" built-in for a little endian target with -maltivec=be
13975 specified. Issue the store preceded by an element-reversing permute. */
13976 void
13977 altivec_expand_stvex_be (rtx op0, rtx op1, machine_mode mode, unsigned unspec)
13979 machine_mode inner_mode = GET_MODE_INNER (mode);
13980 rtx tmp = gen_reg_rtx (mode);
13981 rtx stvx = gen_rtx_UNSPEC (inner_mode, gen_rtvec (1, tmp), unspec);
13982 rtx sel = swap_selector_for_mode (mode);
13983 rtx vperm;
13985 gcc_assert (REG_P (op1));
13986 vperm = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op1, sel), UNSPEC_VPERM);
13987 emit_insn (gen_rtx_SET (tmp, vperm));
13988 emit_insn (gen_rtx_SET (op0, stvx));
13991 static rtx
13992 altivec_expand_lv_builtin (enum insn_code icode, tree exp, rtx target, bool blk)
13994 rtx pat, addr;
13995 tree arg0 = CALL_EXPR_ARG (exp, 0);
13996 tree arg1 = CALL_EXPR_ARG (exp, 1);
13997 machine_mode tmode = insn_data[icode].operand[0].mode;
13998 machine_mode mode0 = Pmode;
13999 machine_mode mode1 = Pmode;
14000 rtx op0 = expand_normal (arg0);
14001 rtx op1 = expand_normal (arg1);
14003 if (icode == CODE_FOR_nothing)
14004 /* Builtin not supported on this processor. */
14005 return 0;
14007 /* If we got invalid arguments bail out before generating bad rtl. */
14008 if (arg0 == error_mark_node || arg1 == error_mark_node)
14009 return const0_rtx;
14011 if (target == 0
14012 || GET_MODE (target) != tmode
14013 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14014 target = gen_reg_rtx (tmode);
14016 op1 = copy_to_mode_reg (mode1, op1);
14018 /* For LVX, express the RTL accurately by ANDing the address with -16.
14019 LVXL and LVE*X expand to use UNSPECs to hide their special behavior,
14020 so the raw address is fine. */
14021 if (icode == CODE_FOR_altivec_lvx_v2df_2op
14022 || icode == CODE_FOR_altivec_lvx_v2di_2op
14023 || icode == CODE_FOR_altivec_lvx_v4sf_2op
14024 || icode == CODE_FOR_altivec_lvx_v4si_2op
14025 || icode == CODE_FOR_altivec_lvx_v8hi_2op
14026 || icode == CODE_FOR_altivec_lvx_v16qi_2op)
14028 rtx rawaddr;
14029 if (op0 == const0_rtx)
14030 rawaddr = op1;
14031 else
14033 op0 = copy_to_mode_reg (mode0, op0);
14034 rawaddr = gen_rtx_PLUS (Pmode, op1, op0);
14036 addr = gen_rtx_AND (Pmode, rawaddr, gen_rtx_CONST_INT (Pmode, -16));
14037 addr = gen_rtx_MEM (blk ? BLKmode : tmode, addr);
14039 /* For -maltivec=be, emit the load and follow it up with a
14040 permute to swap the elements. */
14041 if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
14043 rtx temp = gen_reg_rtx (tmode);
14044 emit_insn (gen_rtx_SET (temp, addr));
14046 rtx sel = swap_selector_for_mode (tmode);
14047 rtx vperm = gen_rtx_UNSPEC (tmode, gen_rtvec (3, temp, temp, sel),
14048 UNSPEC_VPERM);
14049 emit_insn (gen_rtx_SET (target, vperm));
14051 else
14052 emit_insn (gen_rtx_SET (target, addr));
14054 else
14056 if (op0 == const0_rtx)
14057 addr = gen_rtx_MEM (blk ? BLKmode : tmode, op1);
14058 else
14060 op0 = copy_to_mode_reg (mode0, op0);
14061 addr = gen_rtx_MEM (blk ? BLKmode : tmode,
14062 gen_rtx_PLUS (Pmode, op1, op0));
14065 pat = GEN_FCN (icode) (target, addr);
14066 if (! pat)
14067 return 0;
14068 emit_insn (pat);
14071 return target;
14074 static rtx
14075 spe_expand_stv_builtin (enum insn_code icode, tree exp)
14077 tree arg0 = CALL_EXPR_ARG (exp, 0);
14078 tree arg1 = CALL_EXPR_ARG (exp, 1);
14079 tree arg2 = CALL_EXPR_ARG (exp, 2);
14080 rtx op0 = expand_normal (arg0);
14081 rtx op1 = expand_normal (arg1);
14082 rtx op2 = expand_normal (arg2);
14083 rtx pat;
14084 machine_mode mode0 = insn_data[icode].operand[0].mode;
14085 machine_mode mode1 = insn_data[icode].operand[1].mode;
14086 machine_mode mode2 = insn_data[icode].operand[2].mode;
14088 /* Invalid arguments. Bail before doing anything stoopid! */
14089 if (arg0 == error_mark_node
14090 || arg1 == error_mark_node
14091 || arg2 == error_mark_node)
14092 return const0_rtx;
14094 if (! (*insn_data[icode].operand[2].predicate) (op0, mode2))
14095 op0 = copy_to_mode_reg (mode2, op0);
14096 if (! (*insn_data[icode].operand[0].predicate) (op1, mode0))
14097 op1 = copy_to_mode_reg (mode0, op1);
14098 if (! (*insn_data[icode].operand[1].predicate) (op2, mode1))
14099 op2 = copy_to_mode_reg (mode1, op2);
14101 pat = GEN_FCN (icode) (op1, op2, op0);
14102 if (pat)
14103 emit_insn (pat);
14104 return NULL_RTX;
14107 static rtx
14108 paired_expand_stv_builtin (enum insn_code icode, tree exp)
14110 tree arg0 = CALL_EXPR_ARG (exp, 0);
14111 tree arg1 = CALL_EXPR_ARG (exp, 1);
14112 tree arg2 = CALL_EXPR_ARG (exp, 2);
14113 rtx op0 = expand_normal (arg0);
14114 rtx op1 = expand_normal (arg1);
14115 rtx op2 = expand_normal (arg2);
14116 rtx pat, addr;
14117 machine_mode tmode = insn_data[icode].operand[0].mode;
14118 machine_mode mode1 = Pmode;
14119 machine_mode mode2 = Pmode;
14121 /* Invalid arguments. Bail before doing anything stoopid! */
14122 if (arg0 == error_mark_node
14123 || arg1 == error_mark_node
14124 || arg2 == error_mark_node)
14125 return const0_rtx;
14127 if (! (*insn_data[icode].operand[1].predicate) (op0, tmode))
14128 op0 = copy_to_mode_reg (tmode, op0);
14130 op2 = copy_to_mode_reg (mode2, op2);
14132 if (op1 == const0_rtx)
14134 addr = gen_rtx_MEM (tmode, op2);
14136 else
14138 op1 = copy_to_mode_reg (mode1, op1);
14139 addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op1, op2));
14142 pat = GEN_FCN (icode) (addr, op0);
14143 if (pat)
14144 emit_insn (pat);
14145 return NULL_RTX;
14148 static rtx
14149 altivec_expand_stv_builtin (enum insn_code icode, tree exp)
14151 tree arg0 = CALL_EXPR_ARG (exp, 0);
14152 tree arg1 = CALL_EXPR_ARG (exp, 1);
14153 tree arg2 = CALL_EXPR_ARG (exp, 2);
14154 rtx op0 = expand_normal (arg0);
14155 rtx op1 = expand_normal (arg1);
14156 rtx op2 = expand_normal (arg2);
14157 rtx pat, addr, rawaddr;
14158 machine_mode tmode = insn_data[icode].operand[0].mode;
14159 machine_mode smode = insn_data[icode].operand[1].mode;
14160 machine_mode mode1 = Pmode;
14161 machine_mode mode2 = Pmode;
14163 /* Invalid arguments. Bail before doing anything stoopid! */
14164 if (arg0 == error_mark_node
14165 || arg1 == error_mark_node
14166 || arg2 == error_mark_node)
14167 return const0_rtx;
14169 op2 = copy_to_mode_reg (mode2, op2);
14171 /* For STVX, express the RTL accurately by ANDing the address with -16.
14172 STVXL and STVE*X expand to use UNSPECs to hide their special behavior,
14173 so the raw address is fine. */
14174 if (icode == CODE_FOR_altivec_stvx_v2df_2op
14175 || icode == CODE_FOR_altivec_stvx_v2di_2op
14176 || icode == CODE_FOR_altivec_stvx_v4sf_2op
14177 || icode == CODE_FOR_altivec_stvx_v4si_2op
14178 || icode == CODE_FOR_altivec_stvx_v8hi_2op
14179 || icode == CODE_FOR_altivec_stvx_v16qi_2op)
14181 if (op1 == const0_rtx)
14182 rawaddr = op2;
14183 else
14185 op1 = copy_to_mode_reg (mode1, op1);
14186 rawaddr = gen_rtx_PLUS (Pmode, op2, op1);
14189 addr = gen_rtx_AND (Pmode, rawaddr, gen_rtx_CONST_INT (Pmode, -16));
14190 addr = gen_rtx_MEM (tmode, addr);
14192 op0 = copy_to_mode_reg (tmode, op0);
14194 /* For -maltivec=be, emit a permute to swap the elements, followed
14195 by the store. */
14196 if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
14198 rtx temp = gen_reg_rtx (tmode);
14199 rtx sel = swap_selector_for_mode (tmode);
14200 rtx vperm = gen_rtx_UNSPEC (tmode, gen_rtvec (3, op0, op0, sel),
14201 UNSPEC_VPERM);
14202 emit_insn (gen_rtx_SET (temp, vperm));
14203 emit_insn (gen_rtx_SET (addr, temp));
14205 else
14206 emit_insn (gen_rtx_SET (addr, op0));
14208 else
14210 if (! (*insn_data[icode].operand[1].predicate) (op0, smode))
14211 op0 = copy_to_mode_reg (smode, op0);
14213 if (op1 == const0_rtx)
14214 addr = gen_rtx_MEM (tmode, op2);
14215 else
14217 op1 = copy_to_mode_reg (mode1, op1);
14218 addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op2, op1));
14221 pat = GEN_FCN (icode) (addr, op0);
14222 if (pat)
14223 emit_insn (pat);
14226 return NULL_RTX;
14229 /* Return the appropriate SPR number associated with the given builtin. */
14230 static inline HOST_WIDE_INT
14231 htm_spr_num (enum rs6000_builtins code)
14233 if (code == HTM_BUILTIN_GET_TFHAR
14234 || code == HTM_BUILTIN_SET_TFHAR)
14235 return TFHAR_SPR;
14236 else if (code == HTM_BUILTIN_GET_TFIAR
14237 || code == HTM_BUILTIN_SET_TFIAR)
14238 return TFIAR_SPR;
14239 else if (code == HTM_BUILTIN_GET_TEXASR
14240 || code == HTM_BUILTIN_SET_TEXASR)
14241 return TEXASR_SPR;
14242 gcc_assert (code == HTM_BUILTIN_GET_TEXASRU
14243 || code == HTM_BUILTIN_SET_TEXASRU);
14244 return TEXASRU_SPR;
14247 /* Return the appropriate SPR regno associated with the given builtin. */
14248 static inline HOST_WIDE_INT
14249 htm_spr_regno (enum rs6000_builtins code)
14251 if (code == HTM_BUILTIN_GET_TFHAR
14252 || code == HTM_BUILTIN_SET_TFHAR)
14253 return TFHAR_REGNO;
14254 else if (code == HTM_BUILTIN_GET_TFIAR
14255 || code == HTM_BUILTIN_SET_TFIAR)
14256 return TFIAR_REGNO;
14257 gcc_assert (code == HTM_BUILTIN_GET_TEXASR
14258 || code == HTM_BUILTIN_SET_TEXASR
14259 || code == HTM_BUILTIN_GET_TEXASRU
14260 || code == HTM_BUILTIN_SET_TEXASRU);
14261 return TEXASR_REGNO;
14264 /* Return the correct ICODE value depending on whether we are
14265 setting or reading the HTM SPRs. */
14266 static inline enum insn_code
14267 rs6000_htm_spr_icode (bool nonvoid)
14269 if (nonvoid)
14270 return (TARGET_POWERPC64) ? CODE_FOR_htm_mfspr_di : CODE_FOR_htm_mfspr_si;
14271 else
14272 return (TARGET_POWERPC64) ? CODE_FOR_htm_mtspr_di : CODE_FOR_htm_mtspr_si;
14275 /* Expand the HTM builtin in EXP and store the result in TARGET.
14276 Store true in *EXPANDEDP if we found a builtin to expand. */
14277 static rtx
14278 htm_expand_builtin (tree exp, rtx target, bool * expandedp)
14280 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
14281 bool nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
14282 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
14283 const struct builtin_description *d;
14284 size_t i;
14286 *expandedp = true;
14288 if (!TARGET_POWERPC64
14289 && (fcode == HTM_BUILTIN_TABORTDC
14290 || fcode == HTM_BUILTIN_TABORTDCI))
14292 size_t uns_fcode = (size_t)fcode;
14293 const char *name = rs6000_builtin_info[uns_fcode].name;
14294 error ("builtin %s is only valid in 64-bit mode", name);
14295 return const0_rtx;
14298 /* Expand the HTM builtins. */
14299 d = bdesc_htm;
14300 for (i = 0; i < ARRAY_SIZE (bdesc_htm); i++, d++)
14301 if (d->code == fcode)
14303 rtx op[MAX_HTM_OPERANDS], pat;
14304 int nopnds = 0;
14305 tree arg;
14306 call_expr_arg_iterator iter;
14307 unsigned attr = rs6000_builtin_info[fcode].attr;
14308 enum insn_code icode = d->icode;
14309 const struct insn_operand_data *insn_op;
14310 bool uses_spr = (attr & RS6000_BTC_SPR);
14311 rtx cr = NULL_RTX;
14313 if (uses_spr)
14314 icode = rs6000_htm_spr_icode (nonvoid);
14315 insn_op = &insn_data[icode].operand[0];
14317 if (nonvoid)
14319 machine_mode tmode = (uses_spr) ? insn_op->mode : SImode;
14320 if (!target
14321 || GET_MODE (target) != tmode
14322 || (uses_spr && !(*insn_op->predicate) (target, tmode)))
14323 target = gen_reg_rtx (tmode);
14324 if (uses_spr)
14325 op[nopnds++] = target;
14328 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
14330 if (arg == error_mark_node || nopnds >= MAX_HTM_OPERANDS)
14331 return const0_rtx;
14333 insn_op = &insn_data[icode].operand[nopnds];
14335 op[nopnds] = expand_normal (arg);
14337 if (!(*insn_op->predicate) (op[nopnds], insn_op->mode))
14339 if (!strcmp (insn_op->constraint, "n"))
14341 int arg_num = (nonvoid) ? nopnds : nopnds + 1;
14342 if (!CONST_INT_P (op[nopnds]))
14343 error ("argument %d must be an unsigned literal", arg_num);
14344 else
14345 error ("argument %d is an unsigned literal that is "
14346 "out of range", arg_num);
14347 return const0_rtx;
14349 op[nopnds] = copy_to_mode_reg (insn_op->mode, op[nopnds]);
14352 nopnds++;
14355 /* Handle the builtins for extended mnemonics. These accept
14356 no arguments, but map to builtins that take arguments. */
14357 switch (fcode)
14359 case HTM_BUILTIN_TENDALL: /* Alias for: tend. 1 */
14360 case HTM_BUILTIN_TRESUME: /* Alias for: tsr. 1 */
14361 op[nopnds++] = GEN_INT (1);
14362 if (flag_checking)
14363 attr |= RS6000_BTC_UNARY;
14364 break;
14365 case HTM_BUILTIN_TSUSPEND: /* Alias for: tsr. 0 */
14366 op[nopnds++] = GEN_INT (0);
14367 if (flag_checking)
14368 attr |= RS6000_BTC_UNARY;
14369 break;
14370 default:
14371 break;
14374 /* If this builtin accesses SPRs, then pass in the appropriate
14375 SPR number and SPR regno as the last two operands. */
14376 if (uses_spr)
14378 machine_mode mode = (TARGET_POWERPC64) ? DImode : SImode;
14379 op[nopnds++] = gen_rtx_CONST_INT (mode, htm_spr_num (fcode));
14380 op[nopnds++] = gen_rtx_REG (mode, htm_spr_regno (fcode));
14382 /* If this builtin accesses a CR, then pass in a scratch
14383 CR as the last operand. */
14384 else if (attr & RS6000_BTC_CR)
14385 { cr = gen_reg_rtx (CCmode);
14386 op[nopnds++] = cr;
14389 if (flag_checking)
14391 int expected_nopnds = 0;
14392 if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_UNARY)
14393 expected_nopnds = 1;
14394 else if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_BINARY)
14395 expected_nopnds = 2;
14396 else if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_TERNARY)
14397 expected_nopnds = 3;
14398 if (!(attr & RS6000_BTC_VOID))
14399 expected_nopnds += 1;
14400 if (uses_spr)
14401 expected_nopnds += 2;
14403 gcc_assert (nopnds == expected_nopnds
14404 && nopnds <= MAX_HTM_OPERANDS);
14407 switch (nopnds)
14409 case 1:
14410 pat = GEN_FCN (icode) (op[0]);
14411 break;
14412 case 2:
14413 pat = GEN_FCN (icode) (op[0], op[1]);
14414 break;
14415 case 3:
14416 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
14417 break;
14418 case 4:
14419 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
14420 break;
14421 default:
14422 gcc_unreachable ();
14424 if (!pat)
14425 return NULL_RTX;
14426 emit_insn (pat);
14428 if (attr & RS6000_BTC_CR)
14430 if (fcode == HTM_BUILTIN_TBEGIN)
14432 /* Emit code to set TARGET to true or false depending on
14433 whether the tbegin. instruction successfully or failed
14434 to start a transaction. We do this by placing the 1's
14435 complement of CR's EQ bit into TARGET. */
14436 rtx scratch = gen_reg_rtx (SImode);
14437 emit_insn (gen_rtx_SET (scratch,
14438 gen_rtx_EQ (SImode, cr,
14439 const0_rtx)));
14440 emit_insn (gen_rtx_SET (target,
14441 gen_rtx_XOR (SImode, scratch,
14442 GEN_INT (1))));
14444 else
14446 /* Emit code to copy the 4-bit condition register field
14447 CR into the least significant end of register TARGET. */
14448 rtx scratch1 = gen_reg_rtx (SImode);
14449 rtx scratch2 = gen_reg_rtx (SImode);
14450 rtx subreg = simplify_gen_subreg (CCmode, scratch1, SImode, 0);
14451 emit_insn (gen_movcc (subreg, cr));
14452 emit_insn (gen_lshrsi3 (scratch2, scratch1, GEN_INT (28)));
14453 emit_insn (gen_andsi3 (target, scratch2, GEN_INT (0xf)));
14457 if (nonvoid)
14458 return target;
14459 return const0_rtx;
14462 *expandedp = false;
14463 return NULL_RTX;
14466 /* Expand the CPU builtin in FCODE and store the result in TARGET. */
14468 static rtx
14469 cpu_expand_builtin (enum rs6000_builtins fcode, tree exp ATTRIBUTE_UNUSED,
14470 rtx target)
14472 /* __builtin_cpu_init () is a nop, so expand to nothing. */
14473 if (fcode == RS6000_BUILTIN_CPU_INIT)
14474 return const0_rtx;
14476 if (target == 0 || GET_MODE (target) != SImode)
14477 target = gen_reg_rtx (SImode);
14479 #ifdef TARGET_LIBC_PROVIDES_HWCAP_IN_TCB
14480 tree arg = TREE_OPERAND (CALL_EXPR_ARG (exp, 0), 0);
14481 if (TREE_CODE (arg) != STRING_CST)
14483 error ("builtin %s only accepts a string argument",
14484 rs6000_builtin_info[(size_t) fcode].name);
14485 return const0_rtx;
14488 if (fcode == RS6000_BUILTIN_CPU_IS)
14490 const char *cpu = TREE_STRING_POINTER (arg);
14491 rtx cpuid = NULL_RTX;
14492 for (size_t i = 0; i < ARRAY_SIZE (cpu_is_info); i++)
14493 if (strcmp (cpu, cpu_is_info[i].cpu) == 0)
14495 /* The CPUID value in the TCB is offset by _DL_FIRST_PLATFORM. */
14496 cpuid = GEN_INT (cpu_is_info[i].cpuid + _DL_FIRST_PLATFORM);
14497 break;
14499 if (cpuid == NULL_RTX)
14501 /* Invalid CPU argument. */
14502 error ("cpu %s is an invalid argument to builtin %s",
14503 cpu, rs6000_builtin_info[(size_t) fcode].name);
14504 return const0_rtx;
14507 rtx platform = gen_reg_rtx (SImode);
14508 rtx tcbmem = gen_const_mem (SImode,
14509 gen_rtx_PLUS (Pmode,
14510 gen_rtx_REG (Pmode, TLS_REGNUM),
14511 GEN_INT (TCB_PLATFORM_OFFSET)));
14512 emit_move_insn (platform, tcbmem);
14513 emit_insn (gen_eqsi3 (target, platform, cpuid));
14515 else if (fcode == RS6000_BUILTIN_CPU_SUPPORTS)
14517 const char *hwcap = TREE_STRING_POINTER (arg);
14518 rtx mask = NULL_RTX;
14519 int hwcap_offset;
14520 for (size_t i = 0; i < ARRAY_SIZE (cpu_supports_info); i++)
14521 if (strcmp (hwcap, cpu_supports_info[i].hwcap) == 0)
14523 mask = GEN_INT (cpu_supports_info[i].mask);
14524 hwcap_offset = TCB_HWCAP_OFFSET (cpu_supports_info[i].id);
14525 break;
14527 if (mask == NULL_RTX)
14529 /* Invalid HWCAP argument. */
14530 error ("hwcap %s is an invalid argument to builtin %s",
14531 hwcap, rs6000_builtin_info[(size_t) fcode].name);
14532 return const0_rtx;
14535 rtx tcb_hwcap = gen_reg_rtx (SImode);
14536 rtx tcbmem = gen_const_mem (SImode,
14537 gen_rtx_PLUS (Pmode,
14538 gen_rtx_REG (Pmode, TLS_REGNUM),
14539 GEN_INT (hwcap_offset)));
14540 emit_move_insn (tcb_hwcap, tcbmem);
14541 rtx scratch1 = gen_reg_rtx (SImode);
14542 emit_insn (gen_rtx_SET (scratch1, gen_rtx_AND (SImode, tcb_hwcap, mask)));
14543 rtx scratch2 = gen_reg_rtx (SImode);
14544 emit_insn (gen_eqsi3 (scratch2, scratch1, const0_rtx));
14545 emit_insn (gen_rtx_SET (target, gen_rtx_XOR (SImode, scratch2, const1_rtx)));
14548 /* Record that we have expanded a CPU builtin, so that we can later
14549 emit a reference to the special symbol exported by LIBC to ensure we
14550 do not link against an old LIBC that doesn't support this feature. */
14551 cpu_builtin_p = true;
14553 #else
14554 /* For old LIBCs, always return FALSE. */
14555 emit_move_insn (target, GEN_INT (0));
14556 #endif /* TARGET_LIBC_PROVIDES_HWCAP_IN_TCB */
14558 return target;
14561 static rtx
14562 rs6000_expand_ternop_builtin (enum insn_code icode, tree exp, rtx target)
14564 rtx pat;
14565 tree arg0 = CALL_EXPR_ARG (exp, 0);
14566 tree arg1 = CALL_EXPR_ARG (exp, 1);
14567 tree arg2 = CALL_EXPR_ARG (exp, 2);
14568 rtx op0 = expand_normal (arg0);
14569 rtx op1 = expand_normal (arg1);
14570 rtx op2 = expand_normal (arg2);
14571 machine_mode tmode = insn_data[icode].operand[0].mode;
14572 machine_mode mode0 = insn_data[icode].operand[1].mode;
14573 machine_mode mode1 = insn_data[icode].operand[2].mode;
14574 machine_mode mode2 = insn_data[icode].operand[3].mode;
14576 if (icode == CODE_FOR_nothing)
14577 /* Builtin not supported on this processor. */
14578 return 0;
14580 /* If we got invalid arguments bail out before generating bad rtl. */
14581 if (arg0 == error_mark_node
14582 || arg1 == error_mark_node
14583 || arg2 == error_mark_node)
14584 return const0_rtx;
14586 /* Check and prepare argument depending on the instruction code.
14588 Note that a switch statement instead of the sequence of tests
14589 would be incorrect as many of the CODE_FOR values could be
14590 CODE_FOR_nothing and that would yield multiple alternatives
14591 with identical values. We'd never reach here at runtime in
14592 this case. */
14593 if (icode == CODE_FOR_altivec_vsldoi_v4sf
14594 || icode == CODE_FOR_altivec_vsldoi_v4si
14595 || icode == CODE_FOR_altivec_vsldoi_v8hi
14596 || icode == CODE_FOR_altivec_vsldoi_v16qi)
14598 /* Only allow 4-bit unsigned literals. */
14599 STRIP_NOPS (arg2);
14600 if (TREE_CODE (arg2) != INTEGER_CST
14601 || TREE_INT_CST_LOW (arg2) & ~0xf)
14603 error ("argument 3 must be a 4-bit unsigned literal");
14604 return const0_rtx;
14607 else if (icode == CODE_FOR_vsx_xxpermdi_v2df
14608 || icode == CODE_FOR_vsx_xxpermdi_v2di
14609 || icode == CODE_FOR_vsx_xxsldwi_v16qi
14610 || icode == CODE_FOR_vsx_xxsldwi_v8hi
14611 || icode == CODE_FOR_vsx_xxsldwi_v4si
14612 || icode == CODE_FOR_vsx_xxsldwi_v4sf
14613 || icode == CODE_FOR_vsx_xxsldwi_v2di
14614 || icode == CODE_FOR_vsx_xxsldwi_v2df)
14616 /* Only allow 2-bit unsigned literals. */
14617 STRIP_NOPS (arg2);
14618 if (TREE_CODE (arg2) != INTEGER_CST
14619 || TREE_INT_CST_LOW (arg2) & ~0x3)
14621 error ("argument 3 must be a 2-bit unsigned literal");
14622 return const0_rtx;
14625 else if (icode == CODE_FOR_vsx_set_v2df
14626 || icode == CODE_FOR_vsx_set_v2di
14627 || icode == CODE_FOR_bcdadd
14628 || icode == CODE_FOR_bcdadd_lt
14629 || icode == CODE_FOR_bcdadd_eq
14630 || icode == CODE_FOR_bcdadd_gt
14631 || icode == CODE_FOR_bcdsub
14632 || icode == CODE_FOR_bcdsub_lt
14633 || icode == CODE_FOR_bcdsub_eq
14634 || icode == CODE_FOR_bcdsub_gt)
14636 /* Only allow 1-bit unsigned literals. */
14637 STRIP_NOPS (arg2);
14638 if (TREE_CODE (arg2) != INTEGER_CST
14639 || TREE_INT_CST_LOW (arg2) & ~0x1)
14641 error ("argument 3 must be a 1-bit unsigned literal");
14642 return const0_rtx;
14645 else if (icode == CODE_FOR_dfp_ddedpd_dd
14646 || icode == CODE_FOR_dfp_ddedpd_td)
14648 /* Only allow 2-bit unsigned literals where the value is 0 or 2. */
14649 STRIP_NOPS (arg0);
14650 if (TREE_CODE (arg0) != INTEGER_CST
14651 || TREE_INT_CST_LOW (arg2) & ~0x3)
14653 error ("argument 1 must be 0 or 2");
14654 return const0_rtx;
14657 else if (icode == CODE_FOR_dfp_denbcd_dd
14658 || icode == CODE_FOR_dfp_denbcd_td)
14660 /* Only allow 1-bit unsigned literals. */
14661 STRIP_NOPS (arg0);
14662 if (TREE_CODE (arg0) != INTEGER_CST
14663 || TREE_INT_CST_LOW (arg0) & ~0x1)
14665 error ("argument 1 must be a 1-bit unsigned literal");
14666 return const0_rtx;
14669 else if (icode == CODE_FOR_dfp_dscli_dd
14670 || icode == CODE_FOR_dfp_dscli_td
14671 || icode == CODE_FOR_dfp_dscri_dd
14672 || icode == CODE_FOR_dfp_dscri_td)
14674 /* Only allow 6-bit unsigned literals. */
14675 STRIP_NOPS (arg1);
14676 if (TREE_CODE (arg1) != INTEGER_CST
14677 || TREE_INT_CST_LOW (arg1) & ~0x3f)
14679 error ("argument 2 must be a 6-bit unsigned literal");
14680 return const0_rtx;
14683 else if (icode == CODE_FOR_crypto_vshasigmaw
14684 || icode == CODE_FOR_crypto_vshasigmad)
14686 /* Check whether the 2nd and 3rd arguments are integer constants and in
14687 range and prepare arguments. */
14688 STRIP_NOPS (arg1);
14689 if (TREE_CODE (arg1) != INTEGER_CST || wi::geu_p (arg1, 2))
14691 error ("argument 2 must be 0 or 1");
14692 return const0_rtx;
14695 STRIP_NOPS (arg2);
14696 if (TREE_CODE (arg2) != INTEGER_CST || wi::geu_p (arg1, 16))
14698 error ("argument 3 must be in the range 0..15");
14699 return const0_rtx;
14703 if (target == 0
14704 || GET_MODE (target) != tmode
14705 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14706 target = gen_reg_rtx (tmode);
14708 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14709 op0 = copy_to_mode_reg (mode0, op0);
14710 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14711 op1 = copy_to_mode_reg (mode1, op1);
14712 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
14713 op2 = copy_to_mode_reg (mode2, op2);
14715 if (TARGET_PAIRED_FLOAT && icode == CODE_FOR_selv2sf4)
14716 pat = GEN_FCN (icode) (target, op0, op1, op2, CONST0_RTX (SFmode));
14717 else
14718 pat = GEN_FCN (icode) (target, op0, op1, op2);
14719 if (! pat)
14720 return 0;
14721 emit_insn (pat);
14723 return target;
14726 /* Expand the lvx builtins. */
14727 static rtx
14728 altivec_expand_ld_builtin (tree exp, rtx target, bool *expandedp)
14730 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
14731 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
14732 tree arg0;
14733 machine_mode tmode, mode0;
14734 rtx pat, op0;
14735 enum insn_code icode;
14737 switch (fcode)
14739 case ALTIVEC_BUILTIN_LD_INTERNAL_16qi:
14740 icode = CODE_FOR_vector_altivec_load_v16qi;
14741 break;
14742 case ALTIVEC_BUILTIN_LD_INTERNAL_8hi:
14743 icode = CODE_FOR_vector_altivec_load_v8hi;
14744 break;
14745 case ALTIVEC_BUILTIN_LD_INTERNAL_4si:
14746 icode = CODE_FOR_vector_altivec_load_v4si;
14747 break;
14748 case ALTIVEC_BUILTIN_LD_INTERNAL_4sf:
14749 icode = CODE_FOR_vector_altivec_load_v4sf;
14750 break;
14751 case ALTIVEC_BUILTIN_LD_INTERNAL_2df:
14752 icode = CODE_FOR_vector_altivec_load_v2df;
14753 break;
14754 case ALTIVEC_BUILTIN_LD_INTERNAL_2di:
14755 icode = CODE_FOR_vector_altivec_load_v2di;
14756 break;
14757 case ALTIVEC_BUILTIN_LD_INTERNAL_1ti:
14758 icode = CODE_FOR_vector_altivec_load_v1ti;
14759 break;
14760 default:
14761 *expandedp = false;
14762 return NULL_RTX;
14765 *expandedp = true;
14767 arg0 = CALL_EXPR_ARG (exp, 0);
14768 op0 = expand_normal (arg0);
14769 tmode = insn_data[icode].operand[0].mode;
14770 mode0 = insn_data[icode].operand[1].mode;
14772 if (target == 0
14773 || GET_MODE (target) != tmode
14774 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14775 target = gen_reg_rtx (tmode);
14777 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14778 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
14780 pat = GEN_FCN (icode) (target, op0);
14781 if (! pat)
14782 return 0;
14783 emit_insn (pat);
14784 return target;
14787 /* Expand the stvx builtins. */
14788 static rtx
14789 altivec_expand_st_builtin (tree exp, rtx target ATTRIBUTE_UNUSED,
14790 bool *expandedp)
14792 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
14793 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
14794 tree arg0, arg1;
14795 machine_mode mode0, mode1;
14796 rtx pat, op0, op1;
14797 enum insn_code icode;
14799 switch (fcode)
14801 case ALTIVEC_BUILTIN_ST_INTERNAL_16qi:
14802 icode = CODE_FOR_vector_altivec_store_v16qi;
14803 break;
14804 case ALTIVEC_BUILTIN_ST_INTERNAL_8hi:
14805 icode = CODE_FOR_vector_altivec_store_v8hi;
14806 break;
14807 case ALTIVEC_BUILTIN_ST_INTERNAL_4si:
14808 icode = CODE_FOR_vector_altivec_store_v4si;
14809 break;
14810 case ALTIVEC_BUILTIN_ST_INTERNAL_4sf:
14811 icode = CODE_FOR_vector_altivec_store_v4sf;
14812 break;
14813 case ALTIVEC_BUILTIN_ST_INTERNAL_2df:
14814 icode = CODE_FOR_vector_altivec_store_v2df;
14815 break;
14816 case ALTIVEC_BUILTIN_ST_INTERNAL_2di:
14817 icode = CODE_FOR_vector_altivec_store_v2di;
14818 break;
14819 case ALTIVEC_BUILTIN_ST_INTERNAL_1ti:
14820 icode = CODE_FOR_vector_altivec_store_v1ti;
14821 break;
14822 default:
14823 *expandedp = false;
14824 return NULL_RTX;
14827 arg0 = CALL_EXPR_ARG (exp, 0);
14828 arg1 = CALL_EXPR_ARG (exp, 1);
14829 op0 = expand_normal (arg0);
14830 op1 = expand_normal (arg1);
14831 mode0 = insn_data[icode].operand[0].mode;
14832 mode1 = insn_data[icode].operand[1].mode;
14834 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
14835 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
14836 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
14837 op1 = copy_to_mode_reg (mode1, op1);
14839 pat = GEN_FCN (icode) (op0, op1);
14840 if (pat)
14841 emit_insn (pat);
14843 *expandedp = true;
14844 return NULL_RTX;
14847 /* Expand the dst builtins. */
14848 static rtx
14849 altivec_expand_dst_builtin (tree exp, rtx target ATTRIBUTE_UNUSED,
14850 bool *expandedp)
14852 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
14853 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
14854 tree arg0, arg1, arg2;
14855 machine_mode mode0, mode1;
14856 rtx pat, op0, op1, op2;
14857 const struct builtin_description *d;
14858 size_t i;
14860 *expandedp = false;
14862 /* Handle DST variants. */
14863 d = bdesc_dst;
14864 for (i = 0; i < ARRAY_SIZE (bdesc_dst); i++, d++)
14865 if (d->code == fcode)
14867 arg0 = CALL_EXPR_ARG (exp, 0);
14868 arg1 = CALL_EXPR_ARG (exp, 1);
14869 arg2 = CALL_EXPR_ARG (exp, 2);
14870 op0 = expand_normal (arg0);
14871 op1 = expand_normal (arg1);
14872 op2 = expand_normal (arg2);
14873 mode0 = insn_data[d->icode].operand[0].mode;
14874 mode1 = insn_data[d->icode].operand[1].mode;
14876 /* Invalid arguments, bail out before generating bad rtl. */
14877 if (arg0 == error_mark_node
14878 || arg1 == error_mark_node
14879 || arg2 == error_mark_node)
14880 return const0_rtx;
14882 *expandedp = true;
14883 STRIP_NOPS (arg2);
14884 if (TREE_CODE (arg2) != INTEGER_CST
14885 || TREE_INT_CST_LOW (arg2) & ~0x3)
14887 error ("argument to %qs must be a 2-bit unsigned literal", d->name);
14888 return const0_rtx;
14891 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
14892 op0 = copy_to_mode_reg (Pmode, op0);
14893 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
14894 op1 = copy_to_mode_reg (mode1, op1);
14896 pat = GEN_FCN (d->icode) (op0, op1, op2);
14897 if (pat != 0)
14898 emit_insn (pat);
14900 return NULL_RTX;
14903 return NULL_RTX;
14906 /* Expand vec_init builtin. */
14907 static rtx
14908 altivec_expand_vec_init_builtin (tree type, tree exp, rtx target)
14910 machine_mode tmode = TYPE_MODE (type);
14911 machine_mode inner_mode = GET_MODE_INNER (tmode);
14912 int i, n_elt = GET_MODE_NUNITS (tmode);
14914 gcc_assert (VECTOR_MODE_P (tmode));
14915 gcc_assert (n_elt == call_expr_nargs (exp));
14917 if (!target || !register_operand (target, tmode))
14918 target = gen_reg_rtx (tmode);
14920 /* If we have a vector compromised of a single element, such as V1TImode, do
14921 the initialization directly. */
14922 if (n_elt == 1 && GET_MODE_SIZE (tmode) == GET_MODE_SIZE (inner_mode))
14924 rtx x = expand_normal (CALL_EXPR_ARG (exp, 0));
14925 emit_move_insn (target, gen_lowpart (tmode, x));
14927 else
14929 rtvec v = rtvec_alloc (n_elt);
14931 for (i = 0; i < n_elt; ++i)
14933 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
14934 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
14937 rs6000_expand_vector_init (target, gen_rtx_PARALLEL (tmode, v));
14940 return target;
14943 /* Return the integer constant in ARG. Constrain it to be in the range
14944 of the subparts of VEC_TYPE; issue an error if not. */
14946 static int
14947 get_element_number (tree vec_type, tree arg)
14949 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
14951 if (!tree_fits_uhwi_p (arg)
14952 || (elt = tree_to_uhwi (arg), elt > max))
14954 error ("selector must be an integer constant in the range 0..%wi", max);
14955 return 0;
14958 return elt;
14961 /* Expand vec_set builtin. */
14962 static rtx
14963 altivec_expand_vec_set_builtin (tree exp)
14965 machine_mode tmode, mode1;
14966 tree arg0, arg1, arg2;
14967 int elt;
14968 rtx op0, op1;
14970 arg0 = CALL_EXPR_ARG (exp, 0);
14971 arg1 = CALL_EXPR_ARG (exp, 1);
14972 arg2 = CALL_EXPR_ARG (exp, 2);
14974 tmode = TYPE_MODE (TREE_TYPE (arg0));
14975 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
14976 gcc_assert (VECTOR_MODE_P (tmode));
14978 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
14979 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
14980 elt = get_element_number (TREE_TYPE (arg0), arg2);
14982 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
14983 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
14985 op0 = force_reg (tmode, op0);
14986 op1 = force_reg (mode1, op1);
14988 rs6000_expand_vector_set (op0, op1, elt);
14990 return op0;
14993 /* Expand vec_ext builtin. */
14994 static rtx
14995 altivec_expand_vec_ext_builtin (tree exp, rtx target)
14997 machine_mode tmode, mode0;
14998 tree arg0, arg1;
14999 rtx op0;
15000 rtx op1;
15002 arg0 = CALL_EXPR_ARG (exp, 0);
15003 arg1 = CALL_EXPR_ARG (exp, 1);
15005 op0 = expand_normal (arg0);
15006 op1 = expand_normal (arg1);
15008 /* Call get_element_number to validate arg1 if it is a constant. */
15009 if (TREE_CODE (arg1) == INTEGER_CST)
15010 (void) get_element_number (TREE_TYPE (arg0), arg1);
15012 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
15013 mode0 = TYPE_MODE (TREE_TYPE (arg0));
15014 gcc_assert (VECTOR_MODE_P (mode0));
15016 op0 = force_reg (mode0, op0);
15018 if (optimize || !target || !register_operand (target, tmode))
15019 target = gen_reg_rtx (tmode);
15021 rs6000_expand_vector_extract (target, op0, op1);
15023 return target;
15026 /* Expand the builtin in EXP and store the result in TARGET. Store
15027 true in *EXPANDEDP if we found a builtin to expand. */
15028 static rtx
15029 altivec_expand_builtin (tree exp, rtx target, bool *expandedp)
15031 const struct builtin_description *d;
15032 size_t i;
15033 enum insn_code icode;
15034 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
15035 tree arg0;
15036 rtx op0, pat;
15037 machine_mode tmode, mode0;
15038 enum rs6000_builtins fcode
15039 = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
15041 if (rs6000_overloaded_builtin_p (fcode))
15043 *expandedp = true;
15044 error ("unresolved overload for Altivec builtin %qF", fndecl);
15046 /* Given it is invalid, just generate a normal call. */
15047 return expand_call (exp, target, false);
15050 target = altivec_expand_ld_builtin (exp, target, expandedp);
15051 if (*expandedp)
15052 return target;
15054 target = altivec_expand_st_builtin (exp, target, expandedp);
15055 if (*expandedp)
15056 return target;
15058 target = altivec_expand_dst_builtin (exp, target, expandedp);
15059 if (*expandedp)
15060 return target;
15062 *expandedp = true;
15064 switch (fcode)
15066 case ALTIVEC_BUILTIN_STVX_V2DF:
15067 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v2df_2op, exp);
15068 case ALTIVEC_BUILTIN_STVX_V2DI:
15069 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v2di_2op, exp);
15070 case ALTIVEC_BUILTIN_STVX_V4SF:
15071 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v4sf_2op, exp);
15072 case ALTIVEC_BUILTIN_STVX:
15073 case ALTIVEC_BUILTIN_STVX_V4SI:
15074 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v4si_2op, exp);
15075 case ALTIVEC_BUILTIN_STVX_V8HI:
15076 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v8hi_2op, exp);
15077 case ALTIVEC_BUILTIN_STVX_V16QI:
15078 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v16qi_2op, exp);
15079 case ALTIVEC_BUILTIN_STVEBX:
15080 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvebx, exp);
15081 case ALTIVEC_BUILTIN_STVEHX:
15082 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvehx, exp);
15083 case ALTIVEC_BUILTIN_STVEWX:
15084 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvewx, exp);
15085 case ALTIVEC_BUILTIN_STVXL_V2DF:
15086 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v2df, exp);
15087 case ALTIVEC_BUILTIN_STVXL_V2DI:
15088 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v2di, exp);
15089 case ALTIVEC_BUILTIN_STVXL_V4SF:
15090 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v4sf, exp);
15091 case ALTIVEC_BUILTIN_STVXL:
15092 case ALTIVEC_BUILTIN_STVXL_V4SI:
15093 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v4si, exp);
15094 case ALTIVEC_BUILTIN_STVXL_V8HI:
15095 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v8hi, exp);
15096 case ALTIVEC_BUILTIN_STVXL_V16QI:
15097 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v16qi, exp);
15099 case ALTIVEC_BUILTIN_STVLX:
15100 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvlx, exp);
15101 case ALTIVEC_BUILTIN_STVLXL:
15102 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvlxl, exp);
15103 case ALTIVEC_BUILTIN_STVRX:
15104 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvrx, exp);
15105 case ALTIVEC_BUILTIN_STVRXL:
15106 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvrxl, exp);
15108 case VSX_BUILTIN_STXVD2X_V1TI:
15109 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v1ti, exp);
15110 case VSX_BUILTIN_STXVD2X_V2DF:
15111 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v2df, exp);
15112 case VSX_BUILTIN_STXVD2X_V2DI:
15113 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v2di, exp);
15114 case VSX_BUILTIN_STXVW4X_V4SF:
15115 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v4sf, exp);
15116 case VSX_BUILTIN_STXVW4X_V4SI:
15117 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v4si, exp);
15118 case VSX_BUILTIN_STXVW4X_V8HI:
15119 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v8hi, exp);
15120 case VSX_BUILTIN_STXVW4X_V16QI:
15121 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v16qi, exp);
15123 /* For the following on big endian, it's ok to use any appropriate
15124 unaligned-supporting store, so use a generic expander. For
15125 little-endian, the exact element-reversing instruction must
15126 be used. */
15127 case VSX_BUILTIN_ST_ELEMREV_V2DF:
15129 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v2df
15130 : CODE_FOR_vsx_st_elemrev_v2df);
15131 return altivec_expand_stv_builtin (code, exp);
15133 case VSX_BUILTIN_ST_ELEMREV_V2DI:
15135 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v2di
15136 : CODE_FOR_vsx_st_elemrev_v2di);
15137 return altivec_expand_stv_builtin (code, exp);
15139 case VSX_BUILTIN_ST_ELEMREV_V4SF:
15141 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v4sf
15142 : CODE_FOR_vsx_st_elemrev_v4sf);
15143 return altivec_expand_stv_builtin (code, exp);
15145 case VSX_BUILTIN_ST_ELEMREV_V4SI:
15147 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v4si
15148 : CODE_FOR_vsx_st_elemrev_v4si);
15149 return altivec_expand_stv_builtin (code, exp);
15151 case VSX_BUILTIN_ST_ELEMREV_V8HI:
15153 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v8hi
15154 : CODE_FOR_vsx_st_elemrev_v8hi);
15155 return altivec_expand_stv_builtin (code, exp);
15157 case VSX_BUILTIN_ST_ELEMREV_V16QI:
15159 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v16qi
15160 : CODE_FOR_vsx_st_elemrev_v16qi);
15161 return altivec_expand_stv_builtin (code, exp);
15164 case ALTIVEC_BUILTIN_MFVSCR:
15165 icode = CODE_FOR_altivec_mfvscr;
15166 tmode = insn_data[icode].operand[0].mode;
15168 if (target == 0
15169 || GET_MODE (target) != tmode
15170 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15171 target = gen_reg_rtx (tmode);
15173 pat = GEN_FCN (icode) (target);
15174 if (! pat)
15175 return 0;
15176 emit_insn (pat);
15177 return target;
15179 case ALTIVEC_BUILTIN_MTVSCR:
15180 icode = CODE_FOR_altivec_mtvscr;
15181 arg0 = CALL_EXPR_ARG (exp, 0);
15182 op0 = expand_normal (arg0);
15183 mode0 = insn_data[icode].operand[0].mode;
15185 /* If we got invalid arguments bail out before generating bad rtl. */
15186 if (arg0 == error_mark_node)
15187 return const0_rtx;
15189 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
15190 op0 = copy_to_mode_reg (mode0, op0);
15192 pat = GEN_FCN (icode) (op0);
15193 if (pat)
15194 emit_insn (pat);
15195 return NULL_RTX;
15197 case ALTIVEC_BUILTIN_DSSALL:
15198 emit_insn (gen_altivec_dssall ());
15199 return NULL_RTX;
15201 case ALTIVEC_BUILTIN_DSS:
15202 icode = CODE_FOR_altivec_dss;
15203 arg0 = CALL_EXPR_ARG (exp, 0);
15204 STRIP_NOPS (arg0);
15205 op0 = expand_normal (arg0);
15206 mode0 = insn_data[icode].operand[0].mode;
15208 /* If we got invalid arguments bail out before generating bad rtl. */
15209 if (arg0 == error_mark_node)
15210 return const0_rtx;
15212 if (TREE_CODE (arg0) != INTEGER_CST
15213 || TREE_INT_CST_LOW (arg0) & ~0x3)
15215 error ("argument to dss must be a 2-bit unsigned literal");
15216 return const0_rtx;
15219 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
15220 op0 = copy_to_mode_reg (mode0, op0);
15222 emit_insn (gen_altivec_dss (op0));
15223 return NULL_RTX;
15225 case ALTIVEC_BUILTIN_VEC_INIT_V4SI:
15226 case ALTIVEC_BUILTIN_VEC_INIT_V8HI:
15227 case ALTIVEC_BUILTIN_VEC_INIT_V16QI:
15228 case ALTIVEC_BUILTIN_VEC_INIT_V4SF:
15229 case VSX_BUILTIN_VEC_INIT_V2DF:
15230 case VSX_BUILTIN_VEC_INIT_V2DI:
15231 case VSX_BUILTIN_VEC_INIT_V1TI:
15232 return altivec_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
15234 case ALTIVEC_BUILTIN_VEC_SET_V4SI:
15235 case ALTIVEC_BUILTIN_VEC_SET_V8HI:
15236 case ALTIVEC_BUILTIN_VEC_SET_V16QI:
15237 case ALTIVEC_BUILTIN_VEC_SET_V4SF:
15238 case VSX_BUILTIN_VEC_SET_V2DF:
15239 case VSX_BUILTIN_VEC_SET_V2DI:
15240 case VSX_BUILTIN_VEC_SET_V1TI:
15241 return altivec_expand_vec_set_builtin (exp);
15243 case ALTIVEC_BUILTIN_VEC_EXT_V4SI:
15244 case ALTIVEC_BUILTIN_VEC_EXT_V8HI:
15245 case ALTIVEC_BUILTIN_VEC_EXT_V16QI:
15246 case ALTIVEC_BUILTIN_VEC_EXT_V4SF:
15247 case VSX_BUILTIN_VEC_EXT_V2DF:
15248 case VSX_BUILTIN_VEC_EXT_V2DI:
15249 case VSX_BUILTIN_VEC_EXT_V1TI:
15250 return altivec_expand_vec_ext_builtin (exp, target);
15252 default:
15253 break;
15254 /* Fall through. */
15257 /* Expand abs* operations. */
15258 d = bdesc_abs;
15259 for (i = 0; i < ARRAY_SIZE (bdesc_abs); i++, d++)
15260 if (d->code == fcode)
15261 return altivec_expand_abs_builtin (d->icode, exp, target);
15263 /* Expand the AltiVec predicates. */
15264 d = bdesc_altivec_preds;
15265 for (i = 0; i < ARRAY_SIZE (bdesc_altivec_preds); i++, d++)
15266 if (d->code == fcode)
15267 return altivec_expand_predicate_builtin (d->icode, exp, target);
15269 /* LV* are funky. We initialized them differently. */
15270 switch (fcode)
15272 case ALTIVEC_BUILTIN_LVSL:
15273 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvsl,
15274 exp, target, false);
15275 case ALTIVEC_BUILTIN_LVSR:
15276 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvsr,
15277 exp, target, false);
15278 case ALTIVEC_BUILTIN_LVEBX:
15279 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvebx,
15280 exp, target, false);
15281 case ALTIVEC_BUILTIN_LVEHX:
15282 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvehx,
15283 exp, target, false);
15284 case ALTIVEC_BUILTIN_LVEWX:
15285 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvewx,
15286 exp, target, false);
15287 case ALTIVEC_BUILTIN_LVXL_V2DF:
15288 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v2df,
15289 exp, target, false);
15290 case ALTIVEC_BUILTIN_LVXL_V2DI:
15291 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v2di,
15292 exp, target, false);
15293 case ALTIVEC_BUILTIN_LVXL_V4SF:
15294 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v4sf,
15295 exp, target, false);
15296 case ALTIVEC_BUILTIN_LVXL:
15297 case ALTIVEC_BUILTIN_LVXL_V4SI:
15298 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v4si,
15299 exp, target, false);
15300 case ALTIVEC_BUILTIN_LVXL_V8HI:
15301 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v8hi,
15302 exp, target, false);
15303 case ALTIVEC_BUILTIN_LVXL_V16QI:
15304 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v16qi,
15305 exp, target, false);
15306 case ALTIVEC_BUILTIN_LVX_V2DF:
15307 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v2df_2op,
15308 exp, target, false);
15309 case ALTIVEC_BUILTIN_LVX_V2DI:
15310 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v2di_2op,
15311 exp, target, false);
15312 case ALTIVEC_BUILTIN_LVX_V4SF:
15313 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v4sf_2op,
15314 exp, target, false);
15315 case ALTIVEC_BUILTIN_LVX:
15316 case ALTIVEC_BUILTIN_LVX_V4SI:
15317 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v4si_2op,
15318 exp, target, false);
15319 case ALTIVEC_BUILTIN_LVX_V8HI:
15320 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v8hi_2op,
15321 exp, target, false);
15322 case ALTIVEC_BUILTIN_LVX_V16QI:
15323 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v16qi_2op,
15324 exp, target, false);
15325 case ALTIVEC_BUILTIN_LVLX:
15326 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvlx,
15327 exp, target, true);
15328 case ALTIVEC_BUILTIN_LVLXL:
15329 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvlxl,
15330 exp, target, true);
15331 case ALTIVEC_BUILTIN_LVRX:
15332 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvrx,
15333 exp, target, true);
15334 case ALTIVEC_BUILTIN_LVRXL:
15335 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvrxl,
15336 exp, target, true);
15337 case VSX_BUILTIN_LXVD2X_V1TI:
15338 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v1ti,
15339 exp, target, false);
15340 case VSX_BUILTIN_LXVD2X_V2DF:
15341 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v2df,
15342 exp, target, false);
15343 case VSX_BUILTIN_LXVD2X_V2DI:
15344 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v2di,
15345 exp, target, false);
15346 case VSX_BUILTIN_LXVW4X_V4SF:
15347 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v4sf,
15348 exp, target, false);
15349 case VSX_BUILTIN_LXVW4X_V4SI:
15350 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v4si,
15351 exp, target, false);
15352 case VSX_BUILTIN_LXVW4X_V8HI:
15353 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v8hi,
15354 exp, target, false);
15355 case VSX_BUILTIN_LXVW4X_V16QI:
15356 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v16qi,
15357 exp, target, false);
15358 /* For the following on big endian, it's ok to use any appropriate
15359 unaligned-supporting load, so use a generic expander. For
15360 little-endian, the exact element-reversing instruction must
15361 be used. */
15362 case VSX_BUILTIN_LD_ELEMREV_V2DF:
15364 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v2df
15365 : CODE_FOR_vsx_ld_elemrev_v2df);
15366 return altivec_expand_lv_builtin (code, exp, target, false);
15368 case VSX_BUILTIN_LD_ELEMREV_V2DI:
15370 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v2di
15371 : CODE_FOR_vsx_ld_elemrev_v2di);
15372 return altivec_expand_lv_builtin (code, exp, target, false);
15374 case VSX_BUILTIN_LD_ELEMREV_V4SF:
15376 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v4sf
15377 : CODE_FOR_vsx_ld_elemrev_v4sf);
15378 return altivec_expand_lv_builtin (code, exp, target, false);
15380 case VSX_BUILTIN_LD_ELEMREV_V4SI:
15382 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v4si
15383 : CODE_FOR_vsx_ld_elemrev_v4si);
15384 return altivec_expand_lv_builtin (code, exp, target, false);
15386 case VSX_BUILTIN_LD_ELEMREV_V8HI:
15388 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v8hi
15389 : CODE_FOR_vsx_ld_elemrev_v8hi);
15390 return altivec_expand_lv_builtin (code, exp, target, false);
15392 case VSX_BUILTIN_LD_ELEMREV_V16QI:
15394 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v16qi
15395 : CODE_FOR_vsx_ld_elemrev_v16qi);
15396 return altivec_expand_lv_builtin (code, exp, target, false);
15398 break;
15399 default:
15400 break;
15401 /* Fall through. */
15404 *expandedp = false;
15405 return NULL_RTX;
15408 /* Expand the builtin in EXP and store the result in TARGET. Store
15409 true in *EXPANDEDP if we found a builtin to expand. */
15410 static rtx
15411 paired_expand_builtin (tree exp, rtx target, bool * expandedp)
15413 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
15414 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
15415 const struct builtin_description *d;
15416 size_t i;
15418 *expandedp = true;
15420 switch (fcode)
15422 case PAIRED_BUILTIN_STX:
15423 return paired_expand_stv_builtin (CODE_FOR_paired_stx, exp);
15424 case PAIRED_BUILTIN_LX:
15425 return paired_expand_lv_builtin (CODE_FOR_paired_lx, exp, target);
15426 default:
15427 break;
15428 /* Fall through. */
15431 /* Expand the paired predicates. */
15432 d = bdesc_paired_preds;
15433 for (i = 0; i < ARRAY_SIZE (bdesc_paired_preds); i++, d++)
15434 if (d->code == fcode)
15435 return paired_expand_predicate_builtin (d->icode, exp, target);
15437 *expandedp = false;
15438 return NULL_RTX;
15441 /* Binops that need to be initialized manually, but can be expanded
15442 automagically by rs6000_expand_binop_builtin. */
15443 static const struct builtin_description bdesc_2arg_spe[] =
15445 { RS6000_BTM_SPE, CODE_FOR_spe_evlddx, "__builtin_spe_evlddx", SPE_BUILTIN_EVLDDX },
15446 { RS6000_BTM_SPE, CODE_FOR_spe_evldwx, "__builtin_spe_evldwx", SPE_BUILTIN_EVLDWX },
15447 { RS6000_BTM_SPE, CODE_FOR_spe_evldhx, "__builtin_spe_evldhx", SPE_BUILTIN_EVLDHX },
15448 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhex, "__builtin_spe_evlwhex", SPE_BUILTIN_EVLWHEX },
15449 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhoux, "__builtin_spe_evlwhoux", SPE_BUILTIN_EVLWHOUX },
15450 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhosx, "__builtin_spe_evlwhosx", SPE_BUILTIN_EVLWHOSX },
15451 { RS6000_BTM_SPE, CODE_FOR_spe_evlwwsplatx, "__builtin_spe_evlwwsplatx", SPE_BUILTIN_EVLWWSPLATX },
15452 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhsplatx, "__builtin_spe_evlwhsplatx", SPE_BUILTIN_EVLWHSPLATX },
15453 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhesplatx, "__builtin_spe_evlhhesplatx", SPE_BUILTIN_EVLHHESPLATX },
15454 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhousplatx, "__builtin_spe_evlhhousplatx", SPE_BUILTIN_EVLHHOUSPLATX },
15455 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhossplatx, "__builtin_spe_evlhhossplatx", SPE_BUILTIN_EVLHHOSSPLATX },
15456 { RS6000_BTM_SPE, CODE_FOR_spe_evldd, "__builtin_spe_evldd", SPE_BUILTIN_EVLDD },
15457 { RS6000_BTM_SPE, CODE_FOR_spe_evldw, "__builtin_spe_evldw", SPE_BUILTIN_EVLDW },
15458 { RS6000_BTM_SPE, CODE_FOR_spe_evldh, "__builtin_spe_evldh", SPE_BUILTIN_EVLDH },
15459 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhe, "__builtin_spe_evlwhe", SPE_BUILTIN_EVLWHE },
15460 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhou, "__builtin_spe_evlwhou", SPE_BUILTIN_EVLWHOU },
15461 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhos, "__builtin_spe_evlwhos", SPE_BUILTIN_EVLWHOS },
15462 { RS6000_BTM_SPE, CODE_FOR_spe_evlwwsplat, "__builtin_spe_evlwwsplat", SPE_BUILTIN_EVLWWSPLAT },
15463 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhsplat, "__builtin_spe_evlwhsplat", SPE_BUILTIN_EVLWHSPLAT },
15464 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhesplat, "__builtin_spe_evlhhesplat", SPE_BUILTIN_EVLHHESPLAT },
15465 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhousplat, "__builtin_spe_evlhhousplat", SPE_BUILTIN_EVLHHOUSPLAT },
15466 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhossplat, "__builtin_spe_evlhhossplat", SPE_BUILTIN_EVLHHOSSPLAT }
15469 /* Expand the builtin in EXP and store the result in TARGET. Store
15470 true in *EXPANDEDP if we found a builtin to expand.
15472 This expands the SPE builtins that are not simple unary and binary
15473 operations. */
15474 static rtx
15475 spe_expand_builtin (tree exp, rtx target, bool *expandedp)
15477 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
15478 tree arg1, arg0;
15479 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
15480 enum insn_code icode;
15481 machine_mode tmode, mode0;
15482 rtx pat, op0;
15483 const struct builtin_description *d;
15484 size_t i;
15486 *expandedp = true;
15488 /* Syntax check for a 5-bit unsigned immediate. */
15489 switch (fcode)
15491 case SPE_BUILTIN_EVSTDD:
15492 case SPE_BUILTIN_EVSTDH:
15493 case SPE_BUILTIN_EVSTDW:
15494 case SPE_BUILTIN_EVSTWHE:
15495 case SPE_BUILTIN_EVSTWHO:
15496 case SPE_BUILTIN_EVSTWWE:
15497 case SPE_BUILTIN_EVSTWWO:
15498 arg1 = CALL_EXPR_ARG (exp, 2);
15499 if (TREE_CODE (arg1) != INTEGER_CST
15500 || TREE_INT_CST_LOW (arg1) & ~0x1f)
15502 error ("argument 2 must be a 5-bit unsigned literal");
15503 return const0_rtx;
15505 break;
15506 default:
15507 break;
15510 /* The evsplat*i instructions are not quite generic. */
15511 switch (fcode)
15513 case SPE_BUILTIN_EVSPLATFI:
15514 return rs6000_expand_unop_builtin (CODE_FOR_spe_evsplatfi,
15515 exp, target);
15516 case SPE_BUILTIN_EVSPLATI:
15517 return rs6000_expand_unop_builtin (CODE_FOR_spe_evsplati,
15518 exp, target);
15519 default:
15520 break;
15523 d = bdesc_2arg_spe;
15524 for (i = 0; i < ARRAY_SIZE (bdesc_2arg_spe); ++i, ++d)
15525 if (d->code == fcode)
15526 return rs6000_expand_binop_builtin (d->icode, exp, target);
15528 d = bdesc_spe_predicates;
15529 for (i = 0; i < ARRAY_SIZE (bdesc_spe_predicates); ++i, ++d)
15530 if (d->code == fcode)
15531 return spe_expand_predicate_builtin (d->icode, exp, target);
15533 d = bdesc_spe_evsel;
15534 for (i = 0; i < ARRAY_SIZE (bdesc_spe_evsel); ++i, ++d)
15535 if (d->code == fcode)
15536 return spe_expand_evsel_builtin (d->icode, exp, target);
15538 switch (fcode)
15540 case SPE_BUILTIN_EVSTDDX:
15541 return spe_expand_stv_builtin (CODE_FOR_spe_evstddx, exp);
15542 case SPE_BUILTIN_EVSTDHX:
15543 return spe_expand_stv_builtin (CODE_FOR_spe_evstdhx, exp);
15544 case SPE_BUILTIN_EVSTDWX:
15545 return spe_expand_stv_builtin (CODE_FOR_spe_evstdwx, exp);
15546 case SPE_BUILTIN_EVSTWHEX:
15547 return spe_expand_stv_builtin (CODE_FOR_spe_evstwhex, exp);
15548 case SPE_BUILTIN_EVSTWHOX:
15549 return spe_expand_stv_builtin (CODE_FOR_spe_evstwhox, exp);
15550 case SPE_BUILTIN_EVSTWWEX:
15551 return spe_expand_stv_builtin (CODE_FOR_spe_evstwwex, exp);
15552 case SPE_BUILTIN_EVSTWWOX:
15553 return spe_expand_stv_builtin (CODE_FOR_spe_evstwwox, exp);
15554 case SPE_BUILTIN_EVSTDD:
15555 return spe_expand_stv_builtin (CODE_FOR_spe_evstdd, exp);
15556 case SPE_BUILTIN_EVSTDH:
15557 return spe_expand_stv_builtin (CODE_FOR_spe_evstdh, exp);
15558 case SPE_BUILTIN_EVSTDW:
15559 return spe_expand_stv_builtin (CODE_FOR_spe_evstdw, exp);
15560 case SPE_BUILTIN_EVSTWHE:
15561 return spe_expand_stv_builtin (CODE_FOR_spe_evstwhe, exp);
15562 case SPE_BUILTIN_EVSTWHO:
15563 return spe_expand_stv_builtin (CODE_FOR_spe_evstwho, exp);
15564 case SPE_BUILTIN_EVSTWWE:
15565 return spe_expand_stv_builtin (CODE_FOR_spe_evstwwe, exp);
15566 case SPE_BUILTIN_EVSTWWO:
15567 return spe_expand_stv_builtin (CODE_FOR_spe_evstwwo, exp);
15568 case SPE_BUILTIN_MFSPEFSCR:
15569 icode = CODE_FOR_spe_mfspefscr;
15570 tmode = insn_data[icode].operand[0].mode;
15572 if (target == 0
15573 || GET_MODE (target) != tmode
15574 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15575 target = gen_reg_rtx (tmode);
15577 pat = GEN_FCN (icode) (target);
15578 if (! pat)
15579 return 0;
15580 emit_insn (pat);
15581 return target;
15582 case SPE_BUILTIN_MTSPEFSCR:
15583 icode = CODE_FOR_spe_mtspefscr;
15584 arg0 = CALL_EXPR_ARG (exp, 0);
15585 op0 = expand_normal (arg0);
15586 mode0 = insn_data[icode].operand[0].mode;
15588 if (arg0 == error_mark_node)
15589 return const0_rtx;
15591 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
15592 op0 = copy_to_mode_reg (mode0, op0);
15594 pat = GEN_FCN (icode) (op0);
15595 if (pat)
15596 emit_insn (pat);
15597 return NULL_RTX;
15598 default:
15599 break;
15602 *expandedp = false;
15603 return NULL_RTX;
15606 static rtx
15607 paired_expand_predicate_builtin (enum insn_code icode, tree exp, rtx target)
15609 rtx pat, scratch, tmp;
15610 tree form = CALL_EXPR_ARG (exp, 0);
15611 tree arg0 = CALL_EXPR_ARG (exp, 1);
15612 tree arg1 = CALL_EXPR_ARG (exp, 2);
15613 rtx op0 = expand_normal (arg0);
15614 rtx op1 = expand_normal (arg1);
15615 machine_mode mode0 = insn_data[icode].operand[1].mode;
15616 machine_mode mode1 = insn_data[icode].operand[2].mode;
15617 int form_int;
15618 enum rtx_code code;
15620 if (TREE_CODE (form) != INTEGER_CST)
15622 error ("argument 1 of __builtin_paired_predicate must be a constant");
15623 return const0_rtx;
15625 else
15626 form_int = TREE_INT_CST_LOW (form);
15628 gcc_assert (mode0 == mode1);
15630 if (arg0 == error_mark_node || arg1 == error_mark_node)
15631 return const0_rtx;
15633 if (target == 0
15634 || GET_MODE (target) != SImode
15635 || !(*insn_data[icode].operand[0].predicate) (target, SImode))
15636 target = gen_reg_rtx (SImode);
15637 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
15638 op0 = copy_to_mode_reg (mode0, op0);
15639 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
15640 op1 = copy_to_mode_reg (mode1, op1);
15642 scratch = gen_reg_rtx (CCFPmode);
15644 pat = GEN_FCN (icode) (scratch, op0, op1);
15645 if (!pat)
15646 return const0_rtx;
15648 emit_insn (pat);
15650 switch (form_int)
15652 /* LT bit. */
15653 case 0:
15654 code = LT;
15655 break;
15656 /* GT bit. */
15657 case 1:
15658 code = GT;
15659 break;
15660 /* EQ bit. */
15661 case 2:
15662 code = EQ;
15663 break;
15664 /* UN bit. */
15665 case 3:
15666 emit_insn (gen_move_from_CR_ov_bit (target, scratch));
15667 return target;
15668 default:
15669 error ("argument 1 of __builtin_paired_predicate is out of range");
15670 return const0_rtx;
15673 tmp = gen_rtx_fmt_ee (code, SImode, scratch, const0_rtx);
15674 emit_move_insn (target, tmp);
15675 return target;
15678 static rtx
15679 spe_expand_predicate_builtin (enum insn_code icode, tree exp, rtx target)
15681 rtx pat, scratch, tmp;
15682 tree form = CALL_EXPR_ARG (exp, 0);
15683 tree arg0 = CALL_EXPR_ARG (exp, 1);
15684 tree arg1 = CALL_EXPR_ARG (exp, 2);
15685 rtx op0 = expand_normal (arg0);
15686 rtx op1 = expand_normal (arg1);
15687 machine_mode mode0 = insn_data[icode].operand[1].mode;
15688 machine_mode mode1 = insn_data[icode].operand[2].mode;
15689 int form_int;
15690 enum rtx_code code;
15692 if (TREE_CODE (form) != INTEGER_CST)
15694 error ("argument 1 of __builtin_spe_predicate must be a constant");
15695 return const0_rtx;
15697 else
15698 form_int = TREE_INT_CST_LOW (form);
15700 gcc_assert (mode0 == mode1);
15702 if (arg0 == error_mark_node || arg1 == error_mark_node)
15703 return const0_rtx;
15705 if (target == 0
15706 || GET_MODE (target) != SImode
15707 || ! (*insn_data[icode].operand[0].predicate) (target, SImode))
15708 target = gen_reg_rtx (SImode);
15710 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15711 op0 = copy_to_mode_reg (mode0, op0);
15712 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
15713 op1 = copy_to_mode_reg (mode1, op1);
15715 scratch = gen_reg_rtx (CCmode);
15717 pat = GEN_FCN (icode) (scratch, op0, op1);
15718 if (! pat)
15719 return const0_rtx;
15720 emit_insn (pat);
15722 /* There are 4 variants for each predicate: _any_, _all_, _upper_,
15723 _lower_. We use one compare, but look in different bits of the
15724 CR for each variant.
15726 There are 2 elements in each SPE simd type (upper/lower). The CR
15727 bits are set as follows:
15729 BIT0 | BIT 1 | BIT 2 | BIT 3
15730 U | L | (U | L) | (U & L)
15732 So, for an "all" relationship, BIT 3 would be set.
15733 For an "any" relationship, BIT 2 would be set. Etc.
15735 Following traditional nomenclature, these bits map to:
15737 BIT0 | BIT 1 | BIT 2 | BIT 3
15738 LT | GT | EQ | OV
15740 Later, we will generate rtl to look in the LT/EQ/EQ/OV bits.
15743 switch (form_int)
15745 /* All variant. OV bit. */
15746 case 0:
15747 /* We need to get to the OV bit, which is the ORDERED bit. We
15748 could generate (ordered:SI (reg:CC xx) (const_int 0)), but
15749 that's ugly and will make validate_condition_mode die.
15750 So let's just use another pattern. */
15751 emit_insn (gen_move_from_CR_ov_bit (target, scratch));
15752 return target;
15753 /* Any variant. EQ bit. */
15754 case 1:
15755 code = EQ;
15756 break;
15757 /* Upper variant. LT bit. */
15758 case 2:
15759 code = LT;
15760 break;
15761 /* Lower variant. GT bit. */
15762 case 3:
15763 code = GT;
15764 break;
15765 default:
15766 error ("argument 1 of __builtin_spe_predicate is out of range");
15767 return const0_rtx;
15770 tmp = gen_rtx_fmt_ee (code, SImode, scratch, const0_rtx);
15771 emit_move_insn (target, tmp);
15773 return target;
15776 /* The evsel builtins look like this:
15778 e = __builtin_spe_evsel_OP (a, b, c, d);
15780 and work like this:
15782 e[upper] = a[upper] *OP* b[upper] ? c[upper] : d[upper];
15783 e[lower] = a[lower] *OP* b[lower] ? c[lower] : d[lower];
15786 static rtx
15787 spe_expand_evsel_builtin (enum insn_code icode, tree exp, rtx target)
15789 rtx pat, scratch;
15790 tree arg0 = CALL_EXPR_ARG (exp, 0);
15791 tree arg1 = CALL_EXPR_ARG (exp, 1);
15792 tree arg2 = CALL_EXPR_ARG (exp, 2);
15793 tree arg3 = CALL_EXPR_ARG (exp, 3);
15794 rtx op0 = expand_normal (arg0);
15795 rtx op1 = expand_normal (arg1);
15796 rtx op2 = expand_normal (arg2);
15797 rtx op3 = expand_normal (arg3);
15798 machine_mode mode0 = insn_data[icode].operand[1].mode;
15799 machine_mode mode1 = insn_data[icode].operand[2].mode;
15801 gcc_assert (mode0 == mode1);
15803 if (arg0 == error_mark_node || arg1 == error_mark_node
15804 || arg2 == error_mark_node || arg3 == error_mark_node)
15805 return const0_rtx;
15807 if (target == 0
15808 || GET_MODE (target) != mode0
15809 || ! (*insn_data[icode].operand[0].predicate) (target, mode0))
15810 target = gen_reg_rtx (mode0);
15812 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15813 op0 = copy_to_mode_reg (mode0, op0);
15814 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
15815 op1 = copy_to_mode_reg (mode0, op1);
15816 if (! (*insn_data[icode].operand[1].predicate) (op2, mode1))
15817 op2 = copy_to_mode_reg (mode0, op2);
15818 if (! (*insn_data[icode].operand[1].predicate) (op3, mode1))
15819 op3 = copy_to_mode_reg (mode0, op3);
15821 /* Generate the compare. */
15822 scratch = gen_reg_rtx (CCmode);
15823 pat = GEN_FCN (icode) (scratch, op0, op1);
15824 if (! pat)
15825 return const0_rtx;
15826 emit_insn (pat);
15828 if (mode0 == V2SImode)
15829 emit_insn (gen_spe_evsel (target, op2, op3, scratch));
15830 else
15831 emit_insn (gen_spe_evsel_fs (target, op2, op3, scratch));
15833 return target;
15836 /* Raise an error message for a builtin function that is called without the
15837 appropriate target options being set. */
15839 static void
15840 rs6000_invalid_builtin (enum rs6000_builtins fncode)
15842 size_t uns_fncode = (size_t)fncode;
15843 const char *name = rs6000_builtin_info[uns_fncode].name;
15844 HOST_WIDE_INT fnmask = rs6000_builtin_info[uns_fncode].mask;
15846 gcc_assert (name != NULL);
15847 if ((fnmask & RS6000_BTM_CELL) != 0)
15848 error ("Builtin function %s is only valid for the cell processor", name);
15849 else if ((fnmask & RS6000_BTM_VSX) != 0)
15850 error ("Builtin function %s requires the -mvsx option", name);
15851 else if ((fnmask & RS6000_BTM_HTM) != 0)
15852 error ("Builtin function %s requires the -mhtm option", name);
15853 else if ((fnmask & RS6000_BTM_ALTIVEC) != 0)
15854 error ("Builtin function %s requires the -maltivec option", name);
15855 else if ((fnmask & RS6000_BTM_PAIRED) != 0)
15856 error ("Builtin function %s requires the -mpaired option", name);
15857 else if ((fnmask & RS6000_BTM_SPE) != 0)
15858 error ("Builtin function %s requires the -mspe option", name);
15859 else if ((fnmask & (RS6000_BTM_DFP | RS6000_BTM_P8_VECTOR))
15860 == (RS6000_BTM_DFP | RS6000_BTM_P8_VECTOR))
15861 error ("Builtin function %s requires the -mhard-dfp and"
15862 " -mpower8-vector options", name);
15863 else if ((fnmask & RS6000_BTM_DFP) != 0)
15864 error ("Builtin function %s requires the -mhard-dfp option", name);
15865 else if ((fnmask & RS6000_BTM_P8_VECTOR) != 0)
15866 error ("Builtin function %s requires the -mpower8-vector option", name);
15867 else if ((fnmask & RS6000_BTM_P9_VECTOR) != 0)
15868 error ("Builtin function %s requires the -mcpu=power9 option", name);
15869 else if ((fnmask & (RS6000_BTM_P9_MISC | RS6000_BTM_64BIT))
15870 == (RS6000_BTM_P9_MISC | RS6000_BTM_64BIT))
15871 error ("Builtin function %s requires the -mcpu=power9 and"
15872 " -m64 options", name);
15873 else if ((fnmask & RS6000_BTM_P9_MISC) == RS6000_BTM_P9_MISC)
15874 error ("Builtin function %s requires the -mcpu=power9 option", name);
15875 else if ((fnmask & (RS6000_BTM_HARD_FLOAT | RS6000_BTM_LDBL128))
15876 == (RS6000_BTM_HARD_FLOAT | RS6000_BTM_LDBL128))
15877 error ("Builtin function %s requires the -mhard-float and"
15878 " -mlong-double-128 options", name);
15879 else if ((fnmask & RS6000_BTM_HARD_FLOAT) != 0)
15880 error ("Builtin function %s requires the -mhard-float option", name);
15881 else if ((fnmask & RS6000_BTM_FLOAT128) != 0)
15882 error ("Builtin function %s requires the -mfloat128 option", name);
15883 else
15884 error ("Builtin function %s is not supported with the current options",
15885 name);
15888 /* Target hook for early folding of built-ins, shamelessly stolen
15889 from ia64.c. */
15891 static tree
15892 rs6000_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED,
15893 tree *args, bool ignore ATTRIBUTE_UNUSED)
15895 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
15897 enum rs6000_builtins fn_code
15898 = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
15899 switch (fn_code)
15901 case RS6000_BUILTIN_NANQ:
15902 case RS6000_BUILTIN_NANSQ:
15904 tree type = TREE_TYPE (TREE_TYPE (fndecl));
15905 const char *str = c_getstr (*args);
15906 int quiet = fn_code == RS6000_BUILTIN_NANQ;
15907 REAL_VALUE_TYPE real;
15909 if (str && real_nan (&real, str, quiet, TYPE_MODE (type)))
15910 return build_real (type, real);
15911 return NULL_TREE;
15913 case RS6000_BUILTIN_INFQ:
15914 case RS6000_BUILTIN_HUGE_VALQ:
15916 tree type = TREE_TYPE (TREE_TYPE (fndecl));
15917 REAL_VALUE_TYPE inf;
15918 real_inf (&inf);
15919 return build_real (type, inf);
15921 default:
15922 break;
15925 #ifdef SUBTARGET_FOLD_BUILTIN
15926 return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
15927 #else
15928 return NULL_TREE;
15929 #endif
15932 /* Expand an expression EXP that calls a built-in function,
15933 with result going to TARGET if that's convenient
15934 (and in mode MODE if that's convenient).
15935 SUBTARGET may be used as the target for computing one of EXP's operands.
15936 IGNORE is nonzero if the value is to be ignored. */
15938 static rtx
15939 rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
15940 machine_mode mode ATTRIBUTE_UNUSED,
15941 int ignore ATTRIBUTE_UNUSED)
15943 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
15944 enum rs6000_builtins fcode
15945 = (enum rs6000_builtins)DECL_FUNCTION_CODE (fndecl);
15946 size_t uns_fcode = (size_t)fcode;
15947 const struct builtin_description *d;
15948 size_t i;
15949 rtx ret;
15950 bool success;
15951 HOST_WIDE_INT mask = rs6000_builtin_info[uns_fcode].mask;
15952 bool func_valid_p = ((rs6000_builtin_mask & mask) == mask);
15954 if (TARGET_DEBUG_BUILTIN)
15956 enum insn_code icode = rs6000_builtin_info[uns_fcode].icode;
15957 const char *name1 = rs6000_builtin_info[uns_fcode].name;
15958 const char *name2 = ((icode != CODE_FOR_nothing)
15959 ? get_insn_name ((int)icode)
15960 : "nothing");
15961 const char *name3;
15963 switch (rs6000_builtin_info[uns_fcode].attr & RS6000_BTC_TYPE_MASK)
15965 default: name3 = "unknown"; break;
15966 case RS6000_BTC_SPECIAL: name3 = "special"; break;
15967 case RS6000_BTC_UNARY: name3 = "unary"; break;
15968 case RS6000_BTC_BINARY: name3 = "binary"; break;
15969 case RS6000_BTC_TERNARY: name3 = "ternary"; break;
15970 case RS6000_BTC_PREDICATE: name3 = "predicate"; break;
15971 case RS6000_BTC_ABS: name3 = "abs"; break;
15972 case RS6000_BTC_EVSEL: name3 = "evsel"; break;
15973 case RS6000_BTC_DST: name3 = "dst"; break;
15977 fprintf (stderr,
15978 "rs6000_expand_builtin, %s (%d), insn = %s (%d), type=%s%s\n",
15979 (name1) ? name1 : "---", fcode,
15980 (name2) ? name2 : "---", (int)icode,
15981 name3,
15982 func_valid_p ? "" : ", not valid");
15985 if (!func_valid_p)
15987 rs6000_invalid_builtin (fcode);
15989 /* Given it is invalid, just generate a normal call. */
15990 return expand_call (exp, target, ignore);
15993 switch (fcode)
15995 case RS6000_BUILTIN_RECIP:
15996 return rs6000_expand_binop_builtin (CODE_FOR_recipdf3, exp, target);
15998 case RS6000_BUILTIN_RECIPF:
15999 return rs6000_expand_binop_builtin (CODE_FOR_recipsf3, exp, target);
16001 case RS6000_BUILTIN_RSQRTF:
16002 return rs6000_expand_unop_builtin (CODE_FOR_rsqrtsf2, exp, target);
16004 case RS6000_BUILTIN_RSQRT:
16005 return rs6000_expand_unop_builtin (CODE_FOR_rsqrtdf2, exp, target);
16007 case POWER7_BUILTIN_BPERMD:
16008 return rs6000_expand_binop_builtin (((TARGET_64BIT)
16009 ? CODE_FOR_bpermd_di
16010 : CODE_FOR_bpermd_si), exp, target);
16012 case RS6000_BUILTIN_GET_TB:
16013 return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_get_timebase,
16014 target);
16016 case RS6000_BUILTIN_MFTB:
16017 return rs6000_expand_zeroop_builtin (((TARGET_64BIT)
16018 ? CODE_FOR_rs6000_mftb_di
16019 : CODE_FOR_rs6000_mftb_si),
16020 target);
16022 case RS6000_BUILTIN_MFFS:
16023 return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_mffs, target);
16025 case RS6000_BUILTIN_MTFSF:
16026 return rs6000_expand_mtfsf_builtin (CODE_FOR_rs6000_mtfsf, exp);
16028 case RS6000_BUILTIN_CPU_INIT:
16029 case RS6000_BUILTIN_CPU_IS:
16030 case RS6000_BUILTIN_CPU_SUPPORTS:
16031 return cpu_expand_builtin (fcode, exp, target);
16033 case ALTIVEC_BUILTIN_MASK_FOR_LOAD:
16034 case ALTIVEC_BUILTIN_MASK_FOR_STORE:
16036 int icode = (BYTES_BIG_ENDIAN ? (int) CODE_FOR_altivec_lvsr_direct
16037 : (int) CODE_FOR_altivec_lvsl_direct);
16038 machine_mode tmode = insn_data[icode].operand[0].mode;
16039 machine_mode mode = insn_data[icode].operand[1].mode;
16040 tree arg;
16041 rtx op, addr, pat;
16043 gcc_assert (TARGET_ALTIVEC);
16045 arg = CALL_EXPR_ARG (exp, 0);
16046 gcc_assert (POINTER_TYPE_P (TREE_TYPE (arg)));
16047 op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
16048 addr = memory_address (mode, op);
16049 if (fcode == ALTIVEC_BUILTIN_MASK_FOR_STORE)
16050 op = addr;
16051 else
16053 /* For the load case need to negate the address. */
16054 op = gen_reg_rtx (GET_MODE (addr));
16055 emit_insn (gen_rtx_SET (op, gen_rtx_NEG (GET_MODE (addr), addr)));
16057 op = gen_rtx_MEM (mode, op);
16059 if (target == 0
16060 || GET_MODE (target) != tmode
16061 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16062 target = gen_reg_rtx (tmode);
16064 pat = GEN_FCN (icode) (target, op);
16065 if (!pat)
16066 return 0;
16067 emit_insn (pat);
16069 return target;
16072 case ALTIVEC_BUILTIN_VCFUX:
16073 case ALTIVEC_BUILTIN_VCFSX:
16074 case ALTIVEC_BUILTIN_VCTUXS:
16075 case ALTIVEC_BUILTIN_VCTSXS:
16076 /* FIXME: There's got to be a nicer way to handle this case than
16077 constructing a new CALL_EXPR. */
16078 if (call_expr_nargs (exp) == 1)
16080 exp = build_call_nary (TREE_TYPE (exp), CALL_EXPR_FN (exp),
16081 2, CALL_EXPR_ARG (exp, 0), integer_zero_node);
16083 break;
16085 default:
16086 break;
16089 if (TARGET_ALTIVEC)
16091 ret = altivec_expand_builtin (exp, target, &success);
16093 if (success)
16094 return ret;
16096 if (TARGET_SPE)
16098 ret = spe_expand_builtin (exp, target, &success);
16100 if (success)
16101 return ret;
16103 if (TARGET_PAIRED_FLOAT)
16105 ret = paired_expand_builtin (exp, target, &success);
16107 if (success)
16108 return ret;
16110 if (TARGET_HTM)
16112 ret = htm_expand_builtin (exp, target, &success);
16114 if (success)
16115 return ret;
16118 unsigned attr = rs6000_builtin_info[uns_fcode].attr & RS6000_BTC_TYPE_MASK;
16119 /* RS6000_BTC_SPECIAL represents no-operand operators. */
16120 gcc_assert (attr == RS6000_BTC_UNARY
16121 || attr == RS6000_BTC_BINARY
16122 || attr == RS6000_BTC_TERNARY
16123 || attr == RS6000_BTC_SPECIAL);
16125 /* Handle simple unary operations. */
16126 d = bdesc_1arg;
16127 for (i = 0; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
16128 if (d->code == fcode)
16129 return rs6000_expand_unop_builtin (d->icode, exp, target);
16131 /* Handle simple binary operations. */
16132 d = bdesc_2arg;
16133 for (i = 0; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
16134 if (d->code == fcode)
16135 return rs6000_expand_binop_builtin (d->icode, exp, target);
16137 /* Handle simple ternary operations. */
16138 d = bdesc_3arg;
16139 for (i = 0; i < ARRAY_SIZE (bdesc_3arg); i++, d++)
16140 if (d->code == fcode)
16141 return rs6000_expand_ternop_builtin (d->icode, exp, target);
16143 /* Handle simple no-argument operations. */
16144 d = bdesc_0arg;
16145 for (i = 0; i < ARRAY_SIZE (bdesc_0arg); i++, d++)
16146 if (d->code == fcode)
16147 return rs6000_expand_zeroop_builtin (d->icode, target);
16149 gcc_unreachable ();
16152 static void
16153 rs6000_init_builtins (void)
16155 tree tdecl;
16156 tree ftype;
16157 machine_mode mode;
16159 if (TARGET_DEBUG_BUILTIN)
16160 fprintf (stderr, "rs6000_init_builtins%s%s%s%s\n",
16161 (TARGET_PAIRED_FLOAT) ? ", paired" : "",
16162 (TARGET_SPE) ? ", spe" : "",
16163 (TARGET_ALTIVEC) ? ", altivec" : "",
16164 (TARGET_VSX) ? ", vsx" : "");
16166 V2SI_type_node = build_vector_type (intSI_type_node, 2);
16167 V2SF_type_node = build_vector_type (float_type_node, 2);
16168 V2DI_type_node = build_vector_type (intDI_type_node, 2);
16169 V2DF_type_node = build_vector_type (double_type_node, 2);
16170 V4HI_type_node = build_vector_type (intHI_type_node, 4);
16171 V4SI_type_node = build_vector_type (intSI_type_node, 4);
16172 V4SF_type_node = build_vector_type (float_type_node, 4);
16173 V8HI_type_node = build_vector_type (intHI_type_node, 8);
16174 V16QI_type_node = build_vector_type (intQI_type_node, 16);
16176 unsigned_V16QI_type_node = build_vector_type (unsigned_intQI_type_node, 16);
16177 unsigned_V8HI_type_node = build_vector_type (unsigned_intHI_type_node, 8);
16178 unsigned_V4SI_type_node = build_vector_type (unsigned_intSI_type_node, 4);
16179 unsigned_V2DI_type_node = build_vector_type (unsigned_intDI_type_node, 2);
16181 opaque_V2SF_type_node = build_opaque_vector_type (float_type_node, 2);
16182 opaque_V2SI_type_node = build_opaque_vector_type (intSI_type_node, 2);
16183 opaque_p_V2SI_type_node = build_pointer_type (opaque_V2SI_type_node);
16184 opaque_V4SI_type_node = build_opaque_vector_type (intSI_type_node, 4);
16186 const_str_type_node
16187 = build_pointer_type (build_qualified_type (char_type_node,
16188 TYPE_QUAL_CONST));
16190 /* We use V1TI mode as a special container to hold __int128_t items that
16191 must live in VSX registers. */
16192 if (intTI_type_node)
16194 V1TI_type_node = build_vector_type (intTI_type_node, 1);
16195 unsigned_V1TI_type_node = build_vector_type (unsigned_intTI_type_node, 1);
16198 /* The 'vector bool ...' types must be kept distinct from 'vector unsigned ...'
16199 types, especially in C++ land. Similarly, 'vector pixel' is distinct from
16200 'vector unsigned short'. */
16202 bool_char_type_node = build_distinct_type_copy (unsigned_intQI_type_node);
16203 bool_short_type_node = build_distinct_type_copy (unsigned_intHI_type_node);
16204 bool_int_type_node = build_distinct_type_copy (unsigned_intSI_type_node);
16205 bool_long_type_node = build_distinct_type_copy (unsigned_intDI_type_node);
16206 pixel_type_node = build_distinct_type_copy (unsigned_intHI_type_node);
16208 long_integer_type_internal_node = long_integer_type_node;
16209 long_unsigned_type_internal_node = long_unsigned_type_node;
16210 long_long_integer_type_internal_node = long_long_integer_type_node;
16211 long_long_unsigned_type_internal_node = long_long_unsigned_type_node;
16212 intQI_type_internal_node = intQI_type_node;
16213 uintQI_type_internal_node = unsigned_intQI_type_node;
16214 intHI_type_internal_node = intHI_type_node;
16215 uintHI_type_internal_node = unsigned_intHI_type_node;
16216 intSI_type_internal_node = intSI_type_node;
16217 uintSI_type_internal_node = unsigned_intSI_type_node;
16218 intDI_type_internal_node = intDI_type_node;
16219 uintDI_type_internal_node = unsigned_intDI_type_node;
16220 intTI_type_internal_node = intTI_type_node;
16221 uintTI_type_internal_node = unsigned_intTI_type_node;
16222 float_type_internal_node = float_type_node;
16223 double_type_internal_node = double_type_node;
16224 long_double_type_internal_node = long_double_type_node;
16225 dfloat64_type_internal_node = dfloat64_type_node;
16226 dfloat128_type_internal_node = dfloat128_type_node;
16227 void_type_internal_node = void_type_node;
16229 /* 128-bit floating point support. KFmode is IEEE 128-bit floating point.
16230 IFmode is the IBM extended 128-bit format that is a pair of doubles.
16231 TFmode will be either IEEE 128-bit floating point or the IBM double-double
16232 format that uses a pair of doubles, depending on the switches and
16233 defaults. */
16234 if (TARGET_FLOAT128)
16236 ibm128_float_type_node = make_node (REAL_TYPE);
16237 TYPE_PRECISION (ibm128_float_type_node) = 128;
16238 layout_type (ibm128_float_type_node);
16239 SET_TYPE_MODE (ibm128_float_type_node, IFmode);
16241 ieee128_float_type_node = make_node (REAL_TYPE);
16242 TYPE_PRECISION (ieee128_float_type_node) = 128;
16243 layout_type (ieee128_float_type_node);
16244 SET_TYPE_MODE (ieee128_float_type_node, KFmode);
16246 lang_hooks.types.register_builtin_type (ieee128_float_type_node,
16247 "__float128");
16249 lang_hooks.types.register_builtin_type (ibm128_float_type_node,
16250 "__ibm128");
16252 else
16254 /* All types must be nonzero, or self-test barfs during bootstrap. */
16255 ieee128_float_type_node = long_double_type_node;
16256 ibm128_float_type_node = long_double_type_node;
16259 /* Initialize the modes for builtin_function_type, mapping a machine mode to
16260 tree type node. */
16261 builtin_mode_to_type[QImode][0] = integer_type_node;
16262 builtin_mode_to_type[HImode][0] = integer_type_node;
16263 builtin_mode_to_type[SImode][0] = intSI_type_node;
16264 builtin_mode_to_type[SImode][1] = unsigned_intSI_type_node;
16265 builtin_mode_to_type[DImode][0] = intDI_type_node;
16266 builtin_mode_to_type[DImode][1] = unsigned_intDI_type_node;
16267 builtin_mode_to_type[TImode][0] = intTI_type_node;
16268 builtin_mode_to_type[TImode][1] = unsigned_intTI_type_node;
16269 builtin_mode_to_type[SFmode][0] = float_type_node;
16270 builtin_mode_to_type[DFmode][0] = double_type_node;
16271 builtin_mode_to_type[IFmode][0] = ibm128_float_type_node;
16272 builtin_mode_to_type[KFmode][0] = ieee128_float_type_node;
16273 builtin_mode_to_type[TFmode][0] = long_double_type_node;
16274 builtin_mode_to_type[DDmode][0] = dfloat64_type_node;
16275 builtin_mode_to_type[TDmode][0] = dfloat128_type_node;
16276 builtin_mode_to_type[V1TImode][0] = V1TI_type_node;
16277 builtin_mode_to_type[V1TImode][1] = unsigned_V1TI_type_node;
16278 builtin_mode_to_type[V2SImode][0] = V2SI_type_node;
16279 builtin_mode_to_type[V2SFmode][0] = V2SF_type_node;
16280 builtin_mode_to_type[V2DImode][0] = V2DI_type_node;
16281 builtin_mode_to_type[V2DImode][1] = unsigned_V2DI_type_node;
16282 builtin_mode_to_type[V2DFmode][0] = V2DF_type_node;
16283 builtin_mode_to_type[V4HImode][0] = V4HI_type_node;
16284 builtin_mode_to_type[V4SImode][0] = V4SI_type_node;
16285 builtin_mode_to_type[V4SImode][1] = unsigned_V4SI_type_node;
16286 builtin_mode_to_type[V4SFmode][0] = V4SF_type_node;
16287 builtin_mode_to_type[V8HImode][0] = V8HI_type_node;
16288 builtin_mode_to_type[V8HImode][1] = unsigned_V8HI_type_node;
16289 builtin_mode_to_type[V16QImode][0] = V16QI_type_node;
16290 builtin_mode_to_type[V16QImode][1] = unsigned_V16QI_type_node;
16292 tdecl = add_builtin_type ("__bool char", bool_char_type_node);
16293 TYPE_NAME (bool_char_type_node) = tdecl;
16295 tdecl = add_builtin_type ("__bool short", bool_short_type_node);
16296 TYPE_NAME (bool_short_type_node) = tdecl;
16298 tdecl = add_builtin_type ("__bool int", bool_int_type_node);
16299 TYPE_NAME (bool_int_type_node) = tdecl;
16301 tdecl = add_builtin_type ("__pixel", pixel_type_node);
16302 TYPE_NAME (pixel_type_node) = tdecl;
16304 bool_V16QI_type_node = build_vector_type (bool_char_type_node, 16);
16305 bool_V8HI_type_node = build_vector_type (bool_short_type_node, 8);
16306 bool_V4SI_type_node = build_vector_type (bool_int_type_node, 4);
16307 bool_V2DI_type_node = build_vector_type (bool_long_type_node, 2);
16308 pixel_V8HI_type_node = build_vector_type (pixel_type_node, 8);
16310 tdecl = add_builtin_type ("__vector unsigned char", unsigned_V16QI_type_node);
16311 TYPE_NAME (unsigned_V16QI_type_node) = tdecl;
16313 tdecl = add_builtin_type ("__vector signed char", V16QI_type_node);
16314 TYPE_NAME (V16QI_type_node) = tdecl;
16316 tdecl = add_builtin_type ("__vector __bool char", bool_V16QI_type_node);
16317 TYPE_NAME ( bool_V16QI_type_node) = tdecl;
16319 tdecl = add_builtin_type ("__vector unsigned short", unsigned_V8HI_type_node);
16320 TYPE_NAME (unsigned_V8HI_type_node) = tdecl;
16322 tdecl = add_builtin_type ("__vector signed short", V8HI_type_node);
16323 TYPE_NAME (V8HI_type_node) = tdecl;
16325 tdecl = add_builtin_type ("__vector __bool short", bool_V8HI_type_node);
16326 TYPE_NAME (bool_V8HI_type_node) = tdecl;
16328 tdecl = add_builtin_type ("__vector unsigned int", unsigned_V4SI_type_node);
16329 TYPE_NAME (unsigned_V4SI_type_node) = tdecl;
16331 tdecl = add_builtin_type ("__vector signed int", V4SI_type_node);
16332 TYPE_NAME (V4SI_type_node) = tdecl;
16334 tdecl = add_builtin_type ("__vector __bool int", bool_V4SI_type_node);
16335 TYPE_NAME (bool_V4SI_type_node) = tdecl;
16337 tdecl = add_builtin_type ("__vector float", V4SF_type_node);
16338 TYPE_NAME (V4SF_type_node) = tdecl;
16340 tdecl = add_builtin_type ("__vector __pixel", pixel_V8HI_type_node);
16341 TYPE_NAME (pixel_V8HI_type_node) = tdecl;
16343 tdecl = add_builtin_type ("__vector double", V2DF_type_node);
16344 TYPE_NAME (V2DF_type_node) = tdecl;
16346 if (TARGET_POWERPC64)
16348 tdecl = add_builtin_type ("__vector long", V2DI_type_node);
16349 TYPE_NAME (V2DI_type_node) = tdecl;
16351 tdecl = add_builtin_type ("__vector unsigned long",
16352 unsigned_V2DI_type_node);
16353 TYPE_NAME (unsigned_V2DI_type_node) = tdecl;
16355 tdecl = add_builtin_type ("__vector __bool long", bool_V2DI_type_node);
16356 TYPE_NAME (bool_V2DI_type_node) = tdecl;
16358 else
16360 tdecl = add_builtin_type ("__vector long long", V2DI_type_node);
16361 TYPE_NAME (V2DI_type_node) = tdecl;
16363 tdecl = add_builtin_type ("__vector unsigned long long",
16364 unsigned_V2DI_type_node);
16365 TYPE_NAME (unsigned_V2DI_type_node) = tdecl;
16367 tdecl = add_builtin_type ("__vector __bool long long",
16368 bool_V2DI_type_node);
16369 TYPE_NAME (bool_V2DI_type_node) = tdecl;
16372 if (V1TI_type_node)
16374 tdecl = add_builtin_type ("__vector __int128", V1TI_type_node);
16375 TYPE_NAME (V1TI_type_node) = tdecl;
16377 tdecl = add_builtin_type ("__vector unsigned __int128",
16378 unsigned_V1TI_type_node);
16379 TYPE_NAME (unsigned_V1TI_type_node) = tdecl;
16382 /* Paired and SPE builtins are only available if you build a compiler with
16383 the appropriate options, so only create those builtins with the
16384 appropriate compiler option. Create Altivec and VSX builtins on machines
16385 with at least the general purpose extensions (970 and newer) to allow the
16386 use of the target attribute. */
16387 if (TARGET_PAIRED_FLOAT)
16388 paired_init_builtins ();
16389 if (TARGET_SPE)
16390 spe_init_builtins ();
16391 if (TARGET_EXTRA_BUILTINS)
16392 altivec_init_builtins ();
16393 if (TARGET_HTM)
16394 htm_init_builtins ();
16396 if (TARGET_EXTRA_BUILTINS || TARGET_SPE || TARGET_PAIRED_FLOAT)
16397 rs6000_common_init_builtins ();
16399 ftype = build_function_type_list (ieee128_float_type_node,
16400 const_str_type_node, NULL_TREE);
16401 def_builtin ("__builtin_nanq", ftype, RS6000_BUILTIN_NANQ);
16402 def_builtin ("__builtin_nansq", ftype, RS6000_BUILTIN_NANSQ);
16404 ftype = build_function_type_list (ieee128_float_type_node, NULL_TREE);
16405 def_builtin ("__builtin_infq", ftype, RS6000_BUILTIN_INFQ);
16406 def_builtin ("__builtin_huge_valq", ftype, RS6000_BUILTIN_HUGE_VALQ);
16408 ftype = builtin_function_type (DFmode, DFmode, DFmode, VOIDmode,
16409 RS6000_BUILTIN_RECIP, "__builtin_recipdiv");
16410 def_builtin ("__builtin_recipdiv", ftype, RS6000_BUILTIN_RECIP);
16412 ftype = builtin_function_type (SFmode, SFmode, SFmode, VOIDmode,
16413 RS6000_BUILTIN_RECIPF, "__builtin_recipdivf");
16414 def_builtin ("__builtin_recipdivf", ftype, RS6000_BUILTIN_RECIPF);
16416 ftype = builtin_function_type (DFmode, DFmode, VOIDmode, VOIDmode,
16417 RS6000_BUILTIN_RSQRT, "__builtin_rsqrt");
16418 def_builtin ("__builtin_rsqrt", ftype, RS6000_BUILTIN_RSQRT);
16420 ftype = builtin_function_type (SFmode, SFmode, VOIDmode, VOIDmode,
16421 RS6000_BUILTIN_RSQRTF, "__builtin_rsqrtf");
16422 def_builtin ("__builtin_rsqrtf", ftype, RS6000_BUILTIN_RSQRTF);
16424 mode = (TARGET_64BIT) ? DImode : SImode;
16425 ftype = builtin_function_type (mode, mode, mode, VOIDmode,
16426 POWER7_BUILTIN_BPERMD, "__builtin_bpermd");
16427 def_builtin ("__builtin_bpermd", ftype, POWER7_BUILTIN_BPERMD);
16429 ftype = build_function_type_list (unsigned_intDI_type_node,
16430 NULL_TREE);
16431 def_builtin ("__builtin_ppc_get_timebase", ftype, RS6000_BUILTIN_GET_TB);
16433 if (TARGET_64BIT)
16434 ftype = build_function_type_list (unsigned_intDI_type_node,
16435 NULL_TREE);
16436 else
16437 ftype = build_function_type_list (unsigned_intSI_type_node,
16438 NULL_TREE);
16439 def_builtin ("__builtin_ppc_mftb", ftype, RS6000_BUILTIN_MFTB);
16441 ftype = build_function_type_list (double_type_node, NULL_TREE);
16442 def_builtin ("__builtin_mffs", ftype, RS6000_BUILTIN_MFFS);
16444 ftype = build_function_type_list (void_type_node,
16445 intSI_type_node, double_type_node,
16446 NULL_TREE);
16447 def_builtin ("__builtin_mtfsf", ftype, RS6000_BUILTIN_MTFSF);
16449 ftype = build_function_type_list (void_type_node, NULL_TREE);
16450 def_builtin ("__builtin_cpu_init", ftype, RS6000_BUILTIN_CPU_INIT);
16452 ftype = build_function_type_list (bool_int_type_node, const_ptr_type_node,
16453 NULL_TREE);
16454 def_builtin ("__builtin_cpu_is", ftype, RS6000_BUILTIN_CPU_IS);
16455 def_builtin ("__builtin_cpu_supports", ftype, RS6000_BUILTIN_CPU_SUPPORTS);
16457 #if TARGET_XCOFF
16458 /* AIX libm provides clog as __clog. */
16459 if ((tdecl = builtin_decl_explicit (BUILT_IN_CLOG)) != NULL_TREE)
16460 set_user_assembler_name (tdecl, "__clog");
16461 #endif
16463 #ifdef SUBTARGET_INIT_BUILTINS
16464 SUBTARGET_INIT_BUILTINS;
16465 #endif
16468 /* Returns the rs6000 builtin decl for CODE. */
16470 static tree
16471 rs6000_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
16473 HOST_WIDE_INT fnmask;
16475 if (code >= RS6000_BUILTIN_COUNT)
16476 return error_mark_node;
16478 fnmask = rs6000_builtin_info[code].mask;
16479 if ((fnmask & rs6000_builtin_mask) != fnmask)
16481 rs6000_invalid_builtin ((enum rs6000_builtins)code);
16482 return error_mark_node;
16485 return rs6000_builtin_decls[code];
16488 static void
16489 spe_init_builtins (void)
16491 tree puint_type_node = build_pointer_type (unsigned_type_node);
16492 tree pushort_type_node = build_pointer_type (short_unsigned_type_node);
16493 const struct builtin_description *d;
16494 size_t i;
16496 tree v2si_ftype_4_v2si
16497 = build_function_type_list (opaque_V2SI_type_node,
16498 opaque_V2SI_type_node,
16499 opaque_V2SI_type_node,
16500 opaque_V2SI_type_node,
16501 opaque_V2SI_type_node,
16502 NULL_TREE);
16504 tree v2sf_ftype_4_v2sf
16505 = build_function_type_list (opaque_V2SF_type_node,
16506 opaque_V2SF_type_node,
16507 opaque_V2SF_type_node,
16508 opaque_V2SF_type_node,
16509 opaque_V2SF_type_node,
16510 NULL_TREE);
16512 tree int_ftype_int_v2si_v2si
16513 = build_function_type_list (integer_type_node,
16514 integer_type_node,
16515 opaque_V2SI_type_node,
16516 opaque_V2SI_type_node,
16517 NULL_TREE);
16519 tree int_ftype_int_v2sf_v2sf
16520 = build_function_type_list (integer_type_node,
16521 integer_type_node,
16522 opaque_V2SF_type_node,
16523 opaque_V2SF_type_node,
16524 NULL_TREE);
16526 tree void_ftype_v2si_puint_int
16527 = build_function_type_list (void_type_node,
16528 opaque_V2SI_type_node,
16529 puint_type_node,
16530 integer_type_node,
16531 NULL_TREE);
16533 tree void_ftype_v2si_puint_char
16534 = build_function_type_list (void_type_node,
16535 opaque_V2SI_type_node,
16536 puint_type_node,
16537 char_type_node,
16538 NULL_TREE);
16540 tree void_ftype_v2si_pv2si_int
16541 = build_function_type_list (void_type_node,
16542 opaque_V2SI_type_node,
16543 opaque_p_V2SI_type_node,
16544 integer_type_node,
16545 NULL_TREE);
16547 tree void_ftype_v2si_pv2si_char
16548 = build_function_type_list (void_type_node,
16549 opaque_V2SI_type_node,
16550 opaque_p_V2SI_type_node,
16551 char_type_node,
16552 NULL_TREE);
16554 tree void_ftype_int
16555 = build_function_type_list (void_type_node, integer_type_node, NULL_TREE);
16557 tree int_ftype_void
16558 = build_function_type_list (integer_type_node, NULL_TREE);
16560 tree v2si_ftype_pv2si_int
16561 = build_function_type_list (opaque_V2SI_type_node,
16562 opaque_p_V2SI_type_node,
16563 integer_type_node,
16564 NULL_TREE);
16566 tree v2si_ftype_puint_int
16567 = build_function_type_list (opaque_V2SI_type_node,
16568 puint_type_node,
16569 integer_type_node,
16570 NULL_TREE);
16572 tree v2si_ftype_pushort_int
16573 = build_function_type_list (opaque_V2SI_type_node,
16574 pushort_type_node,
16575 integer_type_node,
16576 NULL_TREE);
16578 tree v2si_ftype_signed_char
16579 = build_function_type_list (opaque_V2SI_type_node,
16580 signed_char_type_node,
16581 NULL_TREE);
16583 add_builtin_type ("__ev64_opaque__", opaque_V2SI_type_node);
16585 /* Initialize irregular SPE builtins. */
16587 def_builtin ("__builtin_spe_mtspefscr", void_ftype_int, SPE_BUILTIN_MTSPEFSCR);
16588 def_builtin ("__builtin_spe_mfspefscr", int_ftype_void, SPE_BUILTIN_MFSPEFSCR);
16589 def_builtin ("__builtin_spe_evstddx", void_ftype_v2si_pv2si_int, SPE_BUILTIN_EVSTDDX);
16590 def_builtin ("__builtin_spe_evstdhx", void_ftype_v2si_pv2si_int, SPE_BUILTIN_EVSTDHX);
16591 def_builtin ("__builtin_spe_evstdwx", void_ftype_v2si_pv2si_int, SPE_BUILTIN_EVSTDWX);
16592 def_builtin ("__builtin_spe_evstwhex", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWHEX);
16593 def_builtin ("__builtin_spe_evstwhox", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWHOX);
16594 def_builtin ("__builtin_spe_evstwwex", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWWEX);
16595 def_builtin ("__builtin_spe_evstwwox", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWWOX);
16596 def_builtin ("__builtin_spe_evstdd", void_ftype_v2si_pv2si_char, SPE_BUILTIN_EVSTDD);
16597 def_builtin ("__builtin_spe_evstdh", void_ftype_v2si_pv2si_char, SPE_BUILTIN_EVSTDH);
16598 def_builtin ("__builtin_spe_evstdw", void_ftype_v2si_pv2si_char, SPE_BUILTIN_EVSTDW);
16599 def_builtin ("__builtin_spe_evstwhe", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWHE);
16600 def_builtin ("__builtin_spe_evstwho", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWHO);
16601 def_builtin ("__builtin_spe_evstwwe", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWWE);
16602 def_builtin ("__builtin_spe_evstwwo", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWWO);
16603 def_builtin ("__builtin_spe_evsplatfi", v2si_ftype_signed_char, SPE_BUILTIN_EVSPLATFI);
16604 def_builtin ("__builtin_spe_evsplati", v2si_ftype_signed_char, SPE_BUILTIN_EVSPLATI);
16606 /* Loads. */
16607 def_builtin ("__builtin_spe_evlddx", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDDX);
16608 def_builtin ("__builtin_spe_evldwx", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDWX);
16609 def_builtin ("__builtin_spe_evldhx", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDHX);
16610 def_builtin ("__builtin_spe_evlwhex", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHEX);
16611 def_builtin ("__builtin_spe_evlwhoux", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOUX);
16612 def_builtin ("__builtin_spe_evlwhosx", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOSX);
16613 def_builtin ("__builtin_spe_evlwwsplatx", v2si_ftype_puint_int, SPE_BUILTIN_EVLWWSPLATX);
16614 def_builtin ("__builtin_spe_evlwhsplatx", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHSPLATX);
16615 def_builtin ("__builtin_spe_evlhhesplatx", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHESPLATX);
16616 def_builtin ("__builtin_spe_evlhhousplatx", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOUSPLATX);
16617 def_builtin ("__builtin_spe_evlhhossplatx", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOSSPLATX);
16618 def_builtin ("__builtin_spe_evldd", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDD);
16619 def_builtin ("__builtin_spe_evldw", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDW);
16620 def_builtin ("__builtin_spe_evldh", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDH);
16621 def_builtin ("__builtin_spe_evlhhesplat", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHESPLAT);
16622 def_builtin ("__builtin_spe_evlhhossplat", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOSSPLAT);
16623 def_builtin ("__builtin_spe_evlhhousplat", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOUSPLAT);
16624 def_builtin ("__builtin_spe_evlwhe", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHE);
16625 def_builtin ("__builtin_spe_evlwhos", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOS);
16626 def_builtin ("__builtin_spe_evlwhou", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOU);
16627 def_builtin ("__builtin_spe_evlwhsplat", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHSPLAT);
16628 def_builtin ("__builtin_spe_evlwwsplat", v2si_ftype_puint_int, SPE_BUILTIN_EVLWWSPLAT);
16630 /* Predicates. */
16631 d = bdesc_spe_predicates;
16632 for (i = 0; i < ARRAY_SIZE (bdesc_spe_predicates); ++i, d++)
16634 tree type;
16636 switch (insn_data[d->icode].operand[1].mode)
16638 case V2SImode:
16639 type = int_ftype_int_v2si_v2si;
16640 break;
16641 case V2SFmode:
16642 type = int_ftype_int_v2sf_v2sf;
16643 break;
16644 default:
16645 gcc_unreachable ();
16648 def_builtin (d->name, type, d->code);
16651 /* Evsel predicates. */
16652 d = bdesc_spe_evsel;
16653 for (i = 0; i < ARRAY_SIZE (bdesc_spe_evsel); ++i, d++)
16655 tree type;
16657 switch (insn_data[d->icode].operand[1].mode)
16659 case V2SImode:
16660 type = v2si_ftype_4_v2si;
16661 break;
16662 case V2SFmode:
16663 type = v2sf_ftype_4_v2sf;
16664 break;
16665 default:
16666 gcc_unreachable ();
16669 def_builtin (d->name, type, d->code);
16673 static void
16674 paired_init_builtins (void)
16676 const struct builtin_description *d;
16677 size_t i;
16679 tree int_ftype_int_v2sf_v2sf
16680 = build_function_type_list (integer_type_node,
16681 integer_type_node,
16682 V2SF_type_node,
16683 V2SF_type_node,
16684 NULL_TREE);
16685 tree pcfloat_type_node =
16686 build_pointer_type (build_qualified_type
16687 (float_type_node, TYPE_QUAL_CONST));
16689 tree v2sf_ftype_long_pcfloat = build_function_type_list (V2SF_type_node,
16690 long_integer_type_node,
16691 pcfloat_type_node,
16692 NULL_TREE);
16693 tree void_ftype_v2sf_long_pcfloat =
16694 build_function_type_list (void_type_node,
16695 V2SF_type_node,
16696 long_integer_type_node,
16697 pcfloat_type_node,
16698 NULL_TREE);
16701 def_builtin ("__builtin_paired_lx", v2sf_ftype_long_pcfloat,
16702 PAIRED_BUILTIN_LX);
16705 def_builtin ("__builtin_paired_stx", void_ftype_v2sf_long_pcfloat,
16706 PAIRED_BUILTIN_STX);
16708 /* Predicates. */
16709 d = bdesc_paired_preds;
16710 for (i = 0; i < ARRAY_SIZE (bdesc_paired_preds); ++i, d++)
16712 tree type;
16714 if (TARGET_DEBUG_BUILTIN)
16715 fprintf (stderr, "paired pred #%d, insn = %s [%d], mode = %s\n",
16716 (int)i, get_insn_name (d->icode), (int)d->icode,
16717 GET_MODE_NAME (insn_data[d->icode].operand[1].mode));
16719 switch (insn_data[d->icode].operand[1].mode)
16721 case V2SFmode:
16722 type = int_ftype_int_v2sf_v2sf;
16723 break;
16724 default:
16725 gcc_unreachable ();
16728 def_builtin (d->name, type, d->code);
16732 static void
16733 altivec_init_builtins (void)
16735 const struct builtin_description *d;
16736 size_t i;
16737 tree ftype;
16738 tree decl;
16740 tree pvoid_type_node = build_pointer_type (void_type_node);
16742 tree pcvoid_type_node
16743 = build_pointer_type (build_qualified_type (void_type_node,
16744 TYPE_QUAL_CONST));
16746 tree int_ftype_opaque
16747 = build_function_type_list (integer_type_node,
16748 opaque_V4SI_type_node, NULL_TREE);
16749 tree opaque_ftype_opaque
16750 = build_function_type_list (integer_type_node, NULL_TREE);
16751 tree opaque_ftype_opaque_int
16752 = build_function_type_list (opaque_V4SI_type_node,
16753 opaque_V4SI_type_node, integer_type_node, NULL_TREE);
16754 tree opaque_ftype_opaque_opaque_int
16755 = build_function_type_list (opaque_V4SI_type_node,
16756 opaque_V4SI_type_node, opaque_V4SI_type_node,
16757 integer_type_node, NULL_TREE);
16758 tree opaque_ftype_opaque_opaque_opaque
16759 = build_function_type_list (opaque_V4SI_type_node,
16760 opaque_V4SI_type_node, opaque_V4SI_type_node,
16761 opaque_V4SI_type_node, NULL_TREE);
16762 tree opaque_ftype_opaque_opaque
16763 = build_function_type_list (opaque_V4SI_type_node,
16764 opaque_V4SI_type_node, opaque_V4SI_type_node,
16765 NULL_TREE);
16766 tree int_ftype_int_opaque_opaque
16767 = build_function_type_list (integer_type_node,
16768 integer_type_node, opaque_V4SI_type_node,
16769 opaque_V4SI_type_node, NULL_TREE);
16770 tree int_ftype_int_v4si_v4si
16771 = build_function_type_list (integer_type_node,
16772 integer_type_node, V4SI_type_node,
16773 V4SI_type_node, NULL_TREE);
16774 tree int_ftype_int_v2di_v2di
16775 = build_function_type_list (integer_type_node,
16776 integer_type_node, V2DI_type_node,
16777 V2DI_type_node, NULL_TREE);
16778 tree void_ftype_v4si
16779 = build_function_type_list (void_type_node, V4SI_type_node, NULL_TREE);
16780 tree v8hi_ftype_void
16781 = build_function_type_list (V8HI_type_node, NULL_TREE);
16782 tree void_ftype_void
16783 = build_function_type_list (void_type_node, NULL_TREE);
16784 tree void_ftype_int
16785 = build_function_type_list (void_type_node, integer_type_node, NULL_TREE);
16787 tree opaque_ftype_long_pcvoid
16788 = build_function_type_list (opaque_V4SI_type_node,
16789 long_integer_type_node, pcvoid_type_node,
16790 NULL_TREE);
16791 tree v16qi_ftype_long_pcvoid
16792 = build_function_type_list (V16QI_type_node,
16793 long_integer_type_node, pcvoid_type_node,
16794 NULL_TREE);
16795 tree v8hi_ftype_long_pcvoid
16796 = build_function_type_list (V8HI_type_node,
16797 long_integer_type_node, pcvoid_type_node,
16798 NULL_TREE);
16799 tree v4si_ftype_long_pcvoid
16800 = build_function_type_list (V4SI_type_node,
16801 long_integer_type_node, pcvoid_type_node,
16802 NULL_TREE);
16803 tree v4sf_ftype_long_pcvoid
16804 = build_function_type_list (V4SF_type_node,
16805 long_integer_type_node, pcvoid_type_node,
16806 NULL_TREE);
16807 tree v2df_ftype_long_pcvoid
16808 = build_function_type_list (V2DF_type_node,
16809 long_integer_type_node, pcvoid_type_node,
16810 NULL_TREE);
16811 tree v2di_ftype_long_pcvoid
16812 = build_function_type_list (V2DI_type_node,
16813 long_integer_type_node, pcvoid_type_node,
16814 NULL_TREE);
16816 tree void_ftype_opaque_long_pvoid
16817 = build_function_type_list (void_type_node,
16818 opaque_V4SI_type_node, long_integer_type_node,
16819 pvoid_type_node, NULL_TREE);
16820 tree void_ftype_v4si_long_pvoid
16821 = build_function_type_list (void_type_node,
16822 V4SI_type_node, long_integer_type_node,
16823 pvoid_type_node, NULL_TREE);
16824 tree void_ftype_v16qi_long_pvoid
16825 = build_function_type_list (void_type_node,
16826 V16QI_type_node, long_integer_type_node,
16827 pvoid_type_node, NULL_TREE);
16828 tree void_ftype_v8hi_long_pvoid
16829 = build_function_type_list (void_type_node,
16830 V8HI_type_node, long_integer_type_node,
16831 pvoid_type_node, NULL_TREE);
16832 tree void_ftype_v4sf_long_pvoid
16833 = build_function_type_list (void_type_node,
16834 V4SF_type_node, long_integer_type_node,
16835 pvoid_type_node, NULL_TREE);
16836 tree void_ftype_v2df_long_pvoid
16837 = build_function_type_list (void_type_node,
16838 V2DF_type_node, long_integer_type_node,
16839 pvoid_type_node, NULL_TREE);
16840 tree void_ftype_v2di_long_pvoid
16841 = build_function_type_list (void_type_node,
16842 V2DI_type_node, long_integer_type_node,
16843 pvoid_type_node, NULL_TREE);
16844 tree int_ftype_int_v8hi_v8hi
16845 = build_function_type_list (integer_type_node,
16846 integer_type_node, V8HI_type_node,
16847 V8HI_type_node, NULL_TREE);
16848 tree int_ftype_int_v16qi_v16qi
16849 = build_function_type_list (integer_type_node,
16850 integer_type_node, V16QI_type_node,
16851 V16QI_type_node, NULL_TREE);
16852 tree int_ftype_int_v4sf_v4sf
16853 = build_function_type_list (integer_type_node,
16854 integer_type_node, V4SF_type_node,
16855 V4SF_type_node, NULL_TREE);
16856 tree int_ftype_int_v2df_v2df
16857 = build_function_type_list (integer_type_node,
16858 integer_type_node, V2DF_type_node,
16859 V2DF_type_node, NULL_TREE);
16860 tree v2di_ftype_v2di
16861 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
16862 tree v4si_ftype_v4si
16863 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
16864 tree v8hi_ftype_v8hi
16865 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
16866 tree v16qi_ftype_v16qi
16867 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
16868 tree v4sf_ftype_v4sf
16869 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
16870 tree v2df_ftype_v2df
16871 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
16872 tree void_ftype_pcvoid_int_int
16873 = build_function_type_list (void_type_node,
16874 pcvoid_type_node, integer_type_node,
16875 integer_type_node, NULL_TREE);
16877 def_builtin ("__builtin_altivec_mtvscr", void_ftype_v4si, ALTIVEC_BUILTIN_MTVSCR);
16878 def_builtin ("__builtin_altivec_mfvscr", v8hi_ftype_void, ALTIVEC_BUILTIN_MFVSCR);
16879 def_builtin ("__builtin_altivec_dssall", void_ftype_void, ALTIVEC_BUILTIN_DSSALL);
16880 def_builtin ("__builtin_altivec_dss", void_ftype_int, ALTIVEC_BUILTIN_DSS);
16881 def_builtin ("__builtin_altivec_lvsl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVSL);
16882 def_builtin ("__builtin_altivec_lvsr", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVSR);
16883 def_builtin ("__builtin_altivec_lvebx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEBX);
16884 def_builtin ("__builtin_altivec_lvehx", v8hi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEHX);
16885 def_builtin ("__builtin_altivec_lvewx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEWX);
16886 def_builtin ("__builtin_altivec_lvxl", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVXL);
16887 def_builtin ("__builtin_altivec_lvxl_v2df", v2df_ftype_long_pcvoid,
16888 ALTIVEC_BUILTIN_LVXL_V2DF);
16889 def_builtin ("__builtin_altivec_lvxl_v2di", v2di_ftype_long_pcvoid,
16890 ALTIVEC_BUILTIN_LVXL_V2DI);
16891 def_builtin ("__builtin_altivec_lvxl_v4sf", v4sf_ftype_long_pcvoid,
16892 ALTIVEC_BUILTIN_LVXL_V4SF);
16893 def_builtin ("__builtin_altivec_lvxl_v4si", v4si_ftype_long_pcvoid,
16894 ALTIVEC_BUILTIN_LVXL_V4SI);
16895 def_builtin ("__builtin_altivec_lvxl_v8hi", v8hi_ftype_long_pcvoid,
16896 ALTIVEC_BUILTIN_LVXL_V8HI);
16897 def_builtin ("__builtin_altivec_lvxl_v16qi", v16qi_ftype_long_pcvoid,
16898 ALTIVEC_BUILTIN_LVXL_V16QI);
16899 def_builtin ("__builtin_altivec_lvx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVX);
16900 def_builtin ("__builtin_altivec_lvx_v2df", v2df_ftype_long_pcvoid,
16901 ALTIVEC_BUILTIN_LVX_V2DF);
16902 def_builtin ("__builtin_altivec_lvx_v2di", v2di_ftype_long_pcvoid,
16903 ALTIVEC_BUILTIN_LVX_V2DI);
16904 def_builtin ("__builtin_altivec_lvx_v4sf", v4sf_ftype_long_pcvoid,
16905 ALTIVEC_BUILTIN_LVX_V4SF);
16906 def_builtin ("__builtin_altivec_lvx_v4si", v4si_ftype_long_pcvoid,
16907 ALTIVEC_BUILTIN_LVX_V4SI);
16908 def_builtin ("__builtin_altivec_lvx_v8hi", v8hi_ftype_long_pcvoid,
16909 ALTIVEC_BUILTIN_LVX_V8HI);
16910 def_builtin ("__builtin_altivec_lvx_v16qi", v16qi_ftype_long_pcvoid,
16911 ALTIVEC_BUILTIN_LVX_V16QI);
16912 def_builtin ("__builtin_altivec_stvx", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVX);
16913 def_builtin ("__builtin_altivec_stvx_v2df", void_ftype_v2df_long_pvoid,
16914 ALTIVEC_BUILTIN_STVX_V2DF);
16915 def_builtin ("__builtin_altivec_stvx_v2di", void_ftype_v2di_long_pvoid,
16916 ALTIVEC_BUILTIN_STVX_V2DI);
16917 def_builtin ("__builtin_altivec_stvx_v4sf", void_ftype_v4sf_long_pvoid,
16918 ALTIVEC_BUILTIN_STVX_V4SF);
16919 def_builtin ("__builtin_altivec_stvx_v4si", void_ftype_v4si_long_pvoid,
16920 ALTIVEC_BUILTIN_STVX_V4SI);
16921 def_builtin ("__builtin_altivec_stvx_v8hi", void_ftype_v8hi_long_pvoid,
16922 ALTIVEC_BUILTIN_STVX_V8HI);
16923 def_builtin ("__builtin_altivec_stvx_v16qi", void_ftype_v16qi_long_pvoid,
16924 ALTIVEC_BUILTIN_STVX_V16QI);
16925 def_builtin ("__builtin_altivec_stvewx", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVEWX);
16926 def_builtin ("__builtin_altivec_stvxl", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVXL);
16927 def_builtin ("__builtin_altivec_stvxl_v2df", void_ftype_v2df_long_pvoid,
16928 ALTIVEC_BUILTIN_STVXL_V2DF);
16929 def_builtin ("__builtin_altivec_stvxl_v2di", void_ftype_v2di_long_pvoid,
16930 ALTIVEC_BUILTIN_STVXL_V2DI);
16931 def_builtin ("__builtin_altivec_stvxl_v4sf", void_ftype_v4sf_long_pvoid,
16932 ALTIVEC_BUILTIN_STVXL_V4SF);
16933 def_builtin ("__builtin_altivec_stvxl_v4si", void_ftype_v4si_long_pvoid,
16934 ALTIVEC_BUILTIN_STVXL_V4SI);
16935 def_builtin ("__builtin_altivec_stvxl_v8hi", void_ftype_v8hi_long_pvoid,
16936 ALTIVEC_BUILTIN_STVXL_V8HI);
16937 def_builtin ("__builtin_altivec_stvxl_v16qi", void_ftype_v16qi_long_pvoid,
16938 ALTIVEC_BUILTIN_STVXL_V16QI);
16939 def_builtin ("__builtin_altivec_stvebx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVEBX);
16940 def_builtin ("__builtin_altivec_stvehx", void_ftype_v8hi_long_pvoid, ALTIVEC_BUILTIN_STVEHX);
16941 def_builtin ("__builtin_vec_ld", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LD);
16942 def_builtin ("__builtin_vec_lde", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LDE);
16943 def_builtin ("__builtin_vec_ldl", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LDL);
16944 def_builtin ("__builtin_vec_lvsl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVSL);
16945 def_builtin ("__builtin_vec_lvsr", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVSR);
16946 def_builtin ("__builtin_vec_lvebx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEBX);
16947 def_builtin ("__builtin_vec_lvehx", v8hi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEHX);
16948 def_builtin ("__builtin_vec_lvewx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEWX);
16949 def_builtin ("__builtin_vec_st", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_ST);
16950 def_builtin ("__builtin_vec_ste", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STE);
16951 def_builtin ("__builtin_vec_stl", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STL);
16952 def_builtin ("__builtin_vec_stvewx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEWX);
16953 def_builtin ("__builtin_vec_stvebx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEBX);
16954 def_builtin ("__builtin_vec_stvehx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEHX);
16956 def_builtin ("__builtin_vsx_lxvd2x_v2df", v2df_ftype_long_pcvoid,
16957 VSX_BUILTIN_LXVD2X_V2DF);
16958 def_builtin ("__builtin_vsx_lxvd2x_v2di", v2di_ftype_long_pcvoid,
16959 VSX_BUILTIN_LXVD2X_V2DI);
16960 def_builtin ("__builtin_vsx_lxvw4x_v4sf", v4sf_ftype_long_pcvoid,
16961 VSX_BUILTIN_LXVW4X_V4SF);
16962 def_builtin ("__builtin_vsx_lxvw4x_v4si", v4si_ftype_long_pcvoid,
16963 VSX_BUILTIN_LXVW4X_V4SI);
16964 def_builtin ("__builtin_vsx_lxvw4x_v8hi", v8hi_ftype_long_pcvoid,
16965 VSX_BUILTIN_LXVW4X_V8HI);
16966 def_builtin ("__builtin_vsx_lxvw4x_v16qi", v16qi_ftype_long_pcvoid,
16967 VSX_BUILTIN_LXVW4X_V16QI);
16968 def_builtin ("__builtin_vsx_stxvd2x_v2df", void_ftype_v2df_long_pvoid,
16969 VSX_BUILTIN_STXVD2X_V2DF);
16970 def_builtin ("__builtin_vsx_stxvd2x_v2di", void_ftype_v2di_long_pvoid,
16971 VSX_BUILTIN_STXVD2X_V2DI);
16972 def_builtin ("__builtin_vsx_stxvw4x_v4sf", void_ftype_v4sf_long_pvoid,
16973 VSX_BUILTIN_STXVW4X_V4SF);
16974 def_builtin ("__builtin_vsx_stxvw4x_v4si", void_ftype_v4si_long_pvoid,
16975 VSX_BUILTIN_STXVW4X_V4SI);
16976 def_builtin ("__builtin_vsx_stxvw4x_v8hi", void_ftype_v8hi_long_pvoid,
16977 VSX_BUILTIN_STXVW4X_V8HI);
16978 def_builtin ("__builtin_vsx_stxvw4x_v16qi", void_ftype_v16qi_long_pvoid,
16979 VSX_BUILTIN_STXVW4X_V16QI);
16981 def_builtin ("__builtin_vsx_ld_elemrev_v2df", v2df_ftype_long_pcvoid,
16982 VSX_BUILTIN_LD_ELEMREV_V2DF);
16983 def_builtin ("__builtin_vsx_ld_elemrev_v2di", v2di_ftype_long_pcvoid,
16984 VSX_BUILTIN_LD_ELEMREV_V2DI);
16985 def_builtin ("__builtin_vsx_ld_elemrev_v4sf", v4sf_ftype_long_pcvoid,
16986 VSX_BUILTIN_LD_ELEMREV_V4SF);
16987 def_builtin ("__builtin_vsx_ld_elemrev_v4si", v4si_ftype_long_pcvoid,
16988 VSX_BUILTIN_LD_ELEMREV_V4SI);
16989 def_builtin ("__builtin_vsx_st_elemrev_v2df", void_ftype_v2df_long_pvoid,
16990 VSX_BUILTIN_ST_ELEMREV_V2DF);
16991 def_builtin ("__builtin_vsx_st_elemrev_v2di", void_ftype_v2di_long_pvoid,
16992 VSX_BUILTIN_ST_ELEMREV_V2DI);
16993 def_builtin ("__builtin_vsx_st_elemrev_v4sf", void_ftype_v4sf_long_pvoid,
16994 VSX_BUILTIN_ST_ELEMREV_V4SF);
16995 def_builtin ("__builtin_vsx_st_elemrev_v4si", void_ftype_v4si_long_pvoid,
16996 VSX_BUILTIN_ST_ELEMREV_V4SI);
16998 if (TARGET_P9_VECTOR)
17000 def_builtin ("__builtin_vsx_ld_elemrev_v8hi", v8hi_ftype_long_pcvoid,
17001 VSX_BUILTIN_LD_ELEMREV_V8HI);
17002 def_builtin ("__builtin_vsx_ld_elemrev_v16qi", v16qi_ftype_long_pcvoid,
17003 VSX_BUILTIN_LD_ELEMREV_V16QI);
17004 def_builtin ("__builtin_vsx_st_elemrev_v8hi",
17005 void_ftype_v8hi_long_pvoid, VSX_BUILTIN_ST_ELEMREV_V8HI);
17006 def_builtin ("__builtin_vsx_st_elemrev_v16qi",
17007 void_ftype_v16qi_long_pvoid, VSX_BUILTIN_ST_ELEMREV_V16QI);
17010 def_builtin ("__builtin_vec_vsx_ld", opaque_ftype_long_pcvoid,
17011 VSX_BUILTIN_VEC_LD);
17012 def_builtin ("__builtin_vec_vsx_st", void_ftype_opaque_long_pvoid,
17013 VSX_BUILTIN_VEC_ST);
17014 def_builtin ("__builtin_vec_xl", opaque_ftype_long_pcvoid,
17015 VSX_BUILTIN_VEC_XL);
17016 def_builtin ("__builtin_vec_xst", void_ftype_opaque_long_pvoid,
17017 VSX_BUILTIN_VEC_XST);
17019 def_builtin ("__builtin_vec_step", int_ftype_opaque, ALTIVEC_BUILTIN_VEC_STEP);
17020 def_builtin ("__builtin_vec_splats", opaque_ftype_opaque, ALTIVEC_BUILTIN_VEC_SPLATS);
17021 def_builtin ("__builtin_vec_promote", opaque_ftype_opaque, ALTIVEC_BUILTIN_VEC_PROMOTE);
17023 def_builtin ("__builtin_vec_sld", opaque_ftype_opaque_opaque_int, ALTIVEC_BUILTIN_VEC_SLD);
17024 def_builtin ("__builtin_vec_splat", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_SPLAT);
17025 def_builtin ("__builtin_vec_extract", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_EXTRACT);
17026 def_builtin ("__builtin_vec_insert", opaque_ftype_opaque_opaque_int, ALTIVEC_BUILTIN_VEC_INSERT);
17027 def_builtin ("__builtin_vec_vspltw", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTW);
17028 def_builtin ("__builtin_vec_vsplth", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTH);
17029 def_builtin ("__builtin_vec_vspltb", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTB);
17030 def_builtin ("__builtin_vec_ctf", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTF);
17031 def_builtin ("__builtin_vec_vcfsx", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VCFSX);
17032 def_builtin ("__builtin_vec_vcfux", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VCFUX);
17033 def_builtin ("__builtin_vec_cts", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTS);
17034 def_builtin ("__builtin_vec_ctu", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTU);
17036 def_builtin ("__builtin_vec_adde", opaque_ftype_opaque_opaque_opaque,
17037 ALTIVEC_BUILTIN_VEC_ADDE);
17038 def_builtin ("__builtin_vec_addec", opaque_ftype_opaque_opaque_opaque,
17039 ALTIVEC_BUILTIN_VEC_ADDEC);
17040 def_builtin ("__builtin_vec_cmpne", opaque_ftype_opaque_opaque,
17041 ALTIVEC_BUILTIN_VEC_CMPNE);
17042 def_builtin ("__builtin_vec_mul", opaque_ftype_opaque_opaque,
17043 ALTIVEC_BUILTIN_VEC_MUL);
17045 /* Cell builtins. */
17046 def_builtin ("__builtin_altivec_lvlx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVLX);
17047 def_builtin ("__builtin_altivec_lvlxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVLXL);
17048 def_builtin ("__builtin_altivec_lvrx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVRX);
17049 def_builtin ("__builtin_altivec_lvrxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVRXL);
17051 def_builtin ("__builtin_vec_lvlx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVLX);
17052 def_builtin ("__builtin_vec_lvlxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVLXL);
17053 def_builtin ("__builtin_vec_lvrx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVRX);
17054 def_builtin ("__builtin_vec_lvrxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVRXL);
17056 def_builtin ("__builtin_altivec_stvlx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVLX);
17057 def_builtin ("__builtin_altivec_stvlxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVLXL);
17058 def_builtin ("__builtin_altivec_stvrx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVRX);
17059 def_builtin ("__builtin_altivec_stvrxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVRXL);
17061 def_builtin ("__builtin_vec_stvlx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVLX);
17062 def_builtin ("__builtin_vec_stvlxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVLXL);
17063 def_builtin ("__builtin_vec_stvrx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVRX);
17064 def_builtin ("__builtin_vec_stvrxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVRXL);
17066 /* Add the DST variants. */
17067 d = bdesc_dst;
17068 for (i = 0; i < ARRAY_SIZE (bdesc_dst); i++, d++)
17069 def_builtin (d->name, void_ftype_pcvoid_int_int, d->code);
17071 /* Initialize the predicates. */
17072 d = bdesc_altivec_preds;
17073 for (i = 0; i < ARRAY_SIZE (bdesc_altivec_preds); i++, d++)
17075 machine_mode mode1;
17076 tree type;
17078 if (rs6000_overloaded_builtin_p (d->code))
17079 mode1 = VOIDmode;
17080 else
17081 mode1 = insn_data[d->icode].operand[1].mode;
17083 switch (mode1)
17085 case VOIDmode:
17086 type = int_ftype_int_opaque_opaque;
17087 break;
17088 case V2DImode:
17089 type = int_ftype_int_v2di_v2di;
17090 break;
17091 case V4SImode:
17092 type = int_ftype_int_v4si_v4si;
17093 break;
17094 case V8HImode:
17095 type = int_ftype_int_v8hi_v8hi;
17096 break;
17097 case V16QImode:
17098 type = int_ftype_int_v16qi_v16qi;
17099 break;
17100 case V4SFmode:
17101 type = int_ftype_int_v4sf_v4sf;
17102 break;
17103 case V2DFmode:
17104 type = int_ftype_int_v2df_v2df;
17105 break;
17106 default:
17107 gcc_unreachable ();
17110 def_builtin (d->name, type, d->code);
17113 /* Initialize the abs* operators. */
17114 d = bdesc_abs;
17115 for (i = 0; i < ARRAY_SIZE (bdesc_abs); i++, d++)
17117 machine_mode mode0;
17118 tree type;
17120 mode0 = insn_data[d->icode].operand[0].mode;
17122 switch (mode0)
17124 case V2DImode:
17125 type = v2di_ftype_v2di;
17126 break;
17127 case V4SImode:
17128 type = v4si_ftype_v4si;
17129 break;
17130 case V8HImode:
17131 type = v8hi_ftype_v8hi;
17132 break;
17133 case V16QImode:
17134 type = v16qi_ftype_v16qi;
17135 break;
17136 case V4SFmode:
17137 type = v4sf_ftype_v4sf;
17138 break;
17139 case V2DFmode:
17140 type = v2df_ftype_v2df;
17141 break;
17142 default:
17143 gcc_unreachable ();
17146 def_builtin (d->name, type, d->code);
17149 /* Initialize target builtin that implements
17150 targetm.vectorize.builtin_mask_for_load. */
17152 decl = add_builtin_function ("__builtin_altivec_mask_for_load",
17153 v16qi_ftype_long_pcvoid,
17154 ALTIVEC_BUILTIN_MASK_FOR_LOAD,
17155 BUILT_IN_MD, NULL, NULL_TREE);
17156 TREE_READONLY (decl) = 1;
17157 /* Record the decl. Will be used by rs6000_builtin_mask_for_load. */
17158 altivec_builtin_mask_for_load = decl;
17160 /* Access to the vec_init patterns. */
17161 ftype = build_function_type_list (V4SI_type_node, integer_type_node,
17162 integer_type_node, integer_type_node,
17163 integer_type_node, NULL_TREE);
17164 def_builtin ("__builtin_vec_init_v4si", ftype, ALTIVEC_BUILTIN_VEC_INIT_V4SI);
17166 ftype = build_function_type_list (V8HI_type_node, short_integer_type_node,
17167 short_integer_type_node,
17168 short_integer_type_node,
17169 short_integer_type_node,
17170 short_integer_type_node,
17171 short_integer_type_node,
17172 short_integer_type_node,
17173 short_integer_type_node, NULL_TREE);
17174 def_builtin ("__builtin_vec_init_v8hi", ftype, ALTIVEC_BUILTIN_VEC_INIT_V8HI);
17176 ftype = build_function_type_list (V16QI_type_node, char_type_node,
17177 char_type_node, char_type_node,
17178 char_type_node, char_type_node,
17179 char_type_node, char_type_node,
17180 char_type_node, char_type_node,
17181 char_type_node, char_type_node,
17182 char_type_node, char_type_node,
17183 char_type_node, char_type_node,
17184 char_type_node, NULL_TREE);
17185 def_builtin ("__builtin_vec_init_v16qi", ftype,
17186 ALTIVEC_BUILTIN_VEC_INIT_V16QI);
17188 ftype = build_function_type_list (V4SF_type_node, float_type_node,
17189 float_type_node, float_type_node,
17190 float_type_node, NULL_TREE);
17191 def_builtin ("__builtin_vec_init_v4sf", ftype, ALTIVEC_BUILTIN_VEC_INIT_V4SF);
17193 /* VSX builtins. */
17194 ftype = build_function_type_list (V2DF_type_node, double_type_node,
17195 double_type_node, NULL_TREE);
17196 def_builtin ("__builtin_vec_init_v2df", ftype, VSX_BUILTIN_VEC_INIT_V2DF);
17198 ftype = build_function_type_list (V2DI_type_node, intDI_type_node,
17199 intDI_type_node, NULL_TREE);
17200 def_builtin ("__builtin_vec_init_v2di", ftype, VSX_BUILTIN_VEC_INIT_V2DI);
17202 /* Access to the vec_set patterns. */
17203 ftype = build_function_type_list (V4SI_type_node, V4SI_type_node,
17204 intSI_type_node,
17205 integer_type_node, NULL_TREE);
17206 def_builtin ("__builtin_vec_set_v4si", ftype, ALTIVEC_BUILTIN_VEC_SET_V4SI);
17208 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
17209 intHI_type_node,
17210 integer_type_node, NULL_TREE);
17211 def_builtin ("__builtin_vec_set_v8hi", ftype, ALTIVEC_BUILTIN_VEC_SET_V8HI);
17213 ftype = build_function_type_list (V16QI_type_node, V16QI_type_node,
17214 intQI_type_node,
17215 integer_type_node, NULL_TREE);
17216 def_builtin ("__builtin_vec_set_v16qi", ftype, ALTIVEC_BUILTIN_VEC_SET_V16QI);
17218 ftype = build_function_type_list (V4SF_type_node, V4SF_type_node,
17219 float_type_node,
17220 integer_type_node, NULL_TREE);
17221 def_builtin ("__builtin_vec_set_v4sf", ftype, ALTIVEC_BUILTIN_VEC_SET_V4SF);
17223 ftype = build_function_type_list (V2DF_type_node, V2DF_type_node,
17224 double_type_node,
17225 integer_type_node, NULL_TREE);
17226 def_builtin ("__builtin_vec_set_v2df", ftype, VSX_BUILTIN_VEC_SET_V2DF);
17228 ftype = build_function_type_list (V2DI_type_node, V2DI_type_node,
17229 intDI_type_node,
17230 integer_type_node, NULL_TREE);
17231 def_builtin ("__builtin_vec_set_v2di", ftype, VSX_BUILTIN_VEC_SET_V2DI);
17233 /* Access to the vec_extract patterns. */
17234 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
17235 integer_type_node, NULL_TREE);
17236 def_builtin ("__builtin_vec_ext_v4si", ftype, ALTIVEC_BUILTIN_VEC_EXT_V4SI);
17238 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
17239 integer_type_node, NULL_TREE);
17240 def_builtin ("__builtin_vec_ext_v8hi", ftype, ALTIVEC_BUILTIN_VEC_EXT_V8HI);
17242 ftype = build_function_type_list (intQI_type_node, V16QI_type_node,
17243 integer_type_node, NULL_TREE);
17244 def_builtin ("__builtin_vec_ext_v16qi", ftype, ALTIVEC_BUILTIN_VEC_EXT_V16QI);
17246 ftype = build_function_type_list (float_type_node, V4SF_type_node,
17247 integer_type_node, NULL_TREE);
17248 def_builtin ("__builtin_vec_ext_v4sf", ftype, ALTIVEC_BUILTIN_VEC_EXT_V4SF);
17250 ftype = build_function_type_list (double_type_node, V2DF_type_node,
17251 integer_type_node, NULL_TREE);
17252 def_builtin ("__builtin_vec_ext_v2df", ftype, VSX_BUILTIN_VEC_EXT_V2DF);
17254 ftype = build_function_type_list (intDI_type_node, V2DI_type_node,
17255 integer_type_node, NULL_TREE);
17256 def_builtin ("__builtin_vec_ext_v2di", ftype, VSX_BUILTIN_VEC_EXT_V2DI);
17259 if (V1TI_type_node)
17261 tree v1ti_ftype_long_pcvoid
17262 = build_function_type_list (V1TI_type_node,
17263 long_integer_type_node, pcvoid_type_node,
17264 NULL_TREE);
17265 tree void_ftype_v1ti_long_pvoid
17266 = build_function_type_list (void_type_node,
17267 V1TI_type_node, long_integer_type_node,
17268 pvoid_type_node, NULL_TREE);
17269 def_builtin ("__builtin_vsx_lxvd2x_v1ti", v1ti_ftype_long_pcvoid,
17270 VSX_BUILTIN_LXVD2X_V1TI);
17271 def_builtin ("__builtin_vsx_stxvd2x_v1ti", void_ftype_v1ti_long_pvoid,
17272 VSX_BUILTIN_STXVD2X_V1TI);
17273 ftype = build_function_type_list (V1TI_type_node, intTI_type_node,
17274 NULL_TREE, NULL_TREE);
17275 def_builtin ("__builtin_vec_init_v1ti", ftype, VSX_BUILTIN_VEC_INIT_V1TI);
17276 ftype = build_function_type_list (V1TI_type_node, V1TI_type_node,
17277 intTI_type_node,
17278 integer_type_node, NULL_TREE);
17279 def_builtin ("__builtin_vec_set_v1ti", ftype, VSX_BUILTIN_VEC_SET_V1TI);
17280 ftype = build_function_type_list (intTI_type_node, V1TI_type_node,
17281 integer_type_node, NULL_TREE);
17282 def_builtin ("__builtin_vec_ext_v1ti", ftype, VSX_BUILTIN_VEC_EXT_V1TI);
17287 static void
17288 htm_init_builtins (void)
17290 HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;
17291 const struct builtin_description *d;
17292 size_t i;
17294 d = bdesc_htm;
17295 for (i = 0; i < ARRAY_SIZE (bdesc_htm); i++, d++)
17297 tree op[MAX_HTM_OPERANDS], type;
17298 HOST_WIDE_INT mask = d->mask;
17299 unsigned attr = rs6000_builtin_info[d->code].attr;
17300 bool void_func = (attr & RS6000_BTC_VOID);
17301 int attr_args = (attr & RS6000_BTC_TYPE_MASK);
17302 int nopnds = 0;
17303 tree gpr_type_node;
17304 tree rettype;
17305 tree argtype;
17307 if (TARGET_32BIT && TARGET_POWERPC64)
17308 gpr_type_node = long_long_unsigned_type_node;
17309 else
17310 gpr_type_node = long_unsigned_type_node;
17312 if (attr & RS6000_BTC_SPR)
17314 rettype = gpr_type_node;
17315 argtype = gpr_type_node;
17317 else if (d->code == HTM_BUILTIN_TABORTDC
17318 || d->code == HTM_BUILTIN_TABORTDCI)
17320 rettype = unsigned_type_node;
17321 argtype = gpr_type_node;
17323 else
17325 rettype = unsigned_type_node;
17326 argtype = unsigned_type_node;
17329 if ((mask & builtin_mask) != mask)
17331 if (TARGET_DEBUG_BUILTIN)
17332 fprintf (stderr, "htm_builtin, skip binary %s\n", d->name);
17333 continue;
17336 if (d->name == 0)
17338 if (TARGET_DEBUG_BUILTIN)
17339 fprintf (stderr, "htm_builtin, bdesc_htm[%ld] no name\n",
17340 (long unsigned) i);
17341 continue;
17344 op[nopnds++] = (void_func) ? void_type_node : rettype;
17346 if (attr_args == RS6000_BTC_UNARY)
17347 op[nopnds++] = argtype;
17348 else if (attr_args == RS6000_BTC_BINARY)
17350 op[nopnds++] = argtype;
17351 op[nopnds++] = argtype;
17353 else if (attr_args == RS6000_BTC_TERNARY)
17355 op[nopnds++] = argtype;
17356 op[nopnds++] = argtype;
17357 op[nopnds++] = argtype;
17360 switch (nopnds)
17362 case 1:
17363 type = build_function_type_list (op[0], NULL_TREE);
17364 break;
17365 case 2:
17366 type = build_function_type_list (op[0], op[1], NULL_TREE);
17367 break;
17368 case 3:
17369 type = build_function_type_list (op[0], op[1], op[2], NULL_TREE);
17370 break;
17371 case 4:
17372 type = build_function_type_list (op[0], op[1], op[2], op[3],
17373 NULL_TREE);
17374 break;
17375 default:
17376 gcc_unreachable ();
17379 def_builtin (d->name, type, d->code);
17383 /* Hash function for builtin functions with up to 3 arguments and a return
17384 type. */
17385 hashval_t
17386 builtin_hasher::hash (builtin_hash_struct *bh)
17388 unsigned ret = 0;
17389 int i;
17391 for (i = 0; i < 4; i++)
17393 ret = (ret * (unsigned)MAX_MACHINE_MODE) + ((unsigned)bh->mode[i]);
17394 ret = (ret * 2) + bh->uns_p[i];
17397 return ret;
17400 /* Compare builtin hash entries H1 and H2 for equivalence. */
17401 bool
17402 builtin_hasher::equal (builtin_hash_struct *p1, builtin_hash_struct *p2)
17404 return ((p1->mode[0] == p2->mode[0])
17405 && (p1->mode[1] == p2->mode[1])
17406 && (p1->mode[2] == p2->mode[2])
17407 && (p1->mode[3] == p2->mode[3])
17408 && (p1->uns_p[0] == p2->uns_p[0])
17409 && (p1->uns_p[1] == p2->uns_p[1])
17410 && (p1->uns_p[2] == p2->uns_p[2])
17411 && (p1->uns_p[3] == p2->uns_p[3]));
17414 /* Map types for builtin functions with an explicit return type and up to 3
17415 arguments. Functions with fewer than 3 arguments use VOIDmode as the type
17416 of the argument. */
17417 static tree
17418 builtin_function_type (machine_mode mode_ret, machine_mode mode_arg0,
17419 machine_mode mode_arg1, machine_mode mode_arg2,
17420 enum rs6000_builtins builtin, const char *name)
17422 struct builtin_hash_struct h;
17423 struct builtin_hash_struct *h2;
17424 int num_args = 3;
17425 int i;
17426 tree ret_type = NULL_TREE;
17427 tree arg_type[3] = { NULL_TREE, NULL_TREE, NULL_TREE };
17429 /* Create builtin_hash_table. */
17430 if (builtin_hash_table == NULL)
17431 builtin_hash_table = hash_table<builtin_hasher>::create_ggc (1500);
17433 h.type = NULL_TREE;
17434 h.mode[0] = mode_ret;
17435 h.mode[1] = mode_arg0;
17436 h.mode[2] = mode_arg1;
17437 h.mode[3] = mode_arg2;
17438 h.uns_p[0] = 0;
17439 h.uns_p[1] = 0;
17440 h.uns_p[2] = 0;
17441 h.uns_p[3] = 0;
17443 /* If the builtin is a type that produces unsigned results or takes unsigned
17444 arguments, and it is returned as a decl for the vectorizer (such as
17445 widening multiplies, permute), make sure the arguments and return value
17446 are type correct. */
17447 switch (builtin)
17449 /* unsigned 1 argument functions. */
17450 case CRYPTO_BUILTIN_VSBOX:
17451 case P8V_BUILTIN_VGBBD:
17452 case MISC_BUILTIN_CDTBCD:
17453 case MISC_BUILTIN_CBCDTD:
17454 h.uns_p[0] = 1;
17455 h.uns_p[1] = 1;
17456 break;
17458 /* unsigned 2 argument functions. */
17459 case ALTIVEC_BUILTIN_VMULEUB_UNS:
17460 case ALTIVEC_BUILTIN_VMULEUH_UNS:
17461 case ALTIVEC_BUILTIN_VMULOUB_UNS:
17462 case ALTIVEC_BUILTIN_VMULOUH_UNS:
17463 case CRYPTO_BUILTIN_VCIPHER:
17464 case CRYPTO_BUILTIN_VCIPHERLAST:
17465 case CRYPTO_BUILTIN_VNCIPHER:
17466 case CRYPTO_BUILTIN_VNCIPHERLAST:
17467 case CRYPTO_BUILTIN_VPMSUMB:
17468 case CRYPTO_BUILTIN_VPMSUMH:
17469 case CRYPTO_BUILTIN_VPMSUMW:
17470 case CRYPTO_BUILTIN_VPMSUMD:
17471 case CRYPTO_BUILTIN_VPMSUM:
17472 case MISC_BUILTIN_ADDG6S:
17473 case MISC_BUILTIN_DIVWEU:
17474 case MISC_BUILTIN_DIVWEUO:
17475 case MISC_BUILTIN_DIVDEU:
17476 case MISC_BUILTIN_DIVDEUO:
17477 h.uns_p[0] = 1;
17478 h.uns_p[1] = 1;
17479 h.uns_p[2] = 1;
17480 break;
17482 /* unsigned 3 argument functions. */
17483 case ALTIVEC_BUILTIN_VPERM_16QI_UNS:
17484 case ALTIVEC_BUILTIN_VPERM_8HI_UNS:
17485 case ALTIVEC_BUILTIN_VPERM_4SI_UNS:
17486 case ALTIVEC_BUILTIN_VPERM_2DI_UNS:
17487 case ALTIVEC_BUILTIN_VSEL_16QI_UNS:
17488 case ALTIVEC_BUILTIN_VSEL_8HI_UNS:
17489 case ALTIVEC_BUILTIN_VSEL_4SI_UNS:
17490 case ALTIVEC_BUILTIN_VSEL_2DI_UNS:
17491 case VSX_BUILTIN_VPERM_16QI_UNS:
17492 case VSX_BUILTIN_VPERM_8HI_UNS:
17493 case VSX_BUILTIN_VPERM_4SI_UNS:
17494 case VSX_BUILTIN_VPERM_2DI_UNS:
17495 case VSX_BUILTIN_XXSEL_16QI_UNS:
17496 case VSX_BUILTIN_XXSEL_8HI_UNS:
17497 case VSX_BUILTIN_XXSEL_4SI_UNS:
17498 case VSX_BUILTIN_XXSEL_2DI_UNS:
17499 case CRYPTO_BUILTIN_VPERMXOR:
17500 case CRYPTO_BUILTIN_VPERMXOR_V2DI:
17501 case CRYPTO_BUILTIN_VPERMXOR_V4SI:
17502 case CRYPTO_BUILTIN_VPERMXOR_V8HI:
17503 case CRYPTO_BUILTIN_VPERMXOR_V16QI:
17504 case CRYPTO_BUILTIN_VSHASIGMAW:
17505 case CRYPTO_BUILTIN_VSHASIGMAD:
17506 case CRYPTO_BUILTIN_VSHASIGMA:
17507 h.uns_p[0] = 1;
17508 h.uns_p[1] = 1;
17509 h.uns_p[2] = 1;
17510 h.uns_p[3] = 1;
17511 break;
17513 /* signed permute functions with unsigned char mask. */
17514 case ALTIVEC_BUILTIN_VPERM_16QI:
17515 case ALTIVEC_BUILTIN_VPERM_8HI:
17516 case ALTIVEC_BUILTIN_VPERM_4SI:
17517 case ALTIVEC_BUILTIN_VPERM_4SF:
17518 case ALTIVEC_BUILTIN_VPERM_2DI:
17519 case ALTIVEC_BUILTIN_VPERM_2DF:
17520 case VSX_BUILTIN_VPERM_16QI:
17521 case VSX_BUILTIN_VPERM_8HI:
17522 case VSX_BUILTIN_VPERM_4SI:
17523 case VSX_BUILTIN_VPERM_4SF:
17524 case VSX_BUILTIN_VPERM_2DI:
17525 case VSX_BUILTIN_VPERM_2DF:
17526 h.uns_p[3] = 1;
17527 break;
17529 /* unsigned args, signed return. */
17530 case VSX_BUILTIN_XVCVUXDDP_UNS:
17531 case ALTIVEC_BUILTIN_UNSFLOAT_V4SI_V4SF:
17532 h.uns_p[1] = 1;
17533 break;
17535 /* signed args, unsigned return. */
17536 case VSX_BUILTIN_XVCVDPUXDS_UNS:
17537 case ALTIVEC_BUILTIN_FIXUNS_V4SF_V4SI:
17538 case MISC_BUILTIN_UNPACK_TD:
17539 case MISC_BUILTIN_UNPACK_V1TI:
17540 h.uns_p[0] = 1;
17541 break;
17543 /* unsigned arguments for 128-bit pack instructions. */
17544 case MISC_BUILTIN_PACK_TD:
17545 case MISC_BUILTIN_PACK_V1TI:
17546 h.uns_p[1] = 1;
17547 h.uns_p[2] = 1;
17548 break;
17550 default:
17551 break;
17554 /* Figure out how many args are present. */
17555 while (num_args > 0 && h.mode[num_args] == VOIDmode)
17556 num_args--;
17558 ret_type = builtin_mode_to_type[h.mode[0]][h.uns_p[0]];
17559 if (!ret_type && h.uns_p[0])
17560 ret_type = builtin_mode_to_type[h.mode[0]][0];
17562 if (!ret_type)
17563 fatal_error (input_location,
17564 "internal error: builtin function %s had an unexpected "
17565 "return type %s", name, GET_MODE_NAME (h.mode[0]));
17567 for (i = 0; i < (int) ARRAY_SIZE (arg_type); i++)
17568 arg_type[i] = NULL_TREE;
17570 for (i = 0; i < num_args; i++)
17572 int m = (int) h.mode[i+1];
17573 int uns_p = h.uns_p[i+1];
17575 arg_type[i] = builtin_mode_to_type[m][uns_p];
17576 if (!arg_type[i] && uns_p)
17577 arg_type[i] = builtin_mode_to_type[m][0];
17579 if (!arg_type[i])
17580 fatal_error (input_location,
17581 "internal error: builtin function %s, argument %d "
17582 "had unexpected argument type %s", name, i,
17583 GET_MODE_NAME (m));
17586 builtin_hash_struct **found = builtin_hash_table->find_slot (&h, INSERT);
17587 if (*found == NULL)
17589 h2 = ggc_alloc<builtin_hash_struct> ();
17590 *h2 = h;
17591 *found = h2;
17593 h2->type = build_function_type_list (ret_type, arg_type[0], arg_type[1],
17594 arg_type[2], NULL_TREE);
17597 return (*found)->type;
17600 static void
17601 rs6000_common_init_builtins (void)
17603 const struct builtin_description *d;
17604 size_t i;
17606 tree opaque_ftype_opaque = NULL_TREE;
17607 tree opaque_ftype_opaque_opaque = NULL_TREE;
17608 tree opaque_ftype_opaque_opaque_opaque = NULL_TREE;
17609 tree v2si_ftype = NULL_TREE;
17610 tree v2si_ftype_qi = NULL_TREE;
17611 tree v2si_ftype_v2si_qi = NULL_TREE;
17612 tree v2si_ftype_int_qi = NULL_TREE;
17613 HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;
17615 if (!TARGET_PAIRED_FLOAT)
17617 builtin_mode_to_type[V2SImode][0] = opaque_V2SI_type_node;
17618 builtin_mode_to_type[V2SFmode][0] = opaque_V2SF_type_node;
17621 /* Paired and SPE builtins are only available if you build a compiler with
17622 the appropriate options, so only create those builtins with the
17623 appropriate compiler option. Create Altivec and VSX builtins on machines
17624 with at least the general purpose extensions (970 and newer) to allow the
17625 use of the target attribute.. */
17627 if (TARGET_EXTRA_BUILTINS)
17628 builtin_mask |= RS6000_BTM_COMMON;
17630 /* Add the ternary operators. */
17631 d = bdesc_3arg;
17632 for (i = 0; i < ARRAY_SIZE (bdesc_3arg); i++, d++)
17634 tree type;
17635 HOST_WIDE_INT mask = d->mask;
17637 if ((mask & builtin_mask) != mask)
17639 if (TARGET_DEBUG_BUILTIN)
17640 fprintf (stderr, "rs6000_builtin, skip ternary %s\n", d->name);
17641 continue;
17644 if (rs6000_overloaded_builtin_p (d->code))
17646 if (! (type = opaque_ftype_opaque_opaque_opaque))
17647 type = opaque_ftype_opaque_opaque_opaque
17648 = build_function_type_list (opaque_V4SI_type_node,
17649 opaque_V4SI_type_node,
17650 opaque_V4SI_type_node,
17651 opaque_V4SI_type_node,
17652 NULL_TREE);
17654 else
17656 enum insn_code icode = d->icode;
17657 if (d->name == 0)
17659 if (TARGET_DEBUG_BUILTIN)
17660 fprintf (stderr, "rs6000_builtin, bdesc_3arg[%ld] no name\n",
17661 (long unsigned)i);
17663 continue;
17666 if (icode == CODE_FOR_nothing)
17668 if (TARGET_DEBUG_BUILTIN)
17669 fprintf (stderr, "rs6000_builtin, skip ternary %s (no code)\n",
17670 d->name);
17672 continue;
17675 type = builtin_function_type (insn_data[icode].operand[0].mode,
17676 insn_data[icode].operand[1].mode,
17677 insn_data[icode].operand[2].mode,
17678 insn_data[icode].operand[3].mode,
17679 d->code, d->name);
17682 def_builtin (d->name, type, d->code);
17685 /* Add the binary operators. */
17686 d = bdesc_2arg;
17687 for (i = 0; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
17689 machine_mode mode0, mode1, mode2;
17690 tree type;
17691 HOST_WIDE_INT mask = d->mask;
17693 if ((mask & builtin_mask) != mask)
17695 if (TARGET_DEBUG_BUILTIN)
17696 fprintf (stderr, "rs6000_builtin, skip binary %s\n", d->name);
17697 continue;
17700 if (rs6000_overloaded_builtin_p (d->code))
17702 if (! (type = opaque_ftype_opaque_opaque))
17703 type = opaque_ftype_opaque_opaque
17704 = build_function_type_list (opaque_V4SI_type_node,
17705 opaque_V4SI_type_node,
17706 opaque_V4SI_type_node,
17707 NULL_TREE);
17709 else
17711 enum insn_code icode = d->icode;
17712 if (d->name == 0)
17714 if (TARGET_DEBUG_BUILTIN)
17715 fprintf (stderr, "rs6000_builtin, bdesc_2arg[%ld] no name\n",
17716 (long unsigned)i);
17718 continue;
17721 if (icode == CODE_FOR_nothing)
17723 if (TARGET_DEBUG_BUILTIN)
17724 fprintf (stderr, "rs6000_builtin, skip binary %s (no code)\n",
17725 d->name);
17727 continue;
17730 mode0 = insn_data[icode].operand[0].mode;
17731 mode1 = insn_data[icode].operand[1].mode;
17732 mode2 = insn_data[icode].operand[2].mode;
17734 if (mode0 == V2SImode && mode1 == V2SImode && mode2 == QImode)
17736 if (! (type = v2si_ftype_v2si_qi))
17737 type = v2si_ftype_v2si_qi
17738 = build_function_type_list (opaque_V2SI_type_node,
17739 opaque_V2SI_type_node,
17740 char_type_node,
17741 NULL_TREE);
17744 else if (mode0 == V2SImode && GET_MODE_CLASS (mode1) == MODE_INT
17745 && mode2 == QImode)
17747 if (! (type = v2si_ftype_int_qi))
17748 type = v2si_ftype_int_qi
17749 = build_function_type_list (opaque_V2SI_type_node,
17750 integer_type_node,
17751 char_type_node,
17752 NULL_TREE);
17755 else
17756 type = builtin_function_type (mode0, mode1, mode2, VOIDmode,
17757 d->code, d->name);
17760 def_builtin (d->name, type, d->code);
17763 /* Add the simple unary operators. */
17764 d = bdesc_1arg;
17765 for (i = 0; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
17767 machine_mode mode0, mode1;
17768 tree type;
17769 HOST_WIDE_INT mask = d->mask;
17771 if ((mask & builtin_mask) != mask)
17773 if (TARGET_DEBUG_BUILTIN)
17774 fprintf (stderr, "rs6000_builtin, skip unary %s\n", d->name);
17775 continue;
17778 if (rs6000_overloaded_builtin_p (d->code))
17780 if (! (type = opaque_ftype_opaque))
17781 type = opaque_ftype_opaque
17782 = build_function_type_list (opaque_V4SI_type_node,
17783 opaque_V4SI_type_node,
17784 NULL_TREE);
17786 else
17788 enum insn_code icode = d->icode;
17789 if (d->name == 0)
17791 if (TARGET_DEBUG_BUILTIN)
17792 fprintf (stderr, "rs6000_builtin, bdesc_1arg[%ld] no name\n",
17793 (long unsigned)i);
17795 continue;
17798 if (icode == CODE_FOR_nothing)
17800 if (TARGET_DEBUG_BUILTIN)
17801 fprintf (stderr, "rs6000_builtin, skip unary %s (no code)\n",
17802 d->name);
17804 continue;
17807 mode0 = insn_data[icode].operand[0].mode;
17808 mode1 = insn_data[icode].operand[1].mode;
17810 if (mode0 == V2SImode && mode1 == QImode)
17812 if (! (type = v2si_ftype_qi))
17813 type = v2si_ftype_qi
17814 = build_function_type_list (opaque_V2SI_type_node,
17815 char_type_node,
17816 NULL_TREE);
17819 else
17820 type = builtin_function_type (mode0, mode1, VOIDmode, VOIDmode,
17821 d->code, d->name);
17824 def_builtin (d->name, type, d->code);
17827 /* Add the simple no-argument operators. */
17828 d = bdesc_0arg;
17829 for (i = 0; i < ARRAY_SIZE (bdesc_0arg); i++, d++)
17831 machine_mode mode0;
17832 tree type;
17833 HOST_WIDE_INT mask = d->mask;
17835 if ((mask & builtin_mask) != mask)
17837 if (TARGET_DEBUG_BUILTIN)
17838 fprintf (stderr, "rs6000_builtin, skip no-argument %s\n", d->name);
17839 continue;
17841 if (rs6000_overloaded_builtin_p (d->code))
17843 if (!opaque_ftype_opaque)
17844 opaque_ftype_opaque
17845 = build_function_type_list (opaque_V4SI_type_node, NULL_TREE);
17846 type = opaque_ftype_opaque;
17848 else
17850 enum insn_code icode = d->icode;
17851 if (d->name == 0)
17853 if (TARGET_DEBUG_BUILTIN)
17854 fprintf (stderr, "rs6000_builtin, bdesc_0arg[%lu] no name\n",
17855 (long unsigned) i);
17856 continue;
17858 if (icode == CODE_FOR_nothing)
17860 if (TARGET_DEBUG_BUILTIN)
17861 fprintf (stderr,
17862 "rs6000_builtin, skip no-argument %s (no code)\n",
17863 d->name);
17864 continue;
17866 mode0 = insn_data[icode].operand[0].mode;
17867 if (mode0 == V2SImode)
17869 /* code for SPE */
17870 if (! (type = v2si_ftype))
17872 v2si_ftype
17873 = build_function_type_list (opaque_V2SI_type_node,
17874 NULL_TREE);
17875 type = v2si_ftype;
17878 else
17879 type = builtin_function_type (mode0, VOIDmode, VOIDmode, VOIDmode,
17880 d->code, d->name);
17882 def_builtin (d->name, type, d->code);
17886 /* Set up AIX/Darwin/64-bit Linux quad floating point routines. */
17887 static void
17888 init_float128_ibm (machine_mode mode)
17890 if (!TARGET_XL_COMPAT)
17892 set_optab_libfunc (add_optab, mode, "__gcc_qadd");
17893 set_optab_libfunc (sub_optab, mode, "__gcc_qsub");
17894 set_optab_libfunc (smul_optab, mode, "__gcc_qmul");
17895 set_optab_libfunc (sdiv_optab, mode, "__gcc_qdiv");
17897 if (!(TARGET_HARD_FLOAT && (TARGET_FPRS || TARGET_E500_DOUBLE)))
17899 set_optab_libfunc (neg_optab, mode, "__gcc_qneg");
17900 set_optab_libfunc (eq_optab, mode, "__gcc_qeq");
17901 set_optab_libfunc (ne_optab, mode, "__gcc_qne");
17902 set_optab_libfunc (gt_optab, mode, "__gcc_qgt");
17903 set_optab_libfunc (ge_optab, mode, "__gcc_qge");
17904 set_optab_libfunc (lt_optab, mode, "__gcc_qlt");
17905 set_optab_libfunc (le_optab, mode, "__gcc_qle");
17907 set_conv_libfunc (sext_optab, mode, SFmode, "__gcc_stoq");
17908 set_conv_libfunc (sext_optab, mode, DFmode, "__gcc_dtoq");
17909 set_conv_libfunc (trunc_optab, SFmode, mode, "__gcc_qtos");
17910 set_conv_libfunc (trunc_optab, DFmode, mode, "__gcc_qtod");
17911 set_conv_libfunc (sfix_optab, SImode, mode, "__gcc_qtoi");
17912 set_conv_libfunc (ufix_optab, SImode, mode, "__gcc_qtou");
17913 set_conv_libfunc (sfloat_optab, mode, SImode, "__gcc_itoq");
17914 set_conv_libfunc (ufloat_optab, mode, SImode, "__gcc_utoq");
17917 if (!(TARGET_HARD_FLOAT && TARGET_FPRS))
17918 set_optab_libfunc (unord_optab, mode, "__gcc_qunord");
17920 else
17922 set_optab_libfunc (add_optab, mode, "_xlqadd");
17923 set_optab_libfunc (sub_optab, mode, "_xlqsub");
17924 set_optab_libfunc (smul_optab, mode, "_xlqmul");
17925 set_optab_libfunc (sdiv_optab, mode, "_xlqdiv");
17928 /* Add various conversions for IFmode to use the traditional TFmode
17929 names. */
17930 if (mode == IFmode)
17932 set_conv_libfunc (sext_optab, mode, SDmode, "__dpd_extendsdtf2");
17933 set_conv_libfunc (sext_optab, mode, DDmode, "__dpd_extendddtf2");
17934 set_conv_libfunc (trunc_optab, mode, TDmode, "__dpd_trunctftd2");
17935 set_conv_libfunc (trunc_optab, SDmode, mode, "__dpd_trunctfsd2");
17936 set_conv_libfunc (trunc_optab, DDmode, mode, "__dpd_trunctfdd2");
17937 set_conv_libfunc (sext_optab, TDmode, mode, "__dpd_extendtdtf2");
17939 if (TARGET_POWERPC64)
17941 set_conv_libfunc (sfix_optab, TImode, mode, "__fixtfti");
17942 set_conv_libfunc (ufix_optab, TImode, mode, "__fixunstfti");
17943 set_conv_libfunc (sfloat_optab, mode, TImode, "__floattitf");
17944 set_conv_libfunc (ufloat_optab, mode, TImode, "__floatuntitf");
17949 /* Set up IEEE 128-bit floating point routines. Use different names if the
17950 arguments can be passed in a vector register. The historical PowerPC
17951 implementation of IEEE 128-bit floating point used _q_<op> for the names, so
17952 continue to use that if we aren't using vector registers to pass IEEE
17953 128-bit floating point. */
17955 static void
17956 init_float128_ieee (machine_mode mode)
17958 if (FLOAT128_VECTOR_P (mode))
17960 set_optab_libfunc (add_optab, mode, "__addkf3");
17961 set_optab_libfunc (sub_optab, mode, "__subkf3");
17962 set_optab_libfunc (neg_optab, mode, "__negkf2");
17963 set_optab_libfunc (smul_optab, mode, "__mulkf3");
17964 set_optab_libfunc (sdiv_optab, mode, "__divkf3");
17965 set_optab_libfunc (sqrt_optab, mode, "__sqrtkf2");
17966 set_optab_libfunc (abs_optab, mode, "__abstkf2");
17968 set_optab_libfunc (eq_optab, mode, "__eqkf2");
17969 set_optab_libfunc (ne_optab, mode, "__nekf2");
17970 set_optab_libfunc (gt_optab, mode, "__gtkf2");
17971 set_optab_libfunc (ge_optab, mode, "__gekf2");
17972 set_optab_libfunc (lt_optab, mode, "__ltkf2");
17973 set_optab_libfunc (le_optab, mode, "__lekf2");
17974 set_optab_libfunc (unord_optab, mode, "__unordkf2");
17976 set_conv_libfunc (sext_optab, mode, SFmode, "__extendsfkf2");
17977 set_conv_libfunc (sext_optab, mode, DFmode, "__extenddfkf2");
17978 set_conv_libfunc (trunc_optab, SFmode, mode, "__trunckfsf2");
17979 set_conv_libfunc (trunc_optab, DFmode, mode, "__trunckfdf2");
17981 set_conv_libfunc (sext_optab, mode, IFmode, "__extendtfkf2");
17982 if (mode != TFmode && FLOAT128_IBM_P (TFmode))
17983 set_conv_libfunc (sext_optab, mode, TFmode, "__extendtfkf2");
17985 set_conv_libfunc (trunc_optab, IFmode, mode, "__trunckftf2");
17986 if (mode != TFmode && FLOAT128_IBM_P (TFmode))
17987 set_conv_libfunc (trunc_optab, TFmode, mode, "__trunckftf2");
17989 set_conv_libfunc (sext_optab, mode, SDmode, "__dpd_extendsdkf2");
17990 set_conv_libfunc (sext_optab, mode, DDmode, "__dpd_extendddkf2");
17991 set_conv_libfunc (trunc_optab, mode, TDmode, "__dpd_trunckftd2");
17992 set_conv_libfunc (trunc_optab, SDmode, mode, "__dpd_trunckfsd2");
17993 set_conv_libfunc (trunc_optab, DDmode, mode, "__dpd_trunckfdd2");
17994 set_conv_libfunc (sext_optab, TDmode, mode, "__dpd_extendtdkf2");
17996 set_conv_libfunc (sfix_optab, SImode, mode, "__fixkfsi");
17997 set_conv_libfunc (ufix_optab, SImode, mode, "__fixunskfsi");
17998 set_conv_libfunc (sfix_optab, DImode, mode, "__fixkfdi");
17999 set_conv_libfunc (ufix_optab, DImode, mode, "__fixunskfdi");
18001 set_conv_libfunc (sfloat_optab, mode, SImode, "__floatsikf");
18002 set_conv_libfunc (ufloat_optab, mode, SImode, "__floatunsikf");
18003 set_conv_libfunc (sfloat_optab, mode, DImode, "__floatdikf");
18004 set_conv_libfunc (ufloat_optab, mode, DImode, "__floatundikf");
18006 if (TARGET_POWERPC64)
18008 set_conv_libfunc (sfix_optab, TImode, mode, "__fixkfti");
18009 set_conv_libfunc (ufix_optab, TImode, mode, "__fixunskfti");
18010 set_conv_libfunc (sfloat_optab, mode, TImode, "__floattikf");
18011 set_conv_libfunc (ufloat_optab, mode, TImode, "__floatuntikf");
18015 else
18017 set_optab_libfunc (add_optab, mode, "_q_add");
18018 set_optab_libfunc (sub_optab, mode, "_q_sub");
18019 set_optab_libfunc (neg_optab, mode, "_q_neg");
18020 set_optab_libfunc (smul_optab, mode, "_q_mul");
18021 set_optab_libfunc (sdiv_optab, mode, "_q_div");
18022 if (TARGET_PPC_GPOPT)
18023 set_optab_libfunc (sqrt_optab, mode, "_q_sqrt");
18025 set_optab_libfunc (eq_optab, mode, "_q_feq");
18026 set_optab_libfunc (ne_optab, mode, "_q_fne");
18027 set_optab_libfunc (gt_optab, mode, "_q_fgt");
18028 set_optab_libfunc (ge_optab, mode, "_q_fge");
18029 set_optab_libfunc (lt_optab, mode, "_q_flt");
18030 set_optab_libfunc (le_optab, mode, "_q_fle");
18032 set_conv_libfunc (sext_optab, mode, SFmode, "_q_stoq");
18033 set_conv_libfunc (sext_optab, mode, DFmode, "_q_dtoq");
18034 set_conv_libfunc (trunc_optab, SFmode, mode, "_q_qtos");
18035 set_conv_libfunc (trunc_optab, DFmode, mode, "_q_qtod");
18036 set_conv_libfunc (sfix_optab, SImode, mode, "_q_qtoi");
18037 set_conv_libfunc (ufix_optab, SImode, mode, "_q_qtou");
18038 set_conv_libfunc (sfloat_optab, mode, SImode, "_q_itoq");
18039 set_conv_libfunc (ufloat_optab, mode, SImode, "_q_utoq");
18043 static void
18044 rs6000_init_libfuncs (void)
18046 /* __float128 support. */
18047 if (TARGET_FLOAT128)
18049 init_float128_ibm (IFmode);
18050 init_float128_ieee (KFmode);
18053 /* AIX/Darwin/64-bit Linux quad floating point routines. */
18054 if (TARGET_LONG_DOUBLE_128)
18056 if (!TARGET_IEEEQUAD)
18057 init_float128_ibm (TFmode);
18059 /* IEEE 128-bit including 32-bit SVR4 quad floating point routines. */
18060 else
18061 init_float128_ieee (TFmode);
18066 /* Expand a block clear operation, and return 1 if successful. Return 0
18067 if we should let the compiler generate normal code.
18069 operands[0] is the destination
18070 operands[1] is the length
18071 operands[3] is the alignment */
18074 expand_block_clear (rtx operands[])
18076 rtx orig_dest = operands[0];
18077 rtx bytes_rtx = operands[1];
18078 rtx align_rtx = operands[3];
18079 bool constp = (GET_CODE (bytes_rtx) == CONST_INT);
18080 HOST_WIDE_INT align;
18081 HOST_WIDE_INT bytes;
18082 int offset;
18083 int clear_bytes;
18084 int clear_step;
18086 /* If this is not a fixed size move, just call memcpy */
18087 if (! constp)
18088 return 0;
18090 /* This must be a fixed size alignment */
18091 gcc_assert (GET_CODE (align_rtx) == CONST_INT);
18092 align = INTVAL (align_rtx) * BITS_PER_UNIT;
18094 /* Anything to clear? */
18095 bytes = INTVAL (bytes_rtx);
18096 if (bytes <= 0)
18097 return 1;
18099 /* Use the builtin memset after a point, to avoid huge code bloat.
18100 When optimize_size, avoid any significant code bloat; calling
18101 memset is about 4 instructions, so allow for one instruction to
18102 load zero and three to do clearing. */
18103 if (TARGET_ALTIVEC && align >= 128)
18104 clear_step = 16;
18105 else if (TARGET_POWERPC64 && (align >= 64 || !STRICT_ALIGNMENT))
18106 clear_step = 8;
18107 else if (TARGET_SPE && align >= 64)
18108 clear_step = 8;
18109 else
18110 clear_step = 4;
18112 if (optimize_size && bytes > 3 * clear_step)
18113 return 0;
18114 if (! optimize_size && bytes > 8 * clear_step)
18115 return 0;
18117 for (offset = 0; bytes > 0; offset += clear_bytes, bytes -= clear_bytes)
18119 machine_mode mode = BLKmode;
18120 rtx dest;
18122 if (bytes >= 16 && TARGET_ALTIVEC && align >= 128)
18124 clear_bytes = 16;
18125 mode = V4SImode;
18127 else if (bytes >= 8 && TARGET_SPE && align >= 64)
18129 clear_bytes = 8;
18130 mode = V2SImode;
18132 else if (bytes >= 8 && TARGET_POWERPC64
18133 && (align >= 64 || !STRICT_ALIGNMENT))
18135 clear_bytes = 8;
18136 mode = DImode;
18137 if (offset == 0 && align < 64)
18139 rtx addr;
18141 /* If the address form is reg+offset with offset not a
18142 multiple of four, reload into reg indirect form here
18143 rather than waiting for reload. This way we get one
18144 reload, not one per store. */
18145 addr = XEXP (orig_dest, 0);
18146 if ((GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
18147 && GET_CODE (XEXP (addr, 1)) == CONST_INT
18148 && (INTVAL (XEXP (addr, 1)) & 3) != 0)
18150 addr = copy_addr_to_reg (addr);
18151 orig_dest = replace_equiv_address (orig_dest, addr);
18155 else if (bytes >= 4 && (align >= 32 || !STRICT_ALIGNMENT))
18156 { /* move 4 bytes */
18157 clear_bytes = 4;
18158 mode = SImode;
18160 else if (bytes >= 2 && (align >= 16 || !STRICT_ALIGNMENT))
18161 { /* move 2 bytes */
18162 clear_bytes = 2;
18163 mode = HImode;
18165 else /* move 1 byte at a time */
18167 clear_bytes = 1;
18168 mode = QImode;
18171 dest = adjust_address (orig_dest, mode, offset);
18173 emit_move_insn (dest, CONST0_RTX (mode));
18176 return 1;
18180 /* Expand a block move operation, and return 1 if successful. Return 0
18181 if we should let the compiler generate normal code.
18183 operands[0] is the destination
18184 operands[1] is the source
18185 operands[2] is the length
18186 operands[3] is the alignment */
18188 #define MAX_MOVE_REG 4
18191 expand_block_move (rtx operands[])
18193 rtx orig_dest = operands[0];
18194 rtx orig_src = operands[1];
18195 rtx bytes_rtx = operands[2];
18196 rtx align_rtx = operands[3];
18197 int constp = (GET_CODE (bytes_rtx) == CONST_INT);
18198 int align;
18199 int bytes;
18200 int offset;
18201 int move_bytes;
18202 rtx stores[MAX_MOVE_REG];
18203 int num_reg = 0;
18205 /* If this is not a fixed size move, just call memcpy */
18206 if (! constp)
18207 return 0;
18209 /* This must be a fixed size alignment */
18210 gcc_assert (GET_CODE (align_rtx) == CONST_INT);
18211 align = INTVAL (align_rtx) * BITS_PER_UNIT;
18213 /* Anything to move? */
18214 bytes = INTVAL (bytes_rtx);
18215 if (bytes <= 0)
18216 return 1;
18218 if (bytes > rs6000_block_move_inline_limit)
18219 return 0;
18221 for (offset = 0; bytes > 0; offset += move_bytes, bytes -= move_bytes)
18223 union {
18224 rtx (*movmemsi) (rtx, rtx, rtx, rtx);
18225 rtx (*mov) (rtx, rtx);
18226 } gen_func;
18227 machine_mode mode = BLKmode;
18228 rtx src, dest;
18230 /* Altivec first, since it will be faster than a string move
18231 when it applies, and usually not significantly larger. */
18232 if (TARGET_ALTIVEC && bytes >= 16 && align >= 128)
18234 move_bytes = 16;
18235 mode = V4SImode;
18236 gen_func.mov = gen_movv4si;
18238 else if (TARGET_SPE && bytes >= 8 && align >= 64)
18240 move_bytes = 8;
18241 mode = V2SImode;
18242 gen_func.mov = gen_movv2si;
18244 else if (TARGET_STRING
18245 && bytes > 24 /* move up to 32 bytes at a time */
18246 && ! fixed_regs[5]
18247 && ! fixed_regs[6]
18248 && ! fixed_regs[7]
18249 && ! fixed_regs[8]
18250 && ! fixed_regs[9]
18251 && ! fixed_regs[10]
18252 && ! fixed_regs[11]
18253 && ! fixed_regs[12])
18255 move_bytes = (bytes > 32) ? 32 : bytes;
18256 gen_func.movmemsi = gen_movmemsi_8reg;
18258 else if (TARGET_STRING
18259 && bytes > 16 /* move up to 24 bytes at a time */
18260 && ! fixed_regs[5]
18261 && ! fixed_regs[6]
18262 && ! fixed_regs[7]
18263 && ! fixed_regs[8]
18264 && ! fixed_regs[9]
18265 && ! fixed_regs[10])
18267 move_bytes = (bytes > 24) ? 24 : bytes;
18268 gen_func.movmemsi = gen_movmemsi_6reg;
18270 else if (TARGET_STRING
18271 && bytes > 8 /* move up to 16 bytes at a time */
18272 && ! fixed_regs[5]
18273 && ! fixed_regs[6]
18274 && ! fixed_regs[7]
18275 && ! fixed_regs[8])
18277 move_bytes = (bytes > 16) ? 16 : bytes;
18278 gen_func.movmemsi = gen_movmemsi_4reg;
18280 else if (bytes >= 8 && TARGET_POWERPC64
18281 && (align >= 64 || !STRICT_ALIGNMENT))
18283 move_bytes = 8;
18284 mode = DImode;
18285 gen_func.mov = gen_movdi;
18286 if (offset == 0 && align < 64)
18288 rtx addr;
18290 /* If the address form is reg+offset with offset not a
18291 multiple of four, reload into reg indirect form here
18292 rather than waiting for reload. This way we get one
18293 reload, not one per load and/or store. */
18294 addr = XEXP (orig_dest, 0);
18295 if ((GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
18296 && GET_CODE (XEXP (addr, 1)) == CONST_INT
18297 && (INTVAL (XEXP (addr, 1)) & 3) != 0)
18299 addr = copy_addr_to_reg (addr);
18300 orig_dest = replace_equiv_address (orig_dest, addr);
18302 addr = XEXP (orig_src, 0);
18303 if ((GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
18304 && GET_CODE (XEXP (addr, 1)) == CONST_INT
18305 && (INTVAL (XEXP (addr, 1)) & 3) != 0)
18307 addr = copy_addr_to_reg (addr);
18308 orig_src = replace_equiv_address (orig_src, addr);
18312 else if (TARGET_STRING && bytes > 4 && !TARGET_POWERPC64)
18313 { /* move up to 8 bytes at a time */
18314 move_bytes = (bytes > 8) ? 8 : bytes;
18315 gen_func.movmemsi = gen_movmemsi_2reg;
18317 else if (bytes >= 4 && (align >= 32 || !STRICT_ALIGNMENT))
18318 { /* move 4 bytes */
18319 move_bytes = 4;
18320 mode = SImode;
18321 gen_func.mov = gen_movsi;
18323 else if (bytes >= 2 && (align >= 16 || !STRICT_ALIGNMENT))
18324 { /* move 2 bytes */
18325 move_bytes = 2;
18326 mode = HImode;
18327 gen_func.mov = gen_movhi;
18329 else if (TARGET_STRING && bytes > 1)
18330 { /* move up to 4 bytes at a time */
18331 move_bytes = (bytes > 4) ? 4 : bytes;
18332 gen_func.movmemsi = gen_movmemsi_1reg;
18334 else /* move 1 byte at a time */
18336 move_bytes = 1;
18337 mode = QImode;
18338 gen_func.mov = gen_movqi;
18341 src = adjust_address (orig_src, mode, offset);
18342 dest = adjust_address (orig_dest, mode, offset);
18344 if (mode != BLKmode)
18346 rtx tmp_reg = gen_reg_rtx (mode);
18348 emit_insn ((*gen_func.mov) (tmp_reg, src));
18349 stores[num_reg++] = (*gen_func.mov) (dest, tmp_reg);
18352 if (mode == BLKmode || num_reg >= MAX_MOVE_REG || bytes == move_bytes)
18354 int i;
18355 for (i = 0; i < num_reg; i++)
18356 emit_insn (stores[i]);
18357 num_reg = 0;
18360 if (mode == BLKmode)
18362 /* Move the address into scratch registers. The movmemsi
18363 patterns require zero offset. */
18364 if (!REG_P (XEXP (src, 0)))
18366 rtx src_reg = copy_addr_to_reg (XEXP (src, 0));
18367 src = replace_equiv_address (src, src_reg);
18369 set_mem_size (src, move_bytes);
18371 if (!REG_P (XEXP (dest, 0)))
18373 rtx dest_reg = copy_addr_to_reg (XEXP (dest, 0));
18374 dest = replace_equiv_address (dest, dest_reg);
18376 set_mem_size (dest, move_bytes);
18378 emit_insn ((*gen_func.movmemsi) (dest, src,
18379 GEN_INT (move_bytes & 31),
18380 align_rtx));
18384 return 1;
18388 /* Return a string to perform a load_multiple operation.
18389 operands[0] is the vector.
18390 operands[1] is the source address.
18391 operands[2] is the first destination register. */
18393 const char *
18394 rs6000_output_load_multiple (rtx operands[3])
18396 /* We have to handle the case where the pseudo used to contain the address
18397 is assigned to one of the output registers. */
18398 int i, j;
18399 int words = XVECLEN (operands[0], 0);
18400 rtx xop[10];
18402 if (XVECLEN (operands[0], 0) == 1)
18403 return "lwz %2,0(%1)";
18405 for (i = 0; i < words; i++)
18406 if (refers_to_regno_p (REGNO (operands[2]) + i, operands[1]))
18408 if (i == words-1)
18410 xop[0] = GEN_INT (4 * (words-1));
18411 xop[1] = operands[1];
18412 xop[2] = operands[2];
18413 output_asm_insn ("lswi %2,%1,%0\n\tlwz %1,%0(%1)", xop);
18414 return "";
18416 else if (i == 0)
18418 xop[0] = GEN_INT (4 * (words-1));
18419 xop[1] = operands[1];
18420 xop[2] = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
18421 output_asm_insn ("addi %1,%1,4\n\tlswi %2,%1,%0\n\tlwz %1,-4(%1)", xop);
18422 return "";
18424 else
18426 for (j = 0; j < words; j++)
18427 if (j != i)
18429 xop[0] = GEN_INT (j * 4);
18430 xop[1] = operands[1];
18431 xop[2] = gen_rtx_REG (SImode, REGNO (operands[2]) + j);
18432 output_asm_insn ("lwz %2,%0(%1)", xop);
18434 xop[0] = GEN_INT (i * 4);
18435 xop[1] = operands[1];
18436 output_asm_insn ("lwz %1,%0(%1)", xop);
18437 return "";
18441 return "lswi %2,%1,%N0";
18445 /* A validation routine: say whether CODE, a condition code, and MODE
18446 match. The other alternatives either don't make sense or should
18447 never be generated. */
18449 void
18450 validate_condition_mode (enum rtx_code code, machine_mode mode)
18452 gcc_assert ((GET_RTX_CLASS (code) == RTX_COMPARE
18453 || GET_RTX_CLASS (code) == RTX_COMM_COMPARE)
18454 && GET_MODE_CLASS (mode) == MODE_CC);
18456 /* These don't make sense. */
18457 gcc_assert ((code != GT && code != LT && code != GE && code != LE)
18458 || mode != CCUNSmode);
18460 gcc_assert ((code != GTU && code != LTU && code != GEU && code != LEU)
18461 || mode == CCUNSmode);
18463 gcc_assert (mode == CCFPmode
18464 || (code != ORDERED && code != UNORDERED
18465 && code != UNEQ && code != LTGT
18466 && code != UNGT && code != UNLT
18467 && code != UNGE && code != UNLE));
18469 /* These should never be generated except for
18470 flag_finite_math_only. */
18471 gcc_assert (mode != CCFPmode
18472 || flag_finite_math_only
18473 || (code != LE && code != GE
18474 && code != UNEQ && code != LTGT
18475 && code != UNGT && code != UNLT));
18477 /* These are invalid; the information is not there. */
18478 gcc_assert (mode != CCEQmode || code == EQ || code == NE);
18482 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm,
18483 rldicl, rldicr, or rldic instruction in mode MODE. If so, if E is
18484 not zero, store there the bit offset (counted from the right) where
18485 the single stretch of 1 bits begins; and similarly for B, the bit
18486 offset where it ends. */
18488 bool
18489 rs6000_is_valid_mask (rtx mask, int *b, int *e, machine_mode mode)
18491 unsigned HOST_WIDE_INT val = INTVAL (mask);
18492 unsigned HOST_WIDE_INT bit;
18493 int nb, ne;
18494 int n = GET_MODE_PRECISION (mode);
18496 if (mode != DImode && mode != SImode)
18497 return false;
18499 if (INTVAL (mask) >= 0)
18501 bit = val & -val;
18502 ne = exact_log2 (bit);
18503 nb = exact_log2 (val + bit);
18505 else if (val + 1 == 0)
18507 nb = n;
18508 ne = 0;
18510 else if (val & 1)
18512 val = ~val;
18513 bit = val & -val;
18514 nb = exact_log2 (bit);
18515 ne = exact_log2 (val + bit);
18517 else
18519 bit = val & -val;
18520 ne = exact_log2 (bit);
18521 if (val + bit == 0)
18522 nb = n;
18523 else
18524 nb = 0;
18527 nb--;
18529 if (nb < 0 || ne < 0 || nb >= n || ne >= n)
18530 return false;
18532 if (b)
18533 *b = nb;
18534 if (e)
18535 *e = ne;
18537 return true;
18540 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm, rldicl,
18541 or rldicr instruction, to implement an AND with it in mode MODE. */
18543 bool
18544 rs6000_is_valid_and_mask (rtx mask, machine_mode mode)
18546 int nb, ne;
18548 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
18549 return false;
18551 /* For DImode, we need a rldicl, rldicr, or a rlwinm with mask that
18552 does not wrap. */
18553 if (mode == DImode)
18554 return (ne == 0 || nb == 63 || (nb < 32 && ne <= nb));
18556 /* For SImode, rlwinm can do everything. */
18557 if (mode == SImode)
18558 return (nb < 32 && ne < 32);
18560 return false;
18563 /* Return the instruction template for an AND with mask in mode MODE, with
18564 operands OPERANDS. If DOT is true, make it a record-form instruction. */
18566 const char *
18567 rs6000_insn_for_and_mask (machine_mode mode, rtx *operands, bool dot)
18569 int nb, ne;
18571 if (!rs6000_is_valid_mask (operands[2], &nb, &ne, mode))
18572 gcc_unreachable ();
18574 if (mode == DImode && ne == 0)
18576 operands[3] = GEN_INT (63 - nb);
18577 if (dot)
18578 return "rldicl. %0,%1,0,%3";
18579 return "rldicl %0,%1,0,%3";
18582 if (mode == DImode && nb == 63)
18584 operands[3] = GEN_INT (63 - ne);
18585 if (dot)
18586 return "rldicr. %0,%1,0,%3";
18587 return "rldicr %0,%1,0,%3";
18590 if (nb < 32 && ne < 32)
18592 operands[3] = GEN_INT (31 - nb);
18593 operands[4] = GEN_INT (31 - ne);
18594 if (dot)
18595 return "rlwinm. %0,%1,0,%3,%4";
18596 return "rlwinm %0,%1,0,%3,%4";
18599 gcc_unreachable ();
18602 /* Return whether MASK (a CONST_INT) is a valid mask for any rlw[i]nm,
18603 rld[i]cl, rld[i]cr, or rld[i]c instruction, to implement an AND with
18604 shift SHIFT (a ROTATE, ASHIFT, or LSHIFTRT) in mode MODE. */
18606 bool
18607 rs6000_is_valid_shift_mask (rtx mask, rtx shift, machine_mode mode)
18609 int nb, ne;
18611 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
18612 return false;
18614 int n = GET_MODE_PRECISION (mode);
18615 int sh = -1;
18617 if (CONST_INT_P (XEXP (shift, 1)))
18619 sh = INTVAL (XEXP (shift, 1));
18620 if (sh < 0 || sh >= n)
18621 return false;
18624 rtx_code code = GET_CODE (shift);
18626 /* Convert any shift by 0 to a rotate, to simplify below code. */
18627 if (sh == 0)
18628 code = ROTATE;
18630 /* Convert rotate to simple shift if we can, to make analysis simpler. */
18631 if (code == ROTATE && sh >= 0 && nb >= ne && ne >= sh)
18632 code = ASHIFT;
18633 if (code == ROTATE && sh >= 0 && nb >= ne && nb < sh)
18635 code = LSHIFTRT;
18636 sh = n - sh;
18639 /* DImode rotates need rld*. */
18640 if (mode == DImode && code == ROTATE)
18641 return (nb == 63 || ne == 0 || ne == sh);
18643 /* SImode rotates need rlw*. */
18644 if (mode == SImode && code == ROTATE)
18645 return (nb < 32 && ne < 32 && sh < 32);
18647 /* Wrap-around masks are only okay for rotates. */
18648 if (ne > nb)
18649 return false;
18651 /* Variable shifts are only okay for rotates. */
18652 if (sh < 0)
18653 return false;
18655 /* Don't allow ASHIFT if the mask is wrong for that. */
18656 if (code == ASHIFT && ne < sh)
18657 return false;
18659 /* If we can do it with an rlw*, we can do it. Don't allow LSHIFTRT
18660 if the mask is wrong for that. */
18661 if (nb < 32 && ne < 32 && sh < 32
18662 && !(code == LSHIFTRT && nb >= 32 - sh))
18663 return true;
18665 /* If we can do it with an rld*, we can do it. Don't allow LSHIFTRT
18666 if the mask is wrong for that. */
18667 if (code == LSHIFTRT)
18668 sh = 64 - sh;
18669 if (nb == 63 || ne == 0 || ne == sh)
18670 return !(code == LSHIFTRT && nb >= sh);
18672 return false;
18675 /* Return the instruction template for a shift with mask in mode MODE, with
18676 operands OPERANDS. If DOT is true, make it a record-form instruction. */
18678 const char *
18679 rs6000_insn_for_shift_mask (machine_mode mode, rtx *operands, bool dot)
18681 int nb, ne;
18683 if (!rs6000_is_valid_mask (operands[3], &nb, &ne, mode))
18684 gcc_unreachable ();
18686 if (mode == DImode && ne == 0)
18688 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
18689 operands[2] = GEN_INT (64 - INTVAL (operands[2]));
18690 operands[3] = GEN_INT (63 - nb);
18691 if (dot)
18692 return "rld%I2cl. %0,%1,%2,%3";
18693 return "rld%I2cl %0,%1,%2,%3";
18696 if (mode == DImode && nb == 63)
18698 operands[3] = GEN_INT (63 - ne);
18699 if (dot)
18700 return "rld%I2cr. %0,%1,%2,%3";
18701 return "rld%I2cr %0,%1,%2,%3";
18704 if (mode == DImode
18705 && GET_CODE (operands[4]) != LSHIFTRT
18706 && CONST_INT_P (operands[2])
18707 && ne == INTVAL (operands[2]))
18709 operands[3] = GEN_INT (63 - nb);
18710 if (dot)
18711 return "rld%I2c. %0,%1,%2,%3";
18712 return "rld%I2c %0,%1,%2,%3";
18715 if (nb < 32 && ne < 32)
18717 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
18718 operands[2] = GEN_INT (32 - INTVAL (operands[2]));
18719 operands[3] = GEN_INT (31 - nb);
18720 operands[4] = GEN_INT (31 - ne);
18721 /* This insn can also be a 64-bit rotate with mask that really makes
18722 it just a shift right (with mask); the %h below are to adjust for
18723 that situation (shift count is >= 32 in that case). */
18724 if (dot)
18725 return "rlw%I2nm. %0,%1,%h2,%3,%4";
18726 return "rlw%I2nm %0,%1,%h2,%3,%4";
18729 gcc_unreachable ();
18732 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwimi or
18733 rldimi instruction, to implement an insert with shift SHIFT (a ROTATE,
18734 ASHIFT, or LSHIFTRT) in mode MODE. */
18736 bool
18737 rs6000_is_valid_insert_mask (rtx mask, rtx shift, machine_mode mode)
18739 int nb, ne;
18741 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
18742 return false;
18744 int n = GET_MODE_PRECISION (mode);
18746 int sh = INTVAL (XEXP (shift, 1));
18747 if (sh < 0 || sh >= n)
18748 return false;
18750 rtx_code code = GET_CODE (shift);
18752 /* Convert any shift by 0 to a rotate, to simplify below code. */
18753 if (sh == 0)
18754 code = ROTATE;
18756 /* Convert rotate to simple shift if we can, to make analysis simpler. */
18757 if (code == ROTATE && sh >= 0 && nb >= ne && ne >= sh)
18758 code = ASHIFT;
18759 if (code == ROTATE && sh >= 0 && nb >= ne && nb < sh)
18761 code = LSHIFTRT;
18762 sh = n - sh;
18765 /* DImode rotates need rldimi. */
18766 if (mode == DImode && code == ROTATE)
18767 return (ne == sh);
18769 /* SImode rotates need rlwimi. */
18770 if (mode == SImode && code == ROTATE)
18771 return (nb < 32 && ne < 32 && sh < 32);
18773 /* Wrap-around masks are only okay for rotates. */
18774 if (ne > nb)
18775 return false;
18777 /* Don't allow ASHIFT if the mask is wrong for that. */
18778 if (code == ASHIFT && ne < sh)
18779 return false;
18781 /* If we can do it with an rlwimi, we can do it. Don't allow LSHIFTRT
18782 if the mask is wrong for that. */
18783 if (nb < 32 && ne < 32 && sh < 32
18784 && !(code == LSHIFTRT && nb >= 32 - sh))
18785 return true;
18787 /* If we can do it with an rldimi, we can do it. Don't allow LSHIFTRT
18788 if the mask is wrong for that. */
18789 if (code == LSHIFTRT)
18790 sh = 64 - sh;
18791 if (ne == sh)
18792 return !(code == LSHIFTRT && nb >= sh);
18794 return false;
18797 /* Return the instruction template for an insert with mask in mode MODE, with
18798 operands OPERANDS. If DOT is true, make it a record-form instruction. */
18800 const char *
18801 rs6000_insn_for_insert_mask (machine_mode mode, rtx *operands, bool dot)
18803 int nb, ne;
18805 if (!rs6000_is_valid_mask (operands[3], &nb, &ne, mode))
18806 gcc_unreachable ();
18808 /* Prefer rldimi because rlwimi is cracked. */
18809 if (TARGET_POWERPC64
18810 && (!dot || mode == DImode)
18811 && GET_CODE (operands[4]) != LSHIFTRT
18812 && ne == INTVAL (operands[2]))
18814 operands[3] = GEN_INT (63 - nb);
18815 if (dot)
18816 return "rldimi. %0,%1,%2,%3";
18817 return "rldimi %0,%1,%2,%3";
18820 if (nb < 32 && ne < 32)
18822 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
18823 operands[2] = GEN_INT (32 - INTVAL (operands[2]));
18824 operands[3] = GEN_INT (31 - nb);
18825 operands[4] = GEN_INT (31 - ne);
18826 if (dot)
18827 return "rlwimi. %0,%1,%2,%3,%4";
18828 return "rlwimi %0,%1,%2,%3,%4";
18831 gcc_unreachable ();
18834 /* Return whether an AND with C (a CONST_INT) in mode MODE can be done
18835 using two machine instructions. */
18837 bool
18838 rs6000_is_valid_2insn_and (rtx c, machine_mode mode)
18840 /* There are two kinds of AND we can handle with two insns:
18841 1) those we can do with two rl* insn;
18842 2) ori[s];xori[s].
18844 We do not handle that last case yet. */
18846 /* If there is just one stretch of ones, we can do it. */
18847 if (rs6000_is_valid_mask (c, NULL, NULL, mode))
18848 return true;
18850 /* Otherwise, fill in the lowest "hole"; if we can do the result with
18851 one insn, we can do the whole thing with two. */
18852 unsigned HOST_WIDE_INT val = INTVAL (c);
18853 unsigned HOST_WIDE_INT bit1 = val & -val;
18854 unsigned HOST_WIDE_INT bit2 = (val + bit1) & ~val;
18855 unsigned HOST_WIDE_INT val1 = (val + bit1) & val;
18856 unsigned HOST_WIDE_INT bit3 = val1 & -val1;
18857 return rs6000_is_valid_and_mask (GEN_INT (val + bit3 - bit2), mode);
18860 /* Emit a potentially record-form instruction, setting DST from SRC.
18861 If DOT is 0, that is all; otherwise, set CCREG to the result of the
18862 signed comparison of DST with zero. If DOT is 1, the generated RTL
18863 doesn't care about the DST result; if DOT is 2, it does. If CCREG
18864 is CR0 do a single dot insn (as a PARALLEL); otherwise, do a SET and
18865 a separate COMPARE. */
18867 static void
18868 rs6000_emit_dot_insn (rtx dst, rtx src, int dot, rtx ccreg)
18870 if (dot == 0)
18872 emit_move_insn (dst, src);
18873 return;
18876 if (cc_reg_not_cr0_operand (ccreg, CCmode))
18878 emit_move_insn (dst, src);
18879 emit_move_insn (ccreg, gen_rtx_COMPARE (CCmode, dst, const0_rtx));
18880 return;
18883 rtx ccset = gen_rtx_SET (ccreg, gen_rtx_COMPARE (CCmode, src, const0_rtx));
18884 if (dot == 1)
18886 rtx clobber = gen_rtx_CLOBBER (VOIDmode, dst);
18887 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, ccset, clobber)));
18889 else
18891 rtx set = gen_rtx_SET (dst, src);
18892 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, ccset, set)));
18896 /* Emit the two insns to do an AND in mode MODE, with operands OPERANDS.
18897 If EXPAND is true, split rotate-and-mask instructions we generate to
18898 their constituent parts as well (this is used during expand); if DOT
18899 is 1, make the last insn a record-form instruction clobbering the
18900 destination GPR and setting the CC reg (from operands[3]); if 2, set
18901 that GPR as well as the CC reg. */
18903 void
18904 rs6000_emit_2insn_and (machine_mode mode, rtx *operands, bool expand, int dot)
18906 gcc_assert (!(expand && dot));
18908 unsigned HOST_WIDE_INT val = INTVAL (operands[2]);
18910 /* If it is one stretch of ones, it is DImode; shift left, mask, then
18911 shift right. This generates better code than doing the masks without
18912 shifts, or shifting first right and then left. */
18913 int nb, ne;
18914 if (rs6000_is_valid_mask (operands[2], &nb, &ne, mode) && nb >= ne)
18916 gcc_assert (mode == DImode);
18918 int shift = 63 - nb;
18919 if (expand)
18921 rtx tmp1 = gen_reg_rtx (DImode);
18922 rtx tmp2 = gen_reg_rtx (DImode);
18923 emit_insn (gen_ashldi3 (tmp1, operands[1], GEN_INT (shift)));
18924 emit_insn (gen_anddi3 (tmp2, tmp1, GEN_INT (val << shift)));
18925 emit_insn (gen_lshrdi3 (operands[0], tmp2, GEN_INT (shift)));
18927 else
18929 rtx tmp = gen_rtx_ASHIFT (mode, operands[1], GEN_INT (shift));
18930 tmp = gen_rtx_AND (mode, tmp, GEN_INT (val << shift));
18931 emit_move_insn (operands[0], tmp);
18932 tmp = gen_rtx_LSHIFTRT (mode, operands[0], GEN_INT (shift));
18933 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
18935 return;
18938 /* Otherwise, make a mask2 that cuts out the lowest "hole", and a mask1
18939 that does the rest. */
18940 unsigned HOST_WIDE_INT bit1 = val & -val;
18941 unsigned HOST_WIDE_INT bit2 = (val + bit1) & ~val;
18942 unsigned HOST_WIDE_INT val1 = (val + bit1) & val;
18943 unsigned HOST_WIDE_INT bit3 = val1 & -val1;
18945 unsigned HOST_WIDE_INT mask1 = -bit3 + bit2 - 1;
18946 unsigned HOST_WIDE_INT mask2 = val + bit3 - bit2;
18948 gcc_assert (rs6000_is_valid_and_mask (GEN_INT (mask2), mode));
18950 /* Two "no-rotate"-and-mask instructions, for SImode. */
18951 if (rs6000_is_valid_and_mask (GEN_INT (mask1), mode))
18953 gcc_assert (mode == SImode);
18955 rtx reg = expand ? gen_reg_rtx (mode) : operands[0];
18956 rtx tmp = gen_rtx_AND (mode, operands[1], GEN_INT (mask1));
18957 emit_move_insn (reg, tmp);
18958 tmp = gen_rtx_AND (mode, reg, GEN_INT (mask2));
18959 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
18960 return;
18963 gcc_assert (mode == DImode);
18965 /* Two "no-rotate"-and-mask instructions, for DImode: both are rlwinm
18966 insns; we have to do the first in SImode, because it wraps. */
18967 if (mask2 <= 0xffffffff
18968 && rs6000_is_valid_and_mask (GEN_INT (mask1), SImode))
18970 rtx reg = expand ? gen_reg_rtx (mode) : operands[0];
18971 rtx tmp = gen_rtx_AND (SImode, gen_lowpart (SImode, operands[1]),
18972 GEN_INT (mask1));
18973 rtx reg_low = gen_lowpart (SImode, reg);
18974 emit_move_insn (reg_low, tmp);
18975 tmp = gen_rtx_AND (mode, reg, GEN_INT (mask2));
18976 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
18977 return;
18980 /* Two rld* insns: rotate, clear the hole in the middle (which now is
18981 at the top end), rotate back and clear the other hole. */
18982 int right = exact_log2 (bit3);
18983 int left = 64 - right;
18985 /* Rotate the mask too. */
18986 mask1 = (mask1 >> right) | ((bit2 - 1) << left);
18988 if (expand)
18990 rtx tmp1 = gen_reg_rtx (DImode);
18991 rtx tmp2 = gen_reg_rtx (DImode);
18992 rtx tmp3 = gen_reg_rtx (DImode);
18993 emit_insn (gen_rotldi3 (tmp1, operands[1], GEN_INT (left)));
18994 emit_insn (gen_anddi3 (tmp2, tmp1, GEN_INT (mask1)));
18995 emit_insn (gen_rotldi3 (tmp3, tmp2, GEN_INT (right)));
18996 emit_insn (gen_anddi3 (operands[0], tmp3, GEN_INT (mask2)));
18998 else
19000 rtx tmp = gen_rtx_ROTATE (mode, operands[1], GEN_INT (left));
19001 tmp = gen_rtx_AND (mode, tmp, GEN_INT (mask1));
19002 emit_move_insn (operands[0], tmp);
19003 tmp = gen_rtx_ROTATE (mode, operands[0], GEN_INT (right));
19004 tmp = gen_rtx_AND (mode, tmp, GEN_INT (mask2));
19005 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
19009 /* Return 1 if REGNO (reg1) == REGNO (reg2) - 1 making them candidates
19010 for lfq and stfq insns iff the registers are hard registers. */
19013 registers_ok_for_quad_peep (rtx reg1, rtx reg2)
19015 /* We might have been passed a SUBREG. */
19016 if (GET_CODE (reg1) != REG || GET_CODE (reg2) != REG)
19017 return 0;
19019 /* We might have been passed non floating point registers. */
19020 if (!FP_REGNO_P (REGNO (reg1))
19021 || !FP_REGNO_P (REGNO (reg2)))
19022 return 0;
19024 return (REGNO (reg1) == REGNO (reg2) - 1);
19027 /* Return 1 if addr1 and addr2 are suitable for lfq or stfq insn.
19028 addr1 and addr2 must be in consecutive memory locations
19029 (addr2 == addr1 + 8). */
19032 mems_ok_for_quad_peep (rtx mem1, rtx mem2)
19034 rtx addr1, addr2;
19035 unsigned int reg1, reg2;
19036 int offset1, offset2;
19038 /* The mems cannot be volatile. */
19039 if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
19040 return 0;
19042 addr1 = XEXP (mem1, 0);
19043 addr2 = XEXP (mem2, 0);
19045 /* Extract an offset (if used) from the first addr. */
19046 if (GET_CODE (addr1) == PLUS)
19048 /* If not a REG, return zero. */
19049 if (GET_CODE (XEXP (addr1, 0)) != REG)
19050 return 0;
19051 else
19053 reg1 = REGNO (XEXP (addr1, 0));
19054 /* The offset must be constant! */
19055 if (GET_CODE (XEXP (addr1, 1)) != CONST_INT)
19056 return 0;
19057 offset1 = INTVAL (XEXP (addr1, 1));
19060 else if (GET_CODE (addr1) != REG)
19061 return 0;
19062 else
19064 reg1 = REGNO (addr1);
19065 /* This was a simple (mem (reg)) expression. Offset is 0. */
19066 offset1 = 0;
19069 /* And now for the second addr. */
19070 if (GET_CODE (addr2) == PLUS)
19072 /* If not a REG, return zero. */
19073 if (GET_CODE (XEXP (addr2, 0)) != REG)
19074 return 0;
19075 else
19077 reg2 = REGNO (XEXP (addr2, 0));
19078 /* The offset must be constant. */
19079 if (GET_CODE (XEXP (addr2, 1)) != CONST_INT)
19080 return 0;
19081 offset2 = INTVAL (XEXP (addr2, 1));
19084 else if (GET_CODE (addr2) != REG)
19085 return 0;
19086 else
19088 reg2 = REGNO (addr2);
19089 /* This was a simple (mem (reg)) expression. Offset is 0. */
19090 offset2 = 0;
19093 /* Both of these must have the same base register. */
19094 if (reg1 != reg2)
19095 return 0;
19097 /* The offset for the second addr must be 8 more than the first addr. */
19098 if (offset2 != offset1 + 8)
19099 return 0;
19101 /* All the tests passed. addr1 and addr2 are valid for lfq or stfq
19102 instructions. */
19103 return 1;
19108 rs6000_secondary_memory_needed_rtx (machine_mode mode)
19110 static bool eliminated = false;
19111 rtx ret;
19113 if (mode != SDmode || TARGET_NO_SDMODE_STACK)
19114 ret = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
19115 else
19117 rtx mem = cfun->machine->sdmode_stack_slot;
19118 gcc_assert (mem != NULL_RTX);
19120 if (!eliminated)
19122 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
19123 cfun->machine->sdmode_stack_slot = mem;
19124 eliminated = true;
19126 ret = mem;
19129 if (TARGET_DEBUG_ADDR)
19131 fprintf (stderr, "\nrs6000_secondary_memory_needed_rtx, mode %s, rtx:\n",
19132 GET_MODE_NAME (mode));
19133 if (!ret)
19134 fprintf (stderr, "\tNULL_RTX\n");
19135 else
19136 debug_rtx (ret);
19139 return ret;
19142 /* Return the mode to be used for memory when a secondary memory
19143 location is needed. For SDmode values we need to use DDmode, in
19144 all other cases we can use the same mode. */
19145 machine_mode
19146 rs6000_secondary_memory_needed_mode (machine_mode mode)
19148 if (lra_in_progress && mode == SDmode)
19149 return DDmode;
19150 return mode;
19153 static tree
19154 rs6000_check_sdmode (tree *tp, int *walk_subtrees, void *data ATTRIBUTE_UNUSED)
19156 /* Don't walk into types. */
19157 if (*tp == NULL_TREE || *tp == error_mark_node || TYPE_P (*tp))
19159 *walk_subtrees = 0;
19160 return NULL_TREE;
19163 switch (TREE_CODE (*tp))
19165 case VAR_DECL:
19166 case PARM_DECL:
19167 case FIELD_DECL:
19168 case RESULT_DECL:
19169 case SSA_NAME:
19170 case REAL_CST:
19171 case MEM_REF:
19172 case VIEW_CONVERT_EXPR:
19173 if (TYPE_MODE (TREE_TYPE (*tp)) == SDmode)
19174 return *tp;
19175 break;
19176 default:
19177 break;
19180 return NULL_TREE;
19183 /* Classify a register type. Because the FMRGOW/FMRGEW instructions only work
19184 on traditional floating point registers, and the VMRGOW/VMRGEW instructions
19185 only work on the traditional altivec registers, note if an altivec register
19186 was chosen. */
19188 static enum rs6000_reg_type
19189 register_to_reg_type (rtx reg, bool *is_altivec)
19191 HOST_WIDE_INT regno;
19192 enum reg_class rclass;
19194 if (GET_CODE (reg) == SUBREG)
19195 reg = SUBREG_REG (reg);
19197 if (!REG_P (reg))
19198 return NO_REG_TYPE;
19200 regno = REGNO (reg);
19201 if (regno >= FIRST_PSEUDO_REGISTER)
19203 if (!lra_in_progress && !reload_in_progress && !reload_completed)
19204 return PSEUDO_REG_TYPE;
19206 regno = true_regnum (reg);
19207 if (regno < 0 || regno >= FIRST_PSEUDO_REGISTER)
19208 return PSEUDO_REG_TYPE;
19211 gcc_assert (regno >= 0);
19213 if (is_altivec && ALTIVEC_REGNO_P (regno))
19214 *is_altivec = true;
19216 rclass = rs6000_regno_regclass[regno];
19217 return reg_class_to_reg_type[(int)rclass];
19220 /* Helper function to return the cost of adding a TOC entry address. */
19222 static inline int
19223 rs6000_secondary_reload_toc_costs (addr_mask_type addr_mask)
19225 int ret;
19227 if (TARGET_CMODEL != CMODEL_SMALL)
19228 ret = ((addr_mask & RELOAD_REG_OFFSET) == 0) ? 1 : 2;
19230 else
19231 ret = (TARGET_MINIMAL_TOC) ? 6 : 3;
19233 return ret;
19236 /* Helper function for rs6000_secondary_reload to determine whether the memory
19237 address (ADDR) with a given register class (RCLASS) and machine mode (MODE)
19238 needs reloading. Return negative if the memory is not handled by the memory
19239 helper functions and to try a different reload method, 0 if no additional
19240 instructions are need, and positive to give the extra cost for the
19241 memory. */
19243 static int
19244 rs6000_secondary_reload_memory (rtx addr,
19245 enum reg_class rclass,
19246 machine_mode mode)
19248 int extra_cost = 0;
19249 rtx reg, and_arg, plus_arg0, plus_arg1;
19250 addr_mask_type addr_mask;
19251 const char *type = NULL;
19252 const char *fail_msg = NULL;
19254 if (GPR_REG_CLASS_P (rclass))
19255 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
19257 else if (rclass == FLOAT_REGS)
19258 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
19260 else if (rclass == ALTIVEC_REGS)
19261 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
19263 /* For the combined VSX_REGS, turn off Altivec AND -16. */
19264 else if (rclass == VSX_REGS)
19265 addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_VMX]
19266 & ~RELOAD_REG_AND_M16);
19268 /* If the register allocator hasn't made up its mind yet on the register
19269 class to use, settle on defaults to use. */
19270 else if (rclass == NO_REGS)
19272 addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_ANY]
19273 & ~RELOAD_REG_AND_M16);
19275 if ((addr_mask & RELOAD_REG_MULTIPLE) != 0)
19276 addr_mask &= ~(RELOAD_REG_INDEXED
19277 | RELOAD_REG_PRE_INCDEC
19278 | RELOAD_REG_PRE_MODIFY);
19281 else
19282 addr_mask = 0;
19284 /* If the register isn't valid in this register class, just return now. */
19285 if ((addr_mask & RELOAD_REG_VALID) == 0)
19287 if (TARGET_DEBUG_ADDR)
19289 fprintf (stderr,
19290 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
19291 "not valid in class\n",
19292 GET_MODE_NAME (mode), reg_class_names[rclass]);
19293 debug_rtx (addr);
19296 return -1;
19299 switch (GET_CODE (addr))
19301 /* Does the register class supports auto update forms for this mode? We
19302 don't need a scratch register, since the powerpc only supports
19303 PRE_INC, PRE_DEC, and PRE_MODIFY. */
19304 case PRE_INC:
19305 case PRE_DEC:
19306 reg = XEXP (addr, 0);
19307 if (!base_reg_operand (addr, GET_MODE (reg)))
19309 fail_msg = "no base register #1";
19310 extra_cost = -1;
19313 else if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
19315 extra_cost = 1;
19316 type = "update";
19318 break;
19320 case PRE_MODIFY:
19321 reg = XEXP (addr, 0);
19322 plus_arg1 = XEXP (addr, 1);
19323 if (!base_reg_operand (reg, GET_MODE (reg))
19324 || GET_CODE (plus_arg1) != PLUS
19325 || !rtx_equal_p (reg, XEXP (plus_arg1, 0)))
19327 fail_msg = "bad PRE_MODIFY";
19328 extra_cost = -1;
19331 else if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
19333 extra_cost = 1;
19334 type = "update";
19336 break;
19338 /* Do we need to simulate AND -16 to clear the bottom address bits used
19339 in VMX load/stores? Only allow the AND for vector sizes. */
19340 case AND:
19341 and_arg = XEXP (addr, 0);
19342 if (GET_MODE_SIZE (mode) != 16
19343 || GET_CODE (XEXP (addr, 1)) != CONST_INT
19344 || INTVAL (XEXP (addr, 1)) != -16)
19346 fail_msg = "bad Altivec AND #1";
19347 extra_cost = -1;
19350 if (rclass != ALTIVEC_REGS)
19352 if (legitimate_indirect_address_p (and_arg, false))
19353 extra_cost = 1;
19355 else if (legitimate_indexed_address_p (and_arg, false))
19356 extra_cost = 2;
19358 else
19360 fail_msg = "bad Altivec AND #2";
19361 extra_cost = -1;
19364 type = "and";
19366 break;
19368 /* If this is an indirect address, make sure it is a base register. */
19369 case REG:
19370 case SUBREG:
19371 if (!legitimate_indirect_address_p (addr, false))
19373 extra_cost = 1;
19374 type = "move";
19376 break;
19378 /* If this is an indexed address, make sure the register class can handle
19379 indexed addresses for this mode. */
19380 case PLUS:
19381 plus_arg0 = XEXP (addr, 0);
19382 plus_arg1 = XEXP (addr, 1);
19384 /* (plus (plus (reg) (constant)) (constant)) is generated during
19385 push_reload processing, so handle it now. */
19386 if (GET_CODE (plus_arg0) == PLUS && CONST_INT_P (plus_arg1))
19388 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
19390 extra_cost = 1;
19391 type = "offset";
19395 /* (plus (plus (reg) (constant)) (reg)) is also generated during
19396 push_reload processing, so handle it now. */
19397 else if (GET_CODE (plus_arg0) == PLUS && REG_P (plus_arg1))
19399 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
19401 extra_cost = 1;
19402 type = "indexed #2";
19406 else if (!base_reg_operand (plus_arg0, GET_MODE (plus_arg0)))
19408 fail_msg = "no base register #2";
19409 extra_cost = -1;
19412 else if (int_reg_operand (plus_arg1, GET_MODE (plus_arg1)))
19414 if ((addr_mask & RELOAD_REG_INDEXED) == 0
19415 || !legitimate_indexed_address_p (addr, false))
19417 extra_cost = 1;
19418 type = "indexed";
19422 else if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0
19423 && CONST_INT_P (plus_arg1))
19425 if (!quad_address_offset_p (INTVAL (plus_arg1)))
19427 extra_cost = 1;
19428 type = "vector d-form offset";
19432 /* Make sure the register class can handle offset addresses. */
19433 else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
19435 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
19437 extra_cost = 1;
19438 type = "offset #2";
19442 else
19444 fail_msg = "bad PLUS";
19445 extra_cost = -1;
19448 break;
19450 case LO_SUM:
19451 /* Quad offsets are restricted and can't handle normal addresses. */
19452 if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
19454 extra_cost = -1;
19455 type = "vector d-form lo_sum";
19458 else if (!legitimate_lo_sum_address_p (mode, addr, false))
19460 fail_msg = "bad LO_SUM";
19461 extra_cost = -1;
19464 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
19466 extra_cost = 1;
19467 type = "lo_sum";
19469 break;
19471 /* Static addresses need to create a TOC entry. */
19472 case CONST:
19473 case SYMBOL_REF:
19474 case LABEL_REF:
19475 if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
19477 extra_cost = -1;
19478 type = "vector d-form lo_sum #2";
19481 else
19483 type = "address";
19484 extra_cost = rs6000_secondary_reload_toc_costs (addr_mask);
19486 break;
19488 /* TOC references look like offsetable memory. */
19489 case UNSPEC:
19490 if (TARGET_CMODEL == CMODEL_SMALL || XINT (addr, 1) != UNSPEC_TOCREL)
19492 fail_msg = "bad UNSPEC";
19493 extra_cost = -1;
19496 else if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
19498 extra_cost = -1;
19499 type = "vector d-form lo_sum #3";
19502 else if ((addr_mask & RELOAD_REG_OFFSET) == 0)
19504 extra_cost = 1;
19505 type = "toc reference";
19507 break;
19509 default:
19511 fail_msg = "bad address";
19512 extra_cost = -1;
19516 if (TARGET_DEBUG_ADDR /* && extra_cost != 0 */)
19518 if (extra_cost < 0)
19519 fprintf (stderr,
19520 "rs6000_secondary_reload_memory error: mode = %s, "
19521 "class = %s, addr_mask = '%s', %s\n",
19522 GET_MODE_NAME (mode),
19523 reg_class_names[rclass],
19524 rs6000_debug_addr_mask (addr_mask, false),
19525 (fail_msg != NULL) ? fail_msg : "<bad address>");
19527 else
19528 fprintf (stderr,
19529 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
19530 "addr_mask = '%s', extra cost = %d, %s\n",
19531 GET_MODE_NAME (mode),
19532 reg_class_names[rclass],
19533 rs6000_debug_addr_mask (addr_mask, false),
19534 extra_cost,
19535 (type) ? type : "<none>");
19537 debug_rtx (addr);
19540 return extra_cost;
19543 /* Helper function for rs6000_secondary_reload to return true if a move to a
19544 different register classe is really a simple move. */
19546 static bool
19547 rs6000_secondary_reload_simple_move (enum rs6000_reg_type to_type,
19548 enum rs6000_reg_type from_type,
19549 machine_mode mode)
19551 int size;
19553 /* Add support for various direct moves available. In this function, we only
19554 look at cases where we don't need any extra registers, and one or more
19555 simple move insns are issued. At present, 32-bit integers are not allowed
19556 in FPR/VSX registers. Single precision binary floating is not a simple
19557 move because we need to convert to the single precision memory layout.
19558 The 4-byte SDmode can be moved. TDmode values are disallowed since they
19559 need special direct move handling, which we do not support yet. */
19560 size = GET_MODE_SIZE (mode);
19561 if (TARGET_DIRECT_MOVE
19562 && ((mode == SDmode) || (TARGET_POWERPC64 && size == 8))
19563 && ((to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
19564 || (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)))
19565 return true;
19567 else if (TARGET_DIRECT_MOVE_128 && size == 16 && mode != TDmode
19568 && ((to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
19569 || (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)))
19570 return true;
19572 else if (TARGET_MFPGPR && TARGET_POWERPC64 && size == 8
19573 && ((to_type == GPR_REG_TYPE && from_type == FPR_REG_TYPE)
19574 || (to_type == FPR_REG_TYPE && from_type == GPR_REG_TYPE)))
19575 return true;
19577 else if ((size == 4 || (TARGET_POWERPC64 && size == 8))
19578 && ((to_type == GPR_REG_TYPE && from_type == SPR_REG_TYPE)
19579 || (to_type == SPR_REG_TYPE && from_type == GPR_REG_TYPE)))
19580 return true;
19582 return false;
19585 /* Direct move helper function for rs6000_secondary_reload, handle all of the
19586 special direct moves that involve allocating an extra register, return the
19587 insn code of the helper function if there is such a function or
19588 CODE_FOR_nothing if not. */
19590 static bool
19591 rs6000_secondary_reload_direct_move (enum rs6000_reg_type to_type,
19592 enum rs6000_reg_type from_type,
19593 machine_mode mode,
19594 secondary_reload_info *sri,
19595 bool altivec_p)
19597 bool ret = false;
19598 enum insn_code icode = CODE_FOR_nothing;
19599 int cost = 0;
19600 int size = GET_MODE_SIZE (mode);
19602 if (TARGET_POWERPC64 && size == 16)
19604 /* Handle moving 128-bit values from GPRs to VSX point registers on
19605 ISA 2.07 (power8, power9) when running in 64-bit mode using
19606 XXPERMDI to glue the two 64-bit values back together. */
19607 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
19609 cost = 3; /* 2 mtvsrd's, 1 xxpermdi. */
19610 icode = reg_addr[mode].reload_vsx_gpr;
19613 /* Handle moving 128-bit values from VSX point registers to GPRs on
19614 ISA 2.07 when running in 64-bit mode using XXPERMDI to get access to the
19615 bottom 64-bit value. */
19616 else if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
19618 cost = 3; /* 2 mfvsrd's, 1 xxpermdi. */
19619 icode = reg_addr[mode].reload_gpr_vsx;
19623 else if (TARGET_POWERPC64 && mode == SFmode)
19625 if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
19627 cost = 3; /* xscvdpspn, mfvsrd, and. */
19628 icode = reg_addr[mode].reload_gpr_vsx;
19631 else if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
19633 cost = 2; /* mtvsrz, xscvspdpn. */
19634 icode = reg_addr[mode].reload_vsx_gpr;
19638 else if (!TARGET_POWERPC64 && size == 8)
19640 /* Handle moving 64-bit values from GPRs to floating point registers on
19641 ISA 2.07 when running in 32-bit mode using FMRGOW to glue the two
19642 32-bit values back together. Altivec register classes must be handled
19643 specially since a different instruction is used, and the secondary
19644 reload support requires a single instruction class in the scratch
19645 register constraint. However, right now TFmode is not allowed in
19646 Altivec registers, so the pattern will never match. */
19647 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE && !altivec_p)
19649 cost = 3; /* 2 mtvsrwz's, 1 fmrgow. */
19650 icode = reg_addr[mode].reload_fpr_gpr;
19654 if (icode != CODE_FOR_nothing)
19656 ret = true;
19657 if (sri)
19659 sri->icode = icode;
19660 sri->extra_cost = cost;
19664 return ret;
19667 /* Return whether a move between two register classes can be done either
19668 directly (simple move) or via a pattern that uses a single extra temporary
19669 (using ISA 2.07's direct move in this case. */
19671 static bool
19672 rs6000_secondary_reload_move (enum rs6000_reg_type to_type,
19673 enum rs6000_reg_type from_type,
19674 machine_mode mode,
19675 secondary_reload_info *sri,
19676 bool altivec_p)
19678 /* Fall back to load/store reloads if either type is not a register. */
19679 if (to_type == NO_REG_TYPE || from_type == NO_REG_TYPE)
19680 return false;
19682 /* If we haven't allocated registers yet, assume the move can be done for the
19683 standard register types. */
19684 if ((to_type == PSEUDO_REG_TYPE && from_type == PSEUDO_REG_TYPE)
19685 || (to_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (from_type))
19686 || (from_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (to_type)))
19687 return true;
19689 /* Moves to the same set of registers is a simple move for non-specialized
19690 registers. */
19691 if (to_type == from_type && IS_STD_REG_TYPE (to_type))
19692 return true;
19694 /* Check whether a simple move can be done directly. */
19695 if (rs6000_secondary_reload_simple_move (to_type, from_type, mode))
19697 if (sri)
19699 sri->icode = CODE_FOR_nothing;
19700 sri->extra_cost = 0;
19702 return true;
19705 /* Now check if we can do it in a few steps. */
19706 return rs6000_secondary_reload_direct_move (to_type, from_type, mode, sri,
19707 altivec_p);
19710 /* Inform reload about cases where moving X with a mode MODE to a register in
19711 RCLASS requires an extra scratch or immediate register. Return the class
19712 needed for the immediate register.
19714 For VSX and Altivec, we may need a register to convert sp+offset into
19715 reg+sp.
19717 For misaligned 64-bit gpr loads and stores we need a register to
19718 convert an offset address to indirect. */
19720 static reg_class_t
19721 rs6000_secondary_reload (bool in_p,
19722 rtx x,
19723 reg_class_t rclass_i,
19724 machine_mode mode,
19725 secondary_reload_info *sri)
19727 enum reg_class rclass = (enum reg_class) rclass_i;
19728 reg_class_t ret = ALL_REGS;
19729 enum insn_code icode;
19730 bool default_p = false;
19731 bool done_p = false;
19733 /* Allow subreg of memory before/during reload. */
19734 bool memory_p = (MEM_P (x)
19735 || (!reload_completed && GET_CODE (x) == SUBREG
19736 && MEM_P (SUBREG_REG (x))));
19738 sri->icode = CODE_FOR_nothing;
19739 sri->t_icode = CODE_FOR_nothing;
19740 sri->extra_cost = 0;
19741 icode = ((in_p)
19742 ? reg_addr[mode].reload_load
19743 : reg_addr[mode].reload_store);
19745 if (REG_P (x) || register_operand (x, mode))
19747 enum rs6000_reg_type to_type = reg_class_to_reg_type[(int)rclass];
19748 bool altivec_p = (rclass == ALTIVEC_REGS);
19749 enum rs6000_reg_type from_type = register_to_reg_type (x, &altivec_p);
19751 if (!in_p)
19753 enum rs6000_reg_type exchange = to_type;
19754 to_type = from_type;
19755 from_type = exchange;
19758 /* Can we do a direct move of some sort? */
19759 if (rs6000_secondary_reload_move (to_type, from_type, mode, sri,
19760 altivec_p))
19762 icode = (enum insn_code)sri->icode;
19763 default_p = false;
19764 done_p = true;
19765 ret = NO_REGS;
19769 /* Make sure 0.0 is not reloaded or forced into memory. */
19770 if (x == CONST0_RTX (mode) && VSX_REG_CLASS_P (rclass))
19772 ret = NO_REGS;
19773 default_p = false;
19774 done_p = true;
19777 /* If this is a scalar floating point value and we want to load it into the
19778 traditional Altivec registers, do it via a move via a traditional floating
19779 point register, unless we have D-form addressing. Also make sure that
19780 non-zero constants use a FPR. */
19781 if (!done_p && reg_addr[mode].scalar_in_vmx_p
19782 && !mode_supports_vmx_dform (mode)
19783 && (rclass == VSX_REGS || rclass == ALTIVEC_REGS)
19784 && (memory_p || (GET_CODE (x) == CONST_DOUBLE)))
19786 ret = FLOAT_REGS;
19787 default_p = false;
19788 done_p = true;
19791 /* Handle reload of load/stores if we have reload helper functions. */
19792 if (!done_p && icode != CODE_FOR_nothing && memory_p)
19794 int extra_cost = rs6000_secondary_reload_memory (XEXP (x, 0), rclass,
19795 mode);
19797 if (extra_cost >= 0)
19799 done_p = true;
19800 ret = NO_REGS;
19801 if (extra_cost > 0)
19803 sri->extra_cost = extra_cost;
19804 sri->icode = icode;
19809 /* Handle unaligned loads and stores of integer registers. */
19810 if (!done_p && TARGET_POWERPC64
19811 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
19812 && memory_p
19813 && GET_MODE_SIZE (GET_MODE (x)) >= UNITS_PER_WORD)
19815 rtx addr = XEXP (x, 0);
19816 rtx off = address_offset (addr);
19818 if (off != NULL_RTX)
19820 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
19821 unsigned HOST_WIDE_INT offset = INTVAL (off);
19823 /* We need a secondary reload when our legitimate_address_p
19824 says the address is good (as otherwise the entire address
19825 will be reloaded), and the offset is not a multiple of
19826 four or we have an address wrap. Address wrap will only
19827 occur for LO_SUMs since legitimate_offset_address_p
19828 rejects addresses for 16-byte mems that will wrap. */
19829 if (GET_CODE (addr) == LO_SUM
19830 ? (1 /* legitimate_address_p allows any offset for lo_sum */
19831 && ((offset & 3) != 0
19832 || ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra))
19833 : (offset + 0x8000 < 0x10000 - extra /* legitimate_address_p */
19834 && (offset & 3) != 0))
19836 /* -m32 -mpowerpc64 needs to use a 32-bit scratch register. */
19837 if (in_p)
19838 sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_load
19839 : CODE_FOR_reload_di_load);
19840 else
19841 sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_store
19842 : CODE_FOR_reload_di_store);
19843 sri->extra_cost = 2;
19844 ret = NO_REGS;
19845 done_p = true;
19847 else
19848 default_p = true;
19850 else
19851 default_p = true;
19854 if (!done_p && !TARGET_POWERPC64
19855 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
19856 && memory_p
19857 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
19859 rtx addr = XEXP (x, 0);
19860 rtx off = address_offset (addr);
19862 if (off != NULL_RTX)
19864 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
19865 unsigned HOST_WIDE_INT offset = INTVAL (off);
19867 /* We need a secondary reload when our legitimate_address_p
19868 says the address is good (as otherwise the entire address
19869 will be reloaded), and we have a wrap.
19871 legitimate_lo_sum_address_p allows LO_SUM addresses to
19872 have any offset so test for wrap in the low 16 bits.
19874 legitimate_offset_address_p checks for the range
19875 [-0x8000,0x7fff] for mode size of 8 and [-0x8000,0x7ff7]
19876 for mode size of 16. We wrap at [0x7ffc,0x7fff] and
19877 [0x7ff4,0x7fff] respectively, so test for the
19878 intersection of these ranges, [0x7ffc,0x7fff] and
19879 [0x7ff4,0x7ff7] respectively.
19881 Note that the address we see here may have been
19882 manipulated by legitimize_reload_address. */
19883 if (GET_CODE (addr) == LO_SUM
19884 ? ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra
19885 : offset - (0x8000 - extra) < UNITS_PER_WORD)
19887 if (in_p)
19888 sri->icode = CODE_FOR_reload_si_load;
19889 else
19890 sri->icode = CODE_FOR_reload_si_store;
19891 sri->extra_cost = 2;
19892 ret = NO_REGS;
19893 done_p = true;
19895 else
19896 default_p = true;
19898 else
19899 default_p = true;
19902 if (!done_p)
19903 default_p = true;
19905 if (default_p)
19906 ret = default_secondary_reload (in_p, x, rclass, mode, sri);
19908 gcc_assert (ret != ALL_REGS);
19910 if (TARGET_DEBUG_ADDR)
19912 fprintf (stderr,
19913 "\nrs6000_secondary_reload, return %s, in_p = %s, rclass = %s, "
19914 "mode = %s",
19915 reg_class_names[ret],
19916 in_p ? "true" : "false",
19917 reg_class_names[rclass],
19918 GET_MODE_NAME (mode));
19920 if (reload_completed)
19921 fputs (", after reload", stderr);
19923 if (!done_p)
19924 fputs (", done_p not set", stderr);
19926 if (default_p)
19927 fputs (", default secondary reload", stderr);
19929 if (sri->icode != CODE_FOR_nothing)
19930 fprintf (stderr, ", reload func = %s, extra cost = %d",
19931 insn_data[sri->icode].name, sri->extra_cost);
19933 else if (sri->extra_cost > 0)
19934 fprintf (stderr, ", extra cost = %d", sri->extra_cost);
19936 fputs ("\n", stderr);
19937 debug_rtx (x);
19940 return ret;
19943 /* Better tracing for rs6000_secondary_reload_inner. */
19945 static void
19946 rs6000_secondary_reload_trace (int line, rtx reg, rtx mem, rtx scratch,
19947 bool store_p)
19949 rtx set, clobber;
19951 gcc_assert (reg != NULL_RTX && mem != NULL_RTX && scratch != NULL_RTX);
19953 fprintf (stderr, "rs6000_secondary_reload_inner:%d, type = %s\n", line,
19954 store_p ? "store" : "load");
19956 if (store_p)
19957 set = gen_rtx_SET (mem, reg);
19958 else
19959 set = gen_rtx_SET (reg, mem);
19961 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
19962 debug_rtx (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
19965 static void rs6000_secondary_reload_fail (int, rtx, rtx, rtx, bool)
19966 ATTRIBUTE_NORETURN;
19968 static void
19969 rs6000_secondary_reload_fail (int line, rtx reg, rtx mem, rtx scratch,
19970 bool store_p)
19972 rs6000_secondary_reload_trace (line, reg, mem, scratch, store_p);
19973 gcc_unreachable ();
19976 /* Fixup reload addresses for values in GPR, FPR, and VMX registers that have
19977 reload helper functions. These were identified in
19978 rs6000_secondary_reload_memory, and if reload decided to use the secondary
19979 reload, it calls the insns:
19980 reload_<RELOAD:mode>_<P:mptrsize>_store
19981 reload_<RELOAD:mode>_<P:mptrsize>_load
19983 which in turn calls this function, to do whatever is necessary to create
19984 valid addresses. */
19986 void
19987 rs6000_secondary_reload_inner (rtx reg, rtx mem, rtx scratch, bool store_p)
19989 int regno = true_regnum (reg);
19990 machine_mode mode = GET_MODE (reg);
19991 addr_mask_type addr_mask;
19992 rtx addr;
19993 rtx new_addr;
19994 rtx op_reg, op0, op1;
19995 rtx and_op;
19996 rtx cc_clobber;
19997 rtvec rv;
19999 if (regno < 0 || regno >= FIRST_PSEUDO_REGISTER || !MEM_P (mem)
20000 || !base_reg_operand (scratch, GET_MODE (scratch)))
20001 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
20003 if (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO))
20004 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
20006 else if (IN_RANGE (regno, FIRST_FPR_REGNO, LAST_FPR_REGNO))
20007 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
20009 else if (IN_RANGE (regno, FIRST_ALTIVEC_REGNO, LAST_ALTIVEC_REGNO))
20010 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
20012 else
20013 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
20015 /* Make sure the mode is valid in this register class. */
20016 if ((addr_mask & RELOAD_REG_VALID) == 0)
20017 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
20019 if (TARGET_DEBUG_ADDR)
20020 rs6000_secondary_reload_trace (__LINE__, reg, mem, scratch, store_p);
20022 new_addr = addr = XEXP (mem, 0);
20023 switch (GET_CODE (addr))
20025 /* Does the register class support auto update forms for this mode? If
20026 not, do the update now. We don't need a scratch register, since the
20027 powerpc only supports PRE_INC, PRE_DEC, and PRE_MODIFY. */
20028 case PRE_INC:
20029 case PRE_DEC:
20030 op_reg = XEXP (addr, 0);
20031 if (!base_reg_operand (op_reg, Pmode))
20032 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
20034 if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
20036 emit_insn (gen_add2_insn (op_reg, GEN_INT (GET_MODE_SIZE (mode))));
20037 new_addr = op_reg;
20039 break;
20041 case PRE_MODIFY:
20042 op0 = XEXP (addr, 0);
20043 op1 = XEXP (addr, 1);
20044 if (!base_reg_operand (op0, Pmode)
20045 || GET_CODE (op1) != PLUS
20046 || !rtx_equal_p (op0, XEXP (op1, 0)))
20047 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
20049 if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
20051 emit_insn (gen_rtx_SET (op0, op1));
20052 new_addr = reg;
20054 break;
20056 /* Do we need to simulate AND -16 to clear the bottom address bits used
20057 in VMX load/stores? */
20058 case AND:
20059 op0 = XEXP (addr, 0);
20060 op1 = XEXP (addr, 1);
20061 if ((addr_mask & RELOAD_REG_AND_M16) == 0)
20063 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
20064 op_reg = op0;
20066 else if (GET_CODE (op1) == PLUS)
20068 emit_insn (gen_rtx_SET (scratch, op1));
20069 op_reg = scratch;
20072 else
20073 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
20075 and_op = gen_rtx_AND (GET_MODE (scratch), op_reg, op1);
20076 cc_clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (CCmode));
20077 rv = gen_rtvec (2, gen_rtx_SET (scratch, and_op), cc_clobber);
20078 emit_insn (gen_rtx_PARALLEL (VOIDmode, rv));
20079 new_addr = scratch;
20081 break;
20083 /* If this is an indirect address, make sure it is a base register. */
20084 case REG:
20085 case SUBREG:
20086 if (!base_reg_operand (addr, GET_MODE (addr)))
20088 emit_insn (gen_rtx_SET (scratch, addr));
20089 new_addr = scratch;
20091 break;
20093 /* If this is an indexed address, make sure the register class can handle
20094 indexed addresses for this mode. */
20095 case PLUS:
20096 op0 = XEXP (addr, 0);
20097 op1 = XEXP (addr, 1);
20098 if (!base_reg_operand (op0, Pmode))
20099 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
20101 else if (int_reg_operand (op1, Pmode))
20103 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
20105 emit_insn (gen_rtx_SET (scratch, addr));
20106 new_addr = scratch;
20110 else if (mode_supports_vsx_dform_quad (mode) && CONST_INT_P (op1))
20112 if (((addr_mask & RELOAD_REG_QUAD_OFFSET) == 0)
20113 || !quad_address_p (addr, mode, false))
20115 emit_insn (gen_rtx_SET (scratch, addr));
20116 new_addr = scratch;
20120 /* Make sure the register class can handle offset addresses. */
20121 else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
20123 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
20125 emit_insn (gen_rtx_SET (scratch, addr));
20126 new_addr = scratch;
20130 else
20131 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
20133 break;
20135 case LO_SUM:
20136 op0 = XEXP (addr, 0);
20137 op1 = XEXP (addr, 1);
20138 if (!base_reg_operand (op0, Pmode))
20139 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
20141 else if (int_reg_operand (op1, Pmode))
20143 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
20145 emit_insn (gen_rtx_SET (scratch, addr));
20146 new_addr = scratch;
20150 /* Quad offsets are restricted and can't handle normal addresses. */
20151 else if (mode_supports_vsx_dform_quad (mode))
20153 emit_insn (gen_rtx_SET (scratch, addr));
20154 new_addr = scratch;
20157 /* Make sure the register class can handle offset addresses. */
20158 else if (legitimate_lo_sum_address_p (mode, addr, false))
20160 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
20162 emit_insn (gen_rtx_SET (scratch, addr));
20163 new_addr = scratch;
20167 else
20168 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
20170 break;
20172 case SYMBOL_REF:
20173 case CONST:
20174 case LABEL_REF:
20175 rs6000_emit_move (scratch, addr, Pmode);
20176 new_addr = scratch;
20177 break;
20179 default:
20180 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
20183 /* Adjust the address if it changed. */
20184 if (addr != new_addr)
20186 mem = replace_equiv_address_nv (mem, new_addr);
20187 if (TARGET_DEBUG_ADDR)
20188 fprintf (stderr, "\nrs6000_secondary_reload_inner, mem adjusted.\n");
20191 /* Now create the move. */
20192 if (store_p)
20193 emit_insn (gen_rtx_SET (mem, reg));
20194 else
20195 emit_insn (gen_rtx_SET (reg, mem));
20197 return;
20200 /* Convert reloads involving 64-bit gprs and misaligned offset
20201 addressing, or multiple 32-bit gprs and offsets that are too large,
20202 to use indirect addressing. */
20204 void
20205 rs6000_secondary_reload_gpr (rtx reg, rtx mem, rtx scratch, bool store_p)
20207 int regno = true_regnum (reg);
20208 enum reg_class rclass;
20209 rtx addr;
20210 rtx scratch_or_premodify = scratch;
20212 if (TARGET_DEBUG_ADDR)
20214 fprintf (stderr, "\nrs6000_secondary_reload_gpr, type = %s\n",
20215 store_p ? "store" : "load");
20216 fprintf (stderr, "reg:\n");
20217 debug_rtx (reg);
20218 fprintf (stderr, "mem:\n");
20219 debug_rtx (mem);
20220 fprintf (stderr, "scratch:\n");
20221 debug_rtx (scratch);
20224 gcc_assert (regno >= 0 && regno < FIRST_PSEUDO_REGISTER);
20225 gcc_assert (GET_CODE (mem) == MEM);
20226 rclass = REGNO_REG_CLASS (regno);
20227 gcc_assert (rclass == GENERAL_REGS || rclass == BASE_REGS);
20228 addr = XEXP (mem, 0);
20230 if (GET_CODE (addr) == PRE_MODIFY)
20232 gcc_assert (REG_P (XEXP (addr, 0))
20233 && GET_CODE (XEXP (addr, 1)) == PLUS
20234 && XEXP (XEXP (addr, 1), 0) == XEXP (addr, 0));
20235 scratch_or_premodify = XEXP (addr, 0);
20236 if (!HARD_REGISTER_P (scratch_or_premodify))
20237 /* If we have a pseudo here then reload will have arranged
20238 to have it replaced, but only in the original insn.
20239 Use the replacement here too. */
20240 scratch_or_premodify = find_replacement (&XEXP (addr, 0));
20242 /* RTL emitted by rs6000_secondary_reload_gpr uses RTL
20243 expressions from the original insn, without unsharing them.
20244 Any RTL that points into the original insn will of course
20245 have register replacements applied. That is why we don't
20246 need to look for replacements under the PLUS. */
20247 addr = XEXP (addr, 1);
20249 gcc_assert (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM);
20251 rs6000_emit_move (scratch_or_premodify, addr, Pmode);
20253 mem = replace_equiv_address_nv (mem, scratch_or_premodify);
20255 /* Now create the move. */
20256 if (store_p)
20257 emit_insn (gen_rtx_SET (mem, reg));
20258 else
20259 emit_insn (gen_rtx_SET (reg, mem));
20261 return;
20264 /* Allocate a 64-bit stack slot to be used for copying SDmode values through if
20265 this function has any SDmode references. If we are on a power7 or later, we
20266 don't need the 64-bit stack slot since the LFIWZX and STIFWX instructions
20267 can load/store the value. */
20269 static void
20270 rs6000_alloc_sdmode_stack_slot (void)
20272 tree t;
20273 basic_block bb;
20274 gimple_stmt_iterator gsi;
20276 gcc_assert (cfun->machine->sdmode_stack_slot == NULL_RTX);
20277 /* We use a different approach for dealing with the secondary
20278 memory in LRA. */
20279 if (ira_use_lra_p)
20280 return;
20282 if (TARGET_NO_SDMODE_STACK)
20283 return;
20285 FOR_EACH_BB_FN (bb, cfun)
20286 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
20288 tree ret = walk_gimple_op (gsi_stmt (gsi), rs6000_check_sdmode, NULL);
20289 if (ret)
20291 rtx stack = assign_stack_local (DDmode, GET_MODE_SIZE (DDmode), 0);
20292 cfun->machine->sdmode_stack_slot = adjust_address_nv (stack,
20293 SDmode, 0);
20294 return;
20298 /* Check for any SDmode parameters of the function. */
20299 for (t = DECL_ARGUMENTS (cfun->decl); t; t = DECL_CHAIN (t))
20301 if (TREE_TYPE (t) == error_mark_node)
20302 continue;
20304 if (TYPE_MODE (TREE_TYPE (t)) == SDmode
20305 || TYPE_MODE (DECL_ARG_TYPE (t)) == SDmode)
20307 rtx stack = assign_stack_local (DDmode, GET_MODE_SIZE (DDmode), 0);
20308 cfun->machine->sdmode_stack_slot = adjust_address_nv (stack,
20309 SDmode, 0);
20310 return;
20315 static void
20316 rs6000_instantiate_decls (void)
20318 if (cfun->machine->sdmode_stack_slot != NULL_RTX)
20319 instantiate_decl_rtl (cfun->machine->sdmode_stack_slot);
20322 /* Given an rtx X being reloaded into a reg required to be
20323 in class CLASS, return the class of reg to actually use.
20324 In general this is just CLASS; but on some machines
20325 in some cases it is preferable to use a more restrictive class.
20327 On the RS/6000, we have to return NO_REGS when we want to reload a
20328 floating-point CONST_DOUBLE to force it to be copied to memory.
20330 We also don't want to reload integer values into floating-point
20331 registers if we can at all help it. In fact, this can
20332 cause reload to die, if it tries to generate a reload of CTR
20333 into a FP register and discovers it doesn't have the memory location
20334 required.
20336 ??? Would it be a good idea to have reload do the converse, that is
20337 try to reload floating modes into FP registers if possible?
20340 static enum reg_class
20341 rs6000_preferred_reload_class (rtx x, enum reg_class rclass)
20343 machine_mode mode = GET_MODE (x);
20344 bool is_constant = CONSTANT_P (x);
20346 /* If a mode can't go in FPR/ALTIVEC/VSX registers, don't return a preferred
20347 reload class for it. */
20348 if ((rclass == ALTIVEC_REGS || rclass == VSX_REGS)
20349 && (reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_VALID) == 0)
20350 return NO_REGS;
20352 if ((rclass == FLOAT_REGS || rclass == VSX_REGS)
20353 && (reg_addr[mode].addr_mask[RELOAD_REG_FPR] & RELOAD_REG_VALID) == 0)
20354 return NO_REGS;
20356 /* For VSX, see if we should prefer FLOAT_REGS or ALTIVEC_REGS. Do not allow
20357 the reloading of address expressions using PLUS into floating point
20358 registers. */
20359 if (TARGET_VSX && VSX_REG_CLASS_P (rclass) && GET_CODE (x) != PLUS)
20361 if (is_constant)
20363 /* Zero is always allowed in all VSX registers. */
20364 if (x == CONST0_RTX (mode))
20365 return rclass;
20367 /* If this is a vector constant that can be formed with a few Altivec
20368 instructions, we want altivec registers. */
20369 if (GET_CODE (x) == CONST_VECTOR && easy_vector_constant (x, mode))
20370 return ALTIVEC_REGS;
20372 /* Force constant to memory. */
20373 return NO_REGS;
20376 /* D-form addressing can easily reload the value. */
20377 if (mode_supports_vmx_dform (mode)
20378 || mode_supports_vsx_dform_quad (mode))
20379 return rclass;
20381 /* If this is a scalar floating point value and we don't have D-form
20382 addressing, prefer the traditional floating point registers so that we
20383 can use D-form (register+offset) addressing. */
20384 if (GET_MODE_SIZE (mode) < 16 && rclass == VSX_REGS)
20385 return FLOAT_REGS;
20387 /* Prefer the Altivec registers if Altivec is handling the vector
20388 operations (i.e. V16QI, V8HI, and V4SI), or if we prefer Altivec
20389 loads. */
20390 if (VECTOR_UNIT_ALTIVEC_P (mode) || VECTOR_MEM_ALTIVEC_P (mode)
20391 || mode == V1TImode)
20392 return ALTIVEC_REGS;
20394 return rclass;
20397 if (is_constant || GET_CODE (x) == PLUS)
20399 if (reg_class_subset_p (GENERAL_REGS, rclass))
20400 return GENERAL_REGS;
20401 if (reg_class_subset_p (BASE_REGS, rclass))
20402 return BASE_REGS;
20403 return NO_REGS;
20406 /* If we haven't picked a register class, and the type is a vector or
20407 floating point type, prefer to use the VSX, FPR, or Altivec register
20408 classes. */
20409 if (rclass == NO_REGS)
20411 if (TARGET_VSX && VECTOR_MEM_VSX_OR_P8_VECTOR_P (mode))
20412 return VSX_REGS;
20414 if (TARGET_ALTIVEC && VECTOR_MEM_ALTIVEC_P (mode))
20415 return ALTIVEC_REGS;
20417 if (DECIMAL_FLOAT_MODE_P (mode))
20418 return TARGET_DFP ? FLOAT_REGS : NO_REGS;
20420 if (TARGET_FPRS && TARGET_HARD_FLOAT && FLOAT_MODE_P (mode)
20421 && (reg_addr[mode].addr_mask[RELOAD_REG_FPR] & RELOAD_REG_VALID) == 0)
20422 return FLOAT_REGS;
20425 if (GET_MODE_CLASS (mode) == MODE_INT && rclass == NON_SPECIAL_REGS)
20426 return GENERAL_REGS;
20428 return rclass;
20431 /* Debug version of rs6000_preferred_reload_class. */
20432 static enum reg_class
20433 rs6000_debug_preferred_reload_class (rtx x, enum reg_class rclass)
20435 enum reg_class ret = rs6000_preferred_reload_class (x, rclass);
20437 fprintf (stderr,
20438 "\nrs6000_preferred_reload_class, return %s, rclass = %s, "
20439 "mode = %s, x:\n",
20440 reg_class_names[ret], reg_class_names[rclass],
20441 GET_MODE_NAME (GET_MODE (x)));
20442 debug_rtx (x);
20444 return ret;
20447 /* If we are copying between FP or AltiVec registers and anything else, we need
20448 a memory location. The exception is when we are targeting ppc64 and the
20449 move to/from fpr to gpr instructions are available. Also, under VSX, you
20450 can copy vector registers from the FP register set to the Altivec register
20451 set and vice versa. */
20453 static bool
20454 rs6000_secondary_memory_needed (enum reg_class from_class,
20455 enum reg_class to_class,
20456 machine_mode mode)
20458 enum rs6000_reg_type from_type, to_type;
20459 bool altivec_p = ((from_class == ALTIVEC_REGS)
20460 || (to_class == ALTIVEC_REGS));
20462 /* If a simple/direct move is available, we don't need secondary memory */
20463 from_type = reg_class_to_reg_type[(int)from_class];
20464 to_type = reg_class_to_reg_type[(int)to_class];
20466 if (rs6000_secondary_reload_move (to_type, from_type, mode,
20467 (secondary_reload_info *)0, altivec_p))
20468 return false;
20470 /* If we have a floating point or vector register class, we need to use
20471 memory to transfer the data. */
20472 if (IS_FP_VECT_REG_TYPE (from_type) || IS_FP_VECT_REG_TYPE (to_type))
20473 return true;
20475 return false;
20478 /* Debug version of rs6000_secondary_memory_needed. */
20479 static bool
20480 rs6000_debug_secondary_memory_needed (enum reg_class from_class,
20481 enum reg_class to_class,
20482 machine_mode mode)
20484 bool ret = rs6000_secondary_memory_needed (from_class, to_class, mode);
20486 fprintf (stderr,
20487 "rs6000_secondary_memory_needed, return: %s, from_class = %s, "
20488 "to_class = %s, mode = %s\n",
20489 ret ? "true" : "false",
20490 reg_class_names[from_class],
20491 reg_class_names[to_class],
20492 GET_MODE_NAME (mode));
20494 return ret;
20497 /* Return the register class of a scratch register needed to copy IN into
20498 or out of a register in RCLASS in MODE. If it can be done directly,
20499 NO_REGS is returned. */
20501 static enum reg_class
20502 rs6000_secondary_reload_class (enum reg_class rclass, machine_mode mode,
20503 rtx in)
20505 int regno;
20507 if (TARGET_ELF || (DEFAULT_ABI == ABI_DARWIN
20508 #if TARGET_MACHO
20509 && MACHOPIC_INDIRECT
20510 #endif
20513 /* We cannot copy a symbolic operand directly into anything
20514 other than BASE_REGS for TARGET_ELF. So indicate that a
20515 register from BASE_REGS is needed as an intermediate
20516 register.
20518 On Darwin, pic addresses require a load from memory, which
20519 needs a base register. */
20520 if (rclass != BASE_REGS
20521 && (GET_CODE (in) == SYMBOL_REF
20522 || GET_CODE (in) == HIGH
20523 || GET_CODE (in) == LABEL_REF
20524 || GET_CODE (in) == CONST))
20525 return BASE_REGS;
20528 if (GET_CODE (in) == REG)
20530 regno = REGNO (in);
20531 if (regno >= FIRST_PSEUDO_REGISTER)
20533 regno = true_regnum (in);
20534 if (regno >= FIRST_PSEUDO_REGISTER)
20535 regno = -1;
20538 else if (GET_CODE (in) == SUBREG)
20540 regno = true_regnum (in);
20541 if (regno >= FIRST_PSEUDO_REGISTER)
20542 regno = -1;
20544 else
20545 regno = -1;
20547 /* If we have VSX register moves, prefer moving scalar values between
20548 Altivec registers and GPR by going via an FPR (and then via memory)
20549 instead of reloading the secondary memory address for Altivec moves. */
20550 if (TARGET_VSX
20551 && GET_MODE_SIZE (mode) < 16
20552 && !mode_supports_vmx_dform (mode)
20553 && (((rclass == GENERAL_REGS || rclass == BASE_REGS)
20554 && (regno >= 0 && ALTIVEC_REGNO_P (regno)))
20555 || ((rclass == VSX_REGS || rclass == ALTIVEC_REGS)
20556 && (regno >= 0 && INT_REGNO_P (regno)))))
20557 return FLOAT_REGS;
20559 /* We can place anything into GENERAL_REGS and can put GENERAL_REGS
20560 into anything. */
20561 if (rclass == GENERAL_REGS || rclass == BASE_REGS
20562 || (regno >= 0 && INT_REGNO_P (regno)))
20563 return NO_REGS;
20565 /* Constants, memory, and VSX registers can go into VSX registers (both the
20566 traditional floating point and the altivec registers). */
20567 if (rclass == VSX_REGS
20568 && (regno == -1 || VSX_REGNO_P (regno)))
20569 return NO_REGS;
20571 /* Constants, memory, and FP registers can go into FP registers. */
20572 if ((regno == -1 || FP_REGNO_P (regno))
20573 && (rclass == FLOAT_REGS || rclass == NON_SPECIAL_REGS))
20574 return (mode != SDmode || lra_in_progress) ? NO_REGS : GENERAL_REGS;
20576 /* Memory, and AltiVec registers can go into AltiVec registers. */
20577 if ((regno == -1 || ALTIVEC_REGNO_P (regno))
20578 && rclass == ALTIVEC_REGS)
20579 return NO_REGS;
20581 /* We can copy among the CR registers. */
20582 if ((rclass == CR_REGS || rclass == CR0_REGS)
20583 && regno >= 0 && CR_REGNO_P (regno))
20584 return NO_REGS;
20586 /* Otherwise, we need GENERAL_REGS. */
20587 return GENERAL_REGS;
20590 /* Debug version of rs6000_secondary_reload_class. */
20591 static enum reg_class
20592 rs6000_debug_secondary_reload_class (enum reg_class rclass,
20593 machine_mode mode, rtx in)
20595 enum reg_class ret = rs6000_secondary_reload_class (rclass, mode, in);
20596 fprintf (stderr,
20597 "\nrs6000_secondary_reload_class, return %s, rclass = %s, "
20598 "mode = %s, input rtx:\n",
20599 reg_class_names[ret], reg_class_names[rclass],
20600 GET_MODE_NAME (mode));
20601 debug_rtx (in);
20603 return ret;
20606 /* Return nonzero if for CLASS a mode change from FROM to TO is invalid. */
20608 static bool
20609 rs6000_cannot_change_mode_class (machine_mode from,
20610 machine_mode to,
20611 enum reg_class rclass)
20613 unsigned from_size = GET_MODE_SIZE (from);
20614 unsigned to_size = GET_MODE_SIZE (to);
20616 if (from_size != to_size)
20618 enum reg_class xclass = (TARGET_VSX) ? VSX_REGS : FLOAT_REGS;
20620 if (reg_classes_intersect_p (xclass, rclass))
20622 unsigned to_nregs = hard_regno_nregs[FIRST_FPR_REGNO][to];
20623 unsigned from_nregs = hard_regno_nregs[FIRST_FPR_REGNO][from];
20624 bool to_float128_vector_p = FLOAT128_VECTOR_P (to);
20625 bool from_float128_vector_p = FLOAT128_VECTOR_P (from);
20627 /* Don't allow 64-bit types to overlap with 128-bit types that take a
20628 single register under VSX because the scalar part of the register
20629 is in the upper 64-bits, and not the lower 64-bits. Types like
20630 TFmode/TDmode that take 2 scalar register can overlap. 128-bit
20631 IEEE floating point can't overlap, and neither can small
20632 values. */
20634 if (to_float128_vector_p && from_float128_vector_p)
20635 return false;
20637 else if (to_float128_vector_p || from_float128_vector_p)
20638 return true;
20640 /* TDmode in floating-mode registers must always go into a register
20641 pair with the most significant word in the even-numbered register
20642 to match ISA requirements. In little-endian mode, this does not
20643 match subreg numbering, so we cannot allow subregs. */
20644 if (!BYTES_BIG_ENDIAN && (to == TDmode || from == TDmode))
20645 return true;
20647 if (from_size < 8 || to_size < 8)
20648 return true;
20650 if (from_size == 8 && (8 * to_nregs) != to_size)
20651 return true;
20653 if (to_size == 8 && (8 * from_nregs) != from_size)
20654 return true;
20656 return false;
20658 else
20659 return false;
20662 if (TARGET_E500_DOUBLE
20663 && ((((to) == DFmode) + ((from) == DFmode)) == 1
20664 || (((to) == TFmode) + ((from) == TFmode)) == 1
20665 || (((to) == IFmode) + ((from) == IFmode)) == 1
20666 || (((to) == KFmode) + ((from) == KFmode)) == 1
20667 || (((to) == DDmode) + ((from) == DDmode)) == 1
20668 || (((to) == TDmode) + ((from) == TDmode)) == 1
20669 || (((to) == DImode) + ((from) == DImode)) == 1))
20670 return true;
20672 /* Since the VSX register set includes traditional floating point registers
20673 and altivec registers, just check for the size being different instead of
20674 trying to check whether the modes are vector modes. Otherwise it won't
20675 allow say DF and DI to change classes. For types like TFmode and TDmode
20676 that take 2 64-bit registers, rather than a single 128-bit register, don't
20677 allow subregs of those types to other 128 bit types. */
20678 if (TARGET_VSX && VSX_REG_CLASS_P (rclass))
20680 unsigned num_regs = (from_size + 15) / 16;
20681 if (hard_regno_nregs[FIRST_FPR_REGNO][to] > num_regs
20682 || hard_regno_nregs[FIRST_FPR_REGNO][from] > num_regs)
20683 return true;
20685 return (from_size != 8 && from_size != 16);
20688 if (TARGET_ALTIVEC && rclass == ALTIVEC_REGS
20689 && (ALTIVEC_VECTOR_MODE (from) + ALTIVEC_VECTOR_MODE (to)) == 1)
20690 return true;
20692 if (TARGET_SPE && (SPE_VECTOR_MODE (from) + SPE_VECTOR_MODE (to)) == 1
20693 && reg_classes_intersect_p (GENERAL_REGS, rclass))
20694 return true;
20696 return false;
20699 /* Debug version of rs6000_cannot_change_mode_class. */
20700 static bool
20701 rs6000_debug_cannot_change_mode_class (machine_mode from,
20702 machine_mode to,
20703 enum reg_class rclass)
20705 bool ret = rs6000_cannot_change_mode_class (from, to, rclass);
20707 fprintf (stderr,
20708 "rs6000_cannot_change_mode_class, return %s, from = %s, "
20709 "to = %s, rclass = %s\n",
20710 ret ? "true" : "false",
20711 GET_MODE_NAME (from), GET_MODE_NAME (to),
20712 reg_class_names[rclass]);
20714 return ret;
20717 /* Return a string to do a move operation of 128 bits of data. */
20719 const char *
20720 rs6000_output_move_128bit (rtx operands[])
20722 rtx dest = operands[0];
20723 rtx src = operands[1];
20724 machine_mode mode = GET_MODE (dest);
20725 int dest_regno;
20726 int src_regno;
20727 bool dest_gpr_p, dest_fp_p, dest_vmx_p, dest_vsx_p;
20728 bool src_gpr_p, src_fp_p, src_vmx_p, src_vsx_p;
20730 if (REG_P (dest))
20732 dest_regno = REGNO (dest);
20733 dest_gpr_p = INT_REGNO_P (dest_regno);
20734 dest_fp_p = FP_REGNO_P (dest_regno);
20735 dest_vmx_p = ALTIVEC_REGNO_P (dest_regno);
20736 dest_vsx_p = dest_fp_p | dest_vmx_p;
20738 else
20740 dest_regno = -1;
20741 dest_gpr_p = dest_fp_p = dest_vmx_p = dest_vsx_p = false;
20744 if (REG_P (src))
20746 src_regno = REGNO (src);
20747 src_gpr_p = INT_REGNO_P (src_regno);
20748 src_fp_p = FP_REGNO_P (src_regno);
20749 src_vmx_p = ALTIVEC_REGNO_P (src_regno);
20750 src_vsx_p = src_fp_p | src_vmx_p;
20752 else
20754 src_regno = -1;
20755 src_gpr_p = src_fp_p = src_vmx_p = src_vsx_p = false;
20758 /* Register moves. */
20759 if (dest_regno >= 0 && src_regno >= 0)
20761 if (dest_gpr_p)
20763 if (src_gpr_p)
20764 return "#";
20766 if (TARGET_DIRECT_MOVE_128 && src_vsx_p)
20767 return (WORDS_BIG_ENDIAN
20768 ? "mfvsrd %0,%x1\n\tmfvsrld %L0,%x1"
20769 : "mfvsrd %L0,%x1\n\tmfvsrld %0,%x1");
20771 else if (TARGET_VSX && TARGET_DIRECT_MOVE && src_vsx_p)
20772 return "#";
20775 else if (TARGET_VSX && dest_vsx_p)
20777 if (src_vsx_p)
20778 return "xxlor %x0,%x1,%x1";
20780 else if (TARGET_DIRECT_MOVE_128 && src_gpr_p)
20781 return (WORDS_BIG_ENDIAN
20782 ? "mtvsrdd %x0,%1,%L1"
20783 : "mtvsrdd %x0,%L1,%1");
20785 else if (TARGET_DIRECT_MOVE && src_gpr_p)
20786 return "#";
20789 else if (TARGET_ALTIVEC && dest_vmx_p && src_vmx_p)
20790 return "vor %0,%1,%1";
20792 else if (dest_fp_p && src_fp_p)
20793 return "#";
20796 /* Loads. */
20797 else if (dest_regno >= 0 && MEM_P (src))
20799 if (dest_gpr_p)
20801 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
20802 return "lq %0,%1";
20803 else
20804 return "#";
20807 else if (TARGET_ALTIVEC && dest_vmx_p
20808 && altivec_indexed_or_indirect_operand (src, mode))
20809 return "lvx %0,%y1";
20811 else if (TARGET_VSX && dest_vsx_p)
20813 if (mode_supports_vsx_dform_quad (mode)
20814 && quad_address_p (XEXP (src, 0), mode, true))
20815 return "lxv %x0,%1";
20817 else if (TARGET_P9_VECTOR)
20818 return "lxvx %x0,%y1";
20820 else if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
20821 return "lxvw4x %x0,%y1";
20823 else
20824 return "lxvd2x %x0,%y1";
20827 else if (TARGET_ALTIVEC && dest_vmx_p)
20828 return "lvx %0,%y1";
20830 else if (dest_fp_p)
20831 return "#";
20834 /* Stores. */
20835 else if (src_regno >= 0 && MEM_P (dest))
20837 if (src_gpr_p)
20839 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
20840 return "stq %1,%0";
20841 else
20842 return "#";
20845 else if (TARGET_ALTIVEC && src_vmx_p
20846 && altivec_indexed_or_indirect_operand (src, mode))
20847 return "stvx %1,%y0";
20849 else if (TARGET_VSX && src_vsx_p)
20851 if (mode_supports_vsx_dform_quad (mode)
20852 && quad_address_p (XEXP (dest, 0), mode, true))
20853 return "stxv %x1,%0";
20855 else if (TARGET_P9_VECTOR)
20856 return "stxvx %x1,%y0";
20858 else if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
20859 return "stxvw4x %x1,%y0";
20861 else
20862 return "stxvd2x %x1,%y0";
20865 else if (TARGET_ALTIVEC && src_vmx_p)
20866 return "stvx %1,%y0";
20868 else if (src_fp_p)
20869 return "#";
20872 /* Constants. */
20873 else if (dest_regno >= 0
20874 && (GET_CODE (src) == CONST_INT
20875 || GET_CODE (src) == CONST_WIDE_INT
20876 || GET_CODE (src) == CONST_DOUBLE
20877 || GET_CODE (src) == CONST_VECTOR))
20879 if (dest_gpr_p)
20880 return "#";
20882 else if ((dest_vmx_p && TARGET_ALTIVEC)
20883 || (dest_vsx_p && TARGET_VSX))
20884 return output_vec_const_move (operands);
20887 fatal_insn ("Bad 128-bit move", gen_rtx_SET (dest, src));
20890 /* Validate a 128-bit move. */
20891 bool
20892 rs6000_move_128bit_ok_p (rtx operands[])
20894 machine_mode mode = GET_MODE (operands[0]);
20895 return (gpc_reg_operand (operands[0], mode)
20896 || gpc_reg_operand (operands[1], mode));
20899 /* Return true if a 128-bit move needs to be split. */
20900 bool
20901 rs6000_split_128bit_ok_p (rtx operands[])
20903 if (!reload_completed)
20904 return false;
20906 if (!gpr_or_gpr_p (operands[0], operands[1]))
20907 return false;
20909 if (quad_load_store_p (operands[0], operands[1]))
20910 return false;
20912 return true;
20916 /* Given a comparison operation, return the bit number in CCR to test. We
20917 know this is a valid comparison.
20919 SCC_P is 1 if this is for an scc. That means that %D will have been
20920 used instead of %C, so the bits will be in different places.
20922 Return -1 if OP isn't a valid comparison for some reason. */
20925 ccr_bit (rtx op, int scc_p)
20927 enum rtx_code code = GET_CODE (op);
20928 machine_mode cc_mode;
20929 int cc_regnum;
20930 int base_bit;
20931 rtx reg;
20933 if (!COMPARISON_P (op))
20934 return -1;
20936 reg = XEXP (op, 0);
20938 gcc_assert (GET_CODE (reg) == REG && CR_REGNO_P (REGNO (reg)));
20940 cc_mode = GET_MODE (reg);
20941 cc_regnum = REGNO (reg);
20942 base_bit = 4 * (cc_regnum - CR0_REGNO);
20944 validate_condition_mode (code, cc_mode);
20946 /* When generating a sCOND operation, only positive conditions are
20947 allowed. */
20948 gcc_assert (!scc_p
20949 || code == EQ || code == GT || code == LT || code == UNORDERED
20950 || code == GTU || code == LTU);
20952 switch (code)
20954 case NE:
20955 return scc_p ? base_bit + 3 : base_bit + 2;
20956 case EQ:
20957 return base_bit + 2;
20958 case GT: case GTU: case UNLE:
20959 return base_bit + 1;
20960 case LT: case LTU: case UNGE:
20961 return base_bit;
20962 case ORDERED: case UNORDERED:
20963 return base_bit + 3;
20965 case GE: case GEU:
20966 /* If scc, we will have done a cror to put the bit in the
20967 unordered position. So test that bit. For integer, this is ! LT
20968 unless this is an scc insn. */
20969 return scc_p ? base_bit + 3 : base_bit;
20971 case LE: case LEU:
20972 return scc_p ? base_bit + 3 : base_bit + 1;
20974 default:
20975 gcc_unreachable ();
20979 /* Return the GOT register. */
20982 rs6000_got_register (rtx value ATTRIBUTE_UNUSED)
20984 /* The second flow pass currently (June 1999) can't update
20985 regs_ever_live without disturbing other parts of the compiler, so
20986 update it here to make the prolog/epilogue code happy. */
20987 if (!can_create_pseudo_p ()
20988 && !df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM))
20989 df_set_regs_ever_live (RS6000_PIC_OFFSET_TABLE_REGNUM, true);
20991 crtl->uses_pic_offset_table = 1;
20993 return pic_offset_table_rtx;
20996 static rs6000_stack_t stack_info;
20998 /* Function to init struct machine_function.
20999 This will be called, via a pointer variable,
21000 from push_function_context. */
21002 static struct machine_function *
21003 rs6000_init_machine_status (void)
21005 stack_info.reload_completed = 0;
21006 return ggc_cleared_alloc<machine_function> ();
21009 #define INT_P(X) (GET_CODE (X) == CONST_INT && GET_MODE (X) == VOIDmode)
21011 /* Write out a function code label. */
21013 void
21014 rs6000_output_function_entry (FILE *file, const char *fname)
21016 if (fname[0] != '.')
21018 switch (DEFAULT_ABI)
21020 default:
21021 gcc_unreachable ();
21023 case ABI_AIX:
21024 if (DOT_SYMBOLS)
21025 putc ('.', file);
21026 else
21027 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "L.");
21028 break;
21030 case ABI_ELFv2:
21031 case ABI_V4:
21032 case ABI_DARWIN:
21033 break;
21037 RS6000_OUTPUT_BASENAME (file, fname);
21040 /* Print an operand. Recognize special options, documented below. */
21042 #if TARGET_ELF
21043 #define SMALL_DATA_RELOC ((rs6000_sdata == SDATA_EABI) ? "sda21" : "sdarel")
21044 #define SMALL_DATA_REG ((rs6000_sdata == SDATA_EABI) ? 0 : 13)
21045 #else
21046 #define SMALL_DATA_RELOC "sda21"
21047 #define SMALL_DATA_REG 0
21048 #endif
21050 void
21051 print_operand (FILE *file, rtx x, int code)
21053 int i;
21054 unsigned HOST_WIDE_INT uval;
21056 switch (code)
21058 /* %a is output_address. */
21060 /* %c is output_addr_const if a CONSTANT_ADDRESS_P, otherwise
21061 output_operand. */
21063 case 'D':
21064 /* Like 'J' but get to the GT bit only. */
21065 gcc_assert (REG_P (x));
21067 /* Bit 1 is GT bit. */
21068 i = 4 * (REGNO (x) - CR0_REGNO) + 1;
21070 /* Add one for shift count in rlinm for scc. */
21071 fprintf (file, "%d", i + 1);
21072 return;
21074 case 'e':
21075 /* If the low 16 bits are 0, but some other bit is set, write 's'. */
21076 if (! INT_P (x))
21078 output_operand_lossage ("invalid %%e value");
21079 return;
21082 uval = INTVAL (x);
21083 if ((uval & 0xffff) == 0 && uval != 0)
21084 putc ('s', file);
21085 return;
21087 case 'E':
21088 /* X is a CR register. Print the number of the EQ bit of the CR */
21089 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
21090 output_operand_lossage ("invalid %%E value");
21091 else
21092 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO) + 2);
21093 return;
21095 case 'f':
21096 /* X is a CR register. Print the shift count needed to move it
21097 to the high-order four bits. */
21098 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
21099 output_operand_lossage ("invalid %%f value");
21100 else
21101 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO));
21102 return;
21104 case 'F':
21105 /* Similar, but print the count for the rotate in the opposite
21106 direction. */
21107 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
21108 output_operand_lossage ("invalid %%F value");
21109 else
21110 fprintf (file, "%d", 32 - 4 * (REGNO (x) - CR0_REGNO));
21111 return;
21113 case 'G':
21114 /* X is a constant integer. If it is negative, print "m",
21115 otherwise print "z". This is to make an aze or ame insn. */
21116 if (GET_CODE (x) != CONST_INT)
21117 output_operand_lossage ("invalid %%G value");
21118 else if (INTVAL (x) >= 0)
21119 putc ('z', file);
21120 else
21121 putc ('m', file);
21122 return;
21124 case 'h':
21125 /* If constant, output low-order five bits. Otherwise, write
21126 normally. */
21127 if (INT_P (x))
21128 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 31);
21129 else
21130 print_operand (file, x, 0);
21131 return;
21133 case 'H':
21134 /* If constant, output low-order six bits. Otherwise, write
21135 normally. */
21136 if (INT_P (x))
21137 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 63);
21138 else
21139 print_operand (file, x, 0);
21140 return;
21142 case 'I':
21143 /* Print `i' if this is a constant, else nothing. */
21144 if (INT_P (x))
21145 putc ('i', file);
21146 return;
21148 case 'j':
21149 /* Write the bit number in CCR for jump. */
21150 i = ccr_bit (x, 0);
21151 if (i == -1)
21152 output_operand_lossage ("invalid %%j code");
21153 else
21154 fprintf (file, "%d", i);
21155 return;
21157 case 'J':
21158 /* Similar, but add one for shift count in rlinm for scc and pass
21159 scc flag to `ccr_bit'. */
21160 i = ccr_bit (x, 1);
21161 if (i == -1)
21162 output_operand_lossage ("invalid %%J code");
21163 else
21164 /* If we want bit 31, write a shift count of zero, not 32. */
21165 fprintf (file, "%d", i == 31 ? 0 : i + 1);
21166 return;
21168 case 'k':
21169 /* X must be a constant. Write the 1's complement of the
21170 constant. */
21171 if (! INT_P (x))
21172 output_operand_lossage ("invalid %%k value");
21173 else
21174 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~ INTVAL (x));
21175 return;
21177 case 'K':
21178 /* X must be a symbolic constant on ELF. Write an
21179 expression suitable for an 'addi' that adds in the low 16
21180 bits of the MEM. */
21181 if (GET_CODE (x) == CONST)
21183 if (GET_CODE (XEXP (x, 0)) != PLUS
21184 || (GET_CODE (XEXP (XEXP (x, 0), 0)) != SYMBOL_REF
21185 && GET_CODE (XEXP (XEXP (x, 0), 0)) != LABEL_REF)
21186 || GET_CODE (XEXP (XEXP (x, 0), 1)) != CONST_INT)
21187 output_operand_lossage ("invalid %%K value");
21189 print_operand_address (file, x);
21190 fputs ("@l", file);
21191 return;
21193 /* %l is output_asm_label. */
21195 case 'L':
21196 /* Write second word of DImode or DFmode reference. Works on register
21197 or non-indexed memory only. */
21198 if (REG_P (x))
21199 fputs (reg_names[REGNO (x) + 1], file);
21200 else if (MEM_P (x))
21202 machine_mode mode = GET_MODE (x);
21203 /* Handle possible auto-increment. Since it is pre-increment and
21204 we have already done it, we can just use an offset of word. */
21205 if (GET_CODE (XEXP (x, 0)) == PRE_INC
21206 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
21207 output_address (mode, plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
21208 UNITS_PER_WORD));
21209 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
21210 output_address (mode, plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
21211 UNITS_PER_WORD));
21212 else
21213 output_address (mode, XEXP (adjust_address_nv (x, SImode,
21214 UNITS_PER_WORD),
21215 0));
21217 if (small_data_operand (x, GET_MODE (x)))
21218 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
21219 reg_names[SMALL_DATA_REG]);
21221 return;
21223 case 'N':
21224 /* Write the number of elements in the vector times 4. */
21225 if (GET_CODE (x) != PARALLEL)
21226 output_operand_lossage ("invalid %%N value");
21227 else
21228 fprintf (file, "%d", XVECLEN (x, 0) * 4);
21229 return;
21231 case 'O':
21232 /* Similar, but subtract 1 first. */
21233 if (GET_CODE (x) != PARALLEL)
21234 output_operand_lossage ("invalid %%O value");
21235 else
21236 fprintf (file, "%d", (XVECLEN (x, 0) - 1) * 4);
21237 return;
21239 case 'p':
21240 /* X is a CONST_INT that is a power of two. Output the logarithm. */
21241 if (! INT_P (x)
21242 || INTVAL (x) < 0
21243 || (i = exact_log2 (INTVAL (x))) < 0)
21244 output_operand_lossage ("invalid %%p value");
21245 else
21246 fprintf (file, "%d", i);
21247 return;
21249 case 'P':
21250 /* The operand must be an indirect memory reference. The result
21251 is the register name. */
21252 if (GET_CODE (x) != MEM || GET_CODE (XEXP (x, 0)) != REG
21253 || REGNO (XEXP (x, 0)) >= 32)
21254 output_operand_lossage ("invalid %%P value");
21255 else
21256 fputs (reg_names[REGNO (XEXP (x, 0))], file);
21257 return;
21259 case 'q':
21260 /* This outputs the logical code corresponding to a boolean
21261 expression. The expression may have one or both operands
21262 negated (if one, only the first one). For condition register
21263 logical operations, it will also treat the negated
21264 CR codes as NOTs, but not handle NOTs of them. */
21266 const char *const *t = 0;
21267 const char *s;
21268 enum rtx_code code = GET_CODE (x);
21269 static const char * const tbl[3][3] = {
21270 { "and", "andc", "nor" },
21271 { "or", "orc", "nand" },
21272 { "xor", "eqv", "xor" } };
21274 if (code == AND)
21275 t = tbl[0];
21276 else if (code == IOR)
21277 t = tbl[1];
21278 else if (code == XOR)
21279 t = tbl[2];
21280 else
21281 output_operand_lossage ("invalid %%q value");
21283 if (GET_CODE (XEXP (x, 0)) != NOT)
21284 s = t[0];
21285 else
21287 if (GET_CODE (XEXP (x, 1)) == NOT)
21288 s = t[2];
21289 else
21290 s = t[1];
21293 fputs (s, file);
21295 return;
21297 case 'Q':
21298 if (! TARGET_MFCRF)
21299 return;
21300 fputc (',', file);
21301 /* FALLTHRU */
21303 case 'R':
21304 /* X is a CR register. Print the mask for `mtcrf'. */
21305 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
21306 output_operand_lossage ("invalid %%R value");
21307 else
21308 fprintf (file, "%d", 128 >> (REGNO (x) - CR0_REGNO));
21309 return;
21311 case 's':
21312 /* Low 5 bits of 32 - value */
21313 if (! INT_P (x))
21314 output_operand_lossage ("invalid %%s value");
21315 else
21316 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (32 - INTVAL (x)) & 31);
21317 return;
21319 case 't':
21320 /* Like 'J' but get to the OVERFLOW/UNORDERED bit. */
21321 gcc_assert (REG_P (x) && GET_MODE (x) == CCmode);
21323 /* Bit 3 is OV bit. */
21324 i = 4 * (REGNO (x) - CR0_REGNO) + 3;
21326 /* If we want bit 31, write a shift count of zero, not 32. */
21327 fprintf (file, "%d", i == 31 ? 0 : i + 1);
21328 return;
21330 case 'T':
21331 /* Print the symbolic name of a branch target register. */
21332 if (GET_CODE (x) != REG || (REGNO (x) != LR_REGNO
21333 && REGNO (x) != CTR_REGNO))
21334 output_operand_lossage ("invalid %%T value");
21335 else if (REGNO (x) == LR_REGNO)
21336 fputs ("lr", file);
21337 else
21338 fputs ("ctr", file);
21339 return;
21341 case 'u':
21342 /* High-order or low-order 16 bits of constant, whichever is non-zero,
21343 for use in unsigned operand. */
21344 if (! INT_P (x))
21346 output_operand_lossage ("invalid %%u value");
21347 return;
21350 uval = INTVAL (x);
21351 if ((uval & 0xffff) == 0)
21352 uval >>= 16;
21354 fprintf (file, HOST_WIDE_INT_PRINT_HEX, uval & 0xffff);
21355 return;
21357 case 'v':
21358 /* High-order 16 bits of constant for use in signed operand. */
21359 if (! INT_P (x))
21360 output_operand_lossage ("invalid %%v value");
21361 else
21362 fprintf (file, HOST_WIDE_INT_PRINT_HEX,
21363 (INTVAL (x) >> 16) & 0xffff);
21364 return;
21366 case 'U':
21367 /* Print `u' if this has an auto-increment or auto-decrement. */
21368 if (MEM_P (x)
21369 && (GET_CODE (XEXP (x, 0)) == PRE_INC
21370 || GET_CODE (XEXP (x, 0)) == PRE_DEC
21371 || GET_CODE (XEXP (x, 0)) == PRE_MODIFY))
21372 putc ('u', file);
21373 return;
21375 case 'V':
21376 /* Print the trap code for this operand. */
21377 switch (GET_CODE (x))
21379 case EQ:
21380 fputs ("eq", file); /* 4 */
21381 break;
21382 case NE:
21383 fputs ("ne", file); /* 24 */
21384 break;
21385 case LT:
21386 fputs ("lt", file); /* 16 */
21387 break;
21388 case LE:
21389 fputs ("le", file); /* 20 */
21390 break;
21391 case GT:
21392 fputs ("gt", file); /* 8 */
21393 break;
21394 case GE:
21395 fputs ("ge", file); /* 12 */
21396 break;
21397 case LTU:
21398 fputs ("llt", file); /* 2 */
21399 break;
21400 case LEU:
21401 fputs ("lle", file); /* 6 */
21402 break;
21403 case GTU:
21404 fputs ("lgt", file); /* 1 */
21405 break;
21406 case GEU:
21407 fputs ("lge", file); /* 5 */
21408 break;
21409 default:
21410 gcc_unreachable ();
21412 break;
21414 case 'w':
21415 /* If constant, low-order 16 bits of constant, signed. Otherwise, write
21416 normally. */
21417 if (INT_P (x))
21418 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
21419 ((INTVAL (x) & 0xffff) ^ 0x8000) - 0x8000);
21420 else
21421 print_operand (file, x, 0);
21422 return;
21424 case 'x':
21425 /* X is a FPR or Altivec register used in a VSX context. */
21426 if (GET_CODE (x) != REG || !VSX_REGNO_P (REGNO (x)))
21427 output_operand_lossage ("invalid %%x value");
21428 else
21430 int reg = REGNO (x);
21431 int vsx_reg = (FP_REGNO_P (reg)
21432 ? reg - 32
21433 : reg - FIRST_ALTIVEC_REGNO + 32);
21435 #ifdef TARGET_REGNAMES
21436 if (TARGET_REGNAMES)
21437 fprintf (file, "%%vs%d", vsx_reg);
21438 else
21439 #endif
21440 fprintf (file, "%d", vsx_reg);
21442 return;
21444 case 'X':
21445 if (MEM_P (x)
21446 && (legitimate_indexed_address_p (XEXP (x, 0), 0)
21447 || (GET_CODE (XEXP (x, 0)) == PRE_MODIFY
21448 && legitimate_indexed_address_p (XEXP (XEXP (x, 0), 1), 0))))
21449 putc ('x', file);
21450 return;
21452 case 'Y':
21453 /* Like 'L', for third word of TImode/PTImode */
21454 if (REG_P (x))
21455 fputs (reg_names[REGNO (x) + 2], file);
21456 else if (MEM_P (x))
21458 machine_mode mode = GET_MODE (x);
21459 if (GET_CODE (XEXP (x, 0)) == PRE_INC
21460 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
21461 output_address (mode, plus_constant (Pmode,
21462 XEXP (XEXP (x, 0), 0), 8));
21463 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
21464 output_address (mode, plus_constant (Pmode,
21465 XEXP (XEXP (x, 0), 0), 8));
21466 else
21467 output_address (mode, XEXP (adjust_address_nv (x, SImode, 8), 0));
21468 if (small_data_operand (x, GET_MODE (x)))
21469 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
21470 reg_names[SMALL_DATA_REG]);
21472 return;
21474 case 'z':
21475 /* X is a SYMBOL_REF. Write out the name preceded by a
21476 period and without any trailing data in brackets. Used for function
21477 names. If we are configured for System V (or the embedded ABI) on
21478 the PowerPC, do not emit the period, since those systems do not use
21479 TOCs and the like. */
21480 gcc_assert (GET_CODE (x) == SYMBOL_REF);
21482 /* For macho, check to see if we need a stub. */
21483 if (TARGET_MACHO)
21485 const char *name = XSTR (x, 0);
21486 #if TARGET_MACHO
21487 if (darwin_emit_branch_islands
21488 && MACHOPIC_INDIRECT
21489 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
21490 name = machopic_indirection_name (x, /*stub_p=*/true);
21491 #endif
21492 assemble_name (file, name);
21494 else if (!DOT_SYMBOLS)
21495 assemble_name (file, XSTR (x, 0));
21496 else
21497 rs6000_output_function_entry (file, XSTR (x, 0));
21498 return;
21500 case 'Z':
21501 /* Like 'L', for last word of TImode/PTImode. */
21502 if (REG_P (x))
21503 fputs (reg_names[REGNO (x) + 3], file);
21504 else if (MEM_P (x))
21506 machine_mode mode = GET_MODE (x);
21507 if (GET_CODE (XEXP (x, 0)) == PRE_INC
21508 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
21509 output_address (mode, plus_constant (Pmode,
21510 XEXP (XEXP (x, 0), 0), 12));
21511 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
21512 output_address (mode, plus_constant (Pmode,
21513 XEXP (XEXP (x, 0), 0), 12));
21514 else
21515 output_address (mode, XEXP (adjust_address_nv (x, SImode, 12), 0));
21516 if (small_data_operand (x, GET_MODE (x)))
21517 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
21518 reg_names[SMALL_DATA_REG]);
21520 return;
21522 /* Print AltiVec or SPE memory operand. */
21523 case 'y':
21525 rtx tmp;
21527 gcc_assert (MEM_P (x));
21529 tmp = XEXP (x, 0);
21531 /* Ugly hack because %y is overloaded. */
21532 if ((TARGET_SPE || TARGET_E500_DOUBLE)
21533 && (GET_MODE_SIZE (GET_MODE (x)) == 8
21534 || FLOAT128_2REG_P (GET_MODE (x))
21535 || GET_MODE (x) == TImode
21536 || GET_MODE (x) == PTImode))
21538 /* Handle [reg]. */
21539 if (REG_P (tmp))
21541 fprintf (file, "0(%s)", reg_names[REGNO (tmp)]);
21542 break;
21544 /* Handle [reg+UIMM]. */
21545 else if (GET_CODE (tmp) == PLUS &&
21546 GET_CODE (XEXP (tmp, 1)) == CONST_INT)
21548 int x;
21550 gcc_assert (REG_P (XEXP (tmp, 0)));
21552 x = INTVAL (XEXP (tmp, 1));
21553 fprintf (file, "%d(%s)", x, reg_names[REGNO (XEXP (tmp, 0))]);
21554 break;
21557 /* Fall through. Must be [reg+reg]. */
21559 if (VECTOR_MEM_ALTIVEC_P (GET_MODE (x))
21560 && GET_CODE (tmp) == AND
21561 && GET_CODE (XEXP (tmp, 1)) == CONST_INT
21562 && INTVAL (XEXP (tmp, 1)) == -16)
21563 tmp = XEXP (tmp, 0);
21564 else if (VECTOR_MEM_VSX_P (GET_MODE (x))
21565 && GET_CODE (tmp) == PRE_MODIFY)
21566 tmp = XEXP (tmp, 1);
21567 if (REG_P (tmp))
21568 fprintf (file, "0,%s", reg_names[REGNO (tmp)]);
21569 else
21571 if (GET_CODE (tmp) != PLUS
21572 || !REG_P (XEXP (tmp, 0))
21573 || !REG_P (XEXP (tmp, 1)))
21575 output_operand_lossage ("invalid %%y value, try using the 'Z' constraint");
21576 break;
21579 if (REGNO (XEXP (tmp, 0)) == 0)
21580 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 1)) ],
21581 reg_names[ REGNO (XEXP (tmp, 0)) ]);
21582 else
21583 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 0)) ],
21584 reg_names[ REGNO (XEXP (tmp, 1)) ]);
21586 break;
21589 case 0:
21590 if (REG_P (x))
21591 fprintf (file, "%s", reg_names[REGNO (x)]);
21592 else if (MEM_P (x))
21594 /* We need to handle PRE_INC and PRE_DEC here, since we need to
21595 know the width from the mode. */
21596 if (GET_CODE (XEXP (x, 0)) == PRE_INC)
21597 fprintf (file, "%d(%s)", GET_MODE_SIZE (GET_MODE (x)),
21598 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
21599 else if (GET_CODE (XEXP (x, 0)) == PRE_DEC)
21600 fprintf (file, "%d(%s)", - GET_MODE_SIZE (GET_MODE (x)),
21601 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
21602 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
21603 output_address (GET_MODE (x), XEXP (XEXP (x, 0), 1));
21604 else
21605 output_address (GET_MODE (x), XEXP (x, 0));
21607 else
21609 if (toc_relative_expr_p (x, false))
21610 /* This hack along with a corresponding hack in
21611 rs6000_output_addr_const_extra arranges to output addends
21612 where the assembler expects to find them. eg.
21613 (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 4)
21614 without this hack would be output as "x@toc+4". We
21615 want "x+4@toc". */
21616 output_addr_const (file, CONST_CAST_RTX (tocrel_base));
21617 else
21618 output_addr_const (file, x);
21620 return;
21622 case '&':
21623 if (const char *name = get_some_local_dynamic_name ())
21624 assemble_name (file, name);
21625 else
21626 output_operand_lossage ("'%%&' used without any "
21627 "local dynamic TLS references");
21628 return;
21630 default:
21631 output_operand_lossage ("invalid %%xn code");
21635 /* Print the address of an operand. */
21637 void
21638 print_operand_address (FILE *file, rtx x)
21640 if (REG_P (x))
21641 fprintf (file, "0(%s)", reg_names[ REGNO (x) ]);
21642 else if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == CONST
21643 || GET_CODE (x) == LABEL_REF)
21645 output_addr_const (file, x);
21646 if (small_data_operand (x, GET_MODE (x)))
21647 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
21648 reg_names[SMALL_DATA_REG]);
21649 else
21650 gcc_assert (!TARGET_TOC);
21652 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
21653 && REG_P (XEXP (x, 1)))
21655 if (REGNO (XEXP (x, 0)) == 0)
21656 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 1)) ],
21657 reg_names[ REGNO (XEXP (x, 0)) ]);
21658 else
21659 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 0)) ],
21660 reg_names[ REGNO (XEXP (x, 1)) ]);
21662 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
21663 && GET_CODE (XEXP (x, 1)) == CONST_INT)
21664 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
21665 INTVAL (XEXP (x, 1)), reg_names[ REGNO (XEXP (x, 0)) ]);
21666 #if TARGET_MACHO
21667 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
21668 && CONSTANT_P (XEXP (x, 1)))
21670 fprintf (file, "lo16(");
21671 output_addr_const (file, XEXP (x, 1));
21672 fprintf (file, ")(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
21674 #endif
21675 #if TARGET_ELF
21676 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
21677 && CONSTANT_P (XEXP (x, 1)))
21679 output_addr_const (file, XEXP (x, 1));
21680 fprintf (file, "@l(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
21682 #endif
21683 else if (toc_relative_expr_p (x, false))
21685 /* This hack along with a corresponding hack in
21686 rs6000_output_addr_const_extra arranges to output addends
21687 where the assembler expects to find them. eg.
21688 (lo_sum (reg 9)
21689 . (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 8))
21690 without this hack would be output as "x@toc+8@l(9)". We
21691 want "x+8@toc@l(9)". */
21692 output_addr_const (file, CONST_CAST_RTX (tocrel_base));
21693 if (GET_CODE (x) == LO_SUM)
21694 fprintf (file, "@l(%s)", reg_names[REGNO (XEXP (x, 0))]);
21695 else
21696 fprintf (file, "(%s)", reg_names[REGNO (XVECEXP (tocrel_base, 0, 1))]);
21698 else
21699 gcc_unreachable ();
21702 /* Implement TARGET_OUTPUT_ADDR_CONST_EXTRA. */
21704 static bool
21705 rs6000_output_addr_const_extra (FILE *file, rtx x)
21707 if (GET_CODE (x) == UNSPEC)
21708 switch (XINT (x, 1))
21710 case UNSPEC_TOCREL:
21711 gcc_checking_assert (GET_CODE (XVECEXP (x, 0, 0)) == SYMBOL_REF
21712 && REG_P (XVECEXP (x, 0, 1))
21713 && REGNO (XVECEXP (x, 0, 1)) == TOC_REGISTER);
21714 output_addr_const (file, XVECEXP (x, 0, 0));
21715 if (x == tocrel_base && tocrel_offset != const0_rtx)
21717 if (INTVAL (tocrel_offset) >= 0)
21718 fprintf (file, "+");
21719 output_addr_const (file, CONST_CAST_RTX (tocrel_offset));
21721 if (!TARGET_AIX || (TARGET_ELF && TARGET_MINIMAL_TOC))
21723 putc ('-', file);
21724 assemble_name (file, toc_label_name);
21725 need_toc_init = 1;
21727 else if (TARGET_ELF)
21728 fputs ("@toc", file);
21729 return true;
21731 #if TARGET_MACHO
21732 case UNSPEC_MACHOPIC_OFFSET:
21733 output_addr_const (file, XVECEXP (x, 0, 0));
21734 putc ('-', file);
21735 machopic_output_function_base_name (file);
21736 return true;
21737 #endif
21739 return false;
21742 /* Target hook for assembling integer objects. The PowerPC version has
21743 to handle fixup entries for relocatable code if RELOCATABLE_NEEDS_FIXUP
21744 is defined. It also needs to handle DI-mode objects on 64-bit
21745 targets. */
21747 static bool
21748 rs6000_assemble_integer (rtx x, unsigned int size, int aligned_p)
21750 #ifdef RELOCATABLE_NEEDS_FIXUP
21751 /* Special handling for SI values. */
21752 if (RELOCATABLE_NEEDS_FIXUP && size == 4 && aligned_p)
21754 static int recurse = 0;
21756 /* For -mrelocatable, we mark all addresses that need to be fixed up in
21757 the .fixup section. Since the TOC section is already relocated, we
21758 don't need to mark it here. We used to skip the text section, but it
21759 should never be valid for relocated addresses to be placed in the text
21760 section. */
21761 if (DEFAULT_ABI == ABI_V4
21762 && (TARGET_RELOCATABLE || flag_pic > 1)
21763 && in_section != toc_section
21764 && !recurse
21765 && !CONST_SCALAR_INT_P (x)
21766 && CONSTANT_P (x))
21768 char buf[256];
21770 recurse = 1;
21771 ASM_GENERATE_INTERNAL_LABEL (buf, "LCP", fixuplabelno);
21772 fixuplabelno++;
21773 ASM_OUTPUT_LABEL (asm_out_file, buf);
21774 fprintf (asm_out_file, "\t.long\t(");
21775 output_addr_const (asm_out_file, x);
21776 fprintf (asm_out_file, ")@fixup\n");
21777 fprintf (asm_out_file, "\t.section\t\".fixup\",\"aw\"\n");
21778 ASM_OUTPUT_ALIGN (asm_out_file, 2);
21779 fprintf (asm_out_file, "\t.long\t");
21780 assemble_name (asm_out_file, buf);
21781 fprintf (asm_out_file, "\n\t.previous\n");
21782 recurse = 0;
21783 return true;
21785 /* Remove initial .'s to turn a -mcall-aixdesc function
21786 address into the address of the descriptor, not the function
21787 itself. */
21788 else if (GET_CODE (x) == SYMBOL_REF
21789 && XSTR (x, 0)[0] == '.'
21790 && DEFAULT_ABI == ABI_AIX)
21792 const char *name = XSTR (x, 0);
21793 while (*name == '.')
21794 name++;
21796 fprintf (asm_out_file, "\t.long\t%s\n", name);
21797 return true;
21800 #endif /* RELOCATABLE_NEEDS_FIXUP */
21801 return default_assemble_integer (x, size, aligned_p);
21804 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
21805 /* Emit an assembler directive to set symbol visibility for DECL to
21806 VISIBILITY_TYPE. */
21808 static void
21809 rs6000_assemble_visibility (tree decl, int vis)
21811 if (TARGET_XCOFF)
21812 return;
21814 /* Functions need to have their entry point symbol visibility set as
21815 well as their descriptor symbol visibility. */
21816 if (DEFAULT_ABI == ABI_AIX
21817 && DOT_SYMBOLS
21818 && TREE_CODE (decl) == FUNCTION_DECL)
21820 static const char * const visibility_types[] = {
21821 NULL, "internal", "hidden", "protected"
21824 const char *name, *type;
21826 name = ((* targetm.strip_name_encoding)
21827 (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl))));
21828 type = visibility_types[vis];
21830 fprintf (asm_out_file, "\t.%s\t%s\n", type, name);
21831 fprintf (asm_out_file, "\t.%s\t.%s\n", type, name);
21833 else
21834 default_assemble_visibility (decl, vis);
21836 #endif
21838 enum rtx_code
21839 rs6000_reverse_condition (machine_mode mode, enum rtx_code code)
21841 /* Reversal of FP compares takes care -- an ordered compare
21842 becomes an unordered compare and vice versa. */
21843 if (mode == CCFPmode
21844 && (!flag_finite_math_only
21845 || code == UNLT || code == UNLE || code == UNGT || code == UNGE
21846 || code == UNEQ || code == LTGT))
21847 return reverse_condition_maybe_unordered (code);
21848 else
21849 return reverse_condition (code);
21852 /* Generate a compare for CODE. Return a brand-new rtx that
21853 represents the result of the compare. */
21855 static rtx
21856 rs6000_generate_compare (rtx cmp, machine_mode mode)
21858 machine_mode comp_mode;
21859 rtx compare_result;
21860 enum rtx_code code = GET_CODE (cmp);
21861 rtx op0 = XEXP (cmp, 0);
21862 rtx op1 = XEXP (cmp, 1);
21864 if (!TARGET_FLOAT128_HW && FLOAT128_VECTOR_P (mode))
21865 comp_mode = CCmode;
21866 else if (FLOAT_MODE_P (mode))
21867 comp_mode = CCFPmode;
21868 else if (code == GTU || code == LTU
21869 || code == GEU || code == LEU)
21870 comp_mode = CCUNSmode;
21871 else if ((code == EQ || code == NE)
21872 && unsigned_reg_p (op0)
21873 && (unsigned_reg_p (op1)
21874 || (CONST_INT_P (op1) && INTVAL (op1) != 0)))
21875 /* These are unsigned values, perhaps there will be a later
21876 ordering compare that can be shared with this one. */
21877 comp_mode = CCUNSmode;
21878 else
21879 comp_mode = CCmode;
21881 /* If we have an unsigned compare, make sure we don't have a signed value as
21882 an immediate. */
21883 if (comp_mode == CCUNSmode && GET_CODE (op1) == CONST_INT
21884 && INTVAL (op1) < 0)
21886 op0 = copy_rtx_if_shared (op0);
21887 op1 = force_reg (GET_MODE (op0), op1);
21888 cmp = gen_rtx_fmt_ee (code, GET_MODE (cmp), op0, op1);
21891 /* First, the compare. */
21892 compare_result = gen_reg_rtx (comp_mode);
21894 /* E500 FP compare instructions on the GPRs. Yuck! */
21895 if ((!TARGET_FPRS && TARGET_HARD_FLOAT)
21896 && FLOAT_MODE_P (mode))
21898 rtx cmp, or_result, compare_result2;
21899 machine_mode op_mode = GET_MODE (op0);
21900 bool reverse_p;
21902 if (op_mode == VOIDmode)
21903 op_mode = GET_MODE (op1);
21905 /* First reverse the condition codes that aren't directly supported. */
21906 switch (code)
21908 case NE:
21909 case UNLT:
21910 case UNLE:
21911 case UNGT:
21912 case UNGE:
21913 code = reverse_condition_maybe_unordered (code);
21914 reverse_p = true;
21915 break;
21917 case EQ:
21918 case LT:
21919 case LE:
21920 case GT:
21921 case GE:
21922 reverse_p = false;
21923 break;
21925 default:
21926 gcc_unreachable ();
21929 /* The E500 FP compare instructions toggle the GT bit (CR bit 1) only.
21930 This explains the following mess. */
21932 switch (code)
21934 case EQ:
21935 switch (op_mode)
21937 case SFmode:
21938 cmp = (flag_finite_math_only && !flag_trapping_math)
21939 ? gen_tstsfeq_gpr (compare_result, op0, op1)
21940 : gen_cmpsfeq_gpr (compare_result, op0, op1);
21941 break;
21943 case DFmode:
21944 cmp = (flag_finite_math_only && !flag_trapping_math)
21945 ? gen_tstdfeq_gpr (compare_result, op0, op1)
21946 : gen_cmpdfeq_gpr (compare_result, op0, op1);
21947 break;
21949 case TFmode:
21950 case IFmode:
21951 case KFmode:
21952 cmp = (flag_finite_math_only && !flag_trapping_math)
21953 ? gen_tsttfeq_gpr (compare_result, op0, op1)
21954 : gen_cmptfeq_gpr (compare_result, op0, op1);
21955 break;
21957 default:
21958 gcc_unreachable ();
21960 break;
21962 case GT:
21963 case GE:
21964 switch (op_mode)
21966 case SFmode:
21967 cmp = (flag_finite_math_only && !flag_trapping_math)
21968 ? gen_tstsfgt_gpr (compare_result, op0, op1)
21969 : gen_cmpsfgt_gpr (compare_result, op0, op1);
21970 break;
21972 case DFmode:
21973 cmp = (flag_finite_math_only && !flag_trapping_math)
21974 ? gen_tstdfgt_gpr (compare_result, op0, op1)
21975 : gen_cmpdfgt_gpr (compare_result, op0, op1);
21976 break;
21978 case TFmode:
21979 case IFmode:
21980 case KFmode:
21981 cmp = (flag_finite_math_only && !flag_trapping_math)
21982 ? gen_tsttfgt_gpr (compare_result, op0, op1)
21983 : gen_cmptfgt_gpr (compare_result, op0, op1);
21984 break;
21986 default:
21987 gcc_unreachable ();
21989 break;
21991 case LT:
21992 case LE:
21993 switch (op_mode)
21995 case SFmode:
21996 cmp = (flag_finite_math_only && !flag_trapping_math)
21997 ? gen_tstsflt_gpr (compare_result, op0, op1)
21998 : gen_cmpsflt_gpr (compare_result, op0, op1);
21999 break;
22001 case DFmode:
22002 cmp = (flag_finite_math_only && !flag_trapping_math)
22003 ? gen_tstdflt_gpr (compare_result, op0, op1)
22004 : gen_cmpdflt_gpr (compare_result, op0, op1);
22005 break;
22007 case TFmode:
22008 case IFmode:
22009 case KFmode:
22010 cmp = (flag_finite_math_only && !flag_trapping_math)
22011 ? gen_tsttflt_gpr (compare_result, op0, op1)
22012 : gen_cmptflt_gpr (compare_result, op0, op1);
22013 break;
22015 default:
22016 gcc_unreachable ();
22018 break;
22020 default:
22021 gcc_unreachable ();
22024 /* Synthesize LE and GE from LT/GT || EQ. */
22025 if (code == LE || code == GE)
22027 emit_insn (cmp);
22029 compare_result2 = gen_reg_rtx (CCFPmode);
22031 /* Do the EQ. */
22032 switch (op_mode)
22034 case SFmode:
22035 cmp = (flag_finite_math_only && !flag_trapping_math)
22036 ? gen_tstsfeq_gpr (compare_result2, op0, op1)
22037 : gen_cmpsfeq_gpr (compare_result2, op0, op1);
22038 break;
22040 case DFmode:
22041 cmp = (flag_finite_math_only && !flag_trapping_math)
22042 ? gen_tstdfeq_gpr (compare_result2, op0, op1)
22043 : gen_cmpdfeq_gpr (compare_result2, op0, op1);
22044 break;
22046 case TFmode:
22047 case IFmode:
22048 case KFmode:
22049 cmp = (flag_finite_math_only && !flag_trapping_math)
22050 ? gen_tsttfeq_gpr (compare_result2, op0, op1)
22051 : gen_cmptfeq_gpr (compare_result2, op0, op1);
22052 break;
22054 default:
22055 gcc_unreachable ();
22058 emit_insn (cmp);
22060 /* OR them together. */
22061 or_result = gen_reg_rtx (CCFPmode);
22062 cmp = gen_e500_cr_ior_compare (or_result, compare_result,
22063 compare_result2);
22064 compare_result = or_result;
22067 code = reverse_p ? NE : EQ;
22069 emit_insn (cmp);
22072 /* IEEE 128-bit support in VSX registers when we do not have hardware
22073 support. */
22074 else if (!TARGET_FLOAT128_HW && FLOAT128_VECTOR_P (mode))
22076 rtx libfunc = NULL_RTX;
22077 bool check_nan = false;
22078 rtx dest;
22080 switch (code)
22082 case EQ:
22083 case NE:
22084 libfunc = optab_libfunc (eq_optab, mode);
22085 break;
22087 case GT:
22088 case GE:
22089 libfunc = optab_libfunc (ge_optab, mode);
22090 break;
22092 case LT:
22093 case LE:
22094 libfunc = optab_libfunc (le_optab, mode);
22095 break;
22097 case UNORDERED:
22098 case ORDERED:
22099 libfunc = optab_libfunc (unord_optab, mode);
22100 code = (code == UNORDERED) ? NE : EQ;
22101 break;
22103 case UNGE:
22104 case UNGT:
22105 check_nan = true;
22106 libfunc = optab_libfunc (ge_optab, mode);
22107 code = (code == UNGE) ? GE : GT;
22108 break;
22110 case UNLE:
22111 case UNLT:
22112 check_nan = true;
22113 libfunc = optab_libfunc (le_optab, mode);
22114 code = (code == UNLE) ? LE : LT;
22115 break;
22117 case UNEQ:
22118 case LTGT:
22119 check_nan = true;
22120 libfunc = optab_libfunc (eq_optab, mode);
22121 code = (code = UNEQ) ? EQ : NE;
22122 break;
22124 default:
22125 gcc_unreachable ();
22128 gcc_assert (libfunc);
22130 if (!check_nan)
22131 dest = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
22132 SImode, 2, op0, mode, op1, mode);
22134 /* The library signals an exception for signalling NaNs, so we need to
22135 handle isgreater, etc. by first checking isordered. */
22136 else
22138 rtx ne_rtx, normal_dest, unord_dest;
22139 rtx unord_func = optab_libfunc (unord_optab, mode);
22140 rtx join_label = gen_label_rtx ();
22141 rtx join_ref = gen_rtx_LABEL_REF (VOIDmode, join_label);
22142 rtx unord_cmp = gen_reg_rtx (comp_mode);
22145 /* Test for either value being a NaN. */
22146 gcc_assert (unord_func);
22147 unord_dest = emit_library_call_value (unord_func, NULL_RTX, LCT_CONST,
22148 SImode, 2, op0, mode, op1,
22149 mode);
22151 /* Set value (0) if either value is a NaN, and jump to the join
22152 label. */
22153 dest = gen_reg_rtx (SImode);
22154 emit_move_insn (dest, const1_rtx);
22155 emit_insn (gen_rtx_SET (unord_cmp,
22156 gen_rtx_COMPARE (comp_mode, unord_dest,
22157 const0_rtx)));
22159 ne_rtx = gen_rtx_NE (comp_mode, unord_cmp, const0_rtx);
22160 emit_jump_insn (gen_rtx_SET (pc_rtx,
22161 gen_rtx_IF_THEN_ELSE (VOIDmode, ne_rtx,
22162 join_ref,
22163 pc_rtx)));
22165 /* Do the normal comparison, knowing that the values are not
22166 NaNs. */
22167 normal_dest = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
22168 SImode, 2, op0, mode, op1,
22169 mode);
22171 emit_insn (gen_cstoresi4 (dest,
22172 gen_rtx_fmt_ee (code, SImode, normal_dest,
22173 const0_rtx),
22174 normal_dest, const0_rtx));
22176 /* Join NaN and non-Nan paths. Compare dest against 0. */
22177 emit_label (join_label);
22178 code = NE;
22181 emit_insn (gen_rtx_SET (compare_result,
22182 gen_rtx_COMPARE (comp_mode, dest, const0_rtx)));
22185 else
22187 /* Generate XLC-compatible TFmode compare as PARALLEL with extra
22188 CLOBBERs to match cmptf_internal2 pattern. */
22189 if (comp_mode == CCFPmode && TARGET_XL_COMPAT
22190 && FLOAT128_IBM_P (GET_MODE (op0))
22191 && TARGET_HARD_FLOAT && TARGET_FPRS)
22192 emit_insn (gen_rtx_PARALLEL (VOIDmode,
22193 gen_rtvec (10,
22194 gen_rtx_SET (compare_result,
22195 gen_rtx_COMPARE (comp_mode, op0, op1)),
22196 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
22197 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
22198 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
22199 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
22200 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
22201 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
22202 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
22203 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
22204 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (Pmode)))));
22205 else if (GET_CODE (op1) == UNSPEC
22206 && XINT (op1, 1) == UNSPEC_SP_TEST)
22208 rtx op1b = XVECEXP (op1, 0, 0);
22209 comp_mode = CCEQmode;
22210 compare_result = gen_reg_rtx (CCEQmode);
22211 if (TARGET_64BIT)
22212 emit_insn (gen_stack_protect_testdi (compare_result, op0, op1b));
22213 else
22214 emit_insn (gen_stack_protect_testsi (compare_result, op0, op1b));
22216 else
22217 emit_insn (gen_rtx_SET (compare_result,
22218 gen_rtx_COMPARE (comp_mode, op0, op1)));
22221 /* Some kinds of FP comparisons need an OR operation;
22222 under flag_finite_math_only we don't bother. */
22223 if (FLOAT_MODE_P (mode)
22224 && (!FLOAT128_IEEE_P (mode) || TARGET_FLOAT128_HW)
22225 && !flag_finite_math_only
22226 && !(TARGET_HARD_FLOAT && !TARGET_FPRS)
22227 && (code == LE || code == GE
22228 || code == UNEQ || code == LTGT
22229 || code == UNGT || code == UNLT))
22231 enum rtx_code or1, or2;
22232 rtx or1_rtx, or2_rtx, compare2_rtx;
22233 rtx or_result = gen_reg_rtx (CCEQmode);
22235 switch (code)
22237 case LE: or1 = LT; or2 = EQ; break;
22238 case GE: or1 = GT; or2 = EQ; break;
22239 case UNEQ: or1 = UNORDERED; or2 = EQ; break;
22240 case LTGT: or1 = LT; or2 = GT; break;
22241 case UNGT: or1 = UNORDERED; or2 = GT; break;
22242 case UNLT: or1 = UNORDERED; or2 = LT; break;
22243 default: gcc_unreachable ();
22245 validate_condition_mode (or1, comp_mode);
22246 validate_condition_mode (or2, comp_mode);
22247 or1_rtx = gen_rtx_fmt_ee (or1, SImode, compare_result, const0_rtx);
22248 or2_rtx = gen_rtx_fmt_ee (or2, SImode, compare_result, const0_rtx);
22249 compare2_rtx = gen_rtx_COMPARE (CCEQmode,
22250 gen_rtx_IOR (SImode, or1_rtx, or2_rtx),
22251 const_true_rtx);
22252 emit_insn (gen_rtx_SET (or_result, compare2_rtx));
22254 compare_result = or_result;
22255 code = EQ;
22258 validate_condition_mode (code, GET_MODE (compare_result));
22260 return gen_rtx_fmt_ee (code, VOIDmode, compare_result, const0_rtx);
22264 /* Return the diagnostic message string if the binary operation OP is
22265 not permitted on TYPE1 and TYPE2, NULL otherwise. */
22267 static const char*
22268 rs6000_invalid_binary_op (int op ATTRIBUTE_UNUSED,
22269 const_tree type1,
22270 const_tree type2)
22272 enum machine_mode mode1 = TYPE_MODE (type1);
22273 enum machine_mode mode2 = TYPE_MODE (type2);
22275 /* For complex modes, use the inner type. */
22276 if (COMPLEX_MODE_P (mode1))
22277 mode1 = GET_MODE_INNER (mode1);
22279 if (COMPLEX_MODE_P (mode2))
22280 mode2 = GET_MODE_INNER (mode2);
22282 /* Don't allow IEEE 754R 128-bit binary floating point and IBM extended
22283 double to intermix unless -mfloat128-convert. */
22284 if (mode1 == mode2)
22285 return NULL;
22287 if (!TARGET_FLOAT128_CVT)
22289 if ((mode1 == KFmode && mode2 == IFmode)
22290 || (mode1 == IFmode && mode2 == KFmode))
22291 return N_("__float128 and __ibm128 cannot be used in the same "
22292 "expression");
22294 if (TARGET_IEEEQUAD
22295 && ((mode1 == IFmode && mode2 == TFmode)
22296 || (mode1 == TFmode && mode2 == IFmode)))
22297 return N_("__ibm128 and long double cannot be used in the same "
22298 "expression");
22300 if (!TARGET_IEEEQUAD
22301 && ((mode1 == KFmode && mode2 == TFmode)
22302 || (mode1 == TFmode && mode2 == KFmode)))
22303 return N_("__float128 and long double cannot be used in the same "
22304 "expression");
22307 return NULL;
22311 /* Expand floating point conversion to/from __float128 and __ibm128. */
22313 void
22314 rs6000_expand_float128_convert (rtx dest, rtx src, bool unsigned_p)
22316 machine_mode dest_mode = GET_MODE (dest);
22317 machine_mode src_mode = GET_MODE (src);
22318 convert_optab cvt = unknown_optab;
22319 bool do_move = false;
22320 rtx libfunc = NULL_RTX;
22321 rtx dest2;
22322 typedef rtx (*rtx_2func_t) (rtx, rtx);
22323 rtx_2func_t hw_convert = (rtx_2func_t)0;
22324 size_t kf_or_tf;
22326 struct hw_conv_t {
22327 rtx_2func_t from_df;
22328 rtx_2func_t from_sf;
22329 rtx_2func_t from_si_sign;
22330 rtx_2func_t from_si_uns;
22331 rtx_2func_t from_di_sign;
22332 rtx_2func_t from_di_uns;
22333 rtx_2func_t to_df;
22334 rtx_2func_t to_sf;
22335 rtx_2func_t to_si_sign;
22336 rtx_2func_t to_si_uns;
22337 rtx_2func_t to_di_sign;
22338 rtx_2func_t to_di_uns;
22339 } hw_conversions[2] = {
22340 /* convertions to/from KFmode */
22342 gen_extenddfkf2_hw, /* KFmode <- DFmode. */
22343 gen_extendsfkf2_hw, /* KFmode <- SFmode. */
22344 gen_float_kfsi2_hw, /* KFmode <- SImode (signed). */
22345 gen_floatuns_kfsi2_hw, /* KFmode <- SImode (unsigned). */
22346 gen_float_kfdi2_hw, /* KFmode <- DImode (signed). */
22347 gen_floatuns_kfdi2_hw, /* KFmode <- DImode (unsigned). */
22348 gen_trunckfdf2_hw, /* DFmode <- KFmode. */
22349 gen_trunckfsf2_hw, /* SFmode <- KFmode. */
22350 gen_fix_kfsi2_hw, /* SImode <- KFmode (signed). */
22351 gen_fixuns_kfsi2_hw, /* SImode <- KFmode (unsigned). */
22352 gen_fix_kfdi2_hw, /* DImode <- KFmode (signed). */
22353 gen_fixuns_kfdi2_hw, /* DImode <- KFmode (unsigned). */
22356 /* convertions to/from TFmode */
22358 gen_extenddftf2_hw, /* TFmode <- DFmode. */
22359 gen_extendsftf2_hw, /* TFmode <- SFmode. */
22360 gen_float_tfsi2_hw, /* TFmode <- SImode (signed). */
22361 gen_floatuns_tfsi2_hw, /* TFmode <- SImode (unsigned). */
22362 gen_float_tfdi2_hw, /* TFmode <- DImode (signed). */
22363 gen_floatuns_tfdi2_hw, /* TFmode <- DImode (unsigned). */
22364 gen_trunctfdf2_hw, /* DFmode <- TFmode. */
22365 gen_trunctfsf2_hw, /* SFmode <- TFmode. */
22366 gen_fix_tfsi2_hw, /* SImode <- TFmode (signed). */
22367 gen_fixuns_tfsi2_hw, /* SImode <- TFmode (unsigned). */
22368 gen_fix_tfdi2_hw, /* DImode <- TFmode (signed). */
22369 gen_fixuns_tfdi2_hw, /* DImode <- TFmode (unsigned). */
22373 if (dest_mode == src_mode)
22374 gcc_unreachable ();
22376 /* Eliminate memory operations. */
22377 if (MEM_P (src))
22378 src = force_reg (src_mode, src);
22380 if (MEM_P (dest))
22382 rtx tmp = gen_reg_rtx (dest_mode);
22383 rs6000_expand_float128_convert (tmp, src, unsigned_p);
22384 rs6000_emit_move (dest, tmp, dest_mode);
22385 return;
22388 /* Convert to IEEE 128-bit floating point. */
22389 if (FLOAT128_IEEE_P (dest_mode))
22391 if (dest_mode == KFmode)
22392 kf_or_tf = 0;
22393 else if (dest_mode == TFmode)
22394 kf_or_tf = 1;
22395 else
22396 gcc_unreachable ();
22398 switch (src_mode)
22400 case DFmode:
22401 cvt = sext_optab;
22402 hw_convert = hw_conversions[kf_or_tf].from_df;
22403 break;
22405 case SFmode:
22406 cvt = sext_optab;
22407 hw_convert = hw_conversions[kf_or_tf].from_sf;
22408 break;
22410 case KFmode:
22411 case IFmode:
22412 case TFmode:
22413 if (FLOAT128_IBM_P (src_mode))
22414 cvt = sext_optab;
22415 else
22416 do_move = true;
22417 break;
22419 case SImode:
22420 if (unsigned_p)
22422 cvt = ufloat_optab;
22423 hw_convert = hw_conversions[kf_or_tf].from_si_uns;
22425 else
22427 cvt = sfloat_optab;
22428 hw_convert = hw_conversions[kf_or_tf].from_si_sign;
22430 break;
22432 case DImode:
22433 if (unsigned_p)
22435 cvt = ufloat_optab;
22436 hw_convert = hw_conversions[kf_or_tf].from_di_uns;
22438 else
22440 cvt = sfloat_optab;
22441 hw_convert = hw_conversions[kf_or_tf].from_di_sign;
22443 break;
22445 default:
22446 gcc_unreachable ();
22450 /* Convert from IEEE 128-bit floating point. */
22451 else if (FLOAT128_IEEE_P (src_mode))
22453 if (src_mode == KFmode)
22454 kf_or_tf = 0;
22455 else if (src_mode == TFmode)
22456 kf_or_tf = 1;
22457 else
22458 gcc_unreachable ();
22460 switch (dest_mode)
22462 case DFmode:
22463 cvt = trunc_optab;
22464 hw_convert = hw_conversions[kf_or_tf].to_df;
22465 break;
22467 case SFmode:
22468 cvt = trunc_optab;
22469 hw_convert = hw_conversions[kf_or_tf].to_sf;
22470 break;
22472 case KFmode:
22473 case IFmode:
22474 case TFmode:
22475 if (FLOAT128_IBM_P (dest_mode))
22476 cvt = trunc_optab;
22477 else
22478 do_move = true;
22479 break;
22481 case SImode:
22482 if (unsigned_p)
22484 cvt = ufix_optab;
22485 hw_convert = hw_conversions[kf_or_tf].to_si_uns;
22487 else
22489 cvt = sfix_optab;
22490 hw_convert = hw_conversions[kf_or_tf].to_si_sign;
22492 break;
22494 case DImode:
22495 if (unsigned_p)
22497 cvt = ufix_optab;
22498 hw_convert = hw_conversions[kf_or_tf].to_di_uns;
22500 else
22502 cvt = sfix_optab;
22503 hw_convert = hw_conversions[kf_or_tf].to_di_sign;
22505 break;
22507 default:
22508 gcc_unreachable ();
22512 /* Both IBM format. */
22513 else if (FLOAT128_IBM_P (dest_mode) && FLOAT128_IBM_P (src_mode))
22514 do_move = true;
22516 else
22517 gcc_unreachable ();
22519 /* Handle conversion between TFmode/KFmode. */
22520 if (do_move)
22521 emit_move_insn (dest, gen_lowpart (dest_mode, src));
22523 /* Handle conversion if we have hardware support. */
22524 else if (TARGET_FLOAT128_HW && hw_convert)
22525 emit_insn ((hw_convert) (dest, src));
22527 /* Call an external function to do the conversion. */
22528 else if (cvt != unknown_optab)
22530 libfunc = convert_optab_libfunc (cvt, dest_mode, src_mode);
22531 gcc_assert (libfunc != NULL_RTX);
22533 dest2 = emit_library_call_value (libfunc, dest, LCT_CONST, dest_mode, 1, src,
22534 src_mode);
22536 gcc_assert (dest2 != NULL_RTX);
22537 if (!rtx_equal_p (dest, dest2))
22538 emit_move_insn (dest, dest2);
22541 else
22542 gcc_unreachable ();
22544 return;
22547 /* Split a conversion from __float128 to an integer type into separate insns.
22548 OPERANDS points to the destination, source, and V2DI temporary
22549 register. CODE is either FIX or UNSIGNED_FIX. */
22551 void
22552 convert_float128_to_int (rtx *operands, enum rtx_code code)
22554 rtx dest = operands[0];
22555 rtx src = operands[1];
22556 rtx tmp = operands[2];
22557 rtx cvt;
22558 rtvec cvt_vec;
22559 rtx cvt_unspec;
22560 rtvec move_vec;
22561 rtx move_unspec;
22563 if (GET_CODE (tmp) == SCRATCH)
22564 tmp = gen_reg_rtx (V2DImode);
22566 if (MEM_P (dest))
22567 dest = rs6000_address_for_fpconvert (dest);
22569 /* Generate the actual convert insn of the form:
22570 (set (tmp) (unspec:V2DI [(fix:SI (reg:KF))] UNSPEC_IEEE128_CONVERT)). */
22571 cvt = gen_rtx_fmt_e (code, GET_MODE (dest), src);
22572 cvt_vec = gen_rtvec (1, cvt);
22573 cvt_unspec = gen_rtx_UNSPEC (V2DImode, cvt_vec, UNSPEC_IEEE128_CONVERT);
22574 emit_insn (gen_rtx_SET (tmp, cvt_unspec));
22576 /* Generate the move insn of the form:
22577 (set (dest:SI) (unspec:SI [(tmp:V2DI))] UNSPEC_IEEE128_MOVE)). */
22578 move_vec = gen_rtvec (1, tmp);
22579 move_unspec = gen_rtx_UNSPEC (GET_MODE (dest), move_vec, UNSPEC_IEEE128_MOVE);
22580 emit_insn (gen_rtx_SET (dest, move_unspec));
22583 /* Split a conversion from an integer type to __float128 into separate insns.
22584 OPERANDS points to the destination, source, and V2DI temporary
22585 register. CODE is either FLOAT or UNSIGNED_FLOAT. */
22587 void
22588 convert_int_to_float128 (rtx *operands, enum rtx_code code)
22590 rtx dest = operands[0];
22591 rtx src = operands[1];
22592 rtx tmp = operands[2];
22593 rtx cvt;
22594 rtvec cvt_vec;
22595 rtx cvt_unspec;
22596 rtvec move_vec;
22597 rtx move_unspec;
22598 rtx unsigned_flag;
22600 if (GET_CODE (tmp) == SCRATCH)
22601 tmp = gen_reg_rtx (V2DImode);
22603 if (MEM_P (src))
22604 src = rs6000_address_for_fpconvert (src);
22606 /* Generate the move of the integer into the Altivec register of the form:
22607 (set (tmp:V2DI) (unspec:V2DI [(src:SI)
22608 (const_int 0)] UNSPEC_IEEE128_MOVE)).
22611 (set (tmp:V2DI) (unspec:V2DI [(src:DI)] UNSPEC_IEEE128_MOVE)). */
22613 if (GET_MODE (src) == SImode)
22615 unsigned_flag = (code == UNSIGNED_FLOAT) ? const1_rtx : const0_rtx;
22616 move_vec = gen_rtvec (2, src, unsigned_flag);
22618 else
22619 move_vec = gen_rtvec (1, src);
22621 move_unspec = gen_rtx_UNSPEC (V2DImode, move_vec, UNSPEC_IEEE128_MOVE);
22622 emit_insn (gen_rtx_SET (tmp, move_unspec));
22624 /* Generate the actual convert insn of the form:
22625 (set (dest:KF) (float:KF (unspec:DI [(tmp:V2DI)]
22626 UNSPEC_IEEE128_CONVERT))). */
22627 cvt_vec = gen_rtvec (1, tmp);
22628 cvt_unspec = gen_rtx_UNSPEC (DImode, cvt_vec, UNSPEC_IEEE128_CONVERT);
22629 cvt = gen_rtx_fmt_e (code, GET_MODE (dest), cvt_unspec);
22630 emit_insn (gen_rtx_SET (dest, cvt));
22634 /* Emit the RTL for an sISEL pattern. */
22636 void
22637 rs6000_emit_sISEL (machine_mode mode ATTRIBUTE_UNUSED, rtx operands[])
22639 rs6000_emit_int_cmove (operands[0], operands[1], const1_rtx, const0_rtx);
22642 /* Emit RTL that sets a register to zero if OP1 and OP2 are equal. SCRATCH
22643 can be used as that dest register. Return the dest register. */
22646 rs6000_emit_eqne (machine_mode mode, rtx op1, rtx op2, rtx scratch)
22648 if (op2 == const0_rtx)
22649 return op1;
22651 if (GET_CODE (scratch) == SCRATCH)
22652 scratch = gen_reg_rtx (mode);
22654 if (logical_operand (op2, mode))
22655 emit_insn (gen_rtx_SET (scratch, gen_rtx_XOR (mode, op1, op2)));
22656 else
22657 emit_insn (gen_rtx_SET (scratch,
22658 gen_rtx_PLUS (mode, op1, negate_rtx (mode, op2))));
22660 return scratch;
22663 void
22664 rs6000_emit_sCOND (machine_mode mode, rtx operands[])
22666 rtx condition_rtx;
22667 machine_mode op_mode;
22668 enum rtx_code cond_code;
22669 rtx result = operands[0];
22671 condition_rtx = rs6000_generate_compare (operands[1], mode);
22672 cond_code = GET_CODE (condition_rtx);
22674 if (FLOAT_MODE_P (mode)
22675 && !TARGET_FPRS && TARGET_HARD_FLOAT)
22677 rtx t;
22679 PUT_MODE (condition_rtx, SImode);
22680 t = XEXP (condition_rtx, 0);
22682 gcc_assert (cond_code == NE || cond_code == EQ);
22684 if (cond_code == NE)
22685 emit_insn (gen_e500_flip_gt_bit (t, t));
22687 emit_insn (gen_move_from_CR_gt_bit (result, t));
22688 return;
22691 if (cond_code == NE
22692 || cond_code == GE || cond_code == LE
22693 || cond_code == GEU || cond_code == LEU
22694 || cond_code == ORDERED || cond_code == UNGE || cond_code == UNLE)
22696 rtx not_result = gen_reg_rtx (CCEQmode);
22697 rtx not_op, rev_cond_rtx;
22698 machine_mode cc_mode;
22700 cc_mode = GET_MODE (XEXP (condition_rtx, 0));
22702 rev_cond_rtx = gen_rtx_fmt_ee (rs6000_reverse_condition (cc_mode, cond_code),
22703 SImode, XEXP (condition_rtx, 0), const0_rtx);
22704 not_op = gen_rtx_COMPARE (CCEQmode, rev_cond_rtx, const0_rtx);
22705 emit_insn (gen_rtx_SET (not_result, not_op));
22706 condition_rtx = gen_rtx_EQ (VOIDmode, not_result, const0_rtx);
22709 op_mode = GET_MODE (XEXP (operands[1], 0));
22710 if (op_mode == VOIDmode)
22711 op_mode = GET_MODE (XEXP (operands[1], 1));
22713 if (TARGET_POWERPC64 && (op_mode == DImode || FLOAT_MODE_P (mode)))
22715 PUT_MODE (condition_rtx, DImode);
22716 convert_move (result, condition_rtx, 0);
22718 else
22720 PUT_MODE (condition_rtx, SImode);
22721 emit_insn (gen_rtx_SET (result, condition_rtx));
22725 /* Emit a branch of kind CODE to location LOC. */
22727 void
22728 rs6000_emit_cbranch (machine_mode mode, rtx operands[])
22730 rtx condition_rtx, loc_ref;
22732 condition_rtx = rs6000_generate_compare (operands[0], mode);
22733 loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
22734 emit_jump_insn (gen_rtx_SET (pc_rtx,
22735 gen_rtx_IF_THEN_ELSE (VOIDmode, condition_rtx,
22736 loc_ref, pc_rtx)));
22739 /* Return the string to output a conditional branch to LABEL, which is
22740 the operand template of the label, or NULL if the branch is really a
22741 conditional return.
22743 OP is the conditional expression. XEXP (OP, 0) is assumed to be a
22744 condition code register and its mode specifies what kind of
22745 comparison we made.
22747 REVERSED is nonzero if we should reverse the sense of the comparison.
22749 INSN is the insn. */
22751 char *
22752 output_cbranch (rtx op, const char *label, int reversed, rtx_insn *insn)
22754 static char string[64];
22755 enum rtx_code code = GET_CODE (op);
22756 rtx cc_reg = XEXP (op, 0);
22757 machine_mode mode = GET_MODE (cc_reg);
22758 int cc_regno = REGNO (cc_reg) - CR0_REGNO;
22759 int need_longbranch = label != NULL && get_attr_length (insn) == 8;
22760 int really_reversed = reversed ^ need_longbranch;
22761 char *s = string;
22762 const char *ccode;
22763 const char *pred;
22764 rtx note;
22766 validate_condition_mode (code, mode);
22768 /* Work out which way this really branches. We could use
22769 reverse_condition_maybe_unordered here always but this
22770 makes the resulting assembler clearer. */
22771 if (really_reversed)
22773 /* Reversal of FP compares takes care -- an ordered compare
22774 becomes an unordered compare and vice versa. */
22775 if (mode == CCFPmode)
22776 code = reverse_condition_maybe_unordered (code);
22777 else
22778 code = reverse_condition (code);
22781 if ((!TARGET_FPRS && TARGET_HARD_FLOAT) && mode == CCFPmode)
22783 /* The efscmp/tst* instructions twiddle bit 2, which maps nicely
22784 to the GT bit. */
22785 switch (code)
22787 case EQ:
22788 /* Opposite of GT. */
22789 code = GT;
22790 break;
22792 case NE:
22793 code = UNLE;
22794 break;
22796 default:
22797 gcc_unreachable ();
22801 switch (code)
22803 /* Not all of these are actually distinct opcodes, but
22804 we distinguish them for clarity of the resulting assembler. */
22805 case NE: case LTGT:
22806 ccode = "ne"; break;
22807 case EQ: case UNEQ:
22808 ccode = "eq"; break;
22809 case GE: case GEU:
22810 ccode = "ge"; break;
22811 case GT: case GTU: case UNGT:
22812 ccode = "gt"; break;
22813 case LE: case LEU:
22814 ccode = "le"; break;
22815 case LT: case LTU: case UNLT:
22816 ccode = "lt"; break;
22817 case UNORDERED: ccode = "un"; break;
22818 case ORDERED: ccode = "nu"; break;
22819 case UNGE: ccode = "nl"; break;
22820 case UNLE: ccode = "ng"; break;
22821 default:
22822 gcc_unreachable ();
22825 /* Maybe we have a guess as to how likely the branch is. */
22826 pred = "";
22827 note = find_reg_note (insn, REG_BR_PROB, NULL_RTX);
22828 if (note != NULL_RTX)
22830 /* PROB is the difference from 50%. */
22831 int prob = XINT (note, 0) - REG_BR_PROB_BASE / 2;
22833 /* Only hint for highly probable/improbable branches on newer cpus when
22834 we have real profile data, as static prediction overrides processor
22835 dynamic prediction. For older cpus we may as well always hint, but
22836 assume not taken for branches that are very close to 50% as a
22837 mispredicted taken branch is more expensive than a
22838 mispredicted not-taken branch. */
22839 if (rs6000_always_hint
22840 || (abs (prob) > REG_BR_PROB_BASE / 100 * 48
22841 && (profile_status_for_fn (cfun) != PROFILE_GUESSED)
22842 && br_prob_note_reliable_p (note)))
22844 if (abs (prob) > REG_BR_PROB_BASE / 20
22845 && ((prob > 0) ^ need_longbranch))
22846 pred = "+";
22847 else
22848 pred = "-";
22852 if (label == NULL)
22853 s += sprintf (s, "b%slr%s ", ccode, pred);
22854 else
22855 s += sprintf (s, "b%s%s ", ccode, pred);
22857 /* We need to escape any '%' characters in the reg_names string.
22858 Assume they'd only be the first character.... */
22859 if (reg_names[cc_regno + CR0_REGNO][0] == '%')
22860 *s++ = '%';
22861 s += sprintf (s, "%s", reg_names[cc_regno + CR0_REGNO]);
22863 if (label != NULL)
22865 /* If the branch distance was too far, we may have to use an
22866 unconditional branch to go the distance. */
22867 if (need_longbranch)
22868 s += sprintf (s, ",$+8\n\tb %s", label);
22869 else
22870 s += sprintf (s, ",%s", label);
22873 return string;
22876 /* Return the string to flip the GT bit on a CR. */
22877 char *
22878 output_e500_flip_gt_bit (rtx dst, rtx src)
22880 static char string[64];
22881 int a, b;
22883 gcc_assert (GET_CODE (dst) == REG && CR_REGNO_P (REGNO (dst))
22884 && GET_CODE (src) == REG && CR_REGNO_P (REGNO (src)));
22886 /* GT bit. */
22887 a = 4 * (REGNO (dst) - CR0_REGNO) + 1;
22888 b = 4 * (REGNO (src) - CR0_REGNO) + 1;
22890 sprintf (string, "crnot %d,%d", a, b);
22891 return string;
22894 /* Return insn for VSX or Altivec comparisons. */
22896 static rtx
22897 rs6000_emit_vector_compare_inner (enum rtx_code code, rtx op0, rtx op1)
22899 rtx mask;
22900 machine_mode mode = GET_MODE (op0);
22902 switch (code)
22904 default:
22905 break;
22907 case GE:
22908 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
22909 return NULL_RTX;
22911 case EQ:
22912 case GT:
22913 case GTU:
22914 case ORDERED:
22915 case UNORDERED:
22916 case UNEQ:
22917 case LTGT:
22918 mask = gen_reg_rtx (mode);
22919 emit_insn (gen_rtx_SET (mask, gen_rtx_fmt_ee (code, mode, op0, op1)));
22920 return mask;
22923 return NULL_RTX;
22926 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
22927 DMODE is expected destination mode. This is a recursive function. */
22929 static rtx
22930 rs6000_emit_vector_compare (enum rtx_code rcode,
22931 rtx op0, rtx op1,
22932 machine_mode dmode)
22934 rtx mask;
22935 bool swap_operands = false;
22936 bool try_again = false;
22938 gcc_assert (VECTOR_UNIT_ALTIVEC_OR_VSX_P (dmode));
22939 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
22941 /* See if the comparison works as is. */
22942 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
22943 if (mask)
22944 return mask;
22946 switch (rcode)
22948 case LT:
22949 rcode = GT;
22950 swap_operands = true;
22951 try_again = true;
22952 break;
22953 case LTU:
22954 rcode = GTU;
22955 swap_operands = true;
22956 try_again = true;
22957 break;
22958 case NE:
22959 case UNLE:
22960 case UNLT:
22961 case UNGE:
22962 case UNGT:
22963 /* Invert condition and try again.
22964 e.g., A != B becomes ~(A==B). */
22966 enum rtx_code rev_code;
22967 enum insn_code nor_code;
22968 rtx mask2;
22970 rev_code = reverse_condition_maybe_unordered (rcode);
22971 if (rev_code == UNKNOWN)
22972 return NULL_RTX;
22974 nor_code = optab_handler (one_cmpl_optab, dmode);
22975 if (nor_code == CODE_FOR_nothing)
22976 return NULL_RTX;
22978 mask2 = rs6000_emit_vector_compare (rev_code, op0, op1, dmode);
22979 if (!mask2)
22980 return NULL_RTX;
22982 mask = gen_reg_rtx (dmode);
22983 emit_insn (GEN_FCN (nor_code) (mask, mask2));
22984 return mask;
22986 break;
22987 case GE:
22988 case GEU:
22989 case LE:
22990 case LEU:
22991 /* Try GT/GTU/LT/LTU OR EQ */
22993 rtx c_rtx, eq_rtx;
22994 enum insn_code ior_code;
22995 enum rtx_code new_code;
22997 switch (rcode)
22999 case GE:
23000 new_code = GT;
23001 break;
23003 case GEU:
23004 new_code = GTU;
23005 break;
23007 case LE:
23008 new_code = LT;
23009 break;
23011 case LEU:
23012 new_code = LTU;
23013 break;
23015 default:
23016 gcc_unreachable ();
23019 ior_code = optab_handler (ior_optab, dmode);
23020 if (ior_code == CODE_FOR_nothing)
23021 return NULL_RTX;
23023 c_rtx = rs6000_emit_vector_compare (new_code, op0, op1, dmode);
23024 if (!c_rtx)
23025 return NULL_RTX;
23027 eq_rtx = rs6000_emit_vector_compare (EQ, op0, op1, dmode);
23028 if (!eq_rtx)
23029 return NULL_RTX;
23031 mask = gen_reg_rtx (dmode);
23032 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
23033 return mask;
23035 break;
23036 default:
23037 return NULL_RTX;
23040 if (try_again)
23042 if (swap_operands)
23043 std::swap (op0, op1);
23045 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
23046 if (mask)
23047 return mask;
23050 /* You only get two chances. */
23051 return NULL_RTX;
23054 /* Emit vector conditional expression. DEST is destination. OP_TRUE and
23055 OP_FALSE are two VEC_COND_EXPR operands. CC_OP0 and CC_OP1 are the two
23056 operands for the relation operation COND. */
23059 rs6000_emit_vector_cond_expr (rtx dest, rtx op_true, rtx op_false,
23060 rtx cond, rtx cc_op0, rtx cc_op1)
23062 machine_mode dest_mode = GET_MODE (dest);
23063 machine_mode mask_mode = GET_MODE (cc_op0);
23064 enum rtx_code rcode = GET_CODE (cond);
23065 machine_mode cc_mode = CCmode;
23066 rtx mask;
23067 rtx cond2;
23068 rtx tmp;
23069 bool invert_move = false;
23071 if (VECTOR_UNIT_NONE_P (dest_mode))
23072 return 0;
23074 gcc_assert (GET_MODE_SIZE (dest_mode) == GET_MODE_SIZE (mask_mode)
23075 && GET_MODE_NUNITS (dest_mode) == GET_MODE_NUNITS (mask_mode));
23077 switch (rcode)
23079 /* Swap operands if we can, and fall back to doing the operation as
23080 specified, and doing a NOR to invert the test. */
23081 case NE:
23082 case UNLE:
23083 case UNLT:
23084 case UNGE:
23085 case UNGT:
23086 /* Invert condition and try again.
23087 e.g., A = (B != C) ? D : E becomes A = (B == C) ? E : D. */
23088 invert_move = true;
23089 rcode = reverse_condition_maybe_unordered (rcode);
23090 if (rcode == UNKNOWN)
23091 return 0;
23092 break;
23094 /* Mark unsigned tests with CCUNSmode. */
23095 case GTU:
23096 case GEU:
23097 case LTU:
23098 case LEU:
23099 cc_mode = CCUNSmode;
23100 break;
23102 default:
23103 break;
23106 /* Get the vector mask for the given relational operations. */
23107 mask = rs6000_emit_vector_compare (rcode, cc_op0, cc_op1, mask_mode);
23109 if (!mask)
23110 return 0;
23112 if (invert_move)
23114 tmp = op_true;
23115 op_true = op_false;
23116 op_false = tmp;
23119 cond2 = gen_rtx_fmt_ee (NE, cc_mode, gen_lowpart (dest_mode, mask),
23120 CONST0_RTX (dest_mode));
23121 emit_insn (gen_rtx_SET (dest,
23122 gen_rtx_IF_THEN_ELSE (dest_mode,
23123 cond2,
23124 op_true,
23125 op_false)));
23126 return 1;
23129 /* ISA 3.0 (power9) minmax subcase to emit a XSMAXCDP or XSMINCDP instruction
23130 for SF/DF scalars. Move TRUE_COND to DEST if OP of the operands of the last
23131 comparison is nonzero/true, FALSE_COND if it is zero/false. Return 0 if the
23132 hardware has no such operation. */
23134 static int
23135 rs6000_emit_p9_fp_minmax (rtx dest, rtx op, rtx true_cond, rtx false_cond)
23137 enum rtx_code code = GET_CODE (op);
23138 rtx op0 = XEXP (op, 0);
23139 rtx op1 = XEXP (op, 1);
23140 machine_mode compare_mode = GET_MODE (op0);
23141 machine_mode result_mode = GET_MODE (dest);
23142 bool max_p = false;
23144 if (result_mode != compare_mode)
23145 return 0;
23147 if (code == GE || code == GT)
23148 max_p = true;
23149 else if (code == LE || code == LT)
23150 max_p = false;
23151 else
23152 return 0;
23154 if (rtx_equal_p (op0, true_cond) && rtx_equal_p (op1, false_cond))
23157 else if (rtx_equal_p (op1, true_cond) && rtx_equal_p (op0, false_cond))
23158 max_p = !max_p;
23160 else
23161 return 0;
23163 rs6000_emit_minmax (dest, max_p ? SMAX : SMIN, op0, op1);
23164 return 1;
23167 /* ISA 3.0 (power9) conditional move subcase to emit XSCMP{EQ,GE,GT,NE}DP and
23168 XXSEL instructions for SF/DF scalars. Move TRUE_COND to DEST if OP of the
23169 operands of the last comparison is nonzero/true, FALSE_COND if it is
23170 zero/false. Return 0 if the hardware has no such operation. */
23172 static int
23173 rs6000_emit_p9_fp_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
23175 enum rtx_code code = GET_CODE (op);
23176 rtx op0 = XEXP (op, 0);
23177 rtx op1 = XEXP (op, 1);
23178 machine_mode result_mode = GET_MODE (dest);
23179 rtx compare_rtx;
23180 rtx cmove_rtx;
23181 rtx clobber_rtx;
23183 if (!can_create_pseudo_p ())
23184 return 0;
23186 switch (code)
23188 case EQ:
23189 case GE:
23190 case GT:
23191 break;
23193 case NE:
23194 case LT:
23195 case LE:
23196 code = swap_condition (code);
23197 std::swap (op0, op1);
23198 break;
23200 default:
23201 return 0;
23204 /* Generate: [(parallel [(set (dest)
23205 (if_then_else (op (cmp1) (cmp2))
23206 (true)
23207 (false)))
23208 (clobber (scratch))])]. */
23210 compare_rtx = gen_rtx_fmt_ee (code, CCFPmode, op0, op1);
23211 cmove_rtx = gen_rtx_SET (dest,
23212 gen_rtx_IF_THEN_ELSE (result_mode,
23213 compare_rtx,
23214 true_cond,
23215 false_cond));
23217 clobber_rtx = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (V2DImode));
23218 emit_insn (gen_rtx_PARALLEL (VOIDmode,
23219 gen_rtvec (2, cmove_rtx, clobber_rtx)));
23221 return 1;
23224 /* Emit a conditional move: move TRUE_COND to DEST if OP of the
23225 operands of the last comparison is nonzero/true, FALSE_COND if it
23226 is zero/false. Return 0 if the hardware has no such operation. */
23229 rs6000_emit_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
23231 enum rtx_code code = GET_CODE (op);
23232 rtx op0 = XEXP (op, 0);
23233 rtx op1 = XEXP (op, 1);
23234 machine_mode compare_mode = GET_MODE (op0);
23235 machine_mode result_mode = GET_MODE (dest);
23236 rtx temp;
23237 bool is_against_zero;
23239 /* These modes should always match. */
23240 if (GET_MODE (op1) != compare_mode
23241 /* In the isel case however, we can use a compare immediate, so
23242 op1 may be a small constant. */
23243 && (!TARGET_ISEL || !short_cint_operand (op1, VOIDmode)))
23244 return 0;
23245 if (GET_MODE (true_cond) != result_mode)
23246 return 0;
23247 if (GET_MODE (false_cond) != result_mode)
23248 return 0;
23250 /* See if we can use the ISA 3.0 (power9) min/max/compare functions. */
23251 if (TARGET_P9_MINMAX
23252 && (compare_mode == SFmode || compare_mode == DFmode)
23253 && (result_mode == SFmode || result_mode == DFmode))
23255 if (rs6000_emit_p9_fp_minmax (dest, op, true_cond, false_cond))
23256 return 1;
23258 if (rs6000_emit_p9_fp_cmove (dest, op, true_cond, false_cond))
23259 return 1;
23262 /* Don't allow using floating point comparisons for integer results for
23263 now. */
23264 if (FLOAT_MODE_P (compare_mode) && !FLOAT_MODE_P (result_mode))
23265 return 0;
23267 /* First, work out if the hardware can do this at all, or
23268 if it's too slow.... */
23269 if (!FLOAT_MODE_P (compare_mode))
23271 if (TARGET_ISEL)
23272 return rs6000_emit_int_cmove (dest, op, true_cond, false_cond);
23273 return 0;
23275 else if (TARGET_HARD_FLOAT && !TARGET_FPRS
23276 && SCALAR_FLOAT_MODE_P (compare_mode))
23277 return 0;
23279 is_against_zero = op1 == CONST0_RTX (compare_mode);
23281 /* A floating-point subtract might overflow, underflow, or produce
23282 an inexact result, thus changing the floating-point flags, so it
23283 can't be generated if we care about that. It's safe if one side
23284 of the construct is zero, since then no subtract will be
23285 generated. */
23286 if (SCALAR_FLOAT_MODE_P (compare_mode)
23287 && flag_trapping_math && ! is_against_zero)
23288 return 0;
23290 /* Eliminate half of the comparisons by switching operands, this
23291 makes the remaining code simpler. */
23292 if (code == UNLT || code == UNGT || code == UNORDERED || code == NE
23293 || code == LTGT || code == LT || code == UNLE)
23295 code = reverse_condition_maybe_unordered (code);
23296 temp = true_cond;
23297 true_cond = false_cond;
23298 false_cond = temp;
23301 /* UNEQ and LTGT take four instructions for a comparison with zero,
23302 it'll probably be faster to use a branch here too. */
23303 if (code == UNEQ && HONOR_NANS (compare_mode))
23304 return 0;
23306 /* We're going to try to implement comparisons by performing
23307 a subtract, then comparing against zero. Unfortunately,
23308 Inf - Inf is NaN which is not zero, and so if we don't
23309 know that the operand is finite and the comparison
23310 would treat EQ different to UNORDERED, we can't do it. */
23311 if (HONOR_INFINITIES (compare_mode)
23312 && code != GT && code != UNGE
23313 && (GET_CODE (op1) != CONST_DOUBLE
23314 || real_isinf (CONST_DOUBLE_REAL_VALUE (op1)))
23315 /* Constructs of the form (a OP b ? a : b) are safe. */
23316 && ((! rtx_equal_p (op0, false_cond) && ! rtx_equal_p (op1, false_cond))
23317 || (! rtx_equal_p (op0, true_cond)
23318 && ! rtx_equal_p (op1, true_cond))))
23319 return 0;
23321 /* At this point we know we can use fsel. */
23323 /* Reduce the comparison to a comparison against zero. */
23324 if (! is_against_zero)
23326 temp = gen_reg_rtx (compare_mode);
23327 emit_insn (gen_rtx_SET (temp, gen_rtx_MINUS (compare_mode, op0, op1)));
23328 op0 = temp;
23329 op1 = CONST0_RTX (compare_mode);
23332 /* If we don't care about NaNs we can reduce some of the comparisons
23333 down to faster ones. */
23334 if (! HONOR_NANS (compare_mode))
23335 switch (code)
23337 case GT:
23338 code = LE;
23339 temp = true_cond;
23340 true_cond = false_cond;
23341 false_cond = temp;
23342 break;
23343 case UNGE:
23344 code = GE;
23345 break;
23346 case UNEQ:
23347 code = EQ;
23348 break;
23349 default:
23350 break;
23353 /* Now, reduce everything down to a GE. */
23354 switch (code)
23356 case GE:
23357 break;
23359 case LE:
23360 temp = gen_reg_rtx (compare_mode);
23361 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
23362 op0 = temp;
23363 break;
23365 case ORDERED:
23366 temp = gen_reg_rtx (compare_mode);
23367 emit_insn (gen_rtx_SET (temp, gen_rtx_ABS (compare_mode, op0)));
23368 op0 = temp;
23369 break;
23371 case EQ:
23372 temp = gen_reg_rtx (compare_mode);
23373 emit_insn (gen_rtx_SET (temp,
23374 gen_rtx_NEG (compare_mode,
23375 gen_rtx_ABS (compare_mode, op0))));
23376 op0 = temp;
23377 break;
23379 case UNGE:
23380 /* a UNGE 0 <-> (a GE 0 || -a UNLT 0) */
23381 temp = gen_reg_rtx (result_mode);
23382 emit_insn (gen_rtx_SET (temp,
23383 gen_rtx_IF_THEN_ELSE (result_mode,
23384 gen_rtx_GE (VOIDmode,
23385 op0, op1),
23386 true_cond, false_cond)));
23387 false_cond = true_cond;
23388 true_cond = temp;
23390 temp = gen_reg_rtx (compare_mode);
23391 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
23392 op0 = temp;
23393 break;
23395 case GT:
23396 /* a GT 0 <-> (a GE 0 && -a UNLT 0) */
23397 temp = gen_reg_rtx (result_mode);
23398 emit_insn (gen_rtx_SET (temp,
23399 gen_rtx_IF_THEN_ELSE (result_mode,
23400 gen_rtx_GE (VOIDmode,
23401 op0, op1),
23402 true_cond, false_cond)));
23403 true_cond = false_cond;
23404 false_cond = temp;
23406 temp = gen_reg_rtx (compare_mode);
23407 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
23408 op0 = temp;
23409 break;
23411 default:
23412 gcc_unreachable ();
23415 emit_insn (gen_rtx_SET (dest,
23416 gen_rtx_IF_THEN_ELSE (result_mode,
23417 gen_rtx_GE (VOIDmode,
23418 op0, op1),
23419 true_cond, false_cond)));
23420 return 1;
23423 /* Same as above, but for ints (isel). */
23425 static int
23426 rs6000_emit_int_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
23428 rtx condition_rtx, cr;
23429 machine_mode mode = GET_MODE (dest);
23430 enum rtx_code cond_code;
23431 rtx (*isel_func) (rtx, rtx, rtx, rtx, rtx);
23432 bool signedp;
23434 if (mode != SImode && (!TARGET_POWERPC64 || mode != DImode))
23435 return 0;
23437 /* We still have to do the compare, because isel doesn't do a
23438 compare, it just looks at the CRx bits set by a previous compare
23439 instruction. */
23440 condition_rtx = rs6000_generate_compare (op, mode);
23441 cond_code = GET_CODE (condition_rtx);
23442 cr = XEXP (condition_rtx, 0);
23443 signedp = GET_MODE (cr) == CCmode;
23445 isel_func = (mode == SImode
23446 ? (signedp ? gen_isel_signed_si : gen_isel_unsigned_si)
23447 : (signedp ? gen_isel_signed_di : gen_isel_unsigned_di));
23449 switch (cond_code)
23451 case LT: case GT: case LTU: case GTU: case EQ:
23452 /* isel handles these directly. */
23453 break;
23455 default:
23456 /* We need to swap the sense of the comparison. */
23458 std::swap (false_cond, true_cond);
23459 PUT_CODE (condition_rtx, reverse_condition (cond_code));
23461 break;
23464 false_cond = force_reg (mode, false_cond);
23465 if (true_cond != const0_rtx)
23466 true_cond = force_reg (mode, true_cond);
23468 emit_insn (isel_func (dest, condition_rtx, true_cond, false_cond, cr));
23470 return 1;
23473 const char *
23474 output_isel (rtx *operands)
23476 enum rtx_code code;
23478 code = GET_CODE (operands[1]);
23480 if (code == GE || code == GEU || code == LE || code == LEU || code == NE)
23482 gcc_assert (GET_CODE (operands[2]) == REG
23483 && GET_CODE (operands[3]) == REG);
23484 PUT_CODE (operands[1], reverse_condition (code));
23485 return "isel %0,%3,%2,%j1";
23488 return "isel %0,%2,%3,%j1";
23491 void
23492 rs6000_emit_minmax (rtx dest, enum rtx_code code, rtx op0, rtx op1)
23494 machine_mode mode = GET_MODE (op0);
23495 enum rtx_code c;
23496 rtx target;
23498 /* VSX/altivec have direct min/max insns. */
23499 if ((code == SMAX || code == SMIN)
23500 && (VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode)
23501 || (mode == SFmode && VECTOR_UNIT_VSX_P (DFmode))))
23503 emit_insn (gen_rtx_SET (dest, gen_rtx_fmt_ee (code, mode, op0, op1)));
23504 return;
23507 if (code == SMAX || code == SMIN)
23508 c = GE;
23509 else
23510 c = GEU;
23512 if (code == SMAX || code == UMAX)
23513 target = emit_conditional_move (dest, c, op0, op1, mode,
23514 op0, op1, mode, 0);
23515 else
23516 target = emit_conditional_move (dest, c, op0, op1, mode,
23517 op1, op0, mode, 0);
23518 gcc_assert (target);
23519 if (target != dest)
23520 emit_move_insn (dest, target);
23523 /* Split a signbit operation on 64-bit machines with direct move. Also allow
23524 for the value to come from memory or if it is already loaded into a GPR. */
23526 void
23527 rs6000_split_signbit (rtx dest, rtx src)
23529 machine_mode d_mode = GET_MODE (dest);
23530 machine_mode s_mode = GET_MODE (src);
23531 rtx dest_di = (d_mode == DImode) ? dest : gen_lowpart (DImode, dest);
23532 rtx shift_reg = dest_di;
23534 gcc_assert (REG_P (dest));
23535 gcc_assert (REG_P (src) || MEM_P (src));
23536 gcc_assert (s_mode == KFmode || s_mode == TFmode);
23538 if (MEM_P (src))
23540 rtx mem = (WORDS_BIG_ENDIAN
23541 ? adjust_address (src, DImode, 0)
23542 : adjust_address (src, DImode, 8));
23543 emit_insn (gen_rtx_SET (dest_di, mem));
23546 else
23548 unsigned int r = REGNO (src);
23550 /* If this is a VSX register, generate the special mfvsrd instruction
23551 to get it in a GPR. Until we support SF and DF modes, that will
23552 always be true. */
23553 gcc_assert (VSX_REGNO_P (r));
23555 if (s_mode == KFmode)
23556 emit_insn (gen_signbitkf2_dm2 (dest_di, src));
23557 else
23558 emit_insn (gen_signbittf2_dm2 (dest_di, src));
23561 emit_insn (gen_lshrdi3 (dest_di, shift_reg, GEN_INT (63)));
23562 return;
23565 /* A subroutine of the atomic operation splitters. Jump to LABEL if
23566 COND is true. Mark the jump as unlikely to be taken. */
23568 static void
23569 emit_unlikely_jump (rtx cond, rtx label)
23571 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
23572 rtx x;
23574 x = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, label, pc_rtx);
23575 x = emit_jump_insn (gen_rtx_SET (pc_rtx, x));
23576 add_int_reg_note (x, REG_BR_PROB, very_unlikely);
23579 /* A subroutine of the atomic operation splitters. Emit a load-locked
23580 instruction in MODE. For QI/HImode, possibly use a pattern than includes
23581 the zero_extend operation. */
23583 static void
23584 emit_load_locked (machine_mode mode, rtx reg, rtx mem)
23586 rtx (*fn) (rtx, rtx) = NULL;
23588 switch (mode)
23590 case QImode:
23591 fn = gen_load_lockedqi;
23592 break;
23593 case HImode:
23594 fn = gen_load_lockedhi;
23595 break;
23596 case SImode:
23597 if (GET_MODE (mem) == QImode)
23598 fn = gen_load_lockedqi_si;
23599 else if (GET_MODE (mem) == HImode)
23600 fn = gen_load_lockedhi_si;
23601 else
23602 fn = gen_load_lockedsi;
23603 break;
23604 case DImode:
23605 fn = gen_load_lockeddi;
23606 break;
23607 case TImode:
23608 fn = gen_load_lockedti;
23609 break;
23610 default:
23611 gcc_unreachable ();
23613 emit_insn (fn (reg, mem));
23616 /* A subroutine of the atomic operation splitters. Emit a store-conditional
23617 instruction in MODE. */
23619 static void
23620 emit_store_conditional (machine_mode mode, rtx res, rtx mem, rtx val)
23622 rtx (*fn) (rtx, rtx, rtx) = NULL;
23624 switch (mode)
23626 case QImode:
23627 fn = gen_store_conditionalqi;
23628 break;
23629 case HImode:
23630 fn = gen_store_conditionalhi;
23631 break;
23632 case SImode:
23633 fn = gen_store_conditionalsi;
23634 break;
23635 case DImode:
23636 fn = gen_store_conditionaldi;
23637 break;
23638 case TImode:
23639 fn = gen_store_conditionalti;
23640 break;
23641 default:
23642 gcc_unreachable ();
23645 /* Emit sync before stwcx. to address PPC405 Erratum. */
23646 if (PPC405_ERRATUM77)
23647 emit_insn (gen_hwsync ());
23649 emit_insn (fn (res, mem, val));
23652 /* Expand barriers before and after a load_locked/store_cond sequence. */
23654 static rtx
23655 rs6000_pre_atomic_barrier (rtx mem, enum memmodel model)
23657 rtx addr = XEXP (mem, 0);
23658 int strict_p = (reload_in_progress || reload_completed);
23660 if (!legitimate_indirect_address_p (addr, strict_p)
23661 && !legitimate_indexed_address_p (addr, strict_p))
23663 addr = force_reg (Pmode, addr);
23664 mem = replace_equiv_address_nv (mem, addr);
23667 switch (model)
23669 case MEMMODEL_RELAXED:
23670 case MEMMODEL_CONSUME:
23671 case MEMMODEL_ACQUIRE:
23672 break;
23673 case MEMMODEL_RELEASE:
23674 case MEMMODEL_ACQ_REL:
23675 emit_insn (gen_lwsync ());
23676 break;
23677 case MEMMODEL_SEQ_CST:
23678 emit_insn (gen_hwsync ());
23679 break;
23680 default:
23681 gcc_unreachable ();
23683 return mem;
23686 static void
23687 rs6000_post_atomic_barrier (enum memmodel model)
23689 switch (model)
23691 case MEMMODEL_RELAXED:
23692 case MEMMODEL_CONSUME:
23693 case MEMMODEL_RELEASE:
23694 break;
23695 case MEMMODEL_ACQUIRE:
23696 case MEMMODEL_ACQ_REL:
23697 case MEMMODEL_SEQ_CST:
23698 emit_insn (gen_isync ());
23699 break;
23700 default:
23701 gcc_unreachable ();
23705 /* A subroutine of the various atomic expanders. For sub-word operations,
23706 we must adjust things to operate on SImode. Given the original MEM,
23707 return a new aligned memory. Also build and return the quantities by
23708 which to shift and mask. */
23710 static rtx
23711 rs6000_adjust_atomic_subword (rtx orig_mem, rtx *pshift, rtx *pmask)
23713 rtx addr, align, shift, mask, mem;
23714 HOST_WIDE_INT shift_mask;
23715 machine_mode mode = GET_MODE (orig_mem);
23717 /* For smaller modes, we have to implement this via SImode. */
23718 shift_mask = (mode == QImode ? 0x18 : 0x10);
23720 addr = XEXP (orig_mem, 0);
23721 addr = force_reg (GET_MODE (addr), addr);
23723 /* Aligned memory containing subword. Generate a new memory. We
23724 do not want any of the existing MEM_ATTR data, as we're now
23725 accessing memory outside the original object. */
23726 align = expand_simple_binop (Pmode, AND, addr, GEN_INT (-4),
23727 NULL_RTX, 1, OPTAB_LIB_WIDEN);
23728 mem = gen_rtx_MEM (SImode, align);
23729 MEM_VOLATILE_P (mem) = MEM_VOLATILE_P (orig_mem);
23730 if (MEM_ALIAS_SET (orig_mem) == ALIAS_SET_MEMORY_BARRIER)
23731 set_mem_alias_set (mem, ALIAS_SET_MEMORY_BARRIER);
23733 /* Shift amount for subword relative to aligned word. */
23734 shift = gen_reg_rtx (SImode);
23735 addr = gen_lowpart (SImode, addr);
23736 rtx tmp = gen_reg_rtx (SImode);
23737 emit_insn (gen_ashlsi3 (tmp, addr, GEN_INT (3)));
23738 emit_insn (gen_andsi3 (shift, tmp, GEN_INT (shift_mask)));
23739 if (BYTES_BIG_ENDIAN)
23740 shift = expand_simple_binop (SImode, XOR, shift, GEN_INT (shift_mask),
23741 shift, 1, OPTAB_LIB_WIDEN);
23742 *pshift = shift;
23744 /* Mask for insertion. */
23745 mask = expand_simple_binop (SImode, ASHIFT, GEN_INT (GET_MODE_MASK (mode)),
23746 shift, NULL_RTX, 1, OPTAB_LIB_WIDEN);
23747 *pmask = mask;
23749 return mem;
23752 /* A subroutine of the various atomic expanders. For sub-word operands,
23753 combine OLDVAL and NEWVAL via MASK. Returns a new pseduo. */
23755 static rtx
23756 rs6000_mask_atomic_subword (rtx oldval, rtx newval, rtx mask)
23758 rtx x;
23760 x = gen_reg_rtx (SImode);
23761 emit_insn (gen_rtx_SET (x, gen_rtx_AND (SImode,
23762 gen_rtx_NOT (SImode, mask),
23763 oldval)));
23765 x = expand_simple_binop (SImode, IOR, newval, x, x, 1, OPTAB_LIB_WIDEN);
23767 return x;
23770 /* A subroutine of the various atomic expanders. For sub-word operands,
23771 extract WIDE to NARROW via SHIFT. */
23773 static void
23774 rs6000_finish_atomic_subword (rtx narrow, rtx wide, rtx shift)
23776 wide = expand_simple_binop (SImode, LSHIFTRT, wide, shift,
23777 wide, 1, OPTAB_LIB_WIDEN);
23778 emit_move_insn (narrow, gen_lowpart (GET_MODE (narrow), wide));
23781 /* Expand an atomic compare and swap operation. */
23783 void
23784 rs6000_expand_atomic_compare_and_swap (rtx operands[])
23786 rtx boolval, retval, mem, oldval, newval, cond;
23787 rtx label1, label2, x, mask, shift;
23788 machine_mode mode, orig_mode;
23789 enum memmodel mod_s, mod_f;
23790 bool is_weak;
23792 boolval = operands[0];
23793 retval = operands[1];
23794 mem = operands[2];
23795 oldval = operands[3];
23796 newval = operands[4];
23797 is_weak = (INTVAL (operands[5]) != 0);
23798 mod_s = memmodel_base (INTVAL (operands[6]));
23799 mod_f = memmodel_base (INTVAL (operands[7]));
23800 orig_mode = mode = GET_MODE (mem);
23802 mask = shift = NULL_RTX;
23803 if (mode == QImode || mode == HImode)
23805 /* Before power8, we didn't have access to lbarx/lharx, so generate a
23806 lwarx and shift/mask operations. With power8, we need to do the
23807 comparison in SImode, but the store is still done in QI/HImode. */
23808 oldval = convert_modes (SImode, mode, oldval, 1);
23810 if (!TARGET_SYNC_HI_QI)
23812 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
23814 /* Shift and mask OLDVAL into position with the word. */
23815 oldval = expand_simple_binop (SImode, ASHIFT, oldval, shift,
23816 NULL_RTX, 1, OPTAB_LIB_WIDEN);
23818 /* Shift and mask NEWVAL into position within the word. */
23819 newval = convert_modes (SImode, mode, newval, 1);
23820 newval = expand_simple_binop (SImode, ASHIFT, newval, shift,
23821 NULL_RTX, 1, OPTAB_LIB_WIDEN);
23824 /* Prepare to adjust the return value. */
23825 retval = gen_reg_rtx (SImode);
23826 mode = SImode;
23828 else if (reg_overlap_mentioned_p (retval, oldval))
23829 oldval = copy_to_reg (oldval);
23831 if (mode != TImode && !reg_or_short_operand (oldval, mode))
23832 oldval = copy_to_mode_reg (mode, oldval);
23834 if (reg_overlap_mentioned_p (retval, newval))
23835 newval = copy_to_reg (newval);
23837 mem = rs6000_pre_atomic_barrier (mem, mod_s);
23839 label1 = NULL_RTX;
23840 if (!is_weak)
23842 label1 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
23843 emit_label (XEXP (label1, 0));
23845 label2 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
23847 emit_load_locked (mode, retval, mem);
23849 x = retval;
23850 if (mask)
23851 x = expand_simple_binop (SImode, AND, retval, mask,
23852 NULL_RTX, 1, OPTAB_LIB_WIDEN);
23854 cond = gen_reg_rtx (CCmode);
23855 /* If we have TImode, synthesize a comparison. */
23856 if (mode != TImode)
23857 x = gen_rtx_COMPARE (CCmode, x, oldval);
23858 else
23860 rtx xor1_result = gen_reg_rtx (DImode);
23861 rtx xor2_result = gen_reg_rtx (DImode);
23862 rtx or_result = gen_reg_rtx (DImode);
23863 rtx new_word0 = simplify_gen_subreg (DImode, x, TImode, 0);
23864 rtx new_word1 = simplify_gen_subreg (DImode, x, TImode, 8);
23865 rtx old_word0 = simplify_gen_subreg (DImode, oldval, TImode, 0);
23866 rtx old_word1 = simplify_gen_subreg (DImode, oldval, TImode, 8);
23868 emit_insn (gen_xordi3 (xor1_result, new_word0, old_word0));
23869 emit_insn (gen_xordi3 (xor2_result, new_word1, old_word1));
23870 emit_insn (gen_iordi3 (or_result, xor1_result, xor2_result));
23871 x = gen_rtx_COMPARE (CCmode, or_result, const0_rtx);
23874 emit_insn (gen_rtx_SET (cond, x));
23876 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
23877 emit_unlikely_jump (x, label2);
23879 x = newval;
23880 if (mask)
23881 x = rs6000_mask_atomic_subword (retval, newval, mask);
23883 emit_store_conditional (orig_mode, cond, mem, x);
23885 if (!is_weak)
23887 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
23888 emit_unlikely_jump (x, label1);
23891 if (!is_mm_relaxed (mod_f))
23892 emit_label (XEXP (label2, 0));
23894 rs6000_post_atomic_barrier (mod_s);
23896 if (is_mm_relaxed (mod_f))
23897 emit_label (XEXP (label2, 0));
23899 if (shift)
23900 rs6000_finish_atomic_subword (operands[1], retval, shift);
23901 else if (mode != GET_MODE (operands[1]))
23902 convert_move (operands[1], retval, 1);
23904 /* In all cases, CR0 contains EQ on success, and NE on failure. */
23905 x = gen_rtx_EQ (SImode, cond, const0_rtx);
23906 emit_insn (gen_rtx_SET (boolval, x));
23909 /* Expand an atomic exchange operation. */
23911 void
23912 rs6000_expand_atomic_exchange (rtx operands[])
23914 rtx retval, mem, val, cond;
23915 machine_mode mode;
23916 enum memmodel model;
23917 rtx label, x, mask, shift;
23919 retval = operands[0];
23920 mem = operands[1];
23921 val = operands[2];
23922 model = memmodel_base (INTVAL (operands[3]));
23923 mode = GET_MODE (mem);
23925 mask = shift = NULL_RTX;
23926 if (!TARGET_SYNC_HI_QI && (mode == QImode || mode == HImode))
23928 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
23930 /* Shift and mask VAL into position with the word. */
23931 val = convert_modes (SImode, mode, val, 1);
23932 val = expand_simple_binop (SImode, ASHIFT, val, shift,
23933 NULL_RTX, 1, OPTAB_LIB_WIDEN);
23935 /* Prepare to adjust the return value. */
23936 retval = gen_reg_rtx (SImode);
23937 mode = SImode;
23940 mem = rs6000_pre_atomic_barrier (mem, model);
23942 label = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
23943 emit_label (XEXP (label, 0));
23945 emit_load_locked (mode, retval, mem);
23947 x = val;
23948 if (mask)
23949 x = rs6000_mask_atomic_subword (retval, val, mask);
23951 cond = gen_reg_rtx (CCmode);
23952 emit_store_conditional (mode, cond, mem, x);
23954 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
23955 emit_unlikely_jump (x, label);
23957 rs6000_post_atomic_barrier (model);
23959 if (shift)
23960 rs6000_finish_atomic_subword (operands[0], retval, shift);
23963 /* Expand an atomic fetch-and-operate pattern. CODE is the binary operation
23964 to perform. MEM is the memory on which to operate. VAL is the second
23965 operand of the binary operator. BEFORE and AFTER are optional locations to
23966 return the value of MEM either before of after the operation. MODEL_RTX
23967 is a CONST_INT containing the memory model to use. */
23969 void
23970 rs6000_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
23971 rtx orig_before, rtx orig_after, rtx model_rtx)
23973 enum memmodel model = memmodel_base (INTVAL (model_rtx));
23974 machine_mode mode = GET_MODE (mem);
23975 machine_mode store_mode = mode;
23976 rtx label, x, cond, mask, shift;
23977 rtx before = orig_before, after = orig_after;
23979 mask = shift = NULL_RTX;
23980 /* On power8, we want to use SImode for the operation. On previous systems,
23981 use the operation in a subword and shift/mask to get the proper byte or
23982 halfword. */
23983 if (mode == QImode || mode == HImode)
23985 if (TARGET_SYNC_HI_QI)
23987 val = convert_modes (SImode, mode, val, 1);
23989 /* Prepare to adjust the return value. */
23990 before = gen_reg_rtx (SImode);
23991 if (after)
23992 after = gen_reg_rtx (SImode);
23993 mode = SImode;
23995 else
23997 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
23999 /* Shift and mask VAL into position with the word. */
24000 val = convert_modes (SImode, mode, val, 1);
24001 val = expand_simple_binop (SImode, ASHIFT, val, shift,
24002 NULL_RTX, 1, OPTAB_LIB_WIDEN);
24004 switch (code)
24006 case IOR:
24007 case XOR:
24008 /* We've already zero-extended VAL. That is sufficient to
24009 make certain that it does not affect other bits. */
24010 mask = NULL;
24011 break;
24013 case AND:
24014 /* If we make certain that all of the other bits in VAL are
24015 set, that will be sufficient to not affect other bits. */
24016 x = gen_rtx_NOT (SImode, mask);
24017 x = gen_rtx_IOR (SImode, x, val);
24018 emit_insn (gen_rtx_SET (val, x));
24019 mask = NULL;
24020 break;
24022 case NOT:
24023 case PLUS:
24024 case MINUS:
24025 /* These will all affect bits outside the field and need
24026 adjustment via MASK within the loop. */
24027 break;
24029 default:
24030 gcc_unreachable ();
24033 /* Prepare to adjust the return value. */
24034 before = gen_reg_rtx (SImode);
24035 if (after)
24036 after = gen_reg_rtx (SImode);
24037 store_mode = mode = SImode;
24041 mem = rs6000_pre_atomic_barrier (mem, model);
24043 label = gen_label_rtx ();
24044 emit_label (label);
24045 label = gen_rtx_LABEL_REF (VOIDmode, label);
24047 if (before == NULL_RTX)
24048 before = gen_reg_rtx (mode);
24050 emit_load_locked (mode, before, mem);
24052 if (code == NOT)
24054 x = expand_simple_binop (mode, AND, before, val,
24055 NULL_RTX, 1, OPTAB_LIB_WIDEN);
24056 after = expand_simple_unop (mode, NOT, x, after, 1);
24058 else
24060 after = expand_simple_binop (mode, code, before, val,
24061 after, 1, OPTAB_LIB_WIDEN);
24064 x = after;
24065 if (mask)
24067 x = expand_simple_binop (SImode, AND, after, mask,
24068 NULL_RTX, 1, OPTAB_LIB_WIDEN);
24069 x = rs6000_mask_atomic_subword (before, x, mask);
24071 else if (store_mode != mode)
24072 x = convert_modes (store_mode, mode, x, 1);
24074 cond = gen_reg_rtx (CCmode);
24075 emit_store_conditional (store_mode, cond, mem, x);
24077 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
24078 emit_unlikely_jump (x, label);
24080 rs6000_post_atomic_barrier (model);
24082 if (shift)
24084 /* QImode/HImode on machines without lbarx/lharx where we do a lwarx and
24085 then do the calcuations in a SImode register. */
24086 if (orig_before)
24087 rs6000_finish_atomic_subword (orig_before, before, shift);
24088 if (orig_after)
24089 rs6000_finish_atomic_subword (orig_after, after, shift);
24091 else if (store_mode != mode)
24093 /* QImode/HImode on machines with lbarx/lharx where we do the native
24094 operation and then do the calcuations in a SImode register. */
24095 if (orig_before)
24096 convert_move (orig_before, before, 1);
24097 if (orig_after)
24098 convert_move (orig_after, after, 1);
24100 else if (orig_after && after != orig_after)
24101 emit_move_insn (orig_after, after);
24104 /* Emit instructions to move SRC to DST. Called by splitters for
24105 multi-register moves. It will emit at most one instruction for
24106 each register that is accessed; that is, it won't emit li/lis pairs
24107 (or equivalent for 64-bit code). One of SRC or DST must be a hard
24108 register. */
24110 void
24111 rs6000_split_multireg_move (rtx dst, rtx src)
24113 /* The register number of the first register being moved. */
24114 int reg;
24115 /* The mode that is to be moved. */
24116 machine_mode mode;
24117 /* The mode that the move is being done in, and its size. */
24118 machine_mode reg_mode;
24119 int reg_mode_size;
24120 /* The number of registers that will be moved. */
24121 int nregs;
24123 reg = REG_P (dst) ? REGNO (dst) : REGNO (src);
24124 mode = GET_MODE (dst);
24125 nregs = hard_regno_nregs[reg][mode];
24126 if (FP_REGNO_P (reg))
24127 reg_mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode :
24128 ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT) ? DFmode : SFmode);
24129 else if (ALTIVEC_REGNO_P (reg))
24130 reg_mode = V16QImode;
24131 else if (TARGET_E500_DOUBLE && FLOAT128_2REG_P (mode))
24132 reg_mode = DFmode;
24133 else
24134 reg_mode = word_mode;
24135 reg_mode_size = GET_MODE_SIZE (reg_mode);
24137 gcc_assert (reg_mode_size * nregs == GET_MODE_SIZE (mode));
24139 /* TDmode residing in FP registers is special, since the ISA requires that
24140 the lower-numbered word of a register pair is always the most significant
24141 word, even in little-endian mode. This does not match the usual subreg
24142 semantics, so we cannnot use simplify_gen_subreg in those cases. Access
24143 the appropriate constituent registers "by hand" in little-endian mode.
24145 Note we do not need to check for destructive overlap here since TDmode
24146 can only reside in even/odd register pairs. */
24147 if (FP_REGNO_P (reg) && DECIMAL_FLOAT_MODE_P (mode) && !BYTES_BIG_ENDIAN)
24149 rtx p_src, p_dst;
24150 int i;
24152 for (i = 0; i < nregs; i++)
24154 if (REG_P (src) && FP_REGNO_P (REGNO (src)))
24155 p_src = gen_rtx_REG (reg_mode, REGNO (src) + nregs - 1 - i);
24156 else
24157 p_src = simplify_gen_subreg (reg_mode, src, mode,
24158 i * reg_mode_size);
24160 if (REG_P (dst) && FP_REGNO_P (REGNO (dst)))
24161 p_dst = gen_rtx_REG (reg_mode, REGNO (dst) + nregs - 1 - i);
24162 else
24163 p_dst = simplify_gen_subreg (reg_mode, dst, mode,
24164 i * reg_mode_size);
24166 emit_insn (gen_rtx_SET (p_dst, p_src));
24169 return;
24172 if (REG_P (src) && REG_P (dst) && (REGNO (src) < REGNO (dst)))
24174 /* Move register range backwards, if we might have destructive
24175 overlap. */
24176 int i;
24177 for (i = nregs - 1; i >= 0; i--)
24178 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
24179 i * reg_mode_size),
24180 simplify_gen_subreg (reg_mode, src, mode,
24181 i * reg_mode_size)));
24183 else
24185 int i;
24186 int j = -1;
24187 bool used_update = false;
24188 rtx restore_basereg = NULL_RTX;
24190 if (MEM_P (src) && INT_REGNO_P (reg))
24192 rtx breg;
24194 if (GET_CODE (XEXP (src, 0)) == PRE_INC
24195 || GET_CODE (XEXP (src, 0)) == PRE_DEC)
24197 rtx delta_rtx;
24198 breg = XEXP (XEXP (src, 0), 0);
24199 delta_rtx = (GET_CODE (XEXP (src, 0)) == PRE_INC
24200 ? GEN_INT (GET_MODE_SIZE (GET_MODE (src)))
24201 : GEN_INT (-GET_MODE_SIZE (GET_MODE (src))));
24202 emit_insn (gen_add3_insn (breg, breg, delta_rtx));
24203 src = replace_equiv_address (src, breg);
24205 else if (! rs6000_offsettable_memref_p (src, reg_mode))
24207 if (GET_CODE (XEXP (src, 0)) == PRE_MODIFY)
24209 rtx basereg = XEXP (XEXP (src, 0), 0);
24210 if (TARGET_UPDATE)
24212 rtx ndst = simplify_gen_subreg (reg_mode, dst, mode, 0);
24213 emit_insn (gen_rtx_SET (ndst,
24214 gen_rtx_MEM (reg_mode,
24215 XEXP (src, 0))));
24216 used_update = true;
24218 else
24219 emit_insn (gen_rtx_SET (basereg,
24220 XEXP (XEXP (src, 0), 1)));
24221 src = replace_equiv_address (src, basereg);
24223 else
24225 rtx basereg = gen_rtx_REG (Pmode, reg);
24226 emit_insn (gen_rtx_SET (basereg, XEXP (src, 0)));
24227 src = replace_equiv_address (src, basereg);
24231 breg = XEXP (src, 0);
24232 if (GET_CODE (breg) == PLUS || GET_CODE (breg) == LO_SUM)
24233 breg = XEXP (breg, 0);
24235 /* If the base register we are using to address memory is
24236 also a destination reg, then change that register last. */
24237 if (REG_P (breg)
24238 && REGNO (breg) >= REGNO (dst)
24239 && REGNO (breg) < REGNO (dst) + nregs)
24240 j = REGNO (breg) - REGNO (dst);
24242 else if (MEM_P (dst) && INT_REGNO_P (reg))
24244 rtx breg;
24246 if (GET_CODE (XEXP (dst, 0)) == PRE_INC
24247 || GET_CODE (XEXP (dst, 0)) == PRE_DEC)
24249 rtx delta_rtx;
24250 breg = XEXP (XEXP (dst, 0), 0);
24251 delta_rtx = (GET_CODE (XEXP (dst, 0)) == PRE_INC
24252 ? GEN_INT (GET_MODE_SIZE (GET_MODE (dst)))
24253 : GEN_INT (-GET_MODE_SIZE (GET_MODE (dst))));
24255 /* We have to update the breg before doing the store.
24256 Use store with update, if available. */
24258 if (TARGET_UPDATE)
24260 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
24261 emit_insn (TARGET_32BIT
24262 ? (TARGET_POWERPC64
24263 ? gen_movdi_si_update (breg, breg, delta_rtx, nsrc)
24264 : gen_movsi_update (breg, breg, delta_rtx, nsrc))
24265 : gen_movdi_di_update (breg, breg, delta_rtx, nsrc));
24266 used_update = true;
24268 else
24269 emit_insn (gen_add3_insn (breg, breg, delta_rtx));
24270 dst = replace_equiv_address (dst, breg);
24272 else if (!rs6000_offsettable_memref_p (dst, reg_mode)
24273 && GET_CODE (XEXP (dst, 0)) != LO_SUM)
24275 if (GET_CODE (XEXP (dst, 0)) == PRE_MODIFY)
24277 rtx basereg = XEXP (XEXP (dst, 0), 0);
24278 if (TARGET_UPDATE)
24280 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
24281 emit_insn (gen_rtx_SET (gen_rtx_MEM (reg_mode,
24282 XEXP (dst, 0)),
24283 nsrc));
24284 used_update = true;
24286 else
24287 emit_insn (gen_rtx_SET (basereg,
24288 XEXP (XEXP (dst, 0), 1)));
24289 dst = replace_equiv_address (dst, basereg);
24291 else
24293 rtx basereg = XEXP (XEXP (dst, 0), 0);
24294 rtx offsetreg = XEXP (XEXP (dst, 0), 1);
24295 gcc_assert (GET_CODE (XEXP (dst, 0)) == PLUS
24296 && REG_P (basereg)
24297 && REG_P (offsetreg)
24298 && REGNO (basereg) != REGNO (offsetreg));
24299 if (REGNO (basereg) == 0)
24301 rtx tmp = offsetreg;
24302 offsetreg = basereg;
24303 basereg = tmp;
24305 emit_insn (gen_add3_insn (basereg, basereg, offsetreg));
24306 restore_basereg = gen_sub3_insn (basereg, basereg, offsetreg);
24307 dst = replace_equiv_address (dst, basereg);
24310 else if (GET_CODE (XEXP (dst, 0)) != LO_SUM)
24311 gcc_assert (rs6000_offsettable_memref_p (dst, reg_mode));
24314 for (i = 0; i < nregs; i++)
24316 /* Calculate index to next subword. */
24317 ++j;
24318 if (j == nregs)
24319 j = 0;
24321 /* If compiler already emitted move of first word by
24322 store with update, no need to do anything. */
24323 if (j == 0 && used_update)
24324 continue;
24326 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
24327 j * reg_mode_size),
24328 simplify_gen_subreg (reg_mode, src, mode,
24329 j * reg_mode_size)));
24331 if (restore_basereg != NULL_RTX)
24332 emit_insn (restore_basereg);
24337 /* This page contains routines that are used to determine what the
24338 function prologue and epilogue code will do and write them out. */
24340 static inline bool
24341 save_reg_p (int r)
24343 return !call_used_regs[r] && df_regs_ever_live_p (r);
24346 /* Determine whether the gp REG is really used. */
24348 static bool
24349 rs6000_reg_live_or_pic_offset_p (int reg)
24351 /* We need to mark the PIC offset register live for the same conditions
24352 as it is set up, or otherwise it won't be saved before we clobber it. */
24354 if (reg == RS6000_PIC_OFFSET_TABLE_REGNUM && !TARGET_SINGLE_PIC_BASE)
24356 if (TARGET_TOC && TARGET_MINIMAL_TOC
24357 && (crtl->calls_eh_return
24358 || df_regs_ever_live_p (reg)
24359 || get_pool_size ()))
24360 return true;
24362 if ((DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN)
24363 && flag_pic)
24364 return true;
24367 /* If the function calls eh_return, claim used all the registers that would
24368 be checked for liveness otherwise. */
24370 return ((crtl->calls_eh_return || df_regs_ever_live_p (reg))
24371 && !call_used_regs[reg]);
24374 /* Return the first fixed-point register that is required to be
24375 saved. 32 if none. */
24378 first_reg_to_save (void)
24380 int first_reg;
24382 /* Find lowest numbered live register. */
24383 for (first_reg = 13; first_reg <= 31; first_reg++)
24384 if (save_reg_p (first_reg))
24385 break;
24387 if (first_reg > RS6000_PIC_OFFSET_TABLE_REGNUM
24388 && ((DEFAULT_ABI == ABI_V4 && flag_pic != 0)
24389 || (DEFAULT_ABI == ABI_DARWIN && flag_pic)
24390 || (TARGET_TOC && TARGET_MINIMAL_TOC))
24391 && rs6000_reg_live_or_pic_offset_p (RS6000_PIC_OFFSET_TABLE_REGNUM))
24392 first_reg = RS6000_PIC_OFFSET_TABLE_REGNUM;
24394 #if TARGET_MACHO
24395 if (flag_pic
24396 && crtl->uses_pic_offset_table
24397 && first_reg > RS6000_PIC_OFFSET_TABLE_REGNUM)
24398 return RS6000_PIC_OFFSET_TABLE_REGNUM;
24399 #endif
24401 return first_reg;
24404 /* Similar, for FP regs. */
24407 first_fp_reg_to_save (void)
24409 int first_reg;
24411 /* Find lowest numbered live register. */
24412 for (first_reg = 14 + 32; first_reg <= 63; first_reg++)
24413 if (save_reg_p (first_reg))
24414 break;
24416 return first_reg;
24419 /* Similar, for AltiVec regs. */
24421 static int
24422 first_altivec_reg_to_save (void)
24424 int i;
24426 /* Stack frame remains as is unless we are in AltiVec ABI. */
24427 if (! TARGET_ALTIVEC_ABI)
24428 return LAST_ALTIVEC_REGNO + 1;
24430 /* On Darwin, the unwind routines are compiled without
24431 TARGET_ALTIVEC, and use save_world to save/restore the
24432 altivec registers when necessary. */
24433 if (DEFAULT_ABI == ABI_DARWIN && crtl->calls_eh_return
24434 && ! TARGET_ALTIVEC)
24435 return FIRST_ALTIVEC_REGNO + 20;
24437 /* Find lowest numbered live register. */
24438 for (i = FIRST_ALTIVEC_REGNO + 20; i <= LAST_ALTIVEC_REGNO; ++i)
24439 if (save_reg_p (i))
24440 break;
24442 return i;
24445 /* Return a 32-bit mask of the AltiVec registers we need to set in
24446 VRSAVE. Bit n of the return value is 1 if Vn is live. The MSB in
24447 the 32-bit word is 0. */
24449 static unsigned int
24450 compute_vrsave_mask (void)
24452 unsigned int i, mask = 0;
24454 /* On Darwin, the unwind routines are compiled without
24455 TARGET_ALTIVEC, and use save_world to save/restore the
24456 call-saved altivec registers when necessary. */
24457 if (DEFAULT_ABI == ABI_DARWIN && crtl->calls_eh_return
24458 && ! TARGET_ALTIVEC)
24459 mask |= 0xFFF;
24461 /* First, find out if we use _any_ altivec registers. */
24462 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
24463 if (df_regs_ever_live_p (i))
24464 mask |= ALTIVEC_REG_BIT (i);
24466 if (mask == 0)
24467 return mask;
24469 /* Next, remove the argument registers from the set. These must
24470 be in the VRSAVE mask set by the caller, so we don't need to add
24471 them in again. More importantly, the mask we compute here is
24472 used to generate CLOBBERs in the set_vrsave insn, and we do not
24473 wish the argument registers to die. */
24474 for (i = ALTIVEC_ARG_MIN_REG; i < (unsigned) crtl->args.info.vregno; i++)
24475 mask &= ~ALTIVEC_REG_BIT (i);
24477 /* Similarly, remove the return value from the set. */
24479 bool yes = false;
24480 diddle_return_value (is_altivec_return_reg, &yes);
24481 if (yes)
24482 mask &= ~ALTIVEC_REG_BIT (ALTIVEC_ARG_RETURN);
24485 return mask;
24488 /* For a very restricted set of circumstances, we can cut down the
24489 size of prologues/epilogues by calling our own save/restore-the-world
24490 routines. */
24492 static void
24493 compute_save_world_info (rs6000_stack_t *info)
24495 info->world_save_p = 1;
24496 info->world_save_p
24497 = (WORLD_SAVE_P (info)
24498 && DEFAULT_ABI == ABI_DARWIN
24499 && !cfun->has_nonlocal_label
24500 && info->first_fp_reg_save == FIRST_SAVED_FP_REGNO
24501 && info->first_gp_reg_save == FIRST_SAVED_GP_REGNO
24502 && info->first_altivec_reg_save == FIRST_SAVED_ALTIVEC_REGNO
24503 && info->cr_save_p);
24505 /* This will not work in conjunction with sibcalls. Make sure there
24506 are none. (This check is expensive, but seldom executed.) */
24507 if (WORLD_SAVE_P (info))
24509 rtx_insn *insn;
24510 for (insn = get_last_insn_anywhere (); insn; insn = PREV_INSN (insn))
24511 if (CALL_P (insn) && SIBLING_CALL_P (insn))
24513 info->world_save_p = 0;
24514 break;
24518 if (WORLD_SAVE_P (info))
24520 /* Even if we're not touching VRsave, make sure there's room on the
24521 stack for it, if it looks like we're calling SAVE_WORLD, which
24522 will attempt to save it. */
24523 info->vrsave_size = 4;
24525 /* If we are going to save the world, we need to save the link register too. */
24526 info->lr_save_p = 1;
24528 /* "Save" the VRsave register too if we're saving the world. */
24529 if (info->vrsave_mask == 0)
24530 info->vrsave_mask = compute_vrsave_mask ();
24532 /* Because the Darwin register save/restore routines only handle
24533 F14 .. F31 and V20 .. V31 as per the ABI, perform a consistency
24534 check. */
24535 gcc_assert (info->first_fp_reg_save >= FIRST_SAVED_FP_REGNO
24536 && (info->first_altivec_reg_save
24537 >= FIRST_SAVED_ALTIVEC_REGNO));
24540 return;
24544 static void
24545 is_altivec_return_reg (rtx reg, void *xyes)
24547 bool *yes = (bool *) xyes;
24548 if (REGNO (reg) == ALTIVEC_ARG_RETURN)
24549 *yes = true;
24553 /* Return whether REG is a global user reg or has been specifed by
24554 -ffixed-REG. We should not restore these, and so cannot use
24555 lmw or out-of-line restore functions if there are any. We also
24556 can't save them (well, emit frame notes for them), because frame
24557 unwinding during exception handling will restore saved registers. */
24559 static bool
24560 fixed_reg_p (int reg)
24562 /* Ignore fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] when the
24563 backend sets it, overriding anything the user might have given. */
24564 if (reg == RS6000_PIC_OFFSET_TABLE_REGNUM
24565 && ((DEFAULT_ABI == ABI_V4 && flag_pic)
24566 || (DEFAULT_ABI == ABI_DARWIN && flag_pic)
24567 || (TARGET_TOC && TARGET_MINIMAL_TOC)))
24568 return false;
24570 return fixed_regs[reg];
24573 /* Determine the strategy for savings/restoring registers. */
24575 enum {
24576 SAVE_MULTIPLE = 0x1,
24577 SAVE_INLINE_GPRS = 0x2,
24578 SAVE_INLINE_FPRS = 0x4,
24579 SAVE_NOINLINE_GPRS_SAVES_LR = 0x8,
24580 SAVE_NOINLINE_FPRS_SAVES_LR = 0x10,
24581 SAVE_INLINE_VRS = 0x20,
24582 REST_MULTIPLE = 0x100,
24583 REST_INLINE_GPRS = 0x200,
24584 REST_INLINE_FPRS = 0x400,
24585 REST_NOINLINE_FPRS_DOESNT_RESTORE_LR = 0x800,
24586 REST_INLINE_VRS = 0x1000
24589 static int
24590 rs6000_savres_strategy (rs6000_stack_t *info,
24591 bool using_static_chain_p)
24593 int strategy = 0;
24595 /* Select between in-line and out-of-line save and restore of regs.
24596 First, all the obvious cases where we don't use out-of-line. */
24597 if (crtl->calls_eh_return
24598 || cfun->machine->ra_need_lr)
24599 strategy |= (SAVE_INLINE_FPRS | REST_INLINE_FPRS
24600 | SAVE_INLINE_GPRS | REST_INLINE_GPRS
24601 | SAVE_INLINE_VRS | REST_INLINE_VRS);
24603 if (info->first_gp_reg_save == 32)
24604 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
24606 if (info->first_fp_reg_save == 64
24607 /* The out-of-line FP routines use double-precision stores;
24608 we can't use those routines if we don't have such stores. */
24609 || (TARGET_HARD_FLOAT && !TARGET_DOUBLE_FLOAT))
24610 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
24612 if (info->first_altivec_reg_save == LAST_ALTIVEC_REGNO + 1)
24613 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
24615 /* Define cutoff for using out-of-line functions to save registers. */
24616 if (DEFAULT_ABI == ABI_V4 || TARGET_ELF)
24618 if (!optimize_size)
24620 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
24621 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
24622 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
24624 else
24626 /* Prefer out-of-line restore if it will exit. */
24627 if (info->first_fp_reg_save > 61)
24628 strategy |= SAVE_INLINE_FPRS;
24629 if (info->first_gp_reg_save > 29)
24631 if (info->first_fp_reg_save == 64)
24632 strategy |= SAVE_INLINE_GPRS;
24633 else
24634 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
24636 if (info->first_altivec_reg_save == LAST_ALTIVEC_REGNO)
24637 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
24640 else if (DEFAULT_ABI == ABI_DARWIN)
24642 if (info->first_fp_reg_save > 60)
24643 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
24644 if (info->first_gp_reg_save > 29)
24645 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
24646 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
24648 else
24650 gcc_checking_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2);
24651 if (info->first_fp_reg_save > 61)
24652 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
24653 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
24654 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
24657 /* Don't bother to try to save things out-of-line if r11 is occupied
24658 by the static chain. It would require too much fiddling and the
24659 static chain is rarely used anyway. FPRs are saved w.r.t the stack
24660 pointer on Darwin, and AIX uses r1 or r12. */
24661 if (using_static_chain_p
24662 && (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN))
24663 strategy |= ((DEFAULT_ABI == ABI_DARWIN ? 0 : SAVE_INLINE_FPRS)
24664 | SAVE_INLINE_GPRS
24665 | SAVE_INLINE_VRS);
24667 /* Saving CR interferes with the exit routines used on the SPE, so
24668 just punt here. */
24669 if (TARGET_SPE_ABI
24670 && info->spe_64bit_regs_used
24671 && info->cr_save_p)
24672 strategy |= REST_INLINE_GPRS;
24674 /* We can only use the out-of-line routines to restore fprs if we've
24675 saved all the registers from first_fp_reg_save in the prologue.
24676 Otherwise, we risk loading garbage. Of course, if we have saved
24677 out-of-line then we know we haven't skipped any fprs. */
24678 if ((strategy & SAVE_INLINE_FPRS)
24679 && !(strategy & REST_INLINE_FPRS))
24681 int i;
24683 for (i = info->first_fp_reg_save; i < 64; i++)
24684 if (fixed_regs[i] || !save_reg_p (i))
24686 strategy |= REST_INLINE_FPRS;
24687 break;
24691 /* Similarly, for altivec regs. */
24692 if ((strategy & SAVE_INLINE_VRS)
24693 && !(strategy & REST_INLINE_VRS))
24695 int i;
24697 for (i = info->first_altivec_reg_save; i < LAST_ALTIVEC_REGNO + 1; i++)
24698 if (fixed_regs[i] || !save_reg_p (i))
24700 strategy |= REST_INLINE_VRS;
24701 break;
24705 /* info->lr_save_p isn't yet set if the only reason lr needs to be
24706 saved is an out-of-line save or restore. Set up the value for
24707 the next test (excluding out-of-line gprs). */
24708 bool lr_save_p = (info->lr_save_p
24709 || !(strategy & SAVE_INLINE_FPRS)
24710 || !(strategy & SAVE_INLINE_VRS)
24711 || !(strategy & REST_INLINE_FPRS)
24712 || !(strategy & REST_INLINE_VRS));
24714 if (TARGET_MULTIPLE
24715 && !TARGET_POWERPC64
24716 && !(TARGET_SPE_ABI && info->spe_64bit_regs_used)
24717 && info->first_gp_reg_save < 31)
24719 /* Prefer store multiple for saves over out-of-line routines,
24720 since the store-multiple instruction will always be smaller. */
24721 strategy |= SAVE_INLINE_GPRS | SAVE_MULTIPLE;
24723 /* The situation is more complicated with load multiple. We'd
24724 prefer to use the out-of-line routines for restores, since the
24725 "exit" out-of-line routines can handle the restore of LR and the
24726 frame teardown. However if doesn't make sense to use the
24727 out-of-line routine if that is the only reason we'd need to save
24728 LR, and we can't use the "exit" out-of-line gpr restore if we
24729 have saved some fprs; In those cases it is advantageous to use
24730 load multiple when available. */
24731 if (info->first_fp_reg_save != 64 || !lr_save_p)
24732 strategy |= REST_INLINE_GPRS | REST_MULTIPLE;
24735 /* Using the "exit" out-of-line routine does not improve code size
24736 if using it would require lr to be saved and if only saving one
24737 or two gprs. */
24738 else if (!lr_save_p && info->first_gp_reg_save > 29)
24739 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
24741 /* We can only use load multiple or the out-of-line routines to
24742 restore gprs if we've saved all the registers from
24743 first_gp_reg_save. Otherwise, we risk loading garbage.
24744 Of course, if we have saved out-of-line or used stmw then we know
24745 we haven't skipped any gprs. */
24746 if ((strategy & (SAVE_INLINE_GPRS | SAVE_MULTIPLE)) == SAVE_INLINE_GPRS
24747 && (strategy & (REST_INLINE_GPRS | REST_MULTIPLE)) != REST_INLINE_GPRS)
24749 int i;
24751 for (i = info->first_gp_reg_save; i < 32; i++)
24752 if (fixed_reg_p (i) || !save_reg_p (i))
24754 strategy |= REST_INLINE_GPRS;
24755 strategy &= ~REST_MULTIPLE;
24756 break;
24760 if (TARGET_ELF && TARGET_64BIT)
24762 if (!(strategy & SAVE_INLINE_FPRS))
24763 strategy |= SAVE_NOINLINE_FPRS_SAVES_LR;
24764 else if (!(strategy & SAVE_INLINE_GPRS)
24765 && info->first_fp_reg_save == 64)
24766 strategy |= SAVE_NOINLINE_GPRS_SAVES_LR;
24768 else if (TARGET_AIX && !(strategy & REST_INLINE_FPRS))
24769 strategy |= REST_NOINLINE_FPRS_DOESNT_RESTORE_LR;
24771 if (TARGET_MACHO && !(strategy & SAVE_INLINE_FPRS))
24772 strategy |= SAVE_NOINLINE_FPRS_SAVES_LR;
24774 return strategy;
24777 /* Calculate the stack information for the current function. This is
24778 complicated by having two separate calling sequences, the AIX calling
24779 sequence and the V.4 calling sequence.
24781 AIX (and Darwin/Mac OS X) stack frames look like:
24782 32-bit 64-bit
24783 SP----> +---------------------------------------+
24784 | back chain to caller | 0 0
24785 +---------------------------------------+
24786 | saved CR | 4 8 (8-11)
24787 +---------------------------------------+
24788 | saved LR | 8 16
24789 +---------------------------------------+
24790 | reserved for compilers | 12 24
24791 +---------------------------------------+
24792 | reserved for binders | 16 32
24793 +---------------------------------------+
24794 | saved TOC pointer | 20 40
24795 +---------------------------------------+
24796 | Parameter save area (P) | 24 48
24797 +---------------------------------------+
24798 | Alloca space (A) | 24+P etc.
24799 +---------------------------------------+
24800 | Local variable space (L) | 24+P+A
24801 +---------------------------------------+
24802 | Float/int conversion temporary (X) | 24+P+A+L
24803 +---------------------------------------+
24804 | Save area for AltiVec registers (W) | 24+P+A+L+X
24805 +---------------------------------------+
24806 | AltiVec alignment padding (Y) | 24+P+A+L+X+W
24807 +---------------------------------------+
24808 | Save area for VRSAVE register (Z) | 24+P+A+L+X+W+Y
24809 +---------------------------------------+
24810 | Save area for GP registers (G) | 24+P+A+X+L+X+W+Y+Z
24811 +---------------------------------------+
24812 | Save area for FP registers (F) | 24+P+A+X+L+X+W+Y+Z+G
24813 +---------------------------------------+
24814 old SP->| back chain to caller's caller |
24815 +---------------------------------------+
24817 The required alignment for AIX configurations is two words (i.e., 8
24818 or 16 bytes).
24820 The ELFv2 ABI is a variant of the AIX ABI. Stack frames look like:
24822 SP----> +---------------------------------------+
24823 | Back chain to caller | 0
24824 +---------------------------------------+
24825 | Save area for CR | 8
24826 +---------------------------------------+
24827 | Saved LR | 16
24828 +---------------------------------------+
24829 | Saved TOC pointer | 24
24830 +---------------------------------------+
24831 | Parameter save area (P) | 32
24832 +---------------------------------------+
24833 | Alloca space (A) | 32+P
24834 +---------------------------------------+
24835 | Local variable space (L) | 32+P+A
24836 +---------------------------------------+
24837 | Save area for AltiVec registers (W) | 32+P+A+L
24838 +---------------------------------------+
24839 | AltiVec alignment padding (Y) | 32+P+A+L+W
24840 +---------------------------------------+
24841 | Save area for GP registers (G) | 32+P+A+L+W+Y
24842 +---------------------------------------+
24843 | Save area for FP registers (F) | 32+P+A+L+W+Y+G
24844 +---------------------------------------+
24845 old SP->| back chain to caller's caller | 32+P+A+L+W+Y+G+F
24846 +---------------------------------------+
24849 V.4 stack frames look like:
24851 SP----> +---------------------------------------+
24852 | back chain to caller | 0
24853 +---------------------------------------+
24854 | caller's saved LR | 4
24855 +---------------------------------------+
24856 | Parameter save area (P) | 8
24857 +---------------------------------------+
24858 | Alloca space (A) | 8+P
24859 +---------------------------------------+
24860 | Varargs save area (V) | 8+P+A
24861 +---------------------------------------+
24862 | Local variable space (L) | 8+P+A+V
24863 +---------------------------------------+
24864 | Float/int conversion temporary (X) | 8+P+A+V+L
24865 +---------------------------------------+
24866 | Save area for AltiVec registers (W) | 8+P+A+V+L+X
24867 +---------------------------------------+
24868 | AltiVec alignment padding (Y) | 8+P+A+V+L+X+W
24869 +---------------------------------------+
24870 | Save area for VRSAVE register (Z) | 8+P+A+V+L+X+W+Y
24871 +---------------------------------------+
24872 | SPE: area for 64-bit GP registers |
24873 +---------------------------------------+
24874 | SPE alignment padding |
24875 +---------------------------------------+
24876 | saved CR (C) | 8+P+A+V+L+X+W+Y+Z
24877 +---------------------------------------+
24878 | Save area for GP registers (G) | 8+P+A+V+L+X+W+Y+Z+C
24879 +---------------------------------------+
24880 | Save area for FP registers (F) | 8+P+A+V+L+X+W+Y+Z+C+G
24881 +---------------------------------------+
24882 old SP->| back chain to caller's caller |
24883 +---------------------------------------+
24885 The required alignment for V.4 is 16 bytes, or 8 bytes if -meabi is
24886 given. (But note below and in sysv4.h that we require only 8 and
24887 may round up the size of our stack frame anyways. The historical
24888 reason is early versions of powerpc-linux which didn't properly
24889 align the stack at program startup. A happy side-effect is that
24890 -mno-eabi libraries can be used with -meabi programs.)
24892 The EABI configuration defaults to the V.4 layout. However,
24893 the stack alignment requirements may differ. If -mno-eabi is not
24894 given, the required stack alignment is 8 bytes; if -mno-eabi is
24895 given, the required alignment is 16 bytes. (But see V.4 comment
24896 above.) */
24898 #ifndef ABI_STACK_BOUNDARY
24899 #define ABI_STACK_BOUNDARY STACK_BOUNDARY
24900 #endif
24902 static rs6000_stack_t *
24903 rs6000_stack_info (void)
24905 /* We should never be called for thunks, we are not set up for that. */
24906 gcc_assert (!cfun->is_thunk);
24908 rs6000_stack_t *info = &stack_info;
24909 int reg_size = TARGET_32BIT ? 4 : 8;
24910 int ehrd_size;
24911 int ehcr_size;
24912 int save_align;
24913 int first_gp;
24914 HOST_WIDE_INT non_fixed_size;
24915 bool using_static_chain_p;
24917 if (reload_completed && info->reload_completed)
24918 return info;
24920 memset (info, 0, sizeof (*info));
24921 info->reload_completed = reload_completed;
24923 if (TARGET_SPE)
24925 /* Cache value so we don't rescan instruction chain over and over. */
24926 if (cfun->machine->spe_insn_chain_scanned_p == 0)
24927 cfun->machine->spe_insn_chain_scanned_p
24928 = spe_func_has_64bit_regs_p () + 1;
24929 info->spe_64bit_regs_used = cfun->machine->spe_insn_chain_scanned_p - 1;
24932 /* Select which calling sequence. */
24933 info->abi = DEFAULT_ABI;
24935 /* Calculate which registers need to be saved & save area size. */
24936 info->first_gp_reg_save = first_reg_to_save ();
24937 /* Assume that we will have to save RS6000_PIC_OFFSET_TABLE_REGNUM,
24938 even if it currently looks like we won't. Reload may need it to
24939 get at a constant; if so, it will have already created a constant
24940 pool entry for it. */
24941 if (((TARGET_TOC && TARGET_MINIMAL_TOC)
24942 || (flag_pic == 1 && DEFAULT_ABI == ABI_V4)
24943 || (flag_pic && DEFAULT_ABI == ABI_DARWIN))
24944 && crtl->uses_const_pool
24945 && info->first_gp_reg_save > RS6000_PIC_OFFSET_TABLE_REGNUM)
24946 first_gp = RS6000_PIC_OFFSET_TABLE_REGNUM;
24947 else
24948 first_gp = info->first_gp_reg_save;
24950 info->gp_size = reg_size * (32 - first_gp);
24952 /* For the SPE, we have an additional upper 32-bits on each GPR.
24953 Ideally we should save the entire 64-bits only when the upper
24954 half is used in SIMD instructions. Since we only record
24955 registers live (not the size they are used in), this proves
24956 difficult because we'd have to traverse the instruction chain at
24957 the right time, taking reload into account. This is a real pain,
24958 so we opt to save the GPRs in 64-bits always if but one register
24959 gets used in 64-bits. Otherwise, all the registers in the frame
24960 get saved in 32-bits.
24962 So... since when we save all GPRs (except the SP) in 64-bits, the
24963 traditional GP save area will be empty. */
24964 if (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0)
24965 info->gp_size = 0;
24967 info->first_fp_reg_save = first_fp_reg_to_save ();
24968 info->fp_size = 8 * (64 - info->first_fp_reg_save);
24970 info->first_altivec_reg_save = first_altivec_reg_to_save ();
24971 info->altivec_size = 16 * (LAST_ALTIVEC_REGNO + 1
24972 - info->first_altivec_reg_save);
24974 /* Does this function call anything? */
24975 info->calls_p = (!crtl->is_leaf || cfun->machine->ra_needs_full_frame);
24977 /* Determine if we need to save the condition code registers. */
24978 if (save_reg_p (CR2_REGNO)
24979 || save_reg_p (CR3_REGNO)
24980 || save_reg_p (CR4_REGNO))
24982 info->cr_save_p = 1;
24983 if (DEFAULT_ABI == ABI_V4)
24984 info->cr_size = reg_size;
24987 /* If the current function calls __builtin_eh_return, then we need
24988 to allocate stack space for registers that will hold data for
24989 the exception handler. */
24990 if (crtl->calls_eh_return)
24992 unsigned int i;
24993 for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM; ++i)
24994 continue;
24996 /* SPE saves EH registers in 64-bits. */
24997 ehrd_size = i * (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0
24998 ? UNITS_PER_SPE_WORD : UNITS_PER_WORD);
25000 else
25001 ehrd_size = 0;
25003 /* In the ELFv2 ABI, we also need to allocate space for separate
25004 CR field save areas if the function calls __builtin_eh_return. */
25005 if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return)
25007 /* This hard-codes that we have three call-saved CR fields. */
25008 ehcr_size = 3 * reg_size;
25009 /* We do *not* use the regular CR save mechanism. */
25010 info->cr_save_p = 0;
25012 else
25013 ehcr_size = 0;
25015 /* Determine various sizes. */
25016 info->reg_size = reg_size;
25017 info->fixed_size = RS6000_SAVE_AREA;
25018 info->vars_size = RS6000_ALIGN (get_frame_size (), 8);
25019 info->parm_size = RS6000_ALIGN (crtl->outgoing_args_size,
25020 TARGET_ALTIVEC ? 16 : 8);
25021 if (FRAME_GROWS_DOWNWARD)
25022 info->vars_size
25023 += RS6000_ALIGN (info->fixed_size + info->vars_size + info->parm_size,
25024 ABI_STACK_BOUNDARY / BITS_PER_UNIT)
25025 - (info->fixed_size + info->vars_size + info->parm_size);
25027 if (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0)
25028 info->spe_gp_size = 8 * (32 - first_gp);
25030 if (TARGET_ALTIVEC_ABI)
25031 info->vrsave_mask = compute_vrsave_mask ();
25033 if (TARGET_ALTIVEC_VRSAVE && info->vrsave_mask)
25034 info->vrsave_size = 4;
25036 compute_save_world_info (info);
25038 /* Calculate the offsets. */
25039 switch (DEFAULT_ABI)
25041 case ABI_NONE:
25042 default:
25043 gcc_unreachable ();
25045 case ABI_AIX:
25046 case ABI_ELFv2:
25047 case ABI_DARWIN:
25048 info->fp_save_offset = -info->fp_size;
25049 info->gp_save_offset = info->fp_save_offset - info->gp_size;
25051 if (TARGET_ALTIVEC_ABI)
25053 info->vrsave_save_offset = info->gp_save_offset - info->vrsave_size;
25055 /* Align stack so vector save area is on a quadword boundary.
25056 The padding goes above the vectors. */
25057 if (info->altivec_size != 0)
25058 info->altivec_padding_size = info->vrsave_save_offset & 0xF;
25060 info->altivec_save_offset = info->vrsave_save_offset
25061 - info->altivec_padding_size
25062 - info->altivec_size;
25063 gcc_assert (info->altivec_size == 0
25064 || info->altivec_save_offset % 16 == 0);
25066 /* Adjust for AltiVec case. */
25067 info->ehrd_offset = info->altivec_save_offset - ehrd_size;
25069 else
25070 info->ehrd_offset = info->gp_save_offset - ehrd_size;
25072 info->ehcr_offset = info->ehrd_offset - ehcr_size;
25073 info->cr_save_offset = reg_size; /* first word when 64-bit. */
25074 info->lr_save_offset = 2*reg_size;
25075 break;
25077 case ABI_V4:
25078 info->fp_save_offset = -info->fp_size;
25079 info->gp_save_offset = info->fp_save_offset - info->gp_size;
25080 info->cr_save_offset = info->gp_save_offset - info->cr_size;
25082 if (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0)
25084 /* Align stack so SPE GPR save area is aligned on a
25085 double-word boundary. */
25086 if (info->spe_gp_size != 0 && info->cr_save_offset != 0)
25087 info->spe_padding_size = 8 - (-info->cr_save_offset % 8);
25088 else
25089 info->spe_padding_size = 0;
25091 info->spe_gp_save_offset = info->cr_save_offset
25092 - info->spe_padding_size
25093 - info->spe_gp_size;
25095 /* Adjust for SPE case. */
25096 info->ehrd_offset = info->spe_gp_save_offset;
25098 else if (TARGET_ALTIVEC_ABI)
25100 info->vrsave_save_offset = info->cr_save_offset - info->vrsave_size;
25102 /* Align stack so vector save area is on a quadword boundary. */
25103 if (info->altivec_size != 0)
25104 info->altivec_padding_size = 16 - (-info->vrsave_save_offset % 16);
25106 info->altivec_save_offset = info->vrsave_save_offset
25107 - info->altivec_padding_size
25108 - info->altivec_size;
25110 /* Adjust for AltiVec case. */
25111 info->ehrd_offset = info->altivec_save_offset;
25113 else
25114 info->ehrd_offset = info->cr_save_offset;
25116 info->ehrd_offset -= ehrd_size;
25117 info->lr_save_offset = reg_size;
25120 save_align = (TARGET_ALTIVEC_ABI || DEFAULT_ABI == ABI_DARWIN) ? 16 : 8;
25121 info->save_size = RS6000_ALIGN (info->fp_size
25122 + info->gp_size
25123 + info->altivec_size
25124 + info->altivec_padding_size
25125 + info->spe_gp_size
25126 + info->spe_padding_size
25127 + ehrd_size
25128 + ehcr_size
25129 + info->cr_size
25130 + info->vrsave_size,
25131 save_align);
25133 non_fixed_size = info->vars_size + info->parm_size + info->save_size;
25135 info->total_size = RS6000_ALIGN (non_fixed_size + info->fixed_size,
25136 ABI_STACK_BOUNDARY / BITS_PER_UNIT);
25138 /* Determine if we need to save the link register. */
25139 if (info->calls_p
25140 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
25141 && crtl->profile
25142 && !TARGET_PROFILE_KERNEL)
25143 || (DEFAULT_ABI == ABI_V4 && cfun->calls_alloca)
25144 #ifdef TARGET_RELOCATABLE
25145 || (DEFAULT_ABI == ABI_V4
25146 && (TARGET_RELOCATABLE || flag_pic > 1)
25147 && get_pool_size () != 0)
25148 #endif
25149 || rs6000_ra_ever_killed ())
25150 info->lr_save_p = 1;
25152 using_static_chain_p = (cfun->static_chain_decl != NULL_TREE
25153 && df_regs_ever_live_p (STATIC_CHAIN_REGNUM)
25154 && call_used_regs[STATIC_CHAIN_REGNUM]);
25155 info->savres_strategy = rs6000_savres_strategy (info, using_static_chain_p);
25157 if (!(info->savres_strategy & SAVE_INLINE_GPRS)
25158 || !(info->savres_strategy & SAVE_INLINE_FPRS)
25159 || !(info->savres_strategy & SAVE_INLINE_VRS)
25160 || !(info->savres_strategy & REST_INLINE_GPRS)
25161 || !(info->savres_strategy & REST_INLINE_FPRS)
25162 || !(info->savres_strategy & REST_INLINE_VRS))
25163 info->lr_save_p = 1;
25165 if (info->lr_save_p)
25166 df_set_regs_ever_live (LR_REGNO, true);
25168 /* Determine if we need to allocate any stack frame:
25170 For AIX we need to push the stack if a frame pointer is needed
25171 (because the stack might be dynamically adjusted), if we are
25172 debugging, if we make calls, or if the sum of fp_save, gp_save,
25173 and local variables are more than the space needed to save all
25174 non-volatile registers: 32-bit: 18*8 + 19*4 = 220 or 64-bit: 18*8
25175 + 18*8 = 288 (GPR13 reserved).
25177 For V.4 we don't have the stack cushion that AIX uses, but assume
25178 that the debugger can handle stackless frames. */
25180 if (info->calls_p)
25181 info->push_p = 1;
25183 else if (DEFAULT_ABI == ABI_V4)
25184 info->push_p = non_fixed_size != 0;
25186 else if (frame_pointer_needed)
25187 info->push_p = 1;
25189 else if (TARGET_XCOFF && write_symbols != NO_DEBUG)
25190 info->push_p = 1;
25192 else
25193 info->push_p = non_fixed_size > (TARGET_32BIT ? 220 : 288);
25195 return info;
25198 /* Return true if the current function uses any GPRs in 64-bit SIMD
25199 mode. */
25201 static bool
25202 spe_func_has_64bit_regs_p (void)
25204 rtx_insn *insns, *insn;
25206 /* Functions that save and restore all the call-saved registers will
25207 need to save/restore the registers in 64-bits. */
25208 if (crtl->calls_eh_return
25209 || cfun->calls_setjmp
25210 || crtl->has_nonlocal_goto)
25211 return true;
25213 insns = get_insns ();
25215 for (insn = NEXT_INSN (insns); insn != NULL_RTX; insn = NEXT_INSN (insn))
25217 if (INSN_P (insn))
25219 rtx i;
25221 /* FIXME: This should be implemented with attributes...
25223 (set_attr "spe64" "true")....then,
25224 if (get_spe64(insn)) return true;
25226 It's the only reliable way to do the stuff below. */
25228 i = PATTERN (insn);
25229 if (GET_CODE (i) == SET)
25231 machine_mode mode = GET_MODE (SET_SRC (i));
25233 if (SPE_VECTOR_MODE (mode))
25234 return true;
25235 if (TARGET_E500_DOUBLE
25236 && (mode == DFmode || FLOAT128_2REG_P (mode)))
25237 return true;
25242 return false;
25245 static void
25246 debug_stack_info (rs6000_stack_t *info)
25248 const char *abi_string;
25250 if (! info)
25251 info = rs6000_stack_info ();
25253 fprintf (stderr, "\nStack information for function %s:\n",
25254 ((current_function_decl && DECL_NAME (current_function_decl))
25255 ? IDENTIFIER_POINTER (DECL_NAME (current_function_decl))
25256 : "<unknown>"));
25258 switch (info->abi)
25260 default: abi_string = "Unknown"; break;
25261 case ABI_NONE: abi_string = "NONE"; break;
25262 case ABI_AIX: abi_string = "AIX"; break;
25263 case ABI_ELFv2: abi_string = "ELFv2"; break;
25264 case ABI_DARWIN: abi_string = "Darwin"; break;
25265 case ABI_V4: abi_string = "V.4"; break;
25268 fprintf (stderr, "\tABI = %5s\n", abi_string);
25270 if (TARGET_ALTIVEC_ABI)
25271 fprintf (stderr, "\tALTIVEC ABI extensions enabled.\n");
25273 if (TARGET_SPE_ABI)
25274 fprintf (stderr, "\tSPE ABI extensions enabled.\n");
25276 if (info->first_gp_reg_save != 32)
25277 fprintf (stderr, "\tfirst_gp_reg_save = %5d\n", info->first_gp_reg_save);
25279 if (info->first_fp_reg_save != 64)
25280 fprintf (stderr, "\tfirst_fp_reg_save = %5d\n", info->first_fp_reg_save);
25282 if (info->first_altivec_reg_save <= LAST_ALTIVEC_REGNO)
25283 fprintf (stderr, "\tfirst_altivec_reg_save = %5d\n",
25284 info->first_altivec_reg_save);
25286 if (info->lr_save_p)
25287 fprintf (stderr, "\tlr_save_p = %5d\n", info->lr_save_p);
25289 if (info->cr_save_p)
25290 fprintf (stderr, "\tcr_save_p = %5d\n", info->cr_save_p);
25292 if (info->vrsave_mask)
25293 fprintf (stderr, "\tvrsave_mask = 0x%x\n", info->vrsave_mask);
25295 if (info->push_p)
25296 fprintf (stderr, "\tpush_p = %5d\n", info->push_p);
25298 if (info->calls_p)
25299 fprintf (stderr, "\tcalls_p = %5d\n", info->calls_p);
25301 if (info->gp_size)
25302 fprintf (stderr, "\tgp_save_offset = %5d\n", info->gp_save_offset);
25304 if (info->fp_size)
25305 fprintf (stderr, "\tfp_save_offset = %5d\n", info->fp_save_offset);
25307 if (info->altivec_size)
25308 fprintf (stderr, "\taltivec_save_offset = %5d\n",
25309 info->altivec_save_offset);
25311 if (info->spe_gp_size)
25312 fprintf (stderr, "\tspe_gp_save_offset = %5d\n",
25313 info->spe_gp_save_offset);
25315 if (info->vrsave_size)
25316 fprintf (stderr, "\tvrsave_save_offset = %5d\n",
25317 info->vrsave_save_offset);
25319 if (info->lr_save_p)
25320 fprintf (stderr, "\tlr_save_offset = %5d\n", info->lr_save_offset);
25322 if (info->cr_save_p)
25323 fprintf (stderr, "\tcr_save_offset = %5d\n", info->cr_save_offset);
25325 if (info->varargs_save_offset)
25326 fprintf (stderr, "\tvarargs_save_offset = %5d\n", info->varargs_save_offset);
25328 if (info->total_size)
25329 fprintf (stderr, "\ttotal_size = " HOST_WIDE_INT_PRINT_DEC"\n",
25330 info->total_size);
25332 if (info->vars_size)
25333 fprintf (stderr, "\tvars_size = " HOST_WIDE_INT_PRINT_DEC"\n",
25334 info->vars_size);
25336 if (info->parm_size)
25337 fprintf (stderr, "\tparm_size = %5d\n", info->parm_size);
25339 if (info->fixed_size)
25340 fprintf (stderr, "\tfixed_size = %5d\n", info->fixed_size);
25342 if (info->gp_size)
25343 fprintf (stderr, "\tgp_size = %5d\n", info->gp_size);
25345 if (info->spe_gp_size)
25346 fprintf (stderr, "\tspe_gp_size = %5d\n", info->spe_gp_size);
25348 if (info->fp_size)
25349 fprintf (stderr, "\tfp_size = %5d\n", info->fp_size);
25351 if (info->altivec_size)
25352 fprintf (stderr, "\taltivec_size = %5d\n", info->altivec_size);
25354 if (info->vrsave_size)
25355 fprintf (stderr, "\tvrsave_size = %5d\n", info->vrsave_size);
25357 if (info->altivec_padding_size)
25358 fprintf (stderr, "\taltivec_padding_size= %5d\n",
25359 info->altivec_padding_size);
25361 if (info->spe_padding_size)
25362 fprintf (stderr, "\tspe_padding_size = %5d\n",
25363 info->spe_padding_size);
25365 if (info->cr_size)
25366 fprintf (stderr, "\tcr_size = %5d\n", info->cr_size);
25368 if (info->save_size)
25369 fprintf (stderr, "\tsave_size = %5d\n", info->save_size);
25371 if (info->reg_size != 4)
25372 fprintf (stderr, "\treg_size = %5d\n", info->reg_size);
25374 fprintf (stderr, "\tsave-strategy = %04x\n", info->savres_strategy);
25376 fprintf (stderr, "\n");
25380 rs6000_return_addr (int count, rtx frame)
25382 /* Currently we don't optimize very well between prolog and body
25383 code and for PIC code the code can be actually quite bad, so
25384 don't try to be too clever here. */
25385 if (count != 0
25386 || ((DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN) && flag_pic))
25388 cfun->machine->ra_needs_full_frame = 1;
25390 return
25391 gen_rtx_MEM
25392 (Pmode,
25393 memory_address
25394 (Pmode,
25395 plus_constant (Pmode,
25396 copy_to_reg
25397 (gen_rtx_MEM (Pmode,
25398 memory_address (Pmode, frame))),
25399 RETURN_ADDRESS_OFFSET)));
25402 cfun->machine->ra_need_lr = 1;
25403 return get_hard_reg_initial_val (Pmode, LR_REGNO);
25406 /* Say whether a function is a candidate for sibcall handling or not. */
25408 static bool
25409 rs6000_function_ok_for_sibcall (tree decl, tree exp)
25411 tree fntype;
25413 if (decl)
25414 fntype = TREE_TYPE (decl);
25415 else
25416 fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
25418 /* We can't do it if the called function has more vector parameters
25419 than the current function; there's nowhere to put the VRsave code. */
25420 if (TARGET_ALTIVEC_ABI
25421 && TARGET_ALTIVEC_VRSAVE
25422 && !(decl && decl == current_function_decl))
25424 function_args_iterator args_iter;
25425 tree type;
25426 int nvreg = 0;
25428 /* Functions with vector parameters are required to have a
25429 prototype, so the argument type info must be available
25430 here. */
25431 FOREACH_FUNCTION_ARGS(fntype, type, args_iter)
25432 if (TREE_CODE (type) == VECTOR_TYPE
25433 && ALTIVEC_OR_VSX_VECTOR_MODE (TYPE_MODE (type)))
25434 nvreg++;
25436 FOREACH_FUNCTION_ARGS(TREE_TYPE (current_function_decl), type, args_iter)
25437 if (TREE_CODE (type) == VECTOR_TYPE
25438 && ALTIVEC_OR_VSX_VECTOR_MODE (TYPE_MODE (type)))
25439 nvreg--;
25441 if (nvreg > 0)
25442 return false;
25445 /* Under the AIX or ELFv2 ABIs we can't allow calls to non-local
25446 functions, because the callee may have a different TOC pointer to
25447 the caller and there's no way to ensure we restore the TOC when
25448 we return. With the secure-plt SYSV ABI we can't make non-local
25449 calls when -fpic/PIC because the plt call stubs use r30. */
25450 if (DEFAULT_ABI == ABI_DARWIN
25451 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
25452 && decl
25453 && !DECL_EXTERNAL (decl)
25454 && !DECL_WEAK (decl)
25455 && (*targetm.binds_local_p) (decl))
25456 || (DEFAULT_ABI == ABI_V4
25457 && (!TARGET_SECURE_PLT
25458 || !flag_pic
25459 || (decl
25460 && (*targetm.binds_local_p) (decl)))))
25462 tree attr_list = TYPE_ATTRIBUTES (fntype);
25464 if (!lookup_attribute ("longcall", attr_list)
25465 || lookup_attribute ("shortcall", attr_list))
25466 return true;
25469 return false;
25472 static int
25473 rs6000_ra_ever_killed (void)
25475 rtx_insn *top;
25476 rtx reg;
25477 rtx_insn *insn;
25479 if (cfun->is_thunk)
25480 return 0;
25482 if (cfun->machine->lr_save_state)
25483 return cfun->machine->lr_save_state - 1;
25485 /* regs_ever_live has LR marked as used if any sibcalls are present,
25486 but this should not force saving and restoring in the
25487 pro/epilogue. Likewise, reg_set_between_p thinks a sibcall
25488 clobbers LR, so that is inappropriate. */
25490 /* Also, the prologue can generate a store into LR that
25491 doesn't really count, like this:
25493 move LR->R0
25494 bcl to set PIC register
25495 move LR->R31
25496 move R0->LR
25498 When we're called from the epilogue, we need to avoid counting
25499 this as a store. */
25501 push_topmost_sequence ();
25502 top = get_insns ();
25503 pop_topmost_sequence ();
25504 reg = gen_rtx_REG (Pmode, LR_REGNO);
25506 for (insn = NEXT_INSN (top); insn != NULL_RTX; insn = NEXT_INSN (insn))
25508 if (INSN_P (insn))
25510 if (CALL_P (insn))
25512 if (!SIBLING_CALL_P (insn))
25513 return 1;
25515 else if (find_regno_note (insn, REG_INC, LR_REGNO))
25516 return 1;
25517 else if (set_of (reg, insn) != NULL_RTX
25518 && !prologue_epilogue_contains (insn))
25519 return 1;
25522 return 0;
25525 /* Emit instructions needed to load the TOC register.
25526 This is only needed when TARGET_TOC, TARGET_MINIMAL_TOC, and there is
25527 a constant pool; or for SVR4 -fpic. */
25529 void
25530 rs6000_emit_load_toc_table (int fromprolog)
25532 rtx dest;
25533 dest = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
25535 if (TARGET_ELF && TARGET_SECURE_PLT && DEFAULT_ABI == ABI_V4 && flag_pic)
25537 char buf[30];
25538 rtx lab, tmp1, tmp2, got;
25540 lab = gen_label_rtx ();
25541 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (lab));
25542 lab = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
25543 if (flag_pic == 2)
25545 got = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (toc_label_name));
25546 need_toc_init = 1;
25548 else
25549 got = rs6000_got_sym ();
25550 tmp1 = tmp2 = dest;
25551 if (!fromprolog)
25553 tmp1 = gen_reg_rtx (Pmode);
25554 tmp2 = gen_reg_rtx (Pmode);
25556 emit_insn (gen_load_toc_v4_PIC_1 (lab));
25557 emit_move_insn (tmp1, gen_rtx_REG (Pmode, LR_REGNO));
25558 emit_insn (gen_load_toc_v4_PIC_3b (tmp2, tmp1, got, lab));
25559 emit_insn (gen_load_toc_v4_PIC_3c (dest, tmp2, got, lab));
25561 else if (TARGET_ELF && DEFAULT_ABI == ABI_V4 && flag_pic == 1)
25563 emit_insn (gen_load_toc_v4_pic_si ());
25564 emit_move_insn (dest, gen_rtx_REG (Pmode, LR_REGNO));
25566 else if (TARGET_ELF && DEFAULT_ABI == ABI_V4 && flag_pic == 2)
25568 char buf[30];
25569 rtx temp0 = (fromprolog
25570 ? gen_rtx_REG (Pmode, 0)
25571 : gen_reg_rtx (Pmode));
25573 if (fromprolog)
25575 rtx symF, symL;
25577 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
25578 symF = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
25580 ASM_GENERATE_INTERNAL_LABEL (buf, "LCL", rs6000_pic_labelno);
25581 symL = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
25583 emit_insn (gen_load_toc_v4_PIC_1 (symF));
25584 emit_move_insn (dest, gen_rtx_REG (Pmode, LR_REGNO));
25585 emit_insn (gen_load_toc_v4_PIC_2 (temp0, dest, symL, symF));
25587 else
25589 rtx tocsym, lab;
25591 tocsym = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (toc_label_name));
25592 need_toc_init = 1;
25593 lab = gen_label_rtx ();
25594 emit_insn (gen_load_toc_v4_PIC_1b (tocsym, lab));
25595 emit_move_insn (dest, gen_rtx_REG (Pmode, LR_REGNO));
25596 if (TARGET_LINK_STACK)
25597 emit_insn (gen_addsi3 (dest, dest, GEN_INT (4)));
25598 emit_move_insn (temp0, gen_rtx_MEM (Pmode, dest));
25600 emit_insn (gen_addsi3 (dest, temp0, dest));
25602 else if (TARGET_ELF && !TARGET_AIX && flag_pic == 0 && TARGET_MINIMAL_TOC)
25604 /* This is for AIX code running in non-PIC ELF32. */
25605 rtx realsym = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (toc_label_name));
25607 need_toc_init = 1;
25608 emit_insn (gen_elf_high (dest, realsym));
25609 emit_insn (gen_elf_low (dest, dest, realsym));
25611 else
25613 gcc_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2);
25615 if (TARGET_32BIT)
25616 emit_insn (gen_load_toc_aix_si (dest));
25617 else
25618 emit_insn (gen_load_toc_aix_di (dest));
25622 /* Emit instructions to restore the link register after determining where
25623 its value has been stored. */
25625 void
25626 rs6000_emit_eh_reg_restore (rtx source, rtx scratch)
25628 rs6000_stack_t *info = rs6000_stack_info ();
25629 rtx operands[2];
25631 operands[0] = source;
25632 operands[1] = scratch;
25634 if (info->lr_save_p)
25636 rtx frame_rtx = stack_pointer_rtx;
25637 HOST_WIDE_INT sp_offset = 0;
25638 rtx tmp;
25640 if (frame_pointer_needed
25641 || cfun->calls_alloca
25642 || info->total_size > 32767)
25644 tmp = gen_frame_mem (Pmode, frame_rtx);
25645 emit_move_insn (operands[1], tmp);
25646 frame_rtx = operands[1];
25648 else if (info->push_p)
25649 sp_offset = info->total_size;
25651 tmp = plus_constant (Pmode, frame_rtx,
25652 info->lr_save_offset + sp_offset);
25653 tmp = gen_frame_mem (Pmode, tmp);
25654 emit_move_insn (tmp, operands[0]);
25656 else
25657 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNO), operands[0]);
25659 /* Freeze lr_save_p. We've just emitted rtl that depends on the
25660 state of lr_save_p so any change from here on would be a bug. In
25661 particular, stop rs6000_ra_ever_killed from considering the SET
25662 of lr we may have added just above. */
25663 cfun->machine->lr_save_state = info->lr_save_p + 1;
25666 static GTY(()) alias_set_type set = -1;
25668 alias_set_type
25669 get_TOC_alias_set (void)
25671 if (set == -1)
25672 set = new_alias_set ();
25673 return set;
25676 /* This returns nonzero if the current function uses the TOC. This is
25677 determined by the presence of (use (unspec ... UNSPEC_TOC)), which
25678 is generated by the ABI_V4 load_toc_* patterns. */
25679 #if TARGET_ELF
25680 static int
25681 uses_TOC (void)
25683 rtx_insn *insn;
25685 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
25686 if (INSN_P (insn))
25688 rtx pat = PATTERN (insn);
25689 int i;
25691 if (GET_CODE (pat) == PARALLEL)
25692 for (i = 0; i < XVECLEN (pat, 0); i++)
25694 rtx sub = XVECEXP (pat, 0, i);
25695 if (GET_CODE (sub) == USE)
25697 sub = XEXP (sub, 0);
25698 if (GET_CODE (sub) == UNSPEC
25699 && XINT (sub, 1) == UNSPEC_TOC)
25700 return 1;
25704 return 0;
25706 #endif
25709 create_TOC_reference (rtx symbol, rtx largetoc_reg)
25711 rtx tocrel, tocreg, hi;
25713 if (TARGET_DEBUG_ADDR)
25715 if (GET_CODE (symbol) == SYMBOL_REF)
25716 fprintf (stderr, "\ncreate_TOC_reference, (symbol_ref %s)\n",
25717 XSTR (symbol, 0));
25718 else
25720 fprintf (stderr, "\ncreate_TOC_reference, code %s:\n",
25721 GET_RTX_NAME (GET_CODE (symbol)));
25722 debug_rtx (symbol);
25726 if (!can_create_pseudo_p ())
25727 df_set_regs_ever_live (TOC_REGISTER, true);
25729 tocreg = gen_rtx_REG (Pmode, TOC_REGISTER);
25730 tocrel = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, symbol, tocreg), UNSPEC_TOCREL);
25731 if (TARGET_CMODEL == CMODEL_SMALL || can_create_pseudo_p ())
25732 return tocrel;
25734 hi = gen_rtx_HIGH (Pmode, copy_rtx (tocrel));
25735 if (largetoc_reg != NULL)
25737 emit_move_insn (largetoc_reg, hi);
25738 hi = largetoc_reg;
25740 return gen_rtx_LO_SUM (Pmode, hi, tocrel);
25743 /* Issue assembly directives that create a reference to the given DWARF
25744 FRAME_TABLE_LABEL from the current function section. */
25745 void
25746 rs6000_aix_asm_output_dwarf_table_ref (char * frame_table_label)
25748 fprintf (asm_out_file, "\t.ref %s\n",
25749 (* targetm.strip_name_encoding) (frame_table_label));
25752 /* This ties together stack memory (MEM with an alias set of frame_alias_set)
25753 and the change to the stack pointer. */
25755 static void
25756 rs6000_emit_stack_tie (rtx fp, bool hard_frame_needed)
25758 rtvec p;
25759 int i;
25760 rtx regs[3];
25762 i = 0;
25763 regs[i++] = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
25764 if (hard_frame_needed)
25765 regs[i++] = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
25766 if (!(REGNO (fp) == STACK_POINTER_REGNUM
25767 || (hard_frame_needed
25768 && REGNO (fp) == HARD_FRAME_POINTER_REGNUM)))
25769 regs[i++] = fp;
25771 p = rtvec_alloc (i);
25772 while (--i >= 0)
25774 rtx mem = gen_frame_mem (BLKmode, regs[i]);
25775 RTVEC_ELT (p, i) = gen_rtx_SET (mem, const0_rtx);
25778 emit_insn (gen_stack_tie (gen_rtx_PARALLEL (VOIDmode, p)));
25781 /* Emit the correct code for allocating stack space, as insns.
25782 If COPY_REG, make sure a copy of the old frame is left there.
25783 The generated code may use hard register 0 as a temporary. */
25785 static rtx_insn *
25786 rs6000_emit_allocate_stack (HOST_WIDE_INT size, rtx copy_reg, int copy_off)
25788 rtx_insn *insn;
25789 rtx stack_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
25790 rtx tmp_reg = gen_rtx_REG (Pmode, 0);
25791 rtx todec = gen_int_mode (-size, Pmode);
25792 rtx par, set, mem;
25794 if (INTVAL (todec) != -size)
25796 warning (0, "stack frame too large");
25797 emit_insn (gen_trap ());
25798 return 0;
25801 if (crtl->limit_stack)
25803 if (REG_P (stack_limit_rtx)
25804 && REGNO (stack_limit_rtx) > 1
25805 && REGNO (stack_limit_rtx) <= 31)
25807 emit_insn (gen_add3_insn (tmp_reg, stack_limit_rtx, GEN_INT (size)));
25808 emit_insn (gen_cond_trap (LTU, stack_reg, tmp_reg,
25809 const0_rtx));
25811 else if (GET_CODE (stack_limit_rtx) == SYMBOL_REF
25812 && TARGET_32BIT
25813 && DEFAULT_ABI == ABI_V4)
25815 rtx toload = gen_rtx_CONST (VOIDmode,
25816 gen_rtx_PLUS (Pmode,
25817 stack_limit_rtx,
25818 GEN_INT (size)));
25820 emit_insn (gen_elf_high (tmp_reg, toload));
25821 emit_insn (gen_elf_low (tmp_reg, tmp_reg, toload));
25822 emit_insn (gen_cond_trap (LTU, stack_reg, tmp_reg,
25823 const0_rtx));
25825 else
25826 warning (0, "stack limit expression is not supported");
25829 if (copy_reg)
25831 if (copy_off != 0)
25832 emit_insn (gen_add3_insn (copy_reg, stack_reg, GEN_INT (copy_off)));
25833 else
25834 emit_move_insn (copy_reg, stack_reg);
25837 if (size > 32767)
25839 /* Need a note here so that try_split doesn't get confused. */
25840 if (get_last_insn () == NULL_RTX)
25841 emit_note (NOTE_INSN_DELETED);
25842 insn = emit_move_insn (tmp_reg, todec);
25843 try_split (PATTERN (insn), insn, 0);
25844 todec = tmp_reg;
25847 insn = emit_insn (TARGET_32BIT
25848 ? gen_movsi_update_stack (stack_reg, stack_reg,
25849 todec, stack_reg)
25850 : gen_movdi_di_update_stack (stack_reg, stack_reg,
25851 todec, stack_reg));
25852 /* Since we didn't use gen_frame_mem to generate the MEM, grab
25853 it now and set the alias set/attributes. The above gen_*_update
25854 calls will generate a PARALLEL with the MEM set being the first
25855 operation. */
25856 par = PATTERN (insn);
25857 gcc_assert (GET_CODE (par) == PARALLEL);
25858 set = XVECEXP (par, 0, 0);
25859 gcc_assert (GET_CODE (set) == SET);
25860 mem = SET_DEST (set);
25861 gcc_assert (MEM_P (mem));
25862 MEM_NOTRAP_P (mem) = 1;
25863 set_mem_alias_set (mem, get_frame_alias_set ());
25865 RTX_FRAME_RELATED_P (insn) = 1;
25866 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
25867 gen_rtx_SET (stack_reg, gen_rtx_PLUS (Pmode, stack_reg,
25868 GEN_INT (-size))));
25869 return insn;
25872 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
25874 #if PROBE_INTERVAL > 32768
25875 #error Cannot use indexed addressing mode for stack probing
25876 #endif
25878 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
25879 inclusive. These are offsets from the current stack pointer. */
25881 static void
25882 rs6000_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
25884 /* See if we have a constant small number of probes to generate. If so,
25885 that's the easy case. */
25886 if (first + size <= 32768)
25888 HOST_WIDE_INT i;
25890 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
25891 it exceeds SIZE. If only one probe is needed, this will not
25892 generate any code. Then probe at FIRST + SIZE. */
25893 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
25894 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
25895 -(first + i)));
25897 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
25898 -(first + size)));
25901 /* Otherwise, do the same as above, but in a loop. Note that we must be
25902 extra careful with variables wrapping around because we might be at
25903 the very top (or the very bottom) of the address space and we have
25904 to be able to handle this case properly; in particular, we use an
25905 equality test for the loop condition. */
25906 else
25908 HOST_WIDE_INT rounded_size;
25909 rtx r12 = gen_rtx_REG (Pmode, 12);
25910 rtx r0 = gen_rtx_REG (Pmode, 0);
25912 /* Sanity check for the addressing mode we're going to use. */
25913 gcc_assert (first <= 32768);
25915 /* Step 1: round SIZE to the previous multiple of the interval. */
25917 rounded_size = ROUND_DOWN (size, PROBE_INTERVAL);
25920 /* Step 2: compute initial and final value of the loop counter. */
25922 /* TEST_ADDR = SP + FIRST. */
25923 emit_insn (gen_rtx_SET (r12, plus_constant (Pmode, stack_pointer_rtx,
25924 -first)));
25926 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
25927 if (rounded_size > 32768)
25929 emit_move_insn (r0, GEN_INT (-rounded_size));
25930 emit_insn (gen_rtx_SET (r0, gen_rtx_PLUS (Pmode, r12, r0)));
25932 else
25933 emit_insn (gen_rtx_SET (r0, plus_constant (Pmode, r12,
25934 -rounded_size)));
25937 /* Step 3: the loop
25941 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
25942 probe at TEST_ADDR
25944 while (TEST_ADDR != LAST_ADDR)
25946 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
25947 until it is equal to ROUNDED_SIZE. */
25949 if (TARGET_64BIT)
25950 emit_insn (gen_probe_stack_rangedi (r12, r12, r0));
25951 else
25952 emit_insn (gen_probe_stack_rangesi (r12, r12, r0));
25955 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
25956 that SIZE is equal to ROUNDED_SIZE. */
25958 if (size != rounded_size)
25959 emit_stack_probe (plus_constant (Pmode, r12, rounded_size - size));
25963 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
25964 absolute addresses. */
25966 const char *
25967 output_probe_stack_range (rtx reg1, rtx reg2)
25969 static int labelno = 0;
25970 char loop_lab[32];
25971 rtx xops[2];
25973 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
25975 /* Loop. */
25976 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
25978 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
25979 xops[0] = reg1;
25980 xops[1] = GEN_INT (-PROBE_INTERVAL);
25981 output_asm_insn ("addi %0,%0,%1", xops);
25983 /* Probe at TEST_ADDR. */
25984 xops[1] = gen_rtx_REG (Pmode, 0);
25985 output_asm_insn ("stw %1,0(%0)", xops);
25987 /* Test if TEST_ADDR == LAST_ADDR. */
25988 xops[1] = reg2;
25989 if (TARGET_64BIT)
25990 output_asm_insn ("cmpd 0,%0,%1", xops);
25991 else
25992 output_asm_insn ("cmpw 0,%0,%1", xops);
25994 /* Branch. */
25995 fputs ("\tbne 0,", asm_out_file);
25996 assemble_name_raw (asm_out_file, loop_lab);
25997 fputc ('\n', asm_out_file);
25999 return "";
26002 /* Add to 'insn' a note which is PATTERN (INSN) but with REG replaced
26003 with (plus:P (reg 1) VAL), and with REG2 replaced with REPL2 if REG2
26004 is not NULL. It would be nice if dwarf2out_frame_debug_expr could
26005 deduce these equivalences by itself so it wasn't necessary to hold
26006 its hand so much. Don't be tempted to always supply d2_f_d_e with
26007 the actual cfa register, ie. r31 when we are using a hard frame
26008 pointer. That fails when saving regs off r1, and sched moves the
26009 r31 setup past the reg saves. */
26011 static rtx
26012 rs6000_frame_related (rtx insn, rtx reg, HOST_WIDE_INT val,
26013 rtx reg2, rtx repl2)
26015 rtx repl;
26017 if (REGNO (reg) == STACK_POINTER_REGNUM)
26019 gcc_checking_assert (val == 0);
26020 repl = NULL_RTX;
26022 else
26023 repl = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, STACK_POINTER_REGNUM),
26024 GEN_INT (val));
26026 rtx pat = PATTERN (insn);
26027 if (!repl && !reg2)
26029 /* No need for any replacement. Just set RTX_FRAME_RELATED_P. */
26030 if (GET_CODE (pat) == PARALLEL)
26031 for (int i = 0; i < XVECLEN (pat, 0); i++)
26032 if (GET_CODE (XVECEXP (pat, 0, i)) == SET)
26034 rtx set = XVECEXP (pat, 0, i);
26036 /* If this PARALLEL has been emitted for out-of-line
26037 register save functions, or store multiple, then omit
26038 eh_frame info for any user-defined global regs. If
26039 eh_frame info is supplied, frame unwinding will
26040 restore a user reg. */
26041 if (!REG_P (SET_SRC (set))
26042 || !fixed_reg_p (REGNO (SET_SRC (set))))
26043 RTX_FRAME_RELATED_P (set) = 1;
26045 RTX_FRAME_RELATED_P (insn) = 1;
26046 return insn;
26049 /* We expect that 'pat' is either a SET or a PARALLEL containing
26050 SETs (and possibly other stuff). In a PARALLEL, all the SETs
26051 are important so they all have to be marked RTX_FRAME_RELATED_P.
26052 Call simplify_replace_rtx on the SETs rather than the whole insn
26053 so as to leave the other stuff alone (for example USE of r12). */
26055 if (GET_CODE (pat) == SET)
26057 if (repl)
26058 pat = simplify_replace_rtx (pat, reg, repl);
26059 if (reg2)
26060 pat = simplify_replace_rtx (pat, reg2, repl2);
26062 else if (GET_CODE (pat) == PARALLEL)
26064 pat = shallow_copy_rtx (pat);
26065 XVEC (pat, 0) = shallow_copy_rtvec (XVEC (pat, 0));
26067 for (int i = 0; i < XVECLEN (pat, 0); i++)
26068 if (GET_CODE (XVECEXP (pat, 0, i)) == SET)
26070 rtx set = XVECEXP (pat, 0, i);
26072 if (repl)
26073 set = simplify_replace_rtx (set, reg, repl);
26074 if (reg2)
26075 set = simplify_replace_rtx (set, reg2, repl2);
26076 XVECEXP (pat, 0, i) = set;
26078 /* Omit eh_frame info for any user-defined global regs. */
26079 if (!REG_P (SET_SRC (set))
26080 || !fixed_reg_p (REGNO (SET_SRC (set))))
26081 RTX_FRAME_RELATED_P (set) = 1;
26084 else
26085 gcc_unreachable ();
26087 RTX_FRAME_RELATED_P (insn) = 1;
26088 if (repl || reg2)
26089 add_reg_note (insn, REG_FRAME_RELATED_EXPR, pat);
26091 return insn;
26094 /* Returns an insn that has a vrsave set operation with the
26095 appropriate CLOBBERs. */
26097 static rtx
26098 generate_set_vrsave (rtx reg, rs6000_stack_t *info, int epiloguep)
26100 int nclobs, i;
26101 rtx insn, clobs[TOTAL_ALTIVEC_REGS + 1];
26102 rtx vrsave = gen_rtx_REG (SImode, VRSAVE_REGNO);
26104 clobs[0]
26105 = gen_rtx_SET (vrsave,
26106 gen_rtx_UNSPEC_VOLATILE (SImode,
26107 gen_rtvec (2, reg, vrsave),
26108 UNSPECV_SET_VRSAVE));
26110 nclobs = 1;
26112 /* We need to clobber the registers in the mask so the scheduler
26113 does not move sets to VRSAVE before sets of AltiVec registers.
26115 However, if the function receives nonlocal gotos, reload will set
26116 all call saved registers live. We will end up with:
26118 (set (reg 999) (mem))
26119 (parallel [ (set (reg vrsave) (unspec blah))
26120 (clobber (reg 999))])
26122 The clobber will cause the store into reg 999 to be dead, and
26123 flow will attempt to delete an epilogue insn. In this case, we
26124 need an unspec use/set of the register. */
26126 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
26127 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
26129 if (!epiloguep || call_used_regs [i])
26130 clobs[nclobs++] = gen_rtx_CLOBBER (VOIDmode,
26131 gen_rtx_REG (V4SImode, i));
26132 else
26134 rtx reg = gen_rtx_REG (V4SImode, i);
26136 clobs[nclobs++]
26137 = gen_rtx_SET (reg,
26138 gen_rtx_UNSPEC (V4SImode,
26139 gen_rtvec (1, reg), 27));
26143 insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nclobs));
26145 for (i = 0; i < nclobs; ++i)
26146 XVECEXP (insn, 0, i) = clobs[i];
26148 return insn;
26151 static rtx
26152 gen_frame_set (rtx reg, rtx frame_reg, int offset, bool store)
26154 rtx addr, mem;
26156 addr = gen_rtx_PLUS (Pmode, frame_reg, GEN_INT (offset));
26157 mem = gen_frame_mem (GET_MODE (reg), addr);
26158 return gen_rtx_SET (store ? mem : reg, store ? reg : mem);
26161 static rtx
26162 gen_frame_load (rtx reg, rtx frame_reg, int offset)
26164 return gen_frame_set (reg, frame_reg, offset, false);
26167 static rtx
26168 gen_frame_store (rtx reg, rtx frame_reg, int offset)
26170 return gen_frame_set (reg, frame_reg, offset, true);
26173 /* Save a register into the frame, and emit RTX_FRAME_RELATED_P notes.
26174 Save REGNO into [FRAME_REG + OFFSET] in mode MODE. */
26176 static rtx
26177 emit_frame_save (rtx frame_reg, machine_mode mode,
26178 unsigned int regno, int offset, HOST_WIDE_INT frame_reg_to_sp)
26180 rtx reg, insn;
26182 /* Some cases that need register indexed addressing. */
26183 gcc_checking_assert (!((TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (mode))
26184 || (TARGET_VSX && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
26185 || (TARGET_E500_DOUBLE && mode == DFmode)
26186 || (TARGET_SPE_ABI
26187 && SPE_VECTOR_MODE (mode)
26188 && !SPE_CONST_OFFSET_OK (offset))));
26190 reg = gen_rtx_REG (mode, regno);
26191 insn = emit_insn (gen_frame_store (reg, frame_reg, offset));
26192 return rs6000_frame_related (insn, frame_reg, frame_reg_to_sp,
26193 NULL_RTX, NULL_RTX);
26196 /* Emit an offset memory reference suitable for a frame store, while
26197 converting to a valid addressing mode. */
26199 static rtx
26200 gen_frame_mem_offset (machine_mode mode, rtx reg, int offset)
26202 rtx int_rtx, offset_rtx;
26204 int_rtx = GEN_INT (offset);
26206 if ((TARGET_SPE_ABI && SPE_VECTOR_MODE (mode) && !SPE_CONST_OFFSET_OK (offset))
26207 || (TARGET_E500_DOUBLE && mode == DFmode))
26209 offset_rtx = gen_rtx_REG (Pmode, FIXED_SCRATCH);
26210 emit_move_insn (offset_rtx, int_rtx);
26212 else
26213 offset_rtx = int_rtx;
26215 return gen_frame_mem (mode, gen_rtx_PLUS (Pmode, reg, offset_rtx));
26218 #ifndef TARGET_FIX_AND_CONTINUE
26219 #define TARGET_FIX_AND_CONTINUE 0
26220 #endif
26222 /* It's really GPR 13 or 14, FPR 14 and VR 20. We need the smallest. */
26223 #define FIRST_SAVRES_REGISTER FIRST_SAVED_GP_REGNO
26224 #define LAST_SAVRES_REGISTER 31
26225 #define N_SAVRES_REGISTERS (LAST_SAVRES_REGISTER - FIRST_SAVRES_REGISTER + 1)
26227 enum {
26228 SAVRES_LR = 0x1,
26229 SAVRES_SAVE = 0x2,
26230 SAVRES_REG = 0x0c,
26231 SAVRES_GPR = 0,
26232 SAVRES_FPR = 4,
26233 SAVRES_VR = 8
26236 static GTY(()) rtx savres_routine_syms[N_SAVRES_REGISTERS][12];
26238 /* Temporary holding space for an out-of-line register save/restore
26239 routine name. */
26240 static char savres_routine_name[30];
26242 /* Return the name for an out-of-line register save/restore routine.
26243 We are saving/restoring GPRs if GPR is true. */
26245 static char *
26246 rs6000_savres_routine_name (rs6000_stack_t *info, int regno, int sel)
26248 const char *prefix = "";
26249 const char *suffix = "";
26251 /* Different targets are supposed to define
26252 {SAVE,RESTORE}_FP_{PREFIX,SUFFIX} with the idea that the needed
26253 routine name could be defined with:
26255 sprintf (name, "%s%d%s", SAVE_FP_PREFIX, regno, SAVE_FP_SUFFIX)
26257 This is a nice idea in practice, but in reality, things are
26258 complicated in several ways:
26260 - ELF targets have save/restore routines for GPRs.
26262 - SPE targets use different prefixes for 32/64-bit registers, and
26263 neither of them fit neatly in the FOO_{PREFIX,SUFFIX} regimen.
26265 - PPC64 ELF targets have routines for save/restore of GPRs that
26266 differ in what they do with the link register, so having a set
26267 prefix doesn't work. (We only use one of the save routines at
26268 the moment, though.)
26270 - PPC32 elf targets have "exit" versions of the restore routines
26271 that restore the link register and can save some extra space.
26272 These require an extra suffix. (There are also "tail" versions
26273 of the restore routines and "GOT" versions of the save routines,
26274 but we don't generate those at present. Same problems apply,
26275 though.)
26277 We deal with all this by synthesizing our own prefix/suffix and
26278 using that for the simple sprintf call shown above. */
26279 if (TARGET_SPE)
26281 /* No floating point saves on the SPE. */
26282 gcc_assert ((sel & SAVRES_REG) == SAVRES_GPR);
26284 if ((sel & SAVRES_SAVE))
26285 prefix = info->spe_64bit_regs_used ? "_save64gpr_" : "_save32gpr_";
26286 else
26287 prefix = info->spe_64bit_regs_used ? "_rest64gpr_" : "_rest32gpr_";
26289 if ((sel & SAVRES_LR))
26290 suffix = "_x";
26292 else if (DEFAULT_ABI == ABI_V4)
26294 if (TARGET_64BIT)
26295 goto aix_names;
26297 if ((sel & SAVRES_REG) == SAVRES_GPR)
26298 prefix = (sel & SAVRES_SAVE) ? "_savegpr_" : "_restgpr_";
26299 else if ((sel & SAVRES_REG) == SAVRES_FPR)
26300 prefix = (sel & SAVRES_SAVE) ? "_savefpr_" : "_restfpr_";
26301 else if ((sel & SAVRES_REG) == SAVRES_VR)
26302 prefix = (sel & SAVRES_SAVE) ? "_savevr_" : "_restvr_";
26303 else
26304 abort ();
26306 if ((sel & SAVRES_LR))
26307 suffix = "_x";
26309 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
26311 #if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD)
26312 /* No out-of-line save/restore routines for GPRs on AIX. */
26313 gcc_assert (!TARGET_AIX || (sel & SAVRES_REG) != SAVRES_GPR);
26314 #endif
26316 aix_names:
26317 if ((sel & SAVRES_REG) == SAVRES_GPR)
26318 prefix = ((sel & SAVRES_SAVE)
26319 ? ((sel & SAVRES_LR) ? "_savegpr0_" : "_savegpr1_")
26320 : ((sel & SAVRES_LR) ? "_restgpr0_" : "_restgpr1_"));
26321 else if ((sel & SAVRES_REG) == SAVRES_FPR)
26323 #if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD)
26324 if ((sel & SAVRES_LR))
26325 prefix = ((sel & SAVRES_SAVE) ? "_savefpr_" : "_restfpr_");
26326 else
26327 #endif
26329 prefix = (sel & SAVRES_SAVE) ? SAVE_FP_PREFIX : RESTORE_FP_PREFIX;
26330 suffix = (sel & SAVRES_SAVE) ? SAVE_FP_SUFFIX : RESTORE_FP_SUFFIX;
26333 else if ((sel & SAVRES_REG) == SAVRES_VR)
26334 prefix = (sel & SAVRES_SAVE) ? "_savevr_" : "_restvr_";
26335 else
26336 abort ();
26339 if (DEFAULT_ABI == ABI_DARWIN)
26341 /* The Darwin approach is (slightly) different, in order to be
26342 compatible with code generated by the system toolchain. There is a
26343 single symbol for the start of save sequence, and the code here
26344 embeds an offset into that code on the basis of the first register
26345 to be saved. */
26346 prefix = (sel & SAVRES_SAVE) ? "save" : "rest" ;
26347 if ((sel & SAVRES_REG) == SAVRES_GPR)
26348 sprintf (savres_routine_name, "*%sGPR%s%s%.0d ; %s r%d-r31", prefix,
26349 ((sel & SAVRES_LR) ? "x" : ""), (regno == 13 ? "" : "+"),
26350 (regno - 13) * 4, prefix, regno);
26351 else if ((sel & SAVRES_REG) == SAVRES_FPR)
26352 sprintf (savres_routine_name, "*%sFP%s%.0d ; %s f%d-f31", prefix,
26353 (regno == 14 ? "" : "+"), (regno - 14) * 4, prefix, regno);
26354 else if ((sel & SAVRES_REG) == SAVRES_VR)
26355 sprintf (savres_routine_name, "*%sVEC%s%.0d ; %s v%d-v31", prefix,
26356 (regno == 20 ? "" : "+"), (regno - 20) * 8, prefix, regno);
26357 else
26358 abort ();
26360 else
26361 sprintf (savres_routine_name, "%s%d%s", prefix, regno, suffix);
26363 return savres_routine_name;
26366 /* Return an RTL SYMBOL_REF for an out-of-line register save/restore routine.
26367 We are saving/restoring GPRs if GPR is true. */
26369 static rtx
26370 rs6000_savres_routine_sym (rs6000_stack_t *info, int sel)
26372 int regno = ((sel & SAVRES_REG) == SAVRES_GPR
26373 ? info->first_gp_reg_save
26374 : (sel & SAVRES_REG) == SAVRES_FPR
26375 ? info->first_fp_reg_save - 32
26376 : (sel & SAVRES_REG) == SAVRES_VR
26377 ? info->first_altivec_reg_save - FIRST_ALTIVEC_REGNO
26378 : -1);
26379 rtx sym;
26380 int select = sel;
26382 /* On the SPE, we never have any FPRs, but we do have 32/64-bit
26383 versions of the gpr routines. */
26384 if (TARGET_SPE_ABI && (sel & SAVRES_REG) == SAVRES_GPR
26385 && info->spe_64bit_regs_used)
26386 select ^= SAVRES_FPR ^ SAVRES_GPR;
26388 /* Don't generate bogus routine names. */
26389 gcc_assert (FIRST_SAVRES_REGISTER <= regno
26390 && regno <= LAST_SAVRES_REGISTER
26391 && select >= 0 && select <= 12);
26393 sym = savres_routine_syms[regno-FIRST_SAVRES_REGISTER][select];
26395 if (sym == NULL)
26397 char *name;
26399 name = rs6000_savres_routine_name (info, regno, sel);
26401 sym = savres_routine_syms[regno-FIRST_SAVRES_REGISTER][select]
26402 = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
26403 SYMBOL_REF_FLAGS (sym) |= SYMBOL_FLAG_FUNCTION;
26406 return sym;
26409 /* Emit a sequence of insns, including a stack tie if needed, for
26410 resetting the stack pointer. If UPDT_REGNO is not 1, then don't
26411 reset the stack pointer, but move the base of the frame into
26412 reg UPDT_REGNO for use by out-of-line register restore routines. */
26414 static rtx
26415 rs6000_emit_stack_reset (rs6000_stack_t *info,
26416 rtx frame_reg_rtx, HOST_WIDE_INT frame_off,
26417 unsigned updt_regno)
26419 rtx updt_reg_rtx;
26421 /* This blockage is needed so that sched doesn't decide to move
26422 the sp change before the register restores. */
26423 if (DEFAULT_ABI == ABI_V4
26424 || (TARGET_SPE_ABI
26425 && info->spe_64bit_regs_used != 0
26426 && info->first_gp_reg_save != 32))
26427 rs6000_emit_stack_tie (frame_reg_rtx, frame_pointer_needed);
26429 /* If we are restoring registers out-of-line, we will be using the
26430 "exit" variants of the restore routines, which will reset the
26431 stack for us. But we do need to point updt_reg into the
26432 right place for those routines. */
26433 updt_reg_rtx = gen_rtx_REG (Pmode, updt_regno);
26435 if (frame_off != 0)
26436 return emit_insn (gen_add3_insn (updt_reg_rtx,
26437 frame_reg_rtx, GEN_INT (frame_off)));
26438 else if (REGNO (frame_reg_rtx) != updt_regno)
26439 return emit_move_insn (updt_reg_rtx, frame_reg_rtx);
26441 return NULL_RTX;
26444 /* Return the register number used as a pointer by out-of-line
26445 save/restore functions. */
26447 static inline unsigned
26448 ptr_regno_for_savres (int sel)
26450 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
26451 return (sel & SAVRES_REG) == SAVRES_FPR || (sel & SAVRES_LR) ? 1 : 12;
26452 return DEFAULT_ABI == ABI_DARWIN && (sel & SAVRES_REG) == SAVRES_FPR ? 1 : 11;
26455 /* Construct a parallel rtx describing the effect of a call to an
26456 out-of-line register save/restore routine, and emit the insn
26457 or jump_insn as appropriate. */
26459 static rtx
26460 rs6000_emit_savres_rtx (rs6000_stack_t *info,
26461 rtx frame_reg_rtx, int save_area_offset, int lr_offset,
26462 machine_mode reg_mode, int sel)
26464 int i;
26465 int offset, start_reg, end_reg, n_regs, use_reg;
26466 int reg_size = GET_MODE_SIZE (reg_mode);
26467 rtx sym;
26468 rtvec p;
26469 rtx par, insn;
26471 offset = 0;
26472 start_reg = ((sel & SAVRES_REG) == SAVRES_GPR
26473 ? info->first_gp_reg_save
26474 : (sel & SAVRES_REG) == SAVRES_FPR
26475 ? info->first_fp_reg_save
26476 : (sel & SAVRES_REG) == SAVRES_VR
26477 ? info->first_altivec_reg_save
26478 : -1);
26479 end_reg = ((sel & SAVRES_REG) == SAVRES_GPR
26480 ? 32
26481 : (sel & SAVRES_REG) == SAVRES_FPR
26482 ? 64
26483 : (sel & SAVRES_REG) == SAVRES_VR
26484 ? LAST_ALTIVEC_REGNO + 1
26485 : -1);
26486 n_regs = end_reg - start_reg;
26487 p = rtvec_alloc (3 + ((sel & SAVRES_LR) ? 1 : 0)
26488 + ((sel & SAVRES_REG) == SAVRES_VR ? 1 : 0)
26489 + n_regs);
26491 if (!(sel & SAVRES_SAVE) && (sel & SAVRES_LR))
26492 RTVEC_ELT (p, offset++) = ret_rtx;
26494 RTVEC_ELT (p, offset++)
26495 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, LR_REGNO));
26497 sym = rs6000_savres_routine_sym (info, sel);
26498 RTVEC_ELT (p, offset++) = gen_rtx_USE (VOIDmode, sym);
26500 use_reg = ptr_regno_for_savres (sel);
26501 if ((sel & SAVRES_REG) == SAVRES_VR)
26503 /* Vector regs are saved/restored using [reg+reg] addressing. */
26504 RTVEC_ELT (p, offset++)
26505 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, use_reg));
26506 RTVEC_ELT (p, offset++)
26507 = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, 0));
26509 else
26510 RTVEC_ELT (p, offset++)
26511 = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, use_reg));
26513 for (i = 0; i < end_reg - start_reg; i++)
26514 RTVEC_ELT (p, i + offset)
26515 = gen_frame_set (gen_rtx_REG (reg_mode, start_reg + i),
26516 frame_reg_rtx, save_area_offset + reg_size * i,
26517 (sel & SAVRES_SAVE) != 0);
26519 if ((sel & SAVRES_SAVE) && (sel & SAVRES_LR))
26520 RTVEC_ELT (p, i + offset)
26521 = gen_frame_store (gen_rtx_REG (Pmode, 0), frame_reg_rtx, lr_offset);
26523 par = gen_rtx_PARALLEL (VOIDmode, p);
26525 if (!(sel & SAVRES_SAVE) && (sel & SAVRES_LR))
26527 insn = emit_jump_insn (par);
26528 JUMP_LABEL (insn) = ret_rtx;
26530 else
26531 insn = emit_insn (par);
26532 return insn;
26535 /* Emit code to store CR fields that need to be saved into REG. */
26537 static void
26538 rs6000_emit_move_from_cr (rtx reg)
26540 /* Only the ELFv2 ABI allows storing only selected fields. */
26541 if (DEFAULT_ABI == ABI_ELFv2 && TARGET_MFCRF)
26543 int i, cr_reg[8], count = 0;
26545 /* Collect CR fields that must be saved. */
26546 for (i = 0; i < 8; i++)
26547 if (save_reg_p (CR0_REGNO + i))
26548 cr_reg[count++] = i;
26550 /* If it's just a single one, use mfcrf. */
26551 if (count == 1)
26553 rtvec p = rtvec_alloc (1);
26554 rtvec r = rtvec_alloc (2);
26555 RTVEC_ELT (r, 0) = gen_rtx_REG (CCmode, CR0_REGNO + cr_reg[0]);
26556 RTVEC_ELT (r, 1) = GEN_INT (1 << (7 - cr_reg[0]));
26557 RTVEC_ELT (p, 0)
26558 = gen_rtx_SET (reg,
26559 gen_rtx_UNSPEC (SImode, r, UNSPEC_MOVESI_FROM_CR));
26561 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
26562 return;
26565 /* ??? It might be better to handle count == 2 / 3 cases here
26566 as well, using logical operations to combine the values. */
26569 emit_insn (gen_movesi_from_cr (reg));
26572 /* Return whether the split-stack arg pointer (r12) is used. */
26574 static bool
26575 split_stack_arg_pointer_used_p (void)
26577 /* If the pseudo holding the arg pointer is no longer a pseudo,
26578 then the arg pointer is used. */
26579 if (cfun->machine->split_stack_arg_pointer != NULL_RTX
26580 && (!REG_P (cfun->machine->split_stack_arg_pointer)
26581 || (REGNO (cfun->machine->split_stack_arg_pointer)
26582 < FIRST_PSEUDO_REGISTER)))
26583 return true;
26585 /* Unfortunately we also need to do some code scanning, since
26586 r12 may have been substituted for the pseudo. */
26587 rtx_insn *insn;
26588 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
26589 FOR_BB_INSNS (bb, insn)
26590 if (NONDEBUG_INSN_P (insn))
26592 /* A call destroys r12. */
26593 if (CALL_P (insn))
26594 return false;
26596 df_ref use;
26597 FOR_EACH_INSN_USE (use, insn)
26599 rtx x = DF_REF_REG (use);
26600 if (REG_P (x) && REGNO (x) == 12)
26601 return true;
26603 df_ref def;
26604 FOR_EACH_INSN_DEF (def, insn)
26606 rtx x = DF_REF_REG (def);
26607 if (REG_P (x) && REGNO (x) == 12)
26608 return false;
26611 return bitmap_bit_p (DF_LR_OUT (bb), 12);
26614 /* Return whether we need to emit an ELFv2 global entry point prologue. */
26616 static bool
26617 rs6000_global_entry_point_needed_p (void)
26619 /* Only needed for the ELFv2 ABI. */
26620 if (DEFAULT_ABI != ABI_ELFv2)
26621 return false;
26623 /* With -msingle-pic-base, we assume the whole program shares the same
26624 TOC, so no global entry point prologues are needed anywhere. */
26625 if (TARGET_SINGLE_PIC_BASE)
26626 return false;
26628 /* Ensure we have a global entry point for thunks. ??? We could
26629 avoid that if the target routine doesn't need a global entry point,
26630 but we do not know whether this is the case at this point. */
26631 if (cfun->is_thunk)
26632 return true;
26634 /* For regular functions, rs6000_emit_prologue sets this flag if the
26635 routine ever uses the TOC pointer. */
26636 return cfun->machine->r2_setup_needed;
26639 /* Emit function prologue as insns. */
26641 void
26642 rs6000_emit_prologue (void)
26644 rs6000_stack_t *info = rs6000_stack_info ();
26645 machine_mode reg_mode = Pmode;
26646 int reg_size = TARGET_32BIT ? 4 : 8;
26647 rtx sp_reg_rtx = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
26648 rtx frame_reg_rtx = sp_reg_rtx;
26649 unsigned int cr_save_regno;
26650 rtx cr_save_rtx = NULL_RTX;
26651 rtx insn;
26652 int strategy;
26653 int using_static_chain_p = (cfun->static_chain_decl != NULL_TREE
26654 && df_regs_ever_live_p (STATIC_CHAIN_REGNUM)
26655 && call_used_regs[STATIC_CHAIN_REGNUM]);
26656 int using_split_stack = (flag_split_stack
26657 && (lookup_attribute ("no_split_stack",
26658 DECL_ATTRIBUTES (cfun->decl))
26659 == NULL));
26661 /* Offset to top of frame for frame_reg and sp respectively. */
26662 HOST_WIDE_INT frame_off = 0;
26663 HOST_WIDE_INT sp_off = 0;
26664 /* sp_adjust is the stack adjusting instruction, tracked so that the
26665 insn setting up the split-stack arg pointer can be emitted just
26666 prior to it, when r12 is not used here for other purposes. */
26667 rtx_insn *sp_adjust = 0;
26669 #if CHECKING_P
26670 /* Track and check usage of r0, r11, r12. */
26671 int reg_inuse = using_static_chain_p ? 1 << 11 : 0;
26672 #define START_USE(R) do \
26674 gcc_assert ((reg_inuse & (1 << (R))) == 0); \
26675 reg_inuse |= 1 << (R); \
26676 } while (0)
26677 #define END_USE(R) do \
26679 gcc_assert ((reg_inuse & (1 << (R))) != 0); \
26680 reg_inuse &= ~(1 << (R)); \
26681 } while (0)
26682 #define NOT_INUSE(R) do \
26684 gcc_assert ((reg_inuse & (1 << (R))) == 0); \
26685 } while (0)
26686 #else
26687 #define START_USE(R) do {} while (0)
26688 #define END_USE(R) do {} while (0)
26689 #define NOT_INUSE(R) do {} while (0)
26690 #endif
26692 if (DEFAULT_ABI == ABI_ELFv2
26693 && !TARGET_SINGLE_PIC_BASE)
26695 cfun->machine->r2_setup_needed = df_regs_ever_live_p (TOC_REGNUM);
26697 /* With -mminimal-toc we may generate an extra use of r2 below. */
26698 if (TARGET_TOC && TARGET_MINIMAL_TOC && get_pool_size () != 0)
26699 cfun->machine->r2_setup_needed = true;
26703 if (flag_stack_usage_info)
26704 current_function_static_stack_size = info->total_size;
26706 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
26708 HOST_WIDE_INT size = info->total_size;
26710 if (crtl->is_leaf && !cfun->calls_alloca)
26712 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
26713 rs6000_emit_probe_stack_range (STACK_CHECK_PROTECT,
26714 size - STACK_CHECK_PROTECT);
26716 else if (size > 0)
26717 rs6000_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
26720 if (TARGET_FIX_AND_CONTINUE)
26722 /* gdb on darwin arranges to forward a function from the old
26723 address by modifying the first 5 instructions of the function
26724 to branch to the overriding function. This is necessary to
26725 permit function pointers that point to the old function to
26726 actually forward to the new function. */
26727 emit_insn (gen_nop ());
26728 emit_insn (gen_nop ());
26729 emit_insn (gen_nop ());
26730 emit_insn (gen_nop ());
26731 emit_insn (gen_nop ());
26734 if (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0)
26736 reg_mode = V2SImode;
26737 reg_size = 8;
26740 /* Handle world saves specially here. */
26741 if (WORLD_SAVE_P (info))
26743 int i, j, sz;
26744 rtx treg;
26745 rtvec p;
26746 rtx reg0;
26748 /* save_world expects lr in r0. */
26749 reg0 = gen_rtx_REG (Pmode, 0);
26750 if (info->lr_save_p)
26752 insn = emit_move_insn (reg0,
26753 gen_rtx_REG (Pmode, LR_REGNO));
26754 RTX_FRAME_RELATED_P (insn) = 1;
26757 /* The SAVE_WORLD and RESTORE_WORLD routines make a number of
26758 assumptions about the offsets of various bits of the stack
26759 frame. */
26760 gcc_assert (info->gp_save_offset == -220
26761 && info->fp_save_offset == -144
26762 && info->lr_save_offset == 8
26763 && info->cr_save_offset == 4
26764 && info->push_p
26765 && info->lr_save_p
26766 && (!crtl->calls_eh_return
26767 || info->ehrd_offset == -432)
26768 && info->vrsave_save_offset == -224
26769 && info->altivec_save_offset == -416);
26771 treg = gen_rtx_REG (SImode, 11);
26772 emit_move_insn (treg, GEN_INT (-info->total_size));
26774 /* SAVE_WORLD takes the caller's LR in R0 and the frame size
26775 in R11. It also clobbers R12, so beware! */
26777 /* Preserve CR2 for save_world prologues */
26778 sz = 5;
26779 sz += 32 - info->first_gp_reg_save;
26780 sz += 64 - info->first_fp_reg_save;
26781 sz += LAST_ALTIVEC_REGNO - info->first_altivec_reg_save + 1;
26782 p = rtvec_alloc (sz);
26783 j = 0;
26784 RTVEC_ELT (p, j++) = gen_rtx_CLOBBER (VOIDmode,
26785 gen_rtx_REG (SImode,
26786 LR_REGNO));
26787 RTVEC_ELT (p, j++) = gen_rtx_USE (VOIDmode,
26788 gen_rtx_SYMBOL_REF (Pmode,
26789 "*save_world"));
26790 /* We do floats first so that the instruction pattern matches
26791 properly. */
26792 for (i = 0; i < 64 - info->first_fp_reg_save; i++)
26793 RTVEC_ELT (p, j++)
26794 = gen_frame_store (gen_rtx_REG (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT
26795 ? DFmode : SFmode,
26796 info->first_fp_reg_save + i),
26797 frame_reg_rtx,
26798 info->fp_save_offset + frame_off + 8 * i);
26799 for (i = 0; info->first_altivec_reg_save + i <= LAST_ALTIVEC_REGNO; i++)
26800 RTVEC_ELT (p, j++)
26801 = gen_frame_store (gen_rtx_REG (V4SImode,
26802 info->first_altivec_reg_save + i),
26803 frame_reg_rtx,
26804 info->altivec_save_offset + frame_off + 16 * i);
26805 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
26806 RTVEC_ELT (p, j++)
26807 = gen_frame_store (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
26808 frame_reg_rtx,
26809 info->gp_save_offset + frame_off + reg_size * i);
26811 /* CR register traditionally saved as CR2. */
26812 RTVEC_ELT (p, j++)
26813 = gen_frame_store (gen_rtx_REG (SImode, CR2_REGNO),
26814 frame_reg_rtx, info->cr_save_offset + frame_off);
26815 /* Explain about use of R0. */
26816 if (info->lr_save_p)
26817 RTVEC_ELT (p, j++)
26818 = gen_frame_store (reg0,
26819 frame_reg_rtx, info->lr_save_offset + frame_off);
26820 /* Explain what happens to the stack pointer. */
26822 rtx newval = gen_rtx_PLUS (Pmode, sp_reg_rtx, treg);
26823 RTVEC_ELT (p, j++) = gen_rtx_SET (sp_reg_rtx, newval);
26826 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
26827 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
26828 treg, GEN_INT (-info->total_size));
26829 sp_off = frame_off = info->total_size;
26832 strategy = info->savres_strategy;
26834 /* For V.4, update stack before we do any saving and set back pointer. */
26835 if (! WORLD_SAVE_P (info)
26836 && info->push_p
26837 && (DEFAULT_ABI == ABI_V4
26838 || crtl->calls_eh_return))
26840 bool need_r11 = (TARGET_SPE
26841 ? (!(strategy & SAVE_INLINE_GPRS)
26842 && info->spe_64bit_regs_used == 0)
26843 : (!(strategy & SAVE_INLINE_FPRS)
26844 || !(strategy & SAVE_INLINE_GPRS)
26845 || !(strategy & SAVE_INLINE_VRS)));
26846 int ptr_regno = -1;
26847 rtx ptr_reg = NULL_RTX;
26848 int ptr_off = 0;
26850 if (info->total_size < 32767)
26851 frame_off = info->total_size;
26852 else if (need_r11)
26853 ptr_regno = 11;
26854 else if (info->cr_save_p
26855 || info->lr_save_p
26856 || info->first_fp_reg_save < 64
26857 || info->first_gp_reg_save < 32
26858 || info->altivec_size != 0
26859 || info->vrsave_size != 0
26860 || crtl->calls_eh_return)
26861 ptr_regno = 12;
26862 else
26864 /* The prologue won't be saving any regs so there is no need
26865 to set up a frame register to access any frame save area.
26866 We also won't be using frame_off anywhere below, but set
26867 the correct value anyway to protect against future
26868 changes to this function. */
26869 frame_off = info->total_size;
26871 if (ptr_regno != -1)
26873 /* Set up the frame offset to that needed by the first
26874 out-of-line save function. */
26875 START_USE (ptr_regno);
26876 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
26877 frame_reg_rtx = ptr_reg;
26878 if (!(strategy & SAVE_INLINE_FPRS) && info->fp_size != 0)
26879 gcc_checking_assert (info->fp_save_offset + info->fp_size == 0);
26880 else if (!(strategy & SAVE_INLINE_GPRS) && info->first_gp_reg_save < 32)
26881 ptr_off = info->gp_save_offset + info->gp_size;
26882 else if (!(strategy & SAVE_INLINE_VRS) && info->altivec_size != 0)
26883 ptr_off = info->altivec_save_offset + info->altivec_size;
26884 frame_off = -ptr_off;
26886 sp_adjust = rs6000_emit_allocate_stack (info->total_size,
26887 ptr_reg, ptr_off);
26888 if (REGNO (frame_reg_rtx) == 12)
26889 sp_adjust = 0;
26890 sp_off = info->total_size;
26891 if (frame_reg_rtx != sp_reg_rtx)
26892 rs6000_emit_stack_tie (frame_reg_rtx, false);
26895 /* If we use the link register, get it into r0. */
26896 if (!WORLD_SAVE_P (info) && info->lr_save_p)
26898 rtx addr, reg, mem;
26900 reg = gen_rtx_REG (Pmode, 0);
26901 START_USE (0);
26902 insn = emit_move_insn (reg, gen_rtx_REG (Pmode, LR_REGNO));
26903 RTX_FRAME_RELATED_P (insn) = 1;
26905 if (!(strategy & (SAVE_NOINLINE_GPRS_SAVES_LR
26906 | SAVE_NOINLINE_FPRS_SAVES_LR)))
26908 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx,
26909 GEN_INT (info->lr_save_offset + frame_off));
26910 mem = gen_rtx_MEM (Pmode, addr);
26911 /* This should not be of rs6000_sr_alias_set, because of
26912 __builtin_return_address. */
26914 insn = emit_move_insn (mem, reg);
26915 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
26916 NULL_RTX, NULL_RTX);
26917 END_USE (0);
26921 /* If we need to save CR, put it into r12 or r11. Choose r12 except when
26922 r12 will be needed by out-of-line gpr restore. */
26923 cr_save_regno = ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
26924 && !(strategy & (SAVE_INLINE_GPRS
26925 | SAVE_NOINLINE_GPRS_SAVES_LR))
26926 ? 11 : 12);
26927 if (!WORLD_SAVE_P (info)
26928 && info->cr_save_p
26929 && REGNO (frame_reg_rtx) != cr_save_regno
26930 && !(using_static_chain_p && cr_save_regno == 11)
26931 && !(using_split_stack && cr_save_regno == 12 && sp_adjust))
26933 cr_save_rtx = gen_rtx_REG (SImode, cr_save_regno);
26934 START_USE (cr_save_regno);
26935 rs6000_emit_move_from_cr (cr_save_rtx);
26938 /* Do any required saving of fpr's. If only one or two to save, do
26939 it ourselves. Otherwise, call function. */
26940 if (!WORLD_SAVE_P (info) && (strategy & SAVE_INLINE_FPRS))
26942 int i;
26943 for (i = 0; i < 64 - info->first_fp_reg_save; i++)
26944 if (save_reg_p (info->first_fp_reg_save + i))
26945 emit_frame_save (frame_reg_rtx,
26946 (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT
26947 ? DFmode : SFmode),
26948 info->first_fp_reg_save + i,
26949 info->fp_save_offset + frame_off + 8 * i,
26950 sp_off - frame_off);
26952 else if (!WORLD_SAVE_P (info) && info->first_fp_reg_save != 64)
26954 bool lr = (strategy & SAVE_NOINLINE_FPRS_SAVES_LR) != 0;
26955 int sel = SAVRES_SAVE | SAVRES_FPR | (lr ? SAVRES_LR : 0);
26956 unsigned ptr_regno = ptr_regno_for_savres (sel);
26957 rtx ptr_reg = frame_reg_rtx;
26959 if (REGNO (frame_reg_rtx) == ptr_regno)
26960 gcc_checking_assert (frame_off == 0);
26961 else
26963 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
26964 NOT_INUSE (ptr_regno);
26965 emit_insn (gen_add3_insn (ptr_reg,
26966 frame_reg_rtx, GEN_INT (frame_off)));
26968 insn = rs6000_emit_savres_rtx (info, ptr_reg,
26969 info->fp_save_offset,
26970 info->lr_save_offset,
26971 DFmode, sel);
26972 rs6000_frame_related (insn, ptr_reg, sp_off,
26973 NULL_RTX, NULL_RTX);
26974 if (lr)
26975 END_USE (0);
26978 /* Save GPRs. This is done as a PARALLEL if we are using
26979 the store-multiple instructions. */
26980 if (!WORLD_SAVE_P (info)
26981 && TARGET_SPE_ABI
26982 && info->spe_64bit_regs_used != 0
26983 && info->first_gp_reg_save != 32)
26985 int i;
26986 rtx spe_save_area_ptr;
26987 HOST_WIDE_INT save_off;
26988 int ool_adjust = 0;
26990 /* Determine whether we can address all of the registers that need
26991 to be saved with an offset from frame_reg_rtx that fits in
26992 the small const field for SPE memory instructions. */
26993 int spe_regs_addressable
26994 = (SPE_CONST_OFFSET_OK (info->spe_gp_save_offset + frame_off
26995 + reg_size * (32 - info->first_gp_reg_save - 1))
26996 && (strategy & SAVE_INLINE_GPRS));
26998 if (spe_regs_addressable)
27000 spe_save_area_ptr = frame_reg_rtx;
27001 save_off = frame_off;
27003 else
27005 /* Make r11 point to the start of the SPE save area. We need
27006 to be careful here if r11 is holding the static chain. If
27007 it is, then temporarily save it in r0. */
27008 HOST_WIDE_INT offset;
27010 if (!(strategy & SAVE_INLINE_GPRS))
27011 ool_adjust = 8 * (info->first_gp_reg_save - FIRST_SAVED_GP_REGNO);
27012 offset = info->spe_gp_save_offset + frame_off - ool_adjust;
27013 spe_save_area_ptr = gen_rtx_REG (Pmode, 11);
27014 save_off = frame_off - offset;
27016 if (using_static_chain_p)
27018 rtx r0 = gen_rtx_REG (Pmode, 0);
27020 START_USE (0);
27021 gcc_assert (info->first_gp_reg_save > 11);
27023 emit_move_insn (r0, spe_save_area_ptr);
27025 else if (REGNO (frame_reg_rtx) != 11)
27026 START_USE (11);
27028 emit_insn (gen_addsi3 (spe_save_area_ptr,
27029 frame_reg_rtx, GEN_INT (offset)));
27030 if (!using_static_chain_p && REGNO (frame_reg_rtx) == 11)
27031 frame_off = -info->spe_gp_save_offset + ool_adjust;
27034 if ((strategy & SAVE_INLINE_GPRS))
27036 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
27037 if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i))
27038 emit_frame_save (spe_save_area_ptr, reg_mode,
27039 info->first_gp_reg_save + i,
27040 (info->spe_gp_save_offset + save_off
27041 + reg_size * i),
27042 sp_off - save_off);
27044 else
27046 insn = rs6000_emit_savres_rtx (info, spe_save_area_ptr,
27047 info->spe_gp_save_offset + save_off,
27048 0, reg_mode,
27049 SAVRES_SAVE | SAVRES_GPR);
27051 rs6000_frame_related (insn, spe_save_area_ptr, sp_off - save_off,
27052 NULL_RTX, NULL_RTX);
27055 /* Move the static chain pointer back. */
27056 if (!spe_regs_addressable)
27058 if (using_static_chain_p)
27060 emit_move_insn (spe_save_area_ptr, gen_rtx_REG (Pmode, 0));
27061 END_USE (0);
27063 else if (REGNO (frame_reg_rtx) != 11)
27064 END_USE (11);
27067 else if (!WORLD_SAVE_P (info) && !(strategy & SAVE_INLINE_GPRS))
27069 bool lr = (strategy & SAVE_NOINLINE_GPRS_SAVES_LR) != 0;
27070 int sel = SAVRES_SAVE | SAVRES_GPR | (lr ? SAVRES_LR : 0);
27071 unsigned ptr_regno = ptr_regno_for_savres (sel);
27072 rtx ptr_reg = frame_reg_rtx;
27073 bool ptr_set_up = REGNO (ptr_reg) == ptr_regno;
27074 int end_save = info->gp_save_offset + info->gp_size;
27075 int ptr_off;
27077 if (ptr_regno == 12)
27078 sp_adjust = 0;
27079 if (!ptr_set_up)
27080 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
27082 /* Need to adjust r11 (r12) if we saved any FPRs. */
27083 if (end_save + frame_off != 0)
27085 rtx offset = GEN_INT (end_save + frame_off);
27087 if (ptr_set_up)
27088 frame_off = -end_save;
27089 else
27090 NOT_INUSE (ptr_regno);
27091 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
27093 else if (!ptr_set_up)
27095 NOT_INUSE (ptr_regno);
27096 emit_move_insn (ptr_reg, frame_reg_rtx);
27098 ptr_off = -end_save;
27099 insn = rs6000_emit_savres_rtx (info, ptr_reg,
27100 info->gp_save_offset + ptr_off,
27101 info->lr_save_offset + ptr_off,
27102 reg_mode, sel);
27103 rs6000_frame_related (insn, ptr_reg, sp_off - ptr_off,
27104 NULL_RTX, NULL_RTX);
27105 if (lr)
27106 END_USE (0);
27108 else if (!WORLD_SAVE_P (info) && (strategy & SAVE_MULTIPLE))
27110 rtvec p;
27111 int i;
27112 p = rtvec_alloc (32 - info->first_gp_reg_save);
27113 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
27114 RTVEC_ELT (p, i)
27115 = gen_frame_store (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
27116 frame_reg_rtx,
27117 info->gp_save_offset + frame_off + reg_size * i);
27118 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
27119 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
27120 NULL_RTX, NULL_RTX);
27122 else if (!WORLD_SAVE_P (info))
27124 int i;
27125 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
27126 if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i))
27127 emit_frame_save (frame_reg_rtx, reg_mode,
27128 info->first_gp_reg_save + i,
27129 info->gp_save_offset + frame_off + reg_size * i,
27130 sp_off - frame_off);
27133 if (crtl->calls_eh_return)
27135 unsigned int i;
27136 rtvec p;
27138 for (i = 0; ; ++i)
27140 unsigned int regno = EH_RETURN_DATA_REGNO (i);
27141 if (regno == INVALID_REGNUM)
27142 break;
27145 p = rtvec_alloc (i);
27147 for (i = 0; ; ++i)
27149 unsigned int regno = EH_RETURN_DATA_REGNO (i);
27150 if (regno == INVALID_REGNUM)
27151 break;
27153 insn
27154 = gen_frame_store (gen_rtx_REG (reg_mode, regno),
27155 sp_reg_rtx,
27156 info->ehrd_offset + sp_off + reg_size * (int) i);
27157 RTVEC_ELT (p, i) = insn;
27158 RTX_FRAME_RELATED_P (insn) = 1;
27161 insn = emit_insn (gen_blockage ());
27162 RTX_FRAME_RELATED_P (insn) = 1;
27163 add_reg_note (insn, REG_FRAME_RELATED_EXPR, gen_rtx_PARALLEL (VOIDmode, p));
27166 /* In AIX ABI we need to make sure r2 is really saved. */
27167 if (TARGET_AIX && crtl->calls_eh_return)
27169 rtx tmp_reg, tmp_reg_si, hi, lo, compare_result, toc_save_done, jump;
27170 rtx save_insn, join_insn, note;
27171 long toc_restore_insn;
27173 tmp_reg = gen_rtx_REG (Pmode, 11);
27174 tmp_reg_si = gen_rtx_REG (SImode, 11);
27175 if (using_static_chain_p)
27177 START_USE (0);
27178 emit_move_insn (gen_rtx_REG (Pmode, 0), tmp_reg);
27180 else
27181 START_USE (11);
27182 emit_move_insn (tmp_reg, gen_rtx_REG (Pmode, LR_REGNO));
27183 /* Peek at instruction to which this function returns. If it's
27184 restoring r2, then we know we've already saved r2. We can't
27185 unconditionally save r2 because the value we have will already
27186 be updated if we arrived at this function via a plt call or
27187 toc adjusting stub. */
27188 emit_move_insn (tmp_reg_si, gen_rtx_MEM (SImode, tmp_reg));
27189 toc_restore_insn = ((TARGET_32BIT ? 0x80410000 : 0xE8410000)
27190 + RS6000_TOC_SAVE_SLOT);
27191 hi = gen_int_mode (toc_restore_insn & ~0xffff, SImode);
27192 emit_insn (gen_xorsi3 (tmp_reg_si, tmp_reg_si, hi));
27193 compare_result = gen_rtx_REG (CCUNSmode, CR0_REGNO);
27194 validate_condition_mode (EQ, CCUNSmode);
27195 lo = gen_int_mode (toc_restore_insn & 0xffff, SImode);
27196 emit_insn (gen_rtx_SET (compare_result,
27197 gen_rtx_COMPARE (CCUNSmode, tmp_reg_si, lo)));
27198 toc_save_done = gen_label_rtx ();
27199 jump = gen_rtx_IF_THEN_ELSE (VOIDmode,
27200 gen_rtx_EQ (VOIDmode, compare_result,
27201 const0_rtx),
27202 gen_rtx_LABEL_REF (VOIDmode, toc_save_done),
27203 pc_rtx);
27204 jump = emit_jump_insn (gen_rtx_SET (pc_rtx, jump));
27205 JUMP_LABEL (jump) = toc_save_done;
27206 LABEL_NUSES (toc_save_done) += 1;
27208 save_insn = emit_frame_save (frame_reg_rtx, reg_mode,
27209 TOC_REGNUM, frame_off + RS6000_TOC_SAVE_SLOT,
27210 sp_off - frame_off);
27212 emit_label (toc_save_done);
27214 /* ??? If we leave SAVE_INSN as marked as saving R2, then we'll
27215 have a CFG that has different saves along different paths.
27216 Move the note to a dummy blockage insn, which describes that
27217 R2 is unconditionally saved after the label. */
27218 /* ??? An alternate representation might be a special insn pattern
27219 containing both the branch and the store. That might let the
27220 code that minimizes the number of DW_CFA_advance opcodes better
27221 freedom in placing the annotations. */
27222 note = find_reg_note (save_insn, REG_FRAME_RELATED_EXPR, NULL);
27223 if (note)
27224 remove_note (save_insn, note);
27225 else
27226 note = alloc_reg_note (REG_FRAME_RELATED_EXPR,
27227 copy_rtx (PATTERN (save_insn)), NULL_RTX);
27228 RTX_FRAME_RELATED_P (save_insn) = 0;
27230 join_insn = emit_insn (gen_blockage ());
27231 REG_NOTES (join_insn) = note;
27232 RTX_FRAME_RELATED_P (join_insn) = 1;
27234 if (using_static_chain_p)
27236 emit_move_insn (tmp_reg, gen_rtx_REG (Pmode, 0));
27237 END_USE (0);
27239 else
27240 END_USE (11);
27243 /* Save CR if we use any that must be preserved. */
27244 if (!WORLD_SAVE_P (info) && info->cr_save_p)
27246 rtx addr = gen_rtx_PLUS (Pmode, frame_reg_rtx,
27247 GEN_INT (info->cr_save_offset + frame_off));
27248 rtx mem = gen_frame_mem (SImode, addr);
27250 /* If we didn't copy cr before, do so now using r0. */
27251 if (cr_save_rtx == NULL_RTX)
27253 START_USE (0);
27254 cr_save_rtx = gen_rtx_REG (SImode, 0);
27255 rs6000_emit_move_from_cr (cr_save_rtx);
27258 /* Saving CR requires a two-instruction sequence: one instruction
27259 to move the CR to a general-purpose register, and a second
27260 instruction that stores the GPR to memory.
27262 We do not emit any DWARF CFI records for the first of these,
27263 because we cannot properly represent the fact that CR is saved in
27264 a register. One reason is that we cannot express that multiple
27265 CR fields are saved; another reason is that on 64-bit, the size
27266 of the CR register in DWARF (4 bytes) differs from the size of
27267 a general-purpose register.
27269 This means if any intervening instruction were to clobber one of
27270 the call-saved CR fields, we'd have incorrect CFI. To prevent
27271 this from happening, we mark the store to memory as a use of
27272 those CR fields, which prevents any such instruction from being
27273 scheduled in between the two instructions. */
27274 rtx crsave_v[9];
27275 int n_crsave = 0;
27276 int i;
27278 crsave_v[n_crsave++] = gen_rtx_SET (mem, cr_save_rtx);
27279 for (i = 0; i < 8; i++)
27280 if (save_reg_p (CR0_REGNO + i))
27281 crsave_v[n_crsave++]
27282 = gen_rtx_USE (VOIDmode, gen_rtx_REG (CCmode, CR0_REGNO + i));
27284 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode,
27285 gen_rtvec_v (n_crsave, crsave_v)));
27286 END_USE (REGNO (cr_save_rtx));
27288 /* Now, there's no way that dwarf2out_frame_debug_expr is going to
27289 understand '(unspec:SI [(reg:CC 68) ...] UNSPEC_MOVESI_FROM_CR)',
27290 so we need to construct a frame expression manually. */
27291 RTX_FRAME_RELATED_P (insn) = 1;
27293 /* Update address to be stack-pointer relative, like
27294 rs6000_frame_related would do. */
27295 addr = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, STACK_POINTER_REGNUM),
27296 GEN_INT (info->cr_save_offset + sp_off));
27297 mem = gen_frame_mem (SImode, addr);
27299 if (DEFAULT_ABI == ABI_ELFv2)
27301 /* In the ELFv2 ABI we generate separate CFI records for each
27302 CR field that was actually saved. They all point to the
27303 same 32-bit stack slot. */
27304 rtx crframe[8];
27305 int n_crframe = 0;
27307 for (i = 0; i < 8; i++)
27308 if (save_reg_p (CR0_REGNO + i))
27310 crframe[n_crframe]
27311 = gen_rtx_SET (mem, gen_rtx_REG (SImode, CR0_REGNO + i));
27313 RTX_FRAME_RELATED_P (crframe[n_crframe]) = 1;
27314 n_crframe++;
27317 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
27318 gen_rtx_PARALLEL (VOIDmode,
27319 gen_rtvec_v (n_crframe, crframe)));
27321 else
27323 /* In other ABIs, by convention, we use a single CR regnum to
27324 represent the fact that all call-saved CR fields are saved.
27325 We use CR2_REGNO to be compatible with gcc-2.95 on Linux. */
27326 rtx set = gen_rtx_SET (mem, gen_rtx_REG (SImode, CR2_REGNO));
27327 add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
27331 /* In the ELFv2 ABI we need to save all call-saved CR fields into
27332 *separate* slots if the routine calls __builtin_eh_return, so
27333 that they can be independently restored by the unwinder. */
27334 if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return)
27336 int i, cr_off = info->ehcr_offset;
27337 rtx crsave;
27339 /* ??? We might get better performance by using multiple mfocrf
27340 instructions. */
27341 crsave = gen_rtx_REG (SImode, 0);
27342 emit_insn (gen_movesi_from_cr (crsave));
27344 for (i = 0; i < 8; i++)
27345 if (!call_used_regs[CR0_REGNO + i])
27347 rtvec p = rtvec_alloc (2);
27348 RTVEC_ELT (p, 0)
27349 = gen_frame_store (crsave, frame_reg_rtx, cr_off + frame_off);
27350 RTVEC_ELT (p, 1)
27351 = gen_rtx_USE (VOIDmode, gen_rtx_REG (CCmode, CR0_REGNO + i));
27353 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
27355 RTX_FRAME_RELATED_P (insn) = 1;
27356 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
27357 gen_frame_store (gen_rtx_REG (SImode, CR0_REGNO + i),
27358 sp_reg_rtx, cr_off + sp_off));
27360 cr_off += reg_size;
27364 /* Update stack and set back pointer unless this is V.4,
27365 for which it was done previously. */
27366 if (!WORLD_SAVE_P (info) && info->push_p
27367 && !(DEFAULT_ABI == ABI_V4 || crtl->calls_eh_return))
27369 rtx ptr_reg = NULL;
27370 int ptr_off = 0;
27372 /* If saving altivec regs we need to be able to address all save
27373 locations using a 16-bit offset. */
27374 if ((strategy & SAVE_INLINE_VRS) == 0
27375 || (info->altivec_size != 0
27376 && (info->altivec_save_offset + info->altivec_size - 16
27377 + info->total_size - frame_off) > 32767)
27378 || (info->vrsave_size != 0
27379 && (info->vrsave_save_offset
27380 + info->total_size - frame_off) > 32767))
27382 int sel = SAVRES_SAVE | SAVRES_VR;
27383 unsigned ptr_regno = ptr_regno_for_savres (sel);
27385 if (using_static_chain_p
27386 && ptr_regno == STATIC_CHAIN_REGNUM)
27387 ptr_regno = 12;
27388 if (REGNO (frame_reg_rtx) != ptr_regno)
27389 START_USE (ptr_regno);
27390 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
27391 frame_reg_rtx = ptr_reg;
27392 ptr_off = info->altivec_save_offset + info->altivec_size;
27393 frame_off = -ptr_off;
27395 else if (REGNO (frame_reg_rtx) == 1)
27396 frame_off = info->total_size;
27397 sp_adjust = rs6000_emit_allocate_stack (info->total_size,
27398 ptr_reg, ptr_off);
27399 if (REGNO (frame_reg_rtx) == 12)
27400 sp_adjust = 0;
27401 sp_off = info->total_size;
27402 if (frame_reg_rtx != sp_reg_rtx)
27403 rs6000_emit_stack_tie (frame_reg_rtx, false);
27406 /* Set frame pointer, if needed. */
27407 if (frame_pointer_needed)
27409 insn = emit_move_insn (gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM),
27410 sp_reg_rtx);
27411 RTX_FRAME_RELATED_P (insn) = 1;
27414 /* Save AltiVec registers if needed. Save here because the red zone does
27415 not always include AltiVec registers. */
27416 if (!WORLD_SAVE_P (info)
27417 && info->altivec_size != 0 && (strategy & SAVE_INLINE_VRS) == 0)
27419 int end_save = info->altivec_save_offset + info->altivec_size;
27420 int ptr_off;
27421 /* Oddly, the vector save/restore functions point r0 at the end
27422 of the save area, then use r11 or r12 to load offsets for
27423 [reg+reg] addressing. */
27424 rtx ptr_reg = gen_rtx_REG (Pmode, 0);
27425 int scratch_regno = ptr_regno_for_savres (SAVRES_SAVE | SAVRES_VR);
27426 rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
27428 gcc_checking_assert (scratch_regno == 11 || scratch_regno == 12);
27429 NOT_INUSE (0);
27430 if (scratch_regno == 12)
27431 sp_adjust = 0;
27432 if (end_save + frame_off != 0)
27434 rtx offset = GEN_INT (end_save + frame_off);
27436 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
27438 else
27439 emit_move_insn (ptr_reg, frame_reg_rtx);
27441 ptr_off = -end_save;
27442 insn = rs6000_emit_savres_rtx (info, scratch_reg,
27443 info->altivec_save_offset + ptr_off,
27444 0, V4SImode, SAVRES_SAVE | SAVRES_VR);
27445 rs6000_frame_related (insn, scratch_reg, sp_off - ptr_off,
27446 NULL_RTX, NULL_RTX);
27447 if (REGNO (frame_reg_rtx) == REGNO (scratch_reg))
27449 /* The oddity mentioned above clobbered our frame reg. */
27450 emit_move_insn (frame_reg_rtx, ptr_reg);
27451 frame_off = ptr_off;
27454 else if (!WORLD_SAVE_P (info)
27455 && info->altivec_size != 0)
27457 int i;
27459 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
27460 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
27462 rtx areg, savereg, mem;
27463 HOST_WIDE_INT offset;
27465 offset = (info->altivec_save_offset + frame_off
27466 + 16 * (i - info->first_altivec_reg_save));
27468 savereg = gen_rtx_REG (V4SImode, i);
27470 if (TARGET_P9_DFORM_VECTOR && quad_address_offset_p (offset))
27472 mem = gen_frame_mem (V4SImode,
27473 gen_rtx_PLUS (Pmode, frame_reg_rtx,
27474 GEN_INT (offset)));
27475 insn = emit_insn (gen_rtx_SET (mem, savereg));
27476 areg = NULL_RTX;
27478 else
27480 NOT_INUSE (0);
27481 areg = gen_rtx_REG (Pmode, 0);
27482 emit_move_insn (areg, GEN_INT (offset));
27484 /* AltiVec addressing mode is [reg+reg]. */
27485 mem = gen_frame_mem (V4SImode,
27486 gen_rtx_PLUS (Pmode, frame_reg_rtx, areg));
27488 /* Rather than emitting a generic move, force use of the stvx
27489 instruction, which we always want on ISA 2.07 (power8) systems.
27490 In particular we don't want xxpermdi/stxvd2x for little
27491 endian. */
27492 insn = emit_insn (gen_altivec_stvx_v4si_internal (mem, savereg));
27495 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
27496 areg, GEN_INT (offset));
27500 /* VRSAVE is a bit vector representing which AltiVec registers
27501 are used. The OS uses this to determine which vector
27502 registers to save on a context switch. We need to save
27503 VRSAVE on the stack frame, add whatever AltiVec registers we
27504 used in this function, and do the corresponding magic in the
27505 epilogue. */
27507 if (!WORLD_SAVE_P (info)
27508 && info->vrsave_size != 0)
27510 rtx reg, vrsave;
27511 int offset;
27512 int save_regno;
27514 /* Get VRSAVE onto a GPR. Note that ABI_V4 and ABI_DARWIN might
27515 be using r12 as frame_reg_rtx and r11 as the static chain
27516 pointer for nested functions. */
27517 save_regno = 12;
27518 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
27519 && !using_static_chain_p)
27520 save_regno = 11;
27521 else if (using_split_stack || REGNO (frame_reg_rtx) == 12)
27523 save_regno = 11;
27524 if (using_static_chain_p)
27525 save_regno = 0;
27528 NOT_INUSE (save_regno);
27529 reg = gen_rtx_REG (SImode, save_regno);
27530 vrsave = gen_rtx_REG (SImode, VRSAVE_REGNO);
27531 if (TARGET_MACHO)
27532 emit_insn (gen_get_vrsave_internal (reg));
27533 else
27534 emit_insn (gen_rtx_SET (reg, vrsave));
27536 /* Save VRSAVE. */
27537 offset = info->vrsave_save_offset + frame_off;
27538 insn = emit_insn (gen_frame_store (reg, frame_reg_rtx, offset));
27540 /* Include the registers in the mask. */
27541 emit_insn (gen_iorsi3 (reg, reg, GEN_INT ((int) info->vrsave_mask)));
27543 insn = emit_insn (generate_set_vrsave (reg, info, 0));
27546 /* If we are using RS6000_PIC_OFFSET_TABLE_REGNUM, we need to set it up. */
27547 if (!TARGET_SINGLE_PIC_BASE
27548 && ((TARGET_TOC && TARGET_MINIMAL_TOC && get_pool_size () != 0)
27549 || (DEFAULT_ABI == ABI_V4
27550 && (flag_pic == 1 || (flag_pic && TARGET_SECURE_PLT))
27551 && df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM))))
27553 /* If emit_load_toc_table will use the link register, we need to save
27554 it. We use R12 for this purpose because emit_load_toc_table
27555 can use register 0. This allows us to use a plain 'blr' to return
27556 from the procedure more often. */
27557 int save_LR_around_toc_setup = (TARGET_ELF
27558 && DEFAULT_ABI == ABI_V4
27559 && flag_pic
27560 && ! info->lr_save_p
27561 && EDGE_COUNT (EXIT_BLOCK_PTR_FOR_FN (cfun)->preds) > 0);
27562 if (save_LR_around_toc_setup)
27564 rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
27565 rtx tmp = gen_rtx_REG (Pmode, 12);
27567 sp_adjust = 0;
27568 insn = emit_move_insn (tmp, lr);
27569 RTX_FRAME_RELATED_P (insn) = 1;
27571 rs6000_emit_load_toc_table (TRUE);
27573 insn = emit_move_insn (lr, tmp);
27574 add_reg_note (insn, REG_CFA_RESTORE, lr);
27575 RTX_FRAME_RELATED_P (insn) = 1;
27577 else
27578 rs6000_emit_load_toc_table (TRUE);
27581 #if TARGET_MACHO
27582 if (!TARGET_SINGLE_PIC_BASE
27583 && DEFAULT_ABI == ABI_DARWIN
27584 && flag_pic && crtl->uses_pic_offset_table)
27586 rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
27587 rtx src = gen_rtx_SYMBOL_REF (Pmode, MACHOPIC_FUNCTION_BASE_NAME);
27589 /* Save and restore LR locally around this call (in R0). */
27590 if (!info->lr_save_p)
27591 emit_move_insn (gen_rtx_REG (Pmode, 0), lr);
27593 emit_insn (gen_load_macho_picbase (src));
27595 emit_move_insn (gen_rtx_REG (Pmode,
27596 RS6000_PIC_OFFSET_TABLE_REGNUM),
27597 lr);
27599 if (!info->lr_save_p)
27600 emit_move_insn (lr, gen_rtx_REG (Pmode, 0));
27602 #endif
27604 /* If we need to, save the TOC register after doing the stack setup.
27605 Do not emit eh frame info for this save. The unwinder wants info,
27606 conceptually attached to instructions in this function, about
27607 register values in the caller of this function. This R2 may have
27608 already been changed from the value in the caller.
27609 We don't attempt to write accurate DWARF EH frame info for R2
27610 because code emitted by gcc for a (non-pointer) function call
27611 doesn't save and restore R2. Instead, R2 is managed out-of-line
27612 by a linker generated plt call stub when the function resides in
27613 a shared library. This behavior is costly to describe in DWARF,
27614 both in terms of the size of DWARF info and the time taken in the
27615 unwinder to interpret it. R2 changes, apart from the
27616 calls_eh_return case earlier in this function, are handled by
27617 linux-unwind.h frob_update_context. */
27618 if (rs6000_save_toc_in_prologue_p ())
27620 rtx reg = gen_rtx_REG (reg_mode, TOC_REGNUM);
27621 emit_insn (gen_frame_store (reg, sp_reg_rtx, RS6000_TOC_SAVE_SLOT));
27624 if (using_split_stack && split_stack_arg_pointer_used_p ())
27626 /* Set up the arg pointer (r12) for -fsplit-stack code. If
27627 __morestack was called, it left the arg pointer to the old
27628 stack in r29. Otherwise, the arg pointer is the top of the
27629 current frame. */
27630 cfun->machine->split_stack_argp_used = true;
27631 if (sp_adjust)
27633 rtx r12 = gen_rtx_REG (Pmode, 12);
27634 rtx set_r12 = gen_rtx_SET (r12, sp_reg_rtx);
27635 emit_insn_before (set_r12, sp_adjust);
27637 else if (frame_off != 0 || REGNO (frame_reg_rtx) != 12)
27639 rtx r12 = gen_rtx_REG (Pmode, 12);
27640 if (frame_off == 0)
27641 emit_move_insn (r12, frame_reg_rtx);
27642 else
27643 emit_insn (gen_add3_insn (r12, frame_reg_rtx, GEN_INT (frame_off)));
27645 if (info->push_p)
27647 rtx r12 = gen_rtx_REG (Pmode, 12);
27648 rtx r29 = gen_rtx_REG (Pmode, 29);
27649 rtx cr7 = gen_rtx_REG (CCUNSmode, CR7_REGNO);
27650 rtx not_more = gen_label_rtx ();
27651 rtx jump;
27653 jump = gen_rtx_IF_THEN_ELSE (VOIDmode,
27654 gen_rtx_GEU (VOIDmode, cr7, const0_rtx),
27655 gen_rtx_LABEL_REF (VOIDmode, not_more),
27656 pc_rtx);
27657 jump = emit_jump_insn (gen_rtx_SET (pc_rtx, jump));
27658 JUMP_LABEL (jump) = not_more;
27659 LABEL_NUSES (not_more) += 1;
27660 emit_move_insn (r12, r29);
27661 emit_label (not_more);
27666 /* Output .extern statements for the save/restore routines we use. */
27668 static void
27669 rs6000_output_savres_externs (FILE *file)
27671 rs6000_stack_t *info = rs6000_stack_info ();
27673 if (TARGET_DEBUG_STACK)
27674 debug_stack_info (info);
27676 /* Write .extern for any function we will call to save and restore
27677 fp values. */
27678 if (info->first_fp_reg_save < 64
27679 && !TARGET_MACHO
27680 && !TARGET_ELF)
27682 char *name;
27683 int regno = info->first_fp_reg_save - 32;
27685 if ((info->savres_strategy & SAVE_INLINE_FPRS) == 0)
27687 bool lr = (info->savres_strategy & SAVE_NOINLINE_FPRS_SAVES_LR) != 0;
27688 int sel = SAVRES_SAVE | SAVRES_FPR | (lr ? SAVRES_LR : 0);
27689 name = rs6000_savres_routine_name (info, regno, sel);
27690 fprintf (file, "\t.extern %s\n", name);
27692 if ((info->savres_strategy & REST_INLINE_FPRS) == 0)
27694 bool lr = (info->savres_strategy
27695 & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR) == 0;
27696 int sel = SAVRES_FPR | (lr ? SAVRES_LR : 0);
27697 name = rs6000_savres_routine_name (info, regno, sel);
27698 fprintf (file, "\t.extern %s\n", name);
27703 /* Write function prologue. */
27705 static void
27706 rs6000_output_function_prologue (FILE *file,
27707 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
27709 if (!cfun->is_thunk)
27710 rs6000_output_savres_externs (file);
27712 /* ELFv2 ABI r2 setup code and local entry point. This must follow
27713 immediately after the global entry point label. */
27714 if (rs6000_global_entry_point_needed_p ())
27716 const char *name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
27718 (*targetm.asm_out.internal_label) (file, "LCF", rs6000_pic_labelno);
27720 if (TARGET_CMODEL != CMODEL_LARGE)
27722 /* In the small and medium code models, we assume the TOC is less
27723 2 GB away from the text section, so it can be computed via the
27724 following two-instruction sequence. */
27725 char buf[256];
27727 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
27728 fprintf (file, "0:\taddis 2,12,.TOC.-");
27729 assemble_name (file, buf);
27730 fprintf (file, "@ha\n");
27731 fprintf (file, "\taddi 2,2,.TOC.-");
27732 assemble_name (file, buf);
27733 fprintf (file, "@l\n");
27735 else
27737 /* In the large code model, we allow arbitrary offsets between the
27738 TOC and the text section, so we have to load the offset from
27739 memory. The data field is emitted directly before the global
27740 entry point in rs6000_elf_declare_function_name. */
27741 char buf[256];
27743 #ifdef HAVE_AS_ENTRY_MARKERS
27744 /* If supported by the linker, emit a marker relocation. If the
27745 total code size of the final executable or shared library
27746 happens to fit into 2 GB after all, the linker will replace
27747 this code sequence with the sequence for the small or medium
27748 code model. */
27749 fprintf (file, "\t.reloc .,R_PPC64_ENTRY\n");
27750 #endif
27751 fprintf (file, "\tld 2,");
27752 ASM_GENERATE_INTERNAL_LABEL (buf, "LCL", rs6000_pic_labelno);
27753 assemble_name (file, buf);
27754 fprintf (file, "-");
27755 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
27756 assemble_name (file, buf);
27757 fprintf (file, "(12)\n");
27758 fprintf (file, "\tadd 2,2,12\n");
27761 fputs ("\t.localentry\t", file);
27762 assemble_name (file, name);
27763 fputs (",.-", file);
27764 assemble_name (file, name);
27765 fputs ("\n", file);
27768 /* Output -mprofile-kernel code. This needs to be done here instead of
27769 in output_function_profile since it must go after the ELFv2 ABI
27770 local entry point. */
27771 if (TARGET_PROFILE_KERNEL && crtl->profile)
27773 gcc_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2);
27774 gcc_assert (!TARGET_32BIT);
27776 asm_fprintf (file, "\tmflr %s\n", reg_names[0]);
27778 /* In the ELFv2 ABI we have no compiler stack word. It must be
27779 the resposibility of _mcount to preserve the static chain
27780 register if required. */
27781 if (DEFAULT_ABI != ABI_ELFv2
27782 && cfun->static_chain_decl != NULL)
27784 asm_fprintf (file, "\tstd %s,24(%s)\n",
27785 reg_names[STATIC_CHAIN_REGNUM], reg_names[1]);
27786 fprintf (file, "\tbl %s\n", RS6000_MCOUNT);
27787 asm_fprintf (file, "\tld %s,24(%s)\n",
27788 reg_names[STATIC_CHAIN_REGNUM], reg_names[1]);
27790 else
27791 fprintf (file, "\tbl %s\n", RS6000_MCOUNT);
27794 rs6000_pic_labelno++;
27797 /* -mprofile-kernel code calls mcount before the function prolog,
27798 so a profiled leaf function should stay a leaf function. */
27799 static bool
27800 rs6000_keep_leaf_when_profiled ()
27802 return TARGET_PROFILE_KERNEL;
27805 /* Non-zero if vmx regs are restored before the frame pop, zero if
27806 we restore after the pop when possible. */
27807 #define ALWAYS_RESTORE_ALTIVEC_BEFORE_POP 0
27809 /* Restoring cr is a two step process: loading a reg from the frame
27810 save, then moving the reg to cr. For ABI_V4 we must let the
27811 unwinder know that the stack location is no longer valid at or
27812 before the stack deallocation, but we can't emit a cfa_restore for
27813 cr at the stack deallocation like we do for other registers.
27814 The trouble is that it is possible for the move to cr to be
27815 scheduled after the stack deallocation. So say exactly where cr
27816 is located on each of the two insns. */
27818 static rtx
27819 load_cr_save (int regno, rtx frame_reg_rtx, int offset, bool exit_func)
27821 rtx mem = gen_frame_mem_offset (SImode, frame_reg_rtx, offset);
27822 rtx reg = gen_rtx_REG (SImode, regno);
27823 rtx_insn *insn = emit_move_insn (reg, mem);
27825 if (!exit_func && DEFAULT_ABI == ABI_V4)
27827 rtx cr = gen_rtx_REG (SImode, CR2_REGNO);
27828 rtx set = gen_rtx_SET (reg, cr);
27830 add_reg_note (insn, REG_CFA_REGISTER, set);
27831 RTX_FRAME_RELATED_P (insn) = 1;
27833 return reg;
27836 /* Reload CR from REG. */
27838 static void
27839 restore_saved_cr (rtx reg, int using_mfcr_multiple, bool exit_func)
27841 int count = 0;
27842 int i;
27844 if (using_mfcr_multiple)
27846 for (i = 0; i < 8; i++)
27847 if (save_reg_p (CR0_REGNO + i))
27848 count++;
27849 gcc_assert (count);
27852 if (using_mfcr_multiple && count > 1)
27854 rtx_insn *insn;
27855 rtvec p;
27856 int ndx;
27858 p = rtvec_alloc (count);
27860 ndx = 0;
27861 for (i = 0; i < 8; i++)
27862 if (save_reg_p (CR0_REGNO + i))
27864 rtvec r = rtvec_alloc (2);
27865 RTVEC_ELT (r, 0) = reg;
27866 RTVEC_ELT (r, 1) = GEN_INT (1 << (7-i));
27867 RTVEC_ELT (p, ndx) =
27868 gen_rtx_SET (gen_rtx_REG (CCmode, CR0_REGNO + i),
27869 gen_rtx_UNSPEC (CCmode, r, UNSPEC_MOVESI_TO_CR));
27870 ndx++;
27872 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
27873 gcc_assert (ndx == count);
27875 /* For the ELFv2 ABI we generate a CFA_RESTORE for each
27876 CR field separately. */
27877 if (!exit_func && DEFAULT_ABI == ABI_ELFv2 && flag_shrink_wrap)
27879 for (i = 0; i < 8; i++)
27880 if (save_reg_p (CR0_REGNO + i))
27881 add_reg_note (insn, REG_CFA_RESTORE,
27882 gen_rtx_REG (SImode, CR0_REGNO + i));
27884 RTX_FRAME_RELATED_P (insn) = 1;
27887 else
27888 for (i = 0; i < 8; i++)
27889 if (save_reg_p (CR0_REGNO + i))
27891 rtx insn = emit_insn (gen_movsi_to_cr_one
27892 (gen_rtx_REG (CCmode, CR0_REGNO + i), reg));
27894 /* For the ELFv2 ABI we generate a CFA_RESTORE for each
27895 CR field separately, attached to the insn that in fact
27896 restores this particular CR field. */
27897 if (!exit_func && DEFAULT_ABI == ABI_ELFv2 && flag_shrink_wrap)
27899 add_reg_note (insn, REG_CFA_RESTORE,
27900 gen_rtx_REG (SImode, CR0_REGNO + i));
27902 RTX_FRAME_RELATED_P (insn) = 1;
27906 /* For other ABIs, we just generate a single CFA_RESTORE for CR2. */
27907 if (!exit_func && DEFAULT_ABI != ABI_ELFv2
27908 && (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap))
27910 rtx_insn *insn = get_last_insn ();
27911 rtx cr = gen_rtx_REG (SImode, CR2_REGNO);
27913 add_reg_note (insn, REG_CFA_RESTORE, cr);
27914 RTX_FRAME_RELATED_P (insn) = 1;
27918 /* Like cr, the move to lr instruction can be scheduled after the
27919 stack deallocation, but unlike cr, its stack frame save is still
27920 valid. So we only need to emit the cfa_restore on the correct
27921 instruction. */
27923 static void
27924 load_lr_save (int regno, rtx frame_reg_rtx, int offset)
27926 rtx mem = gen_frame_mem_offset (Pmode, frame_reg_rtx, offset);
27927 rtx reg = gen_rtx_REG (Pmode, regno);
27929 emit_move_insn (reg, mem);
27932 static void
27933 restore_saved_lr (int regno, bool exit_func)
27935 rtx reg = gen_rtx_REG (Pmode, regno);
27936 rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
27937 rtx_insn *insn = emit_move_insn (lr, reg);
27939 if (!exit_func && flag_shrink_wrap)
27941 add_reg_note (insn, REG_CFA_RESTORE, lr);
27942 RTX_FRAME_RELATED_P (insn) = 1;
27946 static rtx
27947 add_crlr_cfa_restore (const rs6000_stack_t *info, rtx cfa_restores)
27949 if (DEFAULT_ABI == ABI_ELFv2)
27951 int i;
27952 for (i = 0; i < 8; i++)
27953 if (save_reg_p (CR0_REGNO + i))
27955 rtx cr = gen_rtx_REG (SImode, CR0_REGNO + i);
27956 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, cr,
27957 cfa_restores);
27960 else if (info->cr_save_p)
27961 cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
27962 gen_rtx_REG (SImode, CR2_REGNO),
27963 cfa_restores);
27965 if (info->lr_save_p)
27966 cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
27967 gen_rtx_REG (Pmode, LR_REGNO),
27968 cfa_restores);
27969 return cfa_restores;
27972 /* Return true if OFFSET from stack pointer can be clobbered by signals.
27973 V.4 doesn't have any stack cushion, AIX ABIs have 220 or 288 bytes
27974 below stack pointer not cloberred by signals. */
27976 static inline bool
27977 offset_below_red_zone_p (HOST_WIDE_INT offset)
27979 return offset < (DEFAULT_ABI == ABI_V4
27981 : TARGET_32BIT ? -220 : -288);
27984 /* Append CFA_RESTORES to any existing REG_NOTES on the last insn. */
27986 static void
27987 emit_cfa_restores (rtx cfa_restores)
27989 rtx_insn *insn = get_last_insn ();
27990 rtx *loc = &REG_NOTES (insn);
27992 while (*loc)
27993 loc = &XEXP (*loc, 1);
27994 *loc = cfa_restores;
27995 RTX_FRAME_RELATED_P (insn) = 1;
27998 /* Emit function epilogue as insns. */
28000 void
28001 rs6000_emit_epilogue (int sibcall)
28003 rs6000_stack_t *info;
28004 int restoring_GPRs_inline;
28005 int restoring_FPRs_inline;
28006 int using_load_multiple;
28007 int using_mtcr_multiple;
28008 int use_backchain_to_restore_sp;
28009 int restore_lr;
28010 int strategy;
28011 HOST_WIDE_INT frame_off = 0;
28012 rtx sp_reg_rtx = gen_rtx_REG (Pmode, 1);
28013 rtx frame_reg_rtx = sp_reg_rtx;
28014 rtx cfa_restores = NULL_RTX;
28015 rtx insn;
28016 rtx cr_save_reg = NULL_RTX;
28017 machine_mode reg_mode = Pmode;
28018 int reg_size = TARGET_32BIT ? 4 : 8;
28019 int i;
28020 bool exit_func;
28021 unsigned ptr_regno;
28023 info = rs6000_stack_info ();
28025 if (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0)
28027 reg_mode = V2SImode;
28028 reg_size = 8;
28031 strategy = info->savres_strategy;
28032 using_load_multiple = strategy & REST_MULTIPLE;
28033 restoring_FPRs_inline = sibcall || (strategy & REST_INLINE_FPRS);
28034 restoring_GPRs_inline = sibcall || (strategy & REST_INLINE_GPRS);
28035 using_mtcr_multiple = (rs6000_cpu == PROCESSOR_PPC601
28036 || rs6000_cpu == PROCESSOR_PPC603
28037 || rs6000_cpu == PROCESSOR_PPC750
28038 || optimize_size);
28039 /* Restore via the backchain when we have a large frame, since this
28040 is more efficient than an addis, addi pair. The second condition
28041 here will not trigger at the moment; We don't actually need a
28042 frame pointer for alloca, but the generic parts of the compiler
28043 give us one anyway. */
28044 use_backchain_to_restore_sp = (info->total_size + (info->lr_save_p
28045 ? info->lr_save_offset
28046 : 0) > 32767
28047 || (cfun->calls_alloca
28048 && !frame_pointer_needed));
28049 restore_lr = (info->lr_save_p
28050 && (restoring_FPRs_inline
28051 || (strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR))
28052 && (restoring_GPRs_inline
28053 || info->first_fp_reg_save < 64));
28055 if (WORLD_SAVE_P (info))
28057 int i, j;
28058 char rname[30];
28059 const char *alloc_rname;
28060 rtvec p;
28062 /* eh_rest_world_r10 will return to the location saved in the LR
28063 stack slot (which is not likely to be our caller.)
28064 Input: R10 -- stack adjustment. Clobbers R0, R11, R12, R7, R8.
28065 rest_world is similar, except any R10 parameter is ignored.
28066 The exception-handling stuff that was here in 2.95 is no
28067 longer necessary. */
28069 p = rtvec_alloc (9
28071 + 32 - info->first_gp_reg_save
28072 + LAST_ALTIVEC_REGNO + 1 - info->first_altivec_reg_save
28073 + 63 + 1 - info->first_fp_reg_save);
28075 strcpy (rname, ((crtl->calls_eh_return) ?
28076 "*eh_rest_world_r10" : "*rest_world"));
28077 alloc_rname = ggc_strdup (rname);
28079 j = 0;
28080 RTVEC_ELT (p, j++) = ret_rtx;
28081 RTVEC_ELT (p, j++) = gen_rtx_USE (VOIDmode,
28082 gen_rtx_REG (Pmode,
28083 LR_REGNO));
28084 RTVEC_ELT (p, j++)
28085 = gen_rtx_USE (VOIDmode, gen_rtx_SYMBOL_REF (Pmode, alloc_rname));
28086 /* The instruction pattern requires a clobber here;
28087 it is shared with the restVEC helper. */
28088 RTVEC_ELT (p, j++)
28089 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, 11));
28092 /* CR register traditionally saved as CR2. */
28093 rtx reg = gen_rtx_REG (SImode, CR2_REGNO);
28094 RTVEC_ELT (p, j++)
28095 = gen_frame_load (reg, frame_reg_rtx, info->cr_save_offset);
28096 if (flag_shrink_wrap)
28098 cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
28099 gen_rtx_REG (Pmode, LR_REGNO),
28100 cfa_restores);
28101 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
28105 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
28107 rtx reg = gen_rtx_REG (reg_mode, info->first_gp_reg_save + i);
28108 RTVEC_ELT (p, j++)
28109 = gen_frame_load (reg,
28110 frame_reg_rtx, info->gp_save_offset + reg_size * i);
28111 if (flag_shrink_wrap)
28112 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
28114 for (i = 0; info->first_altivec_reg_save + i <= LAST_ALTIVEC_REGNO; i++)
28116 rtx reg = gen_rtx_REG (V4SImode, info->first_altivec_reg_save + i);
28117 RTVEC_ELT (p, j++)
28118 = gen_frame_load (reg,
28119 frame_reg_rtx, info->altivec_save_offset + 16 * i);
28120 if (flag_shrink_wrap)
28121 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
28123 for (i = 0; info->first_fp_reg_save + i <= 63; i++)
28125 rtx reg = gen_rtx_REG ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT
28126 ? DFmode : SFmode),
28127 info->first_fp_reg_save + i);
28128 RTVEC_ELT (p, j++)
28129 = gen_frame_load (reg, frame_reg_rtx, info->fp_save_offset + 8 * i);
28130 if (flag_shrink_wrap)
28131 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
28133 RTVEC_ELT (p, j++)
28134 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, 0));
28135 RTVEC_ELT (p, j++)
28136 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 12));
28137 RTVEC_ELT (p, j++)
28138 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 7));
28139 RTVEC_ELT (p, j++)
28140 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 8));
28141 RTVEC_ELT (p, j++)
28142 = gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, 10));
28143 insn = emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, p));
28145 if (flag_shrink_wrap)
28147 REG_NOTES (insn) = cfa_restores;
28148 add_reg_note (insn, REG_CFA_DEF_CFA, sp_reg_rtx);
28149 RTX_FRAME_RELATED_P (insn) = 1;
28151 return;
28154 /* frame_reg_rtx + frame_off points to the top of this stack frame. */
28155 if (info->push_p)
28156 frame_off = info->total_size;
28158 /* Restore AltiVec registers if we must do so before adjusting the
28159 stack. */
28160 if (info->altivec_size != 0
28161 && (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
28162 || (DEFAULT_ABI != ABI_V4
28163 && offset_below_red_zone_p (info->altivec_save_offset))))
28165 int i;
28166 int scratch_regno = ptr_regno_for_savres (SAVRES_VR);
28168 gcc_checking_assert (scratch_regno == 11 || scratch_regno == 12);
28169 if (use_backchain_to_restore_sp)
28171 int frame_regno = 11;
28173 if ((strategy & REST_INLINE_VRS) == 0)
28175 /* Of r11 and r12, select the one not clobbered by an
28176 out-of-line restore function for the frame register. */
28177 frame_regno = 11 + 12 - scratch_regno;
28179 frame_reg_rtx = gen_rtx_REG (Pmode, frame_regno);
28180 emit_move_insn (frame_reg_rtx,
28181 gen_rtx_MEM (Pmode, sp_reg_rtx));
28182 frame_off = 0;
28184 else if (frame_pointer_needed)
28185 frame_reg_rtx = hard_frame_pointer_rtx;
28187 if ((strategy & REST_INLINE_VRS) == 0)
28189 int end_save = info->altivec_save_offset + info->altivec_size;
28190 int ptr_off;
28191 rtx ptr_reg = gen_rtx_REG (Pmode, 0);
28192 rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
28194 if (end_save + frame_off != 0)
28196 rtx offset = GEN_INT (end_save + frame_off);
28198 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
28200 else
28201 emit_move_insn (ptr_reg, frame_reg_rtx);
28203 ptr_off = -end_save;
28204 insn = rs6000_emit_savres_rtx (info, scratch_reg,
28205 info->altivec_save_offset + ptr_off,
28206 0, V4SImode, SAVRES_VR);
28208 else
28210 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
28211 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
28213 rtx addr, areg, mem, insn;
28214 rtx reg = gen_rtx_REG (V4SImode, i);
28215 HOST_WIDE_INT offset
28216 = (info->altivec_save_offset + frame_off
28217 + 16 * (i - info->first_altivec_reg_save));
28219 if (TARGET_P9_DFORM_VECTOR && quad_address_offset_p (offset))
28221 mem = gen_frame_mem (V4SImode,
28222 gen_rtx_PLUS (Pmode, frame_reg_rtx,
28223 GEN_INT (offset)));
28224 insn = gen_rtx_SET (reg, mem);
28226 else
28228 areg = gen_rtx_REG (Pmode, 0);
28229 emit_move_insn (areg, GEN_INT (offset));
28231 /* AltiVec addressing mode is [reg+reg]. */
28232 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx, areg);
28233 mem = gen_frame_mem (V4SImode, addr);
28235 /* Rather than emitting a generic move, force use of the
28236 lvx instruction, which we always want. In particular we
28237 don't want lxvd2x/xxpermdi for little endian. */
28238 insn = gen_altivec_lvx_v4si_internal (reg, mem);
28241 (void) emit_insn (insn);
28245 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
28246 if (((strategy & REST_INLINE_VRS) == 0
28247 || (info->vrsave_mask & ALTIVEC_REG_BIT (i)) != 0)
28248 && (flag_shrink_wrap
28249 || (offset_below_red_zone_p
28250 (info->altivec_save_offset
28251 + 16 * (i - info->first_altivec_reg_save)))))
28253 rtx reg = gen_rtx_REG (V4SImode, i);
28254 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
28258 /* Restore VRSAVE if we must do so before adjusting the stack. */
28259 if (info->vrsave_size != 0
28260 && (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
28261 || (DEFAULT_ABI != ABI_V4
28262 && offset_below_red_zone_p (info->vrsave_save_offset))))
28264 rtx reg;
28266 if (frame_reg_rtx == sp_reg_rtx)
28268 if (use_backchain_to_restore_sp)
28270 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
28271 emit_move_insn (frame_reg_rtx,
28272 gen_rtx_MEM (Pmode, sp_reg_rtx));
28273 frame_off = 0;
28275 else if (frame_pointer_needed)
28276 frame_reg_rtx = hard_frame_pointer_rtx;
28279 reg = gen_rtx_REG (SImode, 12);
28280 emit_insn (gen_frame_load (reg, frame_reg_rtx,
28281 info->vrsave_save_offset + frame_off));
28283 emit_insn (generate_set_vrsave (reg, info, 1));
28286 insn = NULL_RTX;
28287 /* If we have a large stack frame, restore the old stack pointer
28288 using the backchain. */
28289 if (use_backchain_to_restore_sp)
28291 if (frame_reg_rtx == sp_reg_rtx)
28293 /* Under V.4, don't reset the stack pointer until after we're done
28294 loading the saved registers. */
28295 if (DEFAULT_ABI == ABI_V4)
28296 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
28298 insn = emit_move_insn (frame_reg_rtx,
28299 gen_rtx_MEM (Pmode, sp_reg_rtx));
28300 frame_off = 0;
28302 else if (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
28303 && DEFAULT_ABI == ABI_V4)
28304 /* frame_reg_rtx has been set up by the altivec restore. */
28306 else
28308 insn = emit_move_insn (sp_reg_rtx, frame_reg_rtx);
28309 frame_reg_rtx = sp_reg_rtx;
28312 /* If we have a frame pointer, we can restore the old stack pointer
28313 from it. */
28314 else if (frame_pointer_needed)
28316 frame_reg_rtx = sp_reg_rtx;
28317 if (DEFAULT_ABI == ABI_V4)
28318 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
28319 /* Prevent reordering memory accesses against stack pointer restore. */
28320 else if (cfun->calls_alloca
28321 || offset_below_red_zone_p (-info->total_size))
28322 rs6000_emit_stack_tie (frame_reg_rtx, true);
28324 insn = emit_insn (gen_add3_insn (frame_reg_rtx, hard_frame_pointer_rtx,
28325 GEN_INT (info->total_size)));
28326 frame_off = 0;
28328 else if (info->push_p
28329 && DEFAULT_ABI != ABI_V4
28330 && !crtl->calls_eh_return)
28332 /* Prevent reordering memory accesses against stack pointer restore. */
28333 if (cfun->calls_alloca
28334 || offset_below_red_zone_p (-info->total_size))
28335 rs6000_emit_stack_tie (frame_reg_rtx, false);
28336 insn = emit_insn (gen_add3_insn (sp_reg_rtx, sp_reg_rtx,
28337 GEN_INT (info->total_size)));
28338 frame_off = 0;
28340 if (insn && frame_reg_rtx == sp_reg_rtx)
28342 if (cfa_restores)
28344 REG_NOTES (insn) = cfa_restores;
28345 cfa_restores = NULL_RTX;
28347 add_reg_note (insn, REG_CFA_DEF_CFA, sp_reg_rtx);
28348 RTX_FRAME_RELATED_P (insn) = 1;
28351 /* Restore AltiVec registers if we have not done so already. */
28352 if (!ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
28353 && info->altivec_size != 0
28354 && (DEFAULT_ABI == ABI_V4
28355 || !offset_below_red_zone_p (info->altivec_save_offset)))
28357 int i;
28359 if ((strategy & REST_INLINE_VRS) == 0)
28361 int end_save = info->altivec_save_offset + info->altivec_size;
28362 int ptr_off;
28363 rtx ptr_reg = gen_rtx_REG (Pmode, 0);
28364 int scratch_regno = ptr_regno_for_savres (SAVRES_VR);
28365 rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
28367 if (end_save + frame_off != 0)
28369 rtx offset = GEN_INT (end_save + frame_off);
28371 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
28373 else
28374 emit_move_insn (ptr_reg, frame_reg_rtx);
28376 ptr_off = -end_save;
28377 insn = rs6000_emit_savres_rtx (info, scratch_reg,
28378 info->altivec_save_offset + ptr_off,
28379 0, V4SImode, SAVRES_VR);
28380 if (REGNO (frame_reg_rtx) == REGNO (scratch_reg))
28382 /* Frame reg was clobbered by out-of-line save. Restore it
28383 from ptr_reg, and if we are calling out-of-line gpr or
28384 fpr restore set up the correct pointer and offset. */
28385 unsigned newptr_regno = 1;
28386 if (!restoring_GPRs_inline)
28388 bool lr = info->gp_save_offset + info->gp_size == 0;
28389 int sel = SAVRES_GPR | (lr ? SAVRES_LR : 0);
28390 newptr_regno = ptr_regno_for_savres (sel);
28391 end_save = info->gp_save_offset + info->gp_size;
28393 else if (!restoring_FPRs_inline)
28395 bool lr = !(strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR);
28396 int sel = SAVRES_FPR | (lr ? SAVRES_LR : 0);
28397 newptr_regno = ptr_regno_for_savres (sel);
28398 end_save = info->fp_save_offset + info->fp_size;
28401 if (newptr_regno != 1 && REGNO (frame_reg_rtx) != newptr_regno)
28402 frame_reg_rtx = gen_rtx_REG (Pmode, newptr_regno);
28404 if (end_save + ptr_off != 0)
28406 rtx offset = GEN_INT (end_save + ptr_off);
28408 frame_off = -end_save;
28409 if (TARGET_32BIT)
28410 emit_insn (gen_addsi3_carry (frame_reg_rtx,
28411 ptr_reg, offset));
28412 else
28413 emit_insn (gen_adddi3_carry (frame_reg_rtx,
28414 ptr_reg, offset));
28416 else
28418 frame_off = ptr_off;
28419 emit_move_insn (frame_reg_rtx, ptr_reg);
28423 else
28425 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
28426 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
28428 rtx addr, areg, mem, insn;
28429 rtx reg = gen_rtx_REG (V4SImode, i);
28430 HOST_WIDE_INT offset
28431 = (info->altivec_save_offset + frame_off
28432 + 16 * (i - info->first_altivec_reg_save));
28434 if (TARGET_P9_DFORM_VECTOR && quad_address_offset_p (offset))
28436 mem = gen_frame_mem (V4SImode,
28437 gen_rtx_PLUS (Pmode, frame_reg_rtx,
28438 GEN_INT (offset)));
28439 insn = gen_rtx_SET (reg, mem);
28441 else
28443 areg = gen_rtx_REG (Pmode, 0);
28444 emit_move_insn (areg, GEN_INT (offset));
28446 /* AltiVec addressing mode is [reg+reg]. */
28447 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx, areg);
28448 mem = gen_frame_mem (V4SImode, addr);
28450 /* Rather than emitting a generic move, force use of the
28451 lvx instruction, which we always want. In particular we
28452 don't want lxvd2x/xxpermdi for little endian. */
28453 insn = gen_altivec_lvx_v4si_internal (reg, mem);
28456 (void) emit_insn (insn);
28460 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
28461 if (((strategy & REST_INLINE_VRS) == 0
28462 || (info->vrsave_mask & ALTIVEC_REG_BIT (i)) != 0)
28463 && (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap))
28465 rtx reg = gen_rtx_REG (V4SImode, i);
28466 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
28470 /* Restore VRSAVE if we have not done so already. */
28471 if (!ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
28472 && info->vrsave_size != 0
28473 && (DEFAULT_ABI == ABI_V4
28474 || !offset_below_red_zone_p (info->vrsave_save_offset)))
28476 rtx reg;
28478 reg = gen_rtx_REG (SImode, 12);
28479 emit_insn (gen_frame_load (reg, frame_reg_rtx,
28480 info->vrsave_save_offset + frame_off));
28482 emit_insn (generate_set_vrsave (reg, info, 1));
28485 /* If we exit by an out-of-line restore function on ABI_V4 then that
28486 function will deallocate the stack, so we don't need to worry
28487 about the unwinder restoring cr from an invalid stack frame
28488 location. */
28489 exit_func = (!restoring_FPRs_inline
28490 || (!restoring_GPRs_inline
28491 && info->first_fp_reg_save == 64));
28493 /* In the ELFv2 ABI we need to restore all call-saved CR fields from
28494 *separate* slots if the routine calls __builtin_eh_return, so
28495 that they can be independently restored by the unwinder. */
28496 if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return)
28498 int i, cr_off = info->ehcr_offset;
28500 for (i = 0; i < 8; i++)
28501 if (!call_used_regs[CR0_REGNO + i])
28503 rtx reg = gen_rtx_REG (SImode, 0);
28504 emit_insn (gen_frame_load (reg, frame_reg_rtx,
28505 cr_off + frame_off));
28507 insn = emit_insn (gen_movsi_to_cr_one
28508 (gen_rtx_REG (CCmode, CR0_REGNO + i), reg));
28510 if (!exit_func && flag_shrink_wrap)
28512 add_reg_note (insn, REG_CFA_RESTORE,
28513 gen_rtx_REG (SImode, CR0_REGNO + i));
28515 RTX_FRAME_RELATED_P (insn) = 1;
28518 cr_off += reg_size;
28522 /* Get the old lr if we saved it. If we are restoring registers
28523 out-of-line, then the out-of-line routines can do this for us. */
28524 if (restore_lr && restoring_GPRs_inline)
28525 load_lr_save (0, frame_reg_rtx, info->lr_save_offset + frame_off);
28527 /* Get the old cr if we saved it. */
28528 if (info->cr_save_p)
28530 unsigned cr_save_regno = 12;
28532 if (!restoring_GPRs_inline)
28534 /* Ensure we don't use the register used by the out-of-line
28535 gpr register restore below. */
28536 bool lr = info->gp_save_offset + info->gp_size == 0;
28537 int sel = SAVRES_GPR | (lr ? SAVRES_LR : 0);
28538 int gpr_ptr_regno = ptr_regno_for_savres (sel);
28540 if (gpr_ptr_regno == 12)
28541 cr_save_regno = 11;
28542 gcc_checking_assert (REGNO (frame_reg_rtx) != cr_save_regno);
28544 else if (REGNO (frame_reg_rtx) == 12)
28545 cr_save_regno = 11;
28547 cr_save_reg = load_cr_save (cr_save_regno, frame_reg_rtx,
28548 info->cr_save_offset + frame_off,
28549 exit_func);
28552 /* Set LR here to try to overlap restores below. */
28553 if (restore_lr && restoring_GPRs_inline)
28554 restore_saved_lr (0, exit_func);
28556 /* Load exception handler data registers, if needed. */
28557 if (crtl->calls_eh_return)
28559 unsigned int i, regno;
28561 if (TARGET_AIX)
28563 rtx reg = gen_rtx_REG (reg_mode, 2);
28564 emit_insn (gen_frame_load (reg, frame_reg_rtx,
28565 frame_off + RS6000_TOC_SAVE_SLOT));
28568 for (i = 0; ; ++i)
28570 rtx mem;
28572 regno = EH_RETURN_DATA_REGNO (i);
28573 if (regno == INVALID_REGNUM)
28574 break;
28576 /* Note: possible use of r0 here to address SPE regs. */
28577 mem = gen_frame_mem_offset (reg_mode, frame_reg_rtx,
28578 info->ehrd_offset + frame_off
28579 + reg_size * (int) i);
28581 emit_move_insn (gen_rtx_REG (reg_mode, regno), mem);
28585 /* Restore GPRs. This is done as a PARALLEL if we are using
28586 the load-multiple instructions. */
28587 if (TARGET_SPE_ABI
28588 && info->spe_64bit_regs_used
28589 && info->first_gp_reg_save != 32)
28591 /* Determine whether we can address all of the registers that need
28592 to be saved with an offset from frame_reg_rtx that fits in
28593 the small const field for SPE memory instructions. */
28594 int spe_regs_addressable
28595 = (SPE_CONST_OFFSET_OK (info->spe_gp_save_offset + frame_off
28596 + reg_size * (32 - info->first_gp_reg_save - 1))
28597 && restoring_GPRs_inline);
28599 if (!spe_regs_addressable)
28601 int ool_adjust = 0;
28602 rtx old_frame_reg_rtx = frame_reg_rtx;
28603 /* Make r11 point to the start of the SPE save area. We worried about
28604 not clobbering it when we were saving registers in the prologue.
28605 There's no need to worry here because the static chain is passed
28606 anew to every function. */
28608 if (!restoring_GPRs_inline)
28609 ool_adjust = 8 * (info->first_gp_reg_save - FIRST_SAVED_GP_REGNO);
28610 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
28611 emit_insn (gen_addsi3 (frame_reg_rtx, old_frame_reg_rtx,
28612 GEN_INT (info->spe_gp_save_offset
28613 + frame_off
28614 - ool_adjust)));
28615 /* Keep the invariant that frame_reg_rtx + frame_off points
28616 at the top of the stack frame. */
28617 frame_off = -info->spe_gp_save_offset + ool_adjust;
28620 if (restoring_GPRs_inline)
28622 HOST_WIDE_INT spe_offset = info->spe_gp_save_offset + frame_off;
28624 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
28625 if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i))
28627 rtx offset, addr, mem, reg;
28629 /* We're doing all this to ensure that the immediate offset
28630 fits into the immediate field of 'evldd'. */
28631 gcc_assert (SPE_CONST_OFFSET_OK (spe_offset + reg_size * i));
28633 offset = GEN_INT (spe_offset + reg_size * i);
28634 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx, offset);
28635 mem = gen_rtx_MEM (V2SImode, addr);
28636 reg = gen_rtx_REG (reg_mode, info->first_gp_reg_save + i);
28638 emit_move_insn (reg, mem);
28641 else
28642 rs6000_emit_savres_rtx (info, frame_reg_rtx,
28643 info->spe_gp_save_offset + frame_off,
28644 info->lr_save_offset + frame_off,
28645 reg_mode,
28646 SAVRES_GPR | SAVRES_LR);
28648 else if (!restoring_GPRs_inline)
28650 /* We are jumping to an out-of-line function. */
28651 rtx ptr_reg;
28652 int end_save = info->gp_save_offset + info->gp_size;
28653 bool can_use_exit = end_save == 0;
28654 int sel = SAVRES_GPR | (can_use_exit ? SAVRES_LR : 0);
28655 int ptr_off;
28657 /* Emit stack reset code if we need it. */
28658 ptr_regno = ptr_regno_for_savres (sel);
28659 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
28660 if (can_use_exit)
28661 rs6000_emit_stack_reset (info, frame_reg_rtx, frame_off, ptr_regno);
28662 else if (end_save + frame_off != 0)
28663 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx,
28664 GEN_INT (end_save + frame_off)));
28665 else if (REGNO (frame_reg_rtx) != ptr_regno)
28666 emit_move_insn (ptr_reg, frame_reg_rtx);
28667 if (REGNO (frame_reg_rtx) == ptr_regno)
28668 frame_off = -end_save;
28670 if (can_use_exit && info->cr_save_p)
28671 restore_saved_cr (cr_save_reg, using_mtcr_multiple, true);
28673 ptr_off = -end_save;
28674 rs6000_emit_savres_rtx (info, ptr_reg,
28675 info->gp_save_offset + ptr_off,
28676 info->lr_save_offset + ptr_off,
28677 reg_mode, sel);
28679 else if (using_load_multiple)
28681 rtvec p;
28682 p = rtvec_alloc (32 - info->first_gp_reg_save);
28683 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
28684 RTVEC_ELT (p, i)
28685 = gen_frame_load (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
28686 frame_reg_rtx,
28687 info->gp_save_offset + frame_off + reg_size * i);
28688 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
28690 else
28692 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
28693 if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i))
28694 emit_insn (gen_frame_load
28695 (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
28696 frame_reg_rtx,
28697 info->gp_save_offset + frame_off + reg_size * i));
28700 if (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap)
28702 /* If the frame pointer was used then we can't delay emitting
28703 a REG_CFA_DEF_CFA note. This must happen on the insn that
28704 restores the frame pointer, r31. We may have already emitted
28705 a REG_CFA_DEF_CFA note, but that's OK; A duplicate is
28706 discarded by dwarf2cfi.c/dwarf2out.c, and in any case would
28707 be harmless if emitted. */
28708 if (frame_pointer_needed)
28710 insn = get_last_insn ();
28711 add_reg_note (insn, REG_CFA_DEF_CFA,
28712 plus_constant (Pmode, frame_reg_rtx, frame_off));
28713 RTX_FRAME_RELATED_P (insn) = 1;
28716 /* Set up cfa_restores. We always need these when
28717 shrink-wrapping. If not shrink-wrapping then we only need
28718 the cfa_restore when the stack location is no longer valid.
28719 The cfa_restores must be emitted on or before the insn that
28720 invalidates the stack, and of course must not be emitted
28721 before the insn that actually does the restore. The latter
28722 is why it is a bad idea to emit the cfa_restores as a group
28723 on the last instruction here that actually does a restore:
28724 That insn may be reordered with respect to others doing
28725 restores. */
28726 if (flag_shrink_wrap
28727 && !restoring_GPRs_inline
28728 && info->first_fp_reg_save == 64)
28729 cfa_restores = add_crlr_cfa_restore (info, cfa_restores);
28731 for (i = info->first_gp_reg_save; i < 32; i++)
28732 if (!restoring_GPRs_inline
28733 || using_load_multiple
28734 || rs6000_reg_live_or_pic_offset_p (i))
28736 rtx reg = gen_rtx_REG (reg_mode, i);
28738 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
28742 if (!restoring_GPRs_inline
28743 && info->first_fp_reg_save == 64)
28745 /* We are jumping to an out-of-line function. */
28746 if (cfa_restores)
28747 emit_cfa_restores (cfa_restores);
28748 return;
28751 if (restore_lr && !restoring_GPRs_inline)
28753 load_lr_save (0, frame_reg_rtx, info->lr_save_offset + frame_off);
28754 restore_saved_lr (0, exit_func);
28757 /* Restore fpr's if we need to do it without calling a function. */
28758 if (restoring_FPRs_inline)
28759 for (i = 0; i < 64 - info->first_fp_reg_save; i++)
28760 if (save_reg_p (info->first_fp_reg_save + i))
28762 rtx reg = gen_rtx_REG ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT
28763 ? DFmode : SFmode),
28764 info->first_fp_reg_save + i);
28765 emit_insn (gen_frame_load (reg, frame_reg_rtx,
28766 info->fp_save_offset + frame_off + 8 * i));
28767 if (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap)
28768 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
28771 /* If we saved cr, restore it here. Just those that were used. */
28772 if (info->cr_save_p)
28773 restore_saved_cr (cr_save_reg, using_mtcr_multiple, exit_func);
28775 /* If this is V.4, unwind the stack pointer after all of the loads
28776 have been done, or set up r11 if we are restoring fp out of line. */
28777 ptr_regno = 1;
28778 if (!restoring_FPRs_inline)
28780 bool lr = (strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR) == 0;
28781 int sel = SAVRES_FPR | (lr ? SAVRES_LR : 0);
28782 ptr_regno = ptr_regno_for_savres (sel);
28785 insn = rs6000_emit_stack_reset (info, frame_reg_rtx, frame_off, ptr_regno);
28786 if (REGNO (frame_reg_rtx) == ptr_regno)
28787 frame_off = 0;
28789 if (insn && restoring_FPRs_inline)
28791 if (cfa_restores)
28793 REG_NOTES (insn) = cfa_restores;
28794 cfa_restores = NULL_RTX;
28796 add_reg_note (insn, REG_CFA_DEF_CFA, sp_reg_rtx);
28797 RTX_FRAME_RELATED_P (insn) = 1;
28800 if (crtl->calls_eh_return)
28802 rtx sa = EH_RETURN_STACKADJ_RTX;
28803 emit_insn (gen_add3_insn (sp_reg_rtx, sp_reg_rtx, sa));
28806 if (!sibcall)
28808 rtvec p;
28809 bool lr = (strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR) == 0;
28810 if (! restoring_FPRs_inline)
28812 p = rtvec_alloc (4 + 64 - info->first_fp_reg_save);
28813 RTVEC_ELT (p, 0) = ret_rtx;
28815 else
28817 if (cfa_restores)
28819 /* We can't hang the cfa_restores off a simple return,
28820 since the shrink-wrap code sometimes uses an existing
28821 return. This means there might be a path from
28822 pre-prologue code to this return, and dwarf2cfi code
28823 wants the eh_frame unwinder state to be the same on
28824 all paths to any point. So we need to emit the
28825 cfa_restores before the return. For -m64 we really
28826 don't need epilogue cfa_restores at all, except for
28827 this irritating dwarf2cfi with shrink-wrap
28828 requirement; The stack red-zone means eh_frame info
28829 from the prologue telling the unwinder to restore
28830 from the stack is perfectly good right to the end of
28831 the function. */
28832 emit_insn (gen_blockage ());
28833 emit_cfa_restores (cfa_restores);
28834 cfa_restores = NULL_RTX;
28836 p = rtvec_alloc (2);
28837 RTVEC_ELT (p, 0) = simple_return_rtx;
28840 RTVEC_ELT (p, 1) = ((restoring_FPRs_inline || !lr)
28841 ? gen_rtx_USE (VOIDmode,
28842 gen_rtx_REG (Pmode, LR_REGNO))
28843 : gen_rtx_CLOBBER (VOIDmode,
28844 gen_rtx_REG (Pmode, LR_REGNO)));
28846 /* If we have to restore more than two FP registers, branch to the
28847 restore function. It will return to our caller. */
28848 if (! restoring_FPRs_inline)
28850 int i;
28851 int reg;
28852 rtx sym;
28854 if (flag_shrink_wrap)
28855 cfa_restores = add_crlr_cfa_restore (info, cfa_restores);
28857 sym = rs6000_savres_routine_sym (info,
28858 SAVRES_FPR | (lr ? SAVRES_LR : 0));
28859 RTVEC_ELT (p, 2) = gen_rtx_USE (VOIDmode, sym);
28860 reg = (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)? 1 : 11;
28861 RTVEC_ELT (p, 3) = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, reg));
28863 for (i = 0; i < 64 - info->first_fp_reg_save; i++)
28865 rtx reg = gen_rtx_REG (DFmode, info->first_fp_reg_save + i);
28867 RTVEC_ELT (p, i + 4)
28868 = gen_frame_load (reg, sp_reg_rtx, info->fp_save_offset + 8 * i);
28869 if (flag_shrink_wrap)
28870 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg,
28871 cfa_restores);
28875 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, p));
28878 if (cfa_restores)
28880 if (sibcall)
28881 /* Ensure the cfa_restores are hung off an insn that won't
28882 be reordered above other restores. */
28883 emit_insn (gen_blockage ());
28885 emit_cfa_restores (cfa_restores);
28889 /* Write function epilogue. */
28891 static void
28892 rs6000_output_function_epilogue (FILE *file,
28893 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
28895 #if TARGET_MACHO
28896 macho_branch_islands ();
28897 /* Mach-O doesn't support labels at the end of objects, so if
28898 it looks like we might want one, insert a NOP. */
28900 rtx_insn *insn = get_last_insn ();
28901 rtx_insn *deleted_debug_label = NULL;
28902 while (insn
28903 && NOTE_P (insn)
28904 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
28906 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
28907 notes only, instead set their CODE_LABEL_NUMBER to -1,
28908 otherwise there would be code generation differences
28909 in between -g and -g0. */
28910 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
28911 deleted_debug_label = insn;
28912 insn = PREV_INSN (insn);
28914 if (insn
28915 && (LABEL_P (insn)
28916 || (NOTE_P (insn)
28917 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
28918 fputs ("\tnop\n", file);
28919 else if (deleted_debug_label)
28920 for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
28921 if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
28922 CODE_LABEL_NUMBER (insn) = -1;
28924 #endif
28926 /* Output a traceback table here. See /usr/include/sys/debug.h for info
28927 on its format.
28929 We don't output a traceback table if -finhibit-size-directive was
28930 used. The documentation for -finhibit-size-directive reads
28931 ``don't output a @code{.size} assembler directive, or anything
28932 else that would cause trouble if the function is split in the
28933 middle, and the two halves are placed at locations far apart in
28934 memory.'' The traceback table has this property, since it
28935 includes the offset from the start of the function to the
28936 traceback table itself.
28938 System V.4 Powerpc's (and the embedded ABI derived from it) use a
28939 different traceback table. */
28940 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
28941 && ! flag_inhibit_size_directive
28942 && rs6000_traceback != traceback_none && !cfun->is_thunk)
28944 const char *fname = NULL;
28945 const char *language_string = lang_hooks.name;
28946 int fixed_parms = 0, float_parms = 0, parm_info = 0;
28947 int i;
28948 int optional_tbtab;
28949 rs6000_stack_t *info = rs6000_stack_info ();
28951 if (rs6000_traceback == traceback_full)
28952 optional_tbtab = 1;
28953 else if (rs6000_traceback == traceback_part)
28954 optional_tbtab = 0;
28955 else
28956 optional_tbtab = !optimize_size && !TARGET_ELF;
28958 if (optional_tbtab)
28960 fname = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
28961 while (*fname == '.') /* V.4 encodes . in the name */
28962 fname++;
28964 /* Need label immediately before tbtab, so we can compute
28965 its offset from the function start. */
28966 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LT");
28967 ASM_OUTPUT_LABEL (file, fname);
28970 /* The .tbtab pseudo-op can only be used for the first eight
28971 expressions, since it can't handle the possibly variable
28972 length fields that follow. However, if you omit the optional
28973 fields, the assembler outputs zeros for all optional fields
28974 anyways, giving each variable length field is minimum length
28975 (as defined in sys/debug.h). Thus we can not use the .tbtab
28976 pseudo-op at all. */
28978 /* An all-zero word flags the start of the tbtab, for debuggers
28979 that have to find it by searching forward from the entry
28980 point or from the current pc. */
28981 fputs ("\t.long 0\n", file);
28983 /* Tbtab format type. Use format type 0. */
28984 fputs ("\t.byte 0,", file);
28986 /* Language type. Unfortunately, there does not seem to be any
28987 official way to discover the language being compiled, so we
28988 use language_string.
28989 C is 0. Fortran is 1. Pascal is 2. Ada is 3. C++ is 9.
28990 Java is 13. Objective-C is 14. Objective-C++ isn't assigned
28991 a number, so for now use 9. LTO, Go and JIT aren't assigned numbers
28992 either, so for now use 0. */
28993 if (lang_GNU_C ()
28994 || ! strcmp (language_string, "GNU GIMPLE")
28995 || ! strcmp (language_string, "GNU Go")
28996 || ! strcmp (language_string, "libgccjit"))
28997 i = 0;
28998 else if (! strcmp (language_string, "GNU F77")
28999 || lang_GNU_Fortran ())
29000 i = 1;
29001 else if (! strcmp (language_string, "GNU Pascal"))
29002 i = 2;
29003 else if (! strcmp (language_string, "GNU Ada"))
29004 i = 3;
29005 else if (lang_GNU_CXX ()
29006 || ! strcmp (language_string, "GNU Objective-C++"))
29007 i = 9;
29008 else if (! strcmp (language_string, "GNU Java"))
29009 i = 13;
29010 else if (! strcmp (language_string, "GNU Objective-C"))
29011 i = 14;
29012 else
29013 gcc_unreachable ();
29014 fprintf (file, "%d,", i);
29016 /* 8 single bit fields: global linkage (not set for C extern linkage,
29017 apparently a PL/I convention?), out-of-line epilogue/prologue, offset
29018 from start of procedure stored in tbtab, internal function, function
29019 has controlled storage, function has no toc, function uses fp,
29020 function logs/aborts fp operations. */
29021 /* Assume that fp operations are used if any fp reg must be saved. */
29022 fprintf (file, "%d,",
29023 (optional_tbtab << 5) | ((info->first_fp_reg_save != 64) << 1));
29025 /* 6 bitfields: function is interrupt handler, name present in
29026 proc table, function calls alloca, on condition directives
29027 (controls stack walks, 3 bits), saves condition reg, saves
29028 link reg. */
29029 /* The `function calls alloca' bit seems to be set whenever reg 31 is
29030 set up as a frame pointer, even when there is no alloca call. */
29031 fprintf (file, "%d,",
29032 ((optional_tbtab << 6)
29033 | ((optional_tbtab & frame_pointer_needed) << 5)
29034 | (info->cr_save_p << 1)
29035 | (info->lr_save_p)));
29037 /* 3 bitfields: saves backchain, fixup code, number of fpr saved
29038 (6 bits). */
29039 fprintf (file, "%d,",
29040 (info->push_p << 7) | (64 - info->first_fp_reg_save));
29042 /* 2 bitfields: spare bits (2 bits), number of gpr saved (6 bits). */
29043 fprintf (file, "%d,", (32 - first_reg_to_save ()));
29045 if (optional_tbtab)
29047 /* Compute the parameter info from the function decl argument
29048 list. */
29049 tree decl;
29050 int next_parm_info_bit = 31;
29052 for (decl = DECL_ARGUMENTS (current_function_decl);
29053 decl; decl = DECL_CHAIN (decl))
29055 rtx parameter = DECL_INCOMING_RTL (decl);
29056 machine_mode mode = GET_MODE (parameter);
29058 if (GET_CODE (parameter) == REG)
29060 if (SCALAR_FLOAT_MODE_P (mode))
29062 int bits;
29064 float_parms++;
29066 switch (mode)
29068 case SFmode:
29069 case SDmode:
29070 bits = 0x2;
29071 break;
29073 case DFmode:
29074 case DDmode:
29075 case TFmode:
29076 case TDmode:
29077 case IFmode:
29078 case KFmode:
29079 bits = 0x3;
29080 break;
29082 default:
29083 gcc_unreachable ();
29086 /* If only one bit will fit, don't or in this entry. */
29087 if (next_parm_info_bit > 0)
29088 parm_info |= (bits << (next_parm_info_bit - 1));
29089 next_parm_info_bit -= 2;
29091 else
29093 fixed_parms += ((GET_MODE_SIZE (mode)
29094 + (UNITS_PER_WORD - 1))
29095 / UNITS_PER_WORD);
29096 next_parm_info_bit -= 1;
29102 /* Number of fixed point parameters. */
29103 /* This is actually the number of words of fixed point parameters; thus
29104 an 8 byte struct counts as 2; and thus the maximum value is 8. */
29105 fprintf (file, "%d,", fixed_parms);
29107 /* 2 bitfields: number of floating point parameters (7 bits), parameters
29108 all on stack. */
29109 /* This is actually the number of fp registers that hold parameters;
29110 and thus the maximum value is 13. */
29111 /* Set parameters on stack bit if parameters are not in their original
29112 registers, regardless of whether they are on the stack? Xlc
29113 seems to set the bit when not optimizing. */
29114 fprintf (file, "%d\n", ((float_parms << 1) | (! optimize)));
29116 if (! optional_tbtab)
29117 return;
29119 /* Optional fields follow. Some are variable length. */
29121 /* Parameter types, left adjusted bit fields: 0 fixed, 10 single float,
29122 11 double float. */
29123 /* There is an entry for each parameter in a register, in the order that
29124 they occur in the parameter list. Any intervening arguments on the
29125 stack are ignored. If the list overflows a long (max possible length
29126 34 bits) then completely leave off all elements that don't fit. */
29127 /* Only emit this long if there was at least one parameter. */
29128 if (fixed_parms || float_parms)
29129 fprintf (file, "\t.long %d\n", parm_info);
29131 /* Offset from start of code to tb table. */
29132 fputs ("\t.long ", file);
29133 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LT");
29134 RS6000_OUTPUT_BASENAME (file, fname);
29135 putc ('-', file);
29136 rs6000_output_function_entry (file, fname);
29137 putc ('\n', file);
29139 /* Interrupt handler mask. */
29140 /* Omit this long, since we never set the interrupt handler bit
29141 above. */
29143 /* Number of CTL (controlled storage) anchors. */
29144 /* Omit this long, since the has_ctl bit is never set above. */
29146 /* Displacement into stack of each CTL anchor. */
29147 /* Omit this list of longs, because there are no CTL anchors. */
29149 /* Length of function name. */
29150 if (*fname == '*')
29151 ++fname;
29152 fprintf (file, "\t.short %d\n", (int) strlen (fname));
29154 /* Function name. */
29155 assemble_string (fname, strlen (fname));
29157 /* Register for alloca automatic storage; this is always reg 31.
29158 Only emit this if the alloca bit was set above. */
29159 if (frame_pointer_needed)
29160 fputs ("\t.byte 31\n", file);
29162 fputs ("\t.align 2\n", file);
29165 /* Arrange to define .LCTOC1 label, if not already done. */
29166 if (need_toc_init)
29168 need_toc_init = 0;
29169 if (!toc_initialized)
29171 switch_to_section (toc_section);
29172 switch_to_section (current_function_section ());
29177 /* -fsplit-stack support. */
29179 /* A SYMBOL_REF for __morestack. */
29180 static GTY(()) rtx morestack_ref;
29182 static rtx
29183 gen_add3_const (rtx rt, rtx ra, long c)
29185 if (TARGET_64BIT)
29186 return gen_adddi3 (rt, ra, GEN_INT (c));
29187 else
29188 return gen_addsi3 (rt, ra, GEN_INT (c));
29191 /* Emit -fsplit-stack prologue, which goes before the regular function
29192 prologue (at local entry point in the case of ELFv2). */
29194 void
29195 rs6000_expand_split_stack_prologue (void)
29197 rs6000_stack_t *info = rs6000_stack_info ();
29198 unsigned HOST_WIDE_INT allocate;
29199 long alloc_hi, alloc_lo;
29200 rtx r0, r1, r12, lr, ok_label, compare, jump, call_fusage;
29201 rtx_insn *insn;
29203 gcc_assert (flag_split_stack && reload_completed);
29205 if (!info->push_p)
29206 return;
29208 if (global_regs[29])
29210 error ("-fsplit-stack uses register r29");
29211 inform (DECL_SOURCE_LOCATION (global_regs_decl[29]),
29212 "conflicts with %qD", global_regs_decl[29]);
29215 allocate = info->total_size;
29216 if (allocate > (unsigned HOST_WIDE_INT) 1 << 31)
29218 sorry ("Stack frame larger than 2G is not supported for -fsplit-stack");
29219 return;
29221 if (morestack_ref == NULL_RTX)
29223 morestack_ref = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
29224 SYMBOL_REF_FLAGS (morestack_ref) |= (SYMBOL_FLAG_LOCAL
29225 | SYMBOL_FLAG_FUNCTION);
29228 r0 = gen_rtx_REG (Pmode, 0);
29229 r1 = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
29230 r12 = gen_rtx_REG (Pmode, 12);
29231 emit_insn (gen_load_split_stack_limit (r0));
29232 /* Always emit two insns here to calculate the requested stack,
29233 so that the linker can edit them when adjusting size for calling
29234 non-split-stack code. */
29235 alloc_hi = (-allocate + 0x8000) & ~0xffffL;
29236 alloc_lo = -allocate - alloc_hi;
29237 if (alloc_hi != 0)
29239 emit_insn (gen_add3_const (r12, r1, alloc_hi));
29240 if (alloc_lo != 0)
29241 emit_insn (gen_add3_const (r12, r12, alloc_lo));
29242 else
29243 emit_insn (gen_nop ());
29245 else
29247 emit_insn (gen_add3_const (r12, r1, alloc_lo));
29248 emit_insn (gen_nop ());
29251 compare = gen_rtx_REG (CCUNSmode, CR7_REGNO);
29252 emit_insn (gen_rtx_SET (compare, gen_rtx_COMPARE (CCUNSmode, r12, r0)));
29253 ok_label = gen_label_rtx ();
29254 jump = gen_rtx_IF_THEN_ELSE (VOIDmode,
29255 gen_rtx_GEU (VOIDmode, compare, const0_rtx),
29256 gen_rtx_LABEL_REF (VOIDmode, ok_label),
29257 pc_rtx);
29258 jump = emit_jump_insn (gen_rtx_SET (pc_rtx, jump));
29259 JUMP_LABEL (jump) = ok_label;
29260 /* Mark the jump as very likely to be taken. */
29261 add_int_reg_note (jump, REG_BR_PROB,
29262 REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100);
29264 lr = gen_rtx_REG (Pmode, LR_REGNO);
29265 insn = emit_move_insn (r0, lr);
29266 RTX_FRAME_RELATED_P (insn) = 1;
29267 insn = emit_insn (gen_frame_store (r0, r1, info->lr_save_offset));
29268 RTX_FRAME_RELATED_P (insn) = 1;
29270 insn = emit_call_insn (gen_call (gen_rtx_MEM (SImode, morestack_ref),
29271 const0_rtx, const0_rtx));
29272 call_fusage = NULL_RTX;
29273 use_reg (&call_fusage, r12);
29274 /* Say the call uses r0, even though it doesn't, to stop regrename
29275 from twiddling with the insns saving lr, trashing args for cfun.
29276 The insns restoring lr are similarly protected by making
29277 split_stack_return use r0. */
29278 use_reg (&call_fusage, r0);
29279 add_function_usage_to (insn, call_fusage);
29280 emit_insn (gen_frame_load (r0, r1, info->lr_save_offset));
29281 insn = emit_move_insn (lr, r0);
29282 add_reg_note (insn, REG_CFA_RESTORE, lr);
29283 RTX_FRAME_RELATED_P (insn) = 1;
29284 emit_insn (gen_split_stack_return ());
29286 emit_label (ok_label);
29287 LABEL_NUSES (ok_label) = 1;
29290 /* Return the internal arg pointer used for function incoming
29291 arguments. When -fsplit-stack, the arg pointer is r12 so we need
29292 to copy it to a pseudo in order for it to be preserved over calls
29293 and suchlike. We'd really like to use a pseudo here for the
29294 internal arg pointer but data-flow analysis is not prepared to
29295 accept pseudos as live at the beginning of a function. */
29297 static rtx
29298 rs6000_internal_arg_pointer (void)
29300 if (flag_split_stack
29301 && (lookup_attribute ("no_split_stack", DECL_ATTRIBUTES (cfun->decl))
29302 == NULL))
29305 if (cfun->machine->split_stack_arg_pointer == NULL_RTX)
29307 rtx pat;
29309 cfun->machine->split_stack_arg_pointer = gen_reg_rtx (Pmode);
29310 REG_POINTER (cfun->machine->split_stack_arg_pointer) = 1;
29312 /* Put the pseudo initialization right after the note at the
29313 beginning of the function. */
29314 pat = gen_rtx_SET (cfun->machine->split_stack_arg_pointer,
29315 gen_rtx_REG (Pmode, 12));
29316 push_topmost_sequence ();
29317 emit_insn_after (pat, get_insns ());
29318 pop_topmost_sequence ();
29320 return plus_constant (Pmode, cfun->machine->split_stack_arg_pointer,
29321 FIRST_PARM_OFFSET (current_function_decl));
29323 return virtual_incoming_args_rtx;
29326 /* We may have to tell the dataflow pass that the split stack prologue
29327 is initializing a register. */
29329 static void
29330 rs6000_live_on_entry (bitmap regs)
29332 if (flag_split_stack)
29333 bitmap_set_bit (regs, 12);
29336 /* Emit -fsplit-stack dynamic stack allocation space check. */
29338 void
29339 rs6000_split_stack_space_check (rtx size, rtx label)
29341 rtx sp = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
29342 rtx limit = gen_reg_rtx (Pmode);
29343 rtx requested = gen_reg_rtx (Pmode);
29344 rtx cmp = gen_reg_rtx (CCUNSmode);
29345 rtx jump;
29347 emit_insn (gen_load_split_stack_limit (limit));
29348 if (CONST_INT_P (size))
29349 emit_insn (gen_add3_insn (requested, sp, GEN_INT (-INTVAL (size))));
29350 else
29352 size = force_reg (Pmode, size);
29353 emit_move_insn (requested, gen_rtx_MINUS (Pmode, sp, size));
29355 emit_insn (gen_rtx_SET (cmp, gen_rtx_COMPARE (CCUNSmode, requested, limit)));
29356 jump = gen_rtx_IF_THEN_ELSE (VOIDmode,
29357 gen_rtx_GEU (VOIDmode, cmp, const0_rtx),
29358 gen_rtx_LABEL_REF (VOIDmode, label),
29359 pc_rtx);
29360 jump = emit_jump_insn (gen_rtx_SET (pc_rtx, jump));
29361 JUMP_LABEL (jump) = label;
29364 /* A C compound statement that outputs the assembler code for a thunk
29365 function, used to implement C++ virtual function calls with
29366 multiple inheritance. The thunk acts as a wrapper around a virtual
29367 function, adjusting the implicit object parameter before handing
29368 control off to the real function.
29370 First, emit code to add the integer DELTA to the location that
29371 contains the incoming first argument. Assume that this argument
29372 contains a pointer, and is the one used to pass the `this' pointer
29373 in C++. This is the incoming argument *before* the function
29374 prologue, e.g. `%o0' on a sparc. The addition must preserve the
29375 values of all other incoming arguments.
29377 After the addition, emit code to jump to FUNCTION, which is a
29378 `FUNCTION_DECL'. This is a direct pure jump, not a call, and does
29379 not touch the return address. Hence returning from FUNCTION will
29380 return to whoever called the current `thunk'.
29382 The effect must be as if FUNCTION had been called directly with the
29383 adjusted first argument. This macro is responsible for emitting
29384 all of the code for a thunk function; output_function_prologue()
29385 and output_function_epilogue() are not invoked.
29387 The THUNK_FNDECL is redundant. (DELTA and FUNCTION have already
29388 been extracted from it.) It might possibly be useful on some
29389 targets, but probably not.
29391 If you do not define this macro, the target-independent code in the
29392 C++ frontend will generate a less efficient heavyweight thunk that
29393 calls FUNCTION instead of jumping to it. The generic approach does
29394 not support varargs. */
29396 static void
29397 rs6000_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
29398 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
29399 tree function)
29401 rtx this_rtx, funexp;
29402 rtx_insn *insn;
29404 reload_completed = 1;
29405 epilogue_completed = 1;
29407 /* Mark the end of the (empty) prologue. */
29408 emit_note (NOTE_INSN_PROLOGUE_END);
29410 /* Find the "this" pointer. If the function returns a structure,
29411 the structure return pointer is in r3. */
29412 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
29413 this_rtx = gen_rtx_REG (Pmode, 4);
29414 else
29415 this_rtx = gen_rtx_REG (Pmode, 3);
29417 /* Apply the constant offset, if required. */
29418 if (delta)
29419 emit_insn (gen_add3_insn (this_rtx, this_rtx, GEN_INT (delta)));
29421 /* Apply the offset from the vtable, if required. */
29422 if (vcall_offset)
29424 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
29425 rtx tmp = gen_rtx_REG (Pmode, 12);
29427 emit_move_insn (tmp, gen_rtx_MEM (Pmode, this_rtx));
29428 if (((unsigned HOST_WIDE_INT) vcall_offset) + 0x8000 >= 0x10000)
29430 emit_insn (gen_add3_insn (tmp, tmp, vcall_offset_rtx));
29431 emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp));
29433 else
29435 rtx loc = gen_rtx_PLUS (Pmode, tmp, vcall_offset_rtx);
29437 emit_move_insn (tmp, gen_rtx_MEM (Pmode, loc));
29439 emit_insn (gen_add3_insn (this_rtx, this_rtx, tmp));
29442 /* Generate a tail call to the target function. */
29443 if (!TREE_USED (function))
29445 assemble_external (function);
29446 TREE_USED (function) = 1;
29448 funexp = XEXP (DECL_RTL (function), 0);
29449 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
29451 #if TARGET_MACHO
29452 if (MACHOPIC_INDIRECT)
29453 funexp = machopic_indirect_call_target (funexp);
29454 #endif
29456 /* gen_sibcall expects reload to convert scratch pseudo to LR so we must
29457 generate sibcall RTL explicitly. */
29458 insn = emit_call_insn (
29459 gen_rtx_PARALLEL (VOIDmode,
29460 gen_rtvec (4,
29461 gen_rtx_CALL (VOIDmode,
29462 funexp, const0_rtx),
29463 gen_rtx_USE (VOIDmode, const0_rtx),
29464 gen_rtx_USE (VOIDmode,
29465 gen_rtx_REG (SImode,
29466 LR_REGNO)),
29467 simple_return_rtx)));
29468 SIBLING_CALL_P (insn) = 1;
29469 emit_barrier ();
29471 /* Run just enough of rest_of_compilation to get the insns emitted.
29472 There's not really enough bulk here to make other passes such as
29473 instruction scheduling worth while. Note that use_thunk calls
29474 assemble_start_function and assemble_end_function. */
29475 insn = get_insns ();
29476 shorten_branches (insn);
29477 final_start_function (insn, file, 1);
29478 final (insn, file, 1);
29479 final_end_function ();
29481 reload_completed = 0;
29482 epilogue_completed = 0;
29485 /* A quick summary of the various types of 'constant-pool tables'
29486 under PowerPC:
29488 Target Flags Name One table per
29489 AIX (none) AIX TOC object file
29490 AIX -mfull-toc AIX TOC object file
29491 AIX -mminimal-toc AIX minimal TOC translation unit
29492 SVR4/EABI (none) SVR4 SDATA object file
29493 SVR4/EABI -fpic SVR4 pic object file
29494 SVR4/EABI -fPIC SVR4 PIC translation unit
29495 SVR4/EABI -mrelocatable EABI TOC function
29496 SVR4/EABI -maix AIX TOC object file
29497 SVR4/EABI -maix -mminimal-toc
29498 AIX minimal TOC translation unit
29500 Name Reg. Set by entries contains:
29501 made by addrs? fp? sum?
29503 AIX TOC 2 crt0 as Y option option
29504 AIX minimal TOC 30 prolog gcc Y Y option
29505 SVR4 SDATA 13 crt0 gcc N Y N
29506 SVR4 pic 30 prolog ld Y not yet N
29507 SVR4 PIC 30 prolog gcc Y option option
29508 EABI TOC 30 prolog gcc Y option option
29512 /* Hash functions for the hash table. */
29514 static unsigned
29515 rs6000_hash_constant (rtx k)
29517 enum rtx_code code = GET_CODE (k);
29518 machine_mode mode = GET_MODE (k);
29519 unsigned result = (code << 3) ^ mode;
29520 const char *format;
29521 int flen, fidx;
29523 format = GET_RTX_FORMAT (code);
29524 flen = strlen (format);
29525 fidx = 0;
29527 switch (code)
29529 case LABEL_REF:
29530 return result * 1231 + (unsigned) INSN_UID (XEXP (k, 0));
29532 case CONST_WIDE_INT:
29534 int i;
29535 flen = CONST_WIDE_INT_NUNITS (k);
29536 for (i = 0; i < flen; i++)
29537 result = result * 613 + CONST_WIDE_INT_ELT (k, i);
29538 return result;
29541 case CONST_DOUBLE:
29542 if (mode != VOIDmode)
29543 return real_hash (CONST_DOUBLE_REAL_VALUE (k)) * result;
29544 flen = 2;
29545 break;
29547 case CODE_LABEL:
29548 fidx = 3;
29549 break;
29551 default:
29552 break;
29555 for (; fidx < flen; fidx++)
29556 switch (format[fidx])
29558 case 's':
29560 unsigned i, len;
29561 const char *str = XSTR (k, fidx);
29562 len = strlen (str);
29563 result = result * 613 + len;
29564 for (i = 0; i < len; i++)
29565 result = result * 613 + (unsigned) str[i];
29566 break;
29568 case 'u':
29569 case 'e':
29570 result = result * 1231 + rs6000_hash_constant (XEXP (k, fidx));
29571 break;
29572 case 'i':
29573 case 'n':
29574 result = result * 613 + (unsigned) XINT (k, fidx);
29575 break;
29576 case 'w':
29577 if (sizeof (unsigned) >= sizeof (HOST_WIDE_INT))
29578 result = result * 613 + (unsigned) XWINT (k, fidx);
29579 else
29581 size_t i;
29582 for (i = 0; i < sizeof (HOST_WIDE_INT) / sizeof (unsigned); i++)
29583 result = result * 613 + (unsigned) (XWINT (k, fidx)
29584 >> CHAR_BIT * i);
29586 break;
29587 case '0':
29588 break;
29589 default:
29590 gcc_unreachable ();
29593 return result;
29596 hashval_t
29597 toc_hasher::hash (toc_hash_struct *thc)
29599 return rs6000_hash_constant (thc->key) ^ thc->key_mode;
29602 /* Compare H1 and H2 for equivalence. */
29604 bool
29605 toc_hasher::equal (toc_hash_struct *h1, toc_hash_struct *h2)
29607 rtx r1 = h1->key;
29608 rtx r2 = h2->key;
29610 if (h1->key_mode != h2->key_mode)
29611 return 0;
29613 return rtx_equal_p (r1, r2);
29616 /* These are the names given by the C++ front-end to vtables, and
29617 vtable-like objects. Ideally, this logic should not be here;
29618 instead, there should be some programmatic way of inquiring as
29619 to whether or not an object is a vtable. */
29621 #define VTABLE_NAME_P(NAME) \
29622 (strncmp ("_vt.", name, strlen ("_vt.")) == 0 \
29623 || strncmp ("_ZTV", name, strlen ("_ZTV")) == 0 \
29624 || strncmp ("_ZTT", name, strlen ("_ZTT")) == 0 \
29625 || strncmp ("_ZTI", name, strlen ("_ZTI")) == 0 \
29626 || strncmp ("_ZTC", name, strlen ("_ZTC")) == 0)
29628 #ifdef NO_DOLLAR_IN_LABEL
29629 /* Return a GGC-allocated character string translating dollar signs in
29630 input NAME to underscores. Used by XCOFF ASM_OUTPUT_LABELREF. */
29632 const char *
29633 rs6000_xcoff_strip_dollar (const char *name)
29635 char *strip, *p;
29636 const char *q;
29637 size_t len;
29639 q = (const char *) strchr (name, '$');
29641 if (q == 0 || q == name)
29642 return name;
29644 len = strlen (name);
29645 strip = XALLOCAVEC (char, len + 1);
29646 strcpy (strip, name);
29647 p = strip + (q - name);
29648 while (p)
29650 *p = '_';
29651 p = strchr (p + 1, '$');
29654 return ggc_alloc_string (strip, len);
29656 #endif
29658 void
29659 rs6000_output_symbol_ref (FILE *file, rtx x)
29661 /* Currently C++ toc references to vtables can be emitted before it
29662 is decided whether the vtable is public or private. If this is
29663 the case, then the linker will eventually complain that there is
29664 a reference to an unknown section. Thus, for vtables only,
29665 we emit the TOC reference to reference the symbol and not the
29666 section. */
29667 const char *name = XSTR (x, 0);
29669 tree decl = SYMBOL_REF_DECL (x);
29670 if (decl /* sync condition with assemble_external () */
29671 && DECL_P (decl) && DECL_EXTERNAL (decl) && TREE_PUBLIC (decl)
29672 && (TREE_CODE (decl) == VAR_DECL
29673 || TREE_CODE (decl) == FUNCTION_DECL)
29674 && name[strlen (name) - 1] != ']')
29676 name = concat (name,
29677 (TREE_CODE (decl) == FUNCTION_DECL
29678 ? "[DS]" : "[UA]"),
29679 NULL);
29680 XSTR (x, 0) = name;
29683 if (VTABLE_NAME_P (name))
29685 RS6000_OUTPUT_BASENAME (file, name);
29687 else
29688 assemble_name (file, name);
29691 /* Output a TOC entry. We derive the entry name from what is being
29692 written. */
29694 void
29695 output_toc (FILE *file, rtx x, int labelno, machine_mode mode)
29697 char buf[256];
29698 const char *name = buf;
29699 rtx base = x;
29700 HOST_WIDE_INT offset = 0;
29702 gcc_assert (!TARGET_NO_TOC);
29704 /* When the linker won't eliminate them, don't output duplicate
29705 TOC entries (this happens on AIX if there is any kind of TOC,
29706 and on SVR4 under -fPIC or -mrelocatable). Don't do this for
29707 CODE_LABELs. */
29708 if (TARGET_TOC && GET_CODE (x) != LABEL_REF)
29710 struct toc_hash_struct *h;
29712 /* Create toc_hash_table. This can't be done at TARGET_OPTION_OVERRIDE
29713 time because GGC is not initialized at that point. */
29714 if (toc_hash_table == NULL)
29715 toc_hash_table = hash_table<toc_hasher>::create_ggc (1021);
29717 h = ggc_alloc<toc_hash_struct> ();
29718 h->key = x;
29719 h->key_mode = mode;
29720 h->labelno = labelno;
29722 toc_hash_struct **found = toc_hash_table->find_slot (h, INSERT);
29723 if (*found == NULL)
29724 *found = h;
29725 else /* This is indeed a duplicate.
29726 Set this label equal to that label. */
29728 fputs ("\t.set ", file);
29729 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
29730 fprintf (file, "%d,", labelno);
29731 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
29732 fprintf (file, "%d\n", ((*found)->labelno));
29734 #ifdef HAVE_AS_TLS
29735 if (TARGET_XCOFF && GET_CODE (x) == SYMBOL_REF
29736 && (SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_GLOBAL_DYNAMIC
29737 || SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC))
29739 fputs ("\t.set ", file);
29740 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
29741 fprintf (file, "%d,", labelno);
29742 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
29743 fprintf (file, "%d\n", ((*found)->labelno));
29745 #endif
29746 return;
29750 /* If we're going to put a double constant in the TOC, make sure it's
29751 aligned properly when strict alignment is on. */
29752 if ((CONST_DOUBLE_P (x) || CONST_WIDE_INT_P (x))
29753 && STRICT_ALIGNMENT
29754 && GET_MODE_BITSIZE (mode) >= 64
29755 && ! (TARGET_NO_FP_IN_TOC && ! TARGET_MINIMAL_TOC)) {
29756 ASM_OUTPUT_ALIGN (file, 3);
29759 (*targetm.asm_out.internal_label) (file, "LC", labelno);
29761 /* Handle FP constants specially. Note that if we have a minimal
29762 TOC, things we put here aren't actually in the TOC, so we can allow
29763 FP constants. */
29764 if (GET_CODE (x) == CONST_DOUBLE &&
29765 (GET_MODE (x) == TFmode || GET_MODE (x) == TDmode
29766 || GET_MODE (x) == IFmode || GET_MODE (x) == KFmode))
29768 long k[4];
29770 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
29771 REAL_VALUE_TO_TARGET_DECIMAL128 (*CONST_DOUBLE_REAL_VALUE (x), k);
29772 else
29773 REAL_VALUE_TO_TARGET_LONG_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), k);
29775 if (TARGET_64BIT)
29777 if (TARGET_ELF || TARGET_MINIMAL_TOC)
29778 fputs (DOUBLE_INT_ASM_OP, file);
29779 else
29780 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
29781 k[0] & 0xffffffff, k[1] & 0xffffffff,
29782 k[2] & 0xffffffff, k[3] & 0xffffffff);
29783 fprintf (file, "0x%lx%08lx,0x%lx%08lx\n",
29784 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
29785 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff,
29786 k[WORDS_BIG_ENDIAN ? 2 : 3] & 0xffffffff,
29787 k[WORDS_BIG_ENDIAN ? 3 : 2] & 0xffffffff);
29788 return;
29790 else
29792 if (TARGET_ELF || TARGET_MINIMAL_TOC)
29793 fputs ("\t.long ", file);
29794 else
29795 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
29796 k[0] & 0xffffffff, k[1] & 0xffffffff,
29797 k[2] & 0xffffffff, k[3] & 0xffffffff);
29798 fprintf (file, "0x%lx,0x%lx,0x%lx,0x%lx\n",
29799 k[0] & 0xffffffff, k[1] & 0xffffffff,
29800 k[2] & 0xffffffff, k[3] & 0xffffffff);
29801 return;
29804 else if (GET_CODE (x) == CONST_DOUBLE &&
29805 (GET_MODE (x) == DFmode || GET_MODE (x) == DDmode))
29807 long k[2];
29809 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
29810 REAL_VALUE_TO_TARGET_DECIMAL64 (*CONST_DOUBLE_REAL_VALUE (x), k);
29811 else
29812 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), k);
29814 if (TARGET_64BIT)
29816 if (TARGET_ELF || TARGET_MINIMAL_TOC)
29817 fputs (DOUBLE_INT_ASM_OP, file);
29818 else
29819 fprintf (file, "\t.tc FD_%lx_%lx[TC],",
29820 k[0] & 0xffffffff, k[1] & 0xffffffff);
29821 fprintf (file, "0x%lx%08lx\n",
29822 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
29823 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff);
29824 return;
29826 else
29828 if (TARGET_ELF || TARGET_MINIMAL_TOC)
29829 fputs ("\t.long ", file);
29830 else
29831 fprintf (file, "\t.tc FD_%lx_%lx[TC],",
29832 k[0] & 0xffffffff, k[1] & 0xffffffff);
29833 fprintf (file, "0x%lx,0x%lx\n",
29834 k[0] & 0xffffffff, k[1] & 0xffffffff);
29835 return;
29838 else if (GET_CODE (x) == CONST_DOUBLE &&
29839 (GET_MODE (x) == SFmode || GET_MODE (x) == SDmode))
29841 long l;
29843 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
29844 REAL_VALUE_TO_TARGET_DECIMAL32 (*CONST_DOUBLE_REAL_VALUE (x), l);
29845 else
29846 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l);
29848 if (TARGET_64BIT)
29850 if (TARGET_ELF || TARGET_MINIMAL_TOC)
29851 fputs (DOUBLE_INT_ASM_OP, file);
29852 else
29853 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
29854 if (WORDS_BIG_ENDIAN)
29855 fprintf (file, "0x%lx00000000\n", l & 0xffffffff);
29856 else
29857 fprintf (file, "0x%lx\n", l & 0xffffffff);
29858 return;
29860 else
29862 if (TARGET_ELF || TARGET_MINIMAL_TOC)
29863 fputs ("\t.long ", file);
29864 else
29865 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
29866 fprintf (file, "0x%lx\n", l & 0xffffffff);
29867 return;
29870 else if (GET_MODE (x) == VOIDmode && GET_CODE (x) == CONST_INT)
29872 unsigned HOST_WIDE_INT low;
29873 HOST_WIDE_INT high;
29875 low = INTVAL (x) & 0xffffffff;
29876 high = (HOST_WIDE_INT) INTVAL (x) >> 32;
29878 /* TOC entries are always Pmode-sized, so when big-endian
29879 smaller integer constants in the TOC need to be padded.
29880 (This is still a win over putting the constants in
29881 a separate constant pool, because then we'd have
29882 to have both a TOC entry _and_ the actual constant.)
29884 For a 32-bit target, CONST_INT values are loaded and shifted
29885 entirely within `low' and can be stored in one TOC entry. */
29887 /* It would be easy to make this work, but it doesn't now. */
29888 gcc_assert (!TARGET_64BIT || POINTER_SIZE >= GET_MODE_BITSIZE (mode));
29890 if (WORDS_BIG_ENDIAN && POINTER_SIZE > GET_MODE_BITSIZE (mode))
29892 low |= high << 32;
29893 low <<= POINTER_SIZE - GET_MODE_BITSIZE (mode);
29894 high = (HOST_WIDE_INT) low >> 32;
29895 low &= 0xffffffff;
29898 if (TARGET_64BIT)
29900 if (TARGET_ELF || TARGET_MINIMAL_TOC)
29901 fputs (DOUBLE_INT_ASM_OP, file);
29902 else
29903 fprintf (file, "\t.tc ID_%lx_%lx[TC],",
29904 (long) high & 0xffffffff, (long) low & 0xffffffff);
29905 fprintf (file, "0x%lx%08lx\n",
29906 (long) high & 0xffffffff, (long) low & 0xffffffff);
29907 return;
29909 else
29911 if (POINTER_SIZE < GET_MODE_BITSIZE (mode))
29913 if (TARGET_ELF || TARGET_MINIMAL_TOC)
29914 fputs ("\t.long ", file);
29915 else
29916 fprintf (file, "\t.tc ID_%lx_%lx[TC],",
29917 (long) high & 0xffffffff, (long) low & 0xffffffff);
29918 fprintf (file, "0x%lx,0x%lx\n",
29919 (long) high & 0xffffffff, (long) low & 0xffffffff);
29921 else
29923 if (TARGET_ELF || TARGET_MINIMAL_TOC)
29924 fputs ("\t.long ", file);
29925 else
29926 fprintf (file, "\t.tc IS_%lx[TC],", (long) low & 0xffffffff);
29927 fprintf (file, "0x%lx\n", (long) low & 0xffffffff);
29929 return;
29933 if (GET_CODE (x) == CONST)
29935 gcc_assert (GET_CODE (XEXP (x, 0)) == PLUS
29936 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT);
29938 base = XEXP (XEXP (x, 0), 0);
29939 offset = INTVAL (XEXP (XEXP (x, 0), 1));
29942 switch (GET_CODE (base))
29944 case SYMBOL_REF:
29945 name = XSTR (base, 0);
29946 break;
29948 case LABEL_REF:
29949 ASM_GENERATE_INTERNAL_LABEL (buf, "L",
29950 CODE_LABEL_NUMBER (XEXP (base, 0)));
29951 break;
29953 case CODE_LABEL:
29954 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (base));
29955 break;
29957 default:
29958 gcc_unreachable ();
29961 if (TARGET_ELF || TARGET_MINIMAL_TOC)
29962 fputs (TARGET_32BIT ? "\t.long " : DOUBLE_INT_ASM_OP, file);
29963 else
29965 fputs ("\t.tc ", file);
29966 RS6000_OUTPUT_BASENAME (file, name);
29968 if (offset < 0)
29969 fprintf (file, ".N" HOST_WIDE_INT_PRINT_UNSIGNED, - offset);
29970 else if (offset)
29971 fprintf (file, ".P" HOST_WIDE_INT_PRINT_UNSIGNED, offset);
29973 /* Mark large TOC symbols on AIX with [TE] so they are mapped
29974 after other TOC symbols, reducing overflow of small TOC access
29975 to [TC] symbols. */
29976 fputs (TARGET_XCOFF && TARGET_CMODEL != CMODEL_SMALL
29977 ? "[TE]," : "[TC],", file);
29980 /* Currently C++ toc references to vtables can be emitted before it
29981 is decided whether the vtable is public or private. If this is
29982 the case, then the linker will eventually complain that there is
29983 a TOC reference to an unknown section. Thus, for vtables only,
29984 we emit the TOC reference to reference the symbol and not the
29985 section. */
29986 if (VTABLE_NAME_P (name))
29988 RS6000_OUTPUT_BASENAME (file, name);
29989 if (offset < 0)
29990 fprintf (file, HOST_WIDE_INT_PRINT_DEC, offset);
29991 else if (offset > 0)
29992 fprintf (file, "+" HOST_WIDE_INT_PRINT_DEC, offset);
29994 else
29995 output_addr_const (file, x);
29997 #if HAVE_AS_TLS
29998 if (TARGET_XCOFF && GET_CODE (base) == SYMBOL_REF)
30000 switch (SYMBOL_REF_TLS_MODEL (base))
30002 case 0:
30003 break;
30004 case TLS_MODEL_LOCAL_EXEC:
30005 fputs ("@le", file);
30006 break;
30007 case TLS_MODEL_INITIAL_EXEC:
30008 fputs ("@ie", file);
30009 break;
30010 /* Use global-dynamic for local-dynamic. */
30011 case TLS_MODEL_GLOBAL_DYNAMIC:
30012 case TLS_MODEL_LOCAL_DYNAMIC:
30013 putc ('\n', file);
30014 (*targetm.asm_out.internal_label) (file, "LCM", labelno);
30015 fputs ("\t.tc .", file);
30016 RS6000_OUTPUT_BASENAME (file, name);
30017 fputs ("[TC],", file);
30018 output_addr_const (file, x);
30019 fputs ("@m", file);
30020 break;
30021 default:
30022 gcc_unreachable ();
30025 #endif
30027 putc ('\n', file);
30030 /* Output an assembler pseudo-op to write an ASCII string of N characters
30031 starting at P to FILE.
30033 On the RS/6000, we have to do this using the .byte operation and
30034 write out special characters outside the quoted string.
30035 Also, the assembler is broken; very long strings are truncated,
30036 so we must artificially break them up early. */
30038 void
30039 output_ascii (FILE *file, const char *p, int n)
30041 char c;
30042 int i, count_string;
30043 const char *for_string = "\t.byte \"";
30044 const char *for_decimal = "\t.byte ";
30045 const char *to_close = NULL;
30047 count_string = 0;
30048 for (i = 0; i < n; i++)
30050 c = *p++;
30051 if (c >= ' ' && c < 0177)
30053 if (for_string)
30054 fputs (for_string, file);
30055 putc (c, file);
30057 /* Write two quotes to get one. */
30058 if (c == '"')
30060 putc (c, file);
30061 ++count_string;
30064 for_string = NULL;
30065 for_decimal = "\"\n\t.byte ";
30066 to_close = "\"\n";
30067 ++count_string;
30069 if (count_string >= 512)
30071 fputs (to_close, file);
30073 for_string = "\t.byte \"";
30074 for_decimal = "\t.byte ";
30075 to_close = NULL;
30076 count_string = 0;
30079 else
30081 if (for_decimal)
30082 fputs (for_decimal, file);
30083 fprintf (file, "%d", c);
30085 for_string = "\n\t.byte \"";
30086 for_decimal = ", ";
30087 to_close = "\n";
30088 count_string = 0;
30092 /* Now close the string if we have written one. Then end the line. */
30093 if (to_close)
30094 fputs (to_close, file);
30097 /* Generate a unique section name for FILENAME for a section type
30098 represented by SECTION_DESC. Output goes into BUF.
30100 SECTION_DESC can be any string, as long as it is different for each
30101 possible section type.
30103 We name the section in the same manner as xlc. The name begins with an
30104 underscore followed by the filename (after stripping any leading directory
30105 names) with the last period replaced by the string SECTION_DESC. If
30106 FILENAME does not contain a period, SECTION_DESC is appended to the end of
30107 the name. */
30109 void
30110 rs6000_gen_section_name (char **buf, const char *filename,
30111 const char *section_desc)
30113 const char *q, *after_last_slash, *last_period = 0;
30114 char *p;
30115 int len;
30117 after_last_slash = filename;
30118 for (q = filename; *q; q++)
30120 if (*q == '/')
30121 after_last_slash = q + 1;
30122 else if (*q == '.')
30123 last_period = q;
30126 len = strlen (after_last_slash) + strlen (section_desc) + 2;
30127 *buf = (char *) xmalloc (len);
30129 p = *buf;
30130 *p++ = '_';
30132 for (q = after_last_slash; *q; q++)
30134 if (q == last_period)
30136 strcpy (p, section_desc);
30137 p += strlen (section_desc);
30138 break;
30141 else if (ISALNUM (*q))
30142 *p++ = *q;
30145 if (last_period == 0)
30146 strcpy (p, section_desc);
30147 else
30148 *p = '\0';
30151 /* Emit profile function. */
30153 void
30154 output_profile_hook (int labelno ATTRIBUTE_UNUSED)
30156 /* Non-standard profiling for kernels, which just saves LR then calls
30157 _mcount without worrying about arg saves. The idea is to change
30158 the function prologue as little as possible as it isn't easy to
30159 account for arg save/restore code added just for _mcount. */
30160 if (TARGET_PROFILE_KERNEL)
30161 return;
30163 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
30165 #ifndef NO_PROFILE_COUNTERS
30166 # define NO_PROFILE_COUNTERS 0
30167 #endif
30168 if (NO_PROFILE_COUNTERS)
30169 emit_library_call (init_one_libfunc (RS6000_MCOUNT),
30170 LCT_NORMAL, VOIDmode, 0);
30171 else
30173 char buf[30];
30174 const char *label_name;
30175 rtx fun;
30177 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
30178 label_name = ggc_strdup ((*targetm.strip_name_encoding) (buf));
30179 fun = gen_rtx_SYMBOL_REF (Pmode, label_name);
30181 emit_library_call (init_one_libfunc (RS6000_MCOUNT),
30182 LCT_NORMAL, VOIDmode, 1, fun, Pmode);
30185 else if (DEFAULT_ABI == ABI_DARWIN)
30187 const char *mcount_name = RS6000_MCOUNT;
30188 int caller_addr_regno = LR_REGNO;
30190 /* Be conservative and always set this, at least for now. */
30191 crtl->uses_pic_offset_table = 1;
30193 #if TARGET_MACHO
30194 /* For PIC code, set up a stub and collect the caller's address
30195 from r0, which is where the prologue puts it. */
30196 if (MACHOPIC_INDIRECT
30197 && crtl->uses_pic_offset_table)
30198 caller_addr_regno = 0;
30199 #endif
30200 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, mcount_name),
30201 LCT_NORMAL, VOIDmode, 1,
30202 gen_rtx_REG (Pmode, caller_addr_regno), Pmode);
30206 /* Write function profiler code. */
30208 void
30209 output_function_profiler (FILE *file, int labelno)
30211 char buf[100];
30213 switch (DEFAULT_ABI)
30215 default:
30216 gcc_unreachable ();
30218 case ABI_V4:
30219 if (!TARGET_32BIT)
30221 warning (0, "no profiling of 64-bit code for this ABI");
30222 return;
30224 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
30225 fprintf (file, "\tmflr %s\n", reg_names[0]);
30226 if (NO_PROFILE_COUNTERS)
30228 asm_fprintf (file, "\tstw %s,4(%s)\n",
30229 reg_names[0], reg_names[1]);
30231 else if (TARGET_SECURE_PLT && flag_pic)
30233 if (TARGET_LINK_STACK)
30235 char name[32];
30236 get_ppc476_thunk_name (name);
30237 asm_fprintf (file, "\tbl %s\n", name);
30239 else
30240 asm_fprintf (file, "\tbcl 20,31,1f\n1:\n");
30241 asm_fprintf (file, "\tstw %s,4(%s)\n",
30242 reg_names[0], reg_names[1]);
30243 asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
30244 asm_fprintf (file, "\taddis %s,%s,",
30245 reg_names[12], reg_names[12]);
30246 assemble_name (file, buf);
30247 asm_fprintf (file, "-1b@ha\n\tla %s,", reg_names[0]);
30248 assemble_name (file, buf);
30249 asm_fprintf (file, "-1b@l(%s)\n", reg_names[12]);
30251 else if (flag_pic == 1)
30253 fputs ("\tbl _GLOBAL_OFFSET_TABLE_@local-4\n", file);
30254 asm_fprintf (file, "\tstw %s,4(%s)\n",
30255 reg_names[0], reg_names[1]);
30256 asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
30257 asm_fprintf (file, "\tlwz %s,", reg_names[0]);
30258 assemble_name (file, buf);
30259 asm_fprintf (file, "@got(%s)\n", reg_names[12]);
30261 else if (flag_pic > 1)
30263 asm_fprintf (file, "\tstw %s,4(%s)\n",
30264 reg_names[0], reg_names[1]);
30265 /* Now, we need to get the address of the label. */
30266 if (TARGET_LINK_STACK)
30268 char name[32];
30269 get_ppc476_thunk_name (name);
30270 asm_fprintf (file, "\tbl %s\n\tb 1f\n\t.long ", name);
30271 assemble_name (file, buf);
30272 fputs ("-.\n1:", file);
30273 asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
30274 asm_fprintf (file, "\taddi %s,%s,4\n",
30275 reg_names[11], reg_names[11]);
30277 else
30279 fputs ("\tbcl 20,31,1f\n\t.long ", file);
30280 assemble_name (file, buf);
30281 fputs ("-.\n1:", file);
30282 asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
30284 asm_fprintf (file, "\tlwz %s,0(%s)\n",
30285 reg_names[0], reg_names[11]);
30286 asm_fprintf (file, "\tadd %s,%s,%s\n",
30287 reg_names[0], reg_names[0], reg_names[11]);
30289 else
30291 asm_fprintf (file, "\tlis %s,", reg_names[12]);
30292 assemble_name (file, buf);
30293 fputs ("@ha\n", file);
30294 asm_fprintf (file, "\tstw %s,4(%s)\n",
30295 reg_names[0], reg_names[1]);
30296 asm_fprintf (file, "\tla %s,", reg_names[0]);
30297 assemble_name (file, buf);
30298 asm_fprintf (file, "@l(%s)\n", reg_names[12]);
30301 /* ABI_V4 saves the static chain reg with ASM_OUTPUT_REG_PUSH. */
30302 fprintf (file, "\tbl %s%s\n",
30303 RS6000_MCOUNT, flag_pic ? "@plt" : "");
30304 break;
30306 case ABI_AIX:
30307 case ABI_ELFv2:
30308 case ABI_DARWIN:
30309 /* Don't do anything, done in output_profile_hook (). */
30310 break;
30316 /* The following variable value is the last issued insn. */
30318 static rtx_insn *last_scheduled_insn;
30320 /* The following variable helps to balance issuing of load and
30321 store instructions */
30323 static int load_store_pendulum;
30325 /* The following variable helps pair divide insns during scheduling. */
30326 static int divide_cnt;
30327 /* The following variable helps pair and alternate vector and vector load
30328 insns during scheduling. */
30329 static int vec_load_pendulum;
30332 /* Power4 load update and store update instructions are cracked into a
30333 load or store and an integer insn which are executed in the same cycle.
30334 Branches have their own dispatch slot which does not count against the
30335 GCC issue rate, but it changes the program flow so there are no other
30336 instructions to issue in this cycle. */
30338 static int
30339 rs6000_variable_issue_1 (rtx_insn *insn, int more)
30341 last_scheduled_insn = insn;
30342 if (GET_CODE (PATTERN (insn)) == USE
30343 || GET_CODE (PATTERN (insn)) == CLOBBER)
30345 cached_can_issue_more = more;
30346 return cached_can_issue_more;
30349 if (insn_terminates_group_p (insn, current_group))
30351 cached_can_issue_more = 0;
30352 return cached_can_issue_more;
30355 /* If no reservation, but reach here */
30356 if (recog_memoized (insn) < 0)
30357 return more;
30359 if (rs6000_sched_groups)
30361 if (is_microcoded_insn (insn))
30362 cached_can_issue_more = 0;
30363 else if (is_cracked_insn (insn))
30364 cached_can_issue_more = more > 2 ? more - 2 : 0;
30365 else
30366 cached_can_issue_more = more - 1;
30368 return cached_can_issue_more;
30371 if (rs6000_cpu_attr == CPU_CELL && is_nonpipeline_insn (insn))
30372 return 0;
30374 cached_can_issue_more = more - 1;
30375 return cached_can_issue_more;
30378 static int
30379 rs6000_variable_issue (FILE *stream, int verbose, rtx_insn *insn, int more)
30381 int r = rs6000_variable_issue_1 (insn, more);
30382 if (verbose)
30383 fprintf (stream, "// rs6000_variable_issue (more = %d) = %d\n", more, r);
30384 return r;
30387 /* Adjust the cost of a scheduling dependency. Return the new cost of
30388 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
30390 static int
30391 rs6000_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost,
30392 unsigned int)
30394 enum attr_type attr_type;
30396 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
30397 return cost;
30399 switch (dep_type)
30401 case REG_DEP_TRUE:
30403 /* Data dependency; DEP_INSN writes a register that INSN reads
30404 some cycles later. */
30406 /* Separate a load from a narrower, dependent store. */
30407 if ((rs6000_sched_groups || rs6000_cpu_attr == CPU_POWER9)
30408 && GET_CODE (PATTERN (insn)) == SET
30409 && GET_CODE (PATTERN (dep_insn)) == SET
30410 && GET_CODE (XEXP (PATTERN (insn), 1)) == MEM
30411 && GET_CODE (XEXP (PATTERN (dep_insn), 0)) == MEM
30412 && (GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (insn), 1)))
30413 > GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (dep_insn), 0)))))
30414 return cost + 14;
30416 attr_type = get_attr_type (insn);
30418 switch (attr_type)
30420 case TYPE_JMPREG:
30421 /* Tell the first scheduling pass about the latency between
30422 a mtctr and bctr (and mtlr and br/blr). The first
30423 scheduling pass will not know about this latency since
30424 the mtctr instruction, which has the latency associated
30425 to it, will be generated by reload. */
30426 return 4;
30427 case TYPE_BRANCH:
30428 /* Leave some extra cycles between a compare and its
30429 dependent branch, to inhibit expensive mispredicts. */
30430 if ((rs6000_cpu_attr == CPU_PPC603
30431 || rs6000_cpu_attr == CPU_PPC604
30432 || rs6000_cpu_attr == CPU_PPC604E
30433 || rs6000_cpu_attr == CPU_PPC620
30434 || rs6000_cpu_attr == CPU_PPC630
30435 || rs6000_cpu_attr == CPU_PPC750
30436 || rs6000_cpu_attr == CPU_PPC7400
30437 || rs6000_cpu_attr == CPU_PPC7450
30438 || rs6000_cpu_attr == CPU_PPCE5500
30439 || rs6000_cpu_attr == CPU_PPCE6500
30440 || rs6000_cpu_attr == CPU_POWER4
30441 || rs6000_cpu_attr == CPU_POWER5
30442 || rs6000_cpu_attr == CPU_POWER7
30443 || rs6000_cpu_attr == CPU_POWER8
30444 || rs6000_cpu_attr == CPU_POWER9
30445 || rs6000_cpu_attr == CPU_CELL)
30446 && recog_memoized (dep_insn)
30447 && (INSN_CODE (dep_insn) >= 0))
30449 switch (get_attr_type (dep_insn))
30451 case TYPE_CMP:
30452 case TYPE_FPCOMPARE:
30453 case TYPE_CR_LOGICAL:
30454 case TYPE_DELAYED_CR:
30455 return cost + 2;
30456 case TYPE_EXTS:
30457 case TYPE_MUL:
30458 if (get_attr_dot (dep_insn) == DOT_YES)
30459 return cost + 2;
30460 else
30461 break;
30462 case TYPE_SHIFT:
30463 if (get_attr_dot (dep_insn) == DOT_YES
30464 && get_attr_var_shift (dep_insn) == VAR_SHIFT_NO)
30465 return cost + 2;
30466 else
30467 break;
30468 default:
30469 break;
30471 break;
30473 case TYPE_STORE:
30474 case TYPE_FPSTORE:
30475 if ((rs6000_cpu == PROCESSOR_POWER6)
30476 && recog_memoized (dep_insn)
30477 && (INSN_CODE (dep_insn) >= 0))
30480 if (GET_CODE (PATTERN (insn)) != SET)
30481 /* If this happens, we have to extend this to schedule
30482 optimally. Return default for now. */
30483 return cost;
30485 /* Adjust the cost for the case where the value written
30486 by a fixed point operation is used as the address
30487 gen value on a store. */
30488 switch (get_attr_type (dep_insn))
30490 case TYPE_LOAD:
30491 case TYPE_CNTLZ:
30493 if (! store_data_bypass_p (dep_insn, insn))
30494 return get_attr_sign_extend (dep_insn)
30495 == SIGN_EXTEND_YES ? 6 : 4;
30496 break;
30498 case TYPE_SHIFT:
30500 if (! store_data_bypass_p (dep_insn, insn))
30501 return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
30502 6 : 3;
30503 break;
30505 case TYPE_INTEGER:
30506 case TYPE_ADD:
30507 case TYPE_LOGICAL:
30508 case TYPE_EXTS:
30509 case TYPE_INSERT:
30511 if (! store_data_bypass_p (dep_insn, insn))
30512 return 3;
30513 break;
30515 case TYPE_STORE:
30516 case TYPE_FPLOAD:
30517 case TYPE_FPSTORE:
30519 if (get_attr_update (dep_insn) == UPDATE_YES
30520 && ! store_data_bypass_p (dep_insn, insn))
30521 return 3;
30522 break;
30524 case TYPE_MUL:
30526 if (! store_data_bypass_p (dep_insn, insn))
30527 return 17;
30528 break;
30530 case TYPE_DIV:
30532 if (! store_data_bypass_p (dep_insn, insn))
30533 return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
30534 break;
30536 default:
30537 break;
30540 break;
30542 case TYPE_LOAD:
30543 if ((rs6000_cpu == PROCESSOR_POWER6)
30544 && recog_memoized (dep_insn)
30545 && (INSN_CODE (dep_insn) >= 0))
30548 /* Adjust the cost for the case where the value written
30549 by a fixed point instruction is used within the address
30550 gen portion of a subsequent load(u)(x) */
30551 switch (get_attr_type (dep_insn))
30553 case TYPE_LOAD:
30554 case TYPE_CNTLZ:
30556 if (set_to_load_agen (dep_insn, insn))
30557 return get_attr_sign_extend (dep_insn)
30558 == SIGN_EXTEND_YES ? 6 : 4;
30559 break;
30561 case TYPE_SHIFT:
30563 if (set_to_load_agen (dep_insn, insn))
30564 return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
30565 6 : 3;
30566 break;
30568 case TYPE_INTEGER:
30569 case TYPE_ADD:
30570 case TYPE_LOGICAL:
30571 case TYPE_EXTS:
30572 case TYPE_INSERT:
30574 if (set_to_load_agen (dep_insn, insn))
30575 return 3;
30576 break;
30578 case TYPE_STORE:
30579 case TYPE_FPLOAD:
30580 case TYPE_FPSTORE:
30582 if (get_attr_update (dep_insn) == UPDATE_YES
30583 && set_to_load_agen (dep_insn, insn))
30584 return 3;
30585 break;
30587 case TYPE_MUL:
30589 if (set_to_load_agen (dep_insn, insn))
30590 return 17;
30591 break;
30593 case TYPE_DIV:
30595 if (set_to_load_agen (dep_insn, insn))
30596 return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
30597 break;
30599 default:
30600 break;
30603 break;
30605 case TYPE_FPLOAD:
30606 if ((rs6000_cpu == PROCESSOR_POWER6)
30607 && get_attr_update (insn) == UPDATE_NO
30608 && recog_memoized (dep_insn)
30609 && (INSN_CODE (dep_insn) >= 0)
30610 && (get_attr_type (dep_insn) == TYPE_MFFGPR))
30611 return 2;
30613 default:
30614 break;
30617 /* Fall out to return default cost. */
30619 break;
30621 case REG_DEP_OUTPUT:
30622 /* Output dependency; DEP_INSN writes a register that INSN writes some
30623 cycles later. */
30624 if ((rs6000_cpu == PROCESSOR_POWER6)
30625 && recog_memoized (dep_insn)
30626 && (INSN_CODE (dep_insn) >= 0))
30628 attr_type = get_attr_type (insn);
30630 switch (attr_type)
30632 case TYPE_FP:
30633 case TYPE_FPSIMPLE:
30634 if (get_attr_type (dep_insn) == TYPE_FP
30635 || get_attr_type (dep_insn) == TYPE_FPSIMPLE)
30636 return 1;
30637 break;
30638 case TYPE_FPLOAD:
30639 if (get_attr_update (insn) == UPDATE_NO
30640 && get_attr_type (dep_insn) == TYPE_MFFGPR)
30641 return 2;
30642 break;
30643 default:
30644 break;
30647 /* Fall through, no cost for output dependency. */
30649 case REG_DEP_ANTI:
30650 /* Anti dependency; DEP_INSN reads a register that INSN writes some
30651 cycles later. */
30652 return 0;
30654 default:
30655 gcc_unreachable ();
30658 return cost;
30661 /* Debug version of rs6000_adjust_cost. */
30663 static int
30664 rs6000_debug_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn,
30665 int cost, unsigned int dw)
30667 int ret = rs6000_adjust_cost (insn, dep_type, dep_insn, cost, dw);
30669 if (ret != cost)
30671 const char *dep;
30673 switch (dep_type)
30675 default: dep = "unknown depencency"; break;
30676 case REG_DEP_TRUE: dep = "data dependency"; break;
30677 case REG_DEP_OUTPUT: dep = "output dependency"; break;
30678 case REG_DEP_ANTI: dep = "anti depencency"; break;
30681 fprintf (stderr,
30682 "\nrs6000_adjust_cost, final cost = %d, orig cost = %d, "
30683 "%s, insn:\n", ret, cost, dep);
30685 debug_rtx (insn);
30688 return ret;
30691 /* The function returns a true if INSN is microcoded.
30692 Return false otherwise. */
30694 static bool
30695 is_microcoded_insn (rtx_insn *insn)
30697 if (!insn || !NONDEBUG_INSN_P (insn)
30698 || GET_CODE (PATTERN (insn)) == USE
30699 || GET_CODE (PATTERN (insn)) == CLOBBER)
30700 return false;
30702 if (rs6000_cpu_attr == CPU_CELL)
30703 return get_attr_cell_micro (insn) == CELL_MICRO_ALWAYS;
30705 if (rs6000_sched_groups
30706 && (rs6000_cpu == PROCESSOR_POWER4 || rs6000_cpu == PROCESSOR_POWER5))
30708 enum attr_type type = get_attr_type (insn);
30709 if ((type == TYPE_LOAD
30710 && get_attr_update (insn) == UPDATE_YES
30711 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES)
30712 || ((type == TYPE_LOAD || type == TYPE_STORE)
30713 && get_attr_update (insn) == UPDATE_YES
30714 && get_attr_indexed (insn) == INDEXED_YES)
30715 || type == TYPE_MFCR)
30716 return true;
30719 return false;
30722 /* The function returns true if INSN is cracked into 2 instructions
30723 by the processor (and therefore occupies 2 issue slots). */
30725 static bool
30726 is_cracked_insn (rtx_insn *insn)
30728 if (!insn || !NONDEBUG_INSN_P (insn)
30729 || GET_CODE (PATTERN (insn)) == USE
30730 || GET_CODE (PATTERN (insn)) == CLOBBER)
30731 return false;
30733 if (rs6000_sched_groups
30734 && (rs6000_cpu == PROCESSOR_POWER4 || rs6000_cpu == PROCESSOR_POWER5))
30736 enum attr_type type = get_attr_type (insn);
30737 if ((type == TYPE_LOAD
30738 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES
30739 && get_attr_update (insn) == UPDATE_NO)
30740 || (type == TYPE_LOAD
30741 && get_attr_sign_extend (insn) == SIGN_EXTEND_NO
30742 && get_attr_update (insn) == UPDATE_YES
30743 && get_attr_indexed (insn) == INDEXED_NO)
30744 || (type == TYPE_STORE
30745 && get_attr_update (insn) == UPDATE_YES
30746 && get_attr_indexed (insn) == INDEXED_NO)
30747 || ((type == TYPE_FPLOAD || type == TYPE_FPSTORE)
30748 && get_attr_update (insn) == UPDATE_YES)
30749 || type == TYPE_DELAYED_CR
30750 || (type == TYPE_EXTS
30751 && get_attr_dot (insn) == DOT_YES)
30752 || (type == TYPE_SHIFT
30753 && get_attr_dot (insn) == DOT_YES
30754 && get_attr_var_shift (insn) == VAR_SHIFT_NO)
30755 || (type == TYPE_MUL
30756 && get_attr_dot (insn) == DOT_YES)
30757 || type == TYPE_DIV
30758 || (type == TYPE_INSERT
30759 && get_attr_size (insn) == SIZE_32))
30760 return true;
30763 return false;
30766 /* The function returns true if INSN can be issued only from
30767 the branch slot. */
30769 static bool
30770 is_branch_slot_insn (rtx_insn *insn)
30772 if (!insn || !NONDEBUG_INSN_P (insn)
30773 || GET_CODE (PATTERN (insn)) == USE
30774 || GET_CODE (PATTERN (insn)) == CLOBBER)
30775 return false;
30777 if (rs6000_sched_groups)
30779 enum attr_type type = get_attr_type (insn);
30780 if (type == TYPE_BRANCH || type == TYPE_JMPREG)
30781 return true;
30782 return false;
30785 return false;
30788 /* The function returns true if out_inst sets a value that is
30789 used in the address generation computation of in_insn */
30790 static bool
30791 set_to_load_agen (rtx_insn *out_insn, rtx_insn *in_insn)
30793 rtx out_set, in_set;
30795 /* For performance reasons, only handle the simple case where
30796 both loads are a single_set. */
30797 out_set = single_set (out_insn);
30798 if (out_set)
30800 in_set = single_set (in_insn);
30801 if (in_set)
30802 return reg_mentioned_p (SET_DEST (out_set), SET_SRC (in_set));
30805 return false;
30808 /* Try to determine base/offset/size parts of the given MEM.
30809 Return true if successful, false if all the values couldn't
30810 be determined.
30812 This function only looks for REG or REG+CONST address forms.
30813 REG+REG address form will return false. */
30815 static bool
30816 get_memref_parts (rtx mem, rtx *base, HOST_WIDE_INT *offset,
30817 HOST_WIDE_INT *size)
30819 rtx addr_rtx;
30820 if MEM_SIZE_KNOWN_P (mem)
30821 *size = MEM_SIZE (mem);
30822 else
30823 return false;
30825 addr_rtx = (XEXP (mem, 0));
30826 if (GET_CODE (addr_rtx) == PRE_MODIFY)
30827 addr_rtx = XEXP (addr_rtx, 1);
30829 *offset = 0;
30830 while (GET_CODE (addr_rtx) == PLUS
30831 && CONST_INT_P (XEXP (addr_rtx, 1)))
30833 *offset += INTVAL (XEXP (addr_rtx, 1));
30834 addr_rtx = XEXP (addr_rtx, 0);
30836 if (!REG_P (addr_rtx))
30837 return false;
30839 *base = addr_rtx;
30840 return true;
30843 /* The function returns true if the target storage location of
30844 mem1 is adjacent to the target storage location of mem2 */
30845 /* Return 1 if memory locations are adjacent. */
30847 static bool
30848 adjacent_mem_locations (rtx mem1, rtx mem2)
30850 rtx reg1, reg2;
30851 HOST_WIDE_INT off1, size1, off2, size2;
30853 if (get_memref_parts (mem1, &reg1, &off1, &size1)
30854 && get_memref_parts (mem2, &reg2, &off2, &size2))
30855 return ((REGNO (reg1) == REGNO (reg2))
30856 && ((off1 + size1 == off2)
30857 || (off2 + size2 == off1)));
30859 return false;
30862 /* This function returns true if it can be determined that the two MEM
30863 locations overlap by at least 1 byte based on base reg/offset/size. */
30865 static bool
30866 mem_locations_overlap (rtx mem1, rtx mem2)
30868 rtx reg1, reg2;
30869 HOST_WIDE_INT off1, size1, off2, size2;
30871 if (get_memref_parts (mem1, &reg1, &off1, &size1)
30872 && get_memref_parts (mem2, &reg2, &off2, &size2))
30873 return ((REGNO (reg1) == REGNO (reg2))
30874 && (((off1 <= off2) && (off1 + size1 > off2))
30875 || ((off2 <= off1) && (off2 + size2 > off1))));
30877 return false;
30880 /* A C statement (sans semicolon) to update the integer scheduling
30881 priority INSN_PRIORITY (INSN). Increase the priority to execute the
30882 INSN earlier, reduce the priority to execute INSN later. Do not
30883 define this macro if you do not need to adjust the scheduling
30884 priorities of insns. */
30886 static int
30887 rs6000_adjust_priority (rtx_insn *insn ATTRIBUTE_UNUSED, int priority)
30889 rtx load_mem, str_mem;
30890 /* On machines (like the 750) which have asymmetric integer units,
30891 where one integer unit can do multiply and divides and the other
30892 can't, reduce the priority of multiply/divide so it is scheduled
30893 before other integer operations. */
30895 #if 0
30896 if (! INSN_P (insn))
30897 return priority;
30899 if (GET_CODE (PATTERN (insn)) == USE)
30900 return priority;
30902 switch (rs6000_cpu_attr) {
30903 case CPU_PPC750:
30904 switch (get_attr_type (insn))
30906 default:
30907 break;
30909 case TYPE_MUL:
30910 case TYPE_DIV:
30911 fprintf (stderr, "priority was %#x (%d) before adjustment\n",
30912 priority, priority);
30913 if (priority >= 0 && priority < 0x01000000)
30914 priority >>= 3;
30915 break;
30918 #endif
30920 if (insn_must_be_first_in_group (insn)
30921 && reload_completed
30922 && current_sched_info->sched_max_insns_priority
30923 && rs6000_sched_restricted_insns_priority)
30926 /* Prioritize insns that can be dispatched only in the first
30927 dispatch slot. */
30928 if (rs6000_sched_restricted_insns_priority == 1)
30929 /* Attach highest priority to insn. This means that in
30930 haifa-sched.c:ready_sort(), dispatch-slot restriction considerations
30931 precede 'priority' (critical path) considerations. */
30932 return current_sched_info->sched_max_insns_priority;
30933 else if (rs6000_sched_restricted_insns_priority == 2)
30934 /* Increase priority of insn by a minimal amount. This means that in
30935 haifa-sched.c:ready_sort(), only 'priority' (critical path)
30936 considerations precede dispatch-slot restriction considerations. */
30937 return (priority + 1);
30940 if (rs6000_cpu == PROCESSOR_POWER6
30941 && ((load_store_pendulum == -2 && is_load_insn (insn, &load_mem))
30942 || (load_store_pendulum == 2 && is_store_insn (insn, &str_mem))))
30943 /* Attach highest priority to insn if the scheduler has just issued two
30944 stores and this instruction is a load, or two loads and this instruction
30945 is a store. Power6 wants loads and stores scheduled alternately
30946 when possible */
30947 return current_sched_info->sched_max_insns_priority;
30949 return priority;
30952 /* Return true if the instruction is nonpipelined on the Cell. */
30953 static bool
30954 is_nonpipeline_insn (rtx_insn *insn)
30956 enum attr_type type;
30957 if (!insn || !NONDEBUG_INSN_P (insn)
30958 || GET_CODE (PATTERN (insn)) == USE
30959 || GET_CODE (PATTERN (insn)) == CLOBBER)
30960 return false;
30962 type = get_attr_type (insn);
30963 if (type == TYPE_MUL
30964 || type == TYPE_DIV
30965 || type == TYPE_SDIV
30966 || type == TYPE_DDIV
30967 || type == TYPE_SSQRT
30968 || type == TYPE_DSQRT
30969 || type == TYPE_MFCR
30970 || type == TYPE_MFCRF
30971 || type == TYPE_MFJMPR)
30973 return true;
30975 return false;
30979 /* Return how many instructions the machine can issue per cycle. */
30981 static int
30982 rs6000_issue_rate (void)
30984 /* Unless scheduling for register pressure, use issue rate of 1 for
30985 first scheduling pass to decrease degradation. */
30986 if (!reload_completed && !flag_sched_pressure)
30987 return 1;
30989 switch (rs6000_cpu_attr) {
30990 case CPU_RS64A:
30991 case CPU_PPC601: /* ? */
30992 case CPU_PPC7450:
30993 return 3;
30994 case CPU_PPC440:
30995 case CPU_PPC603:
30996 case CPU_PPC750:
30997 case CPU_PPC7400:
30998 case CPU_PPC8540:
30999 case CPU_PPC8548:
31000 case CPU_CELL:
31001 case CPU_PPCE300C2:
31002 case CPU_PPCE300C3:
31003 case CPU_PPCE500MC:
31004 case CPU_PPCE500MC64:
31005 case CPU_PPCE5500:
31006 case CPU_PPCE6500:
31007 case CPU_TITAN:
31008 return 2;
31009 case CPU_PPC476:
31010 case CPU_PPC604:
31011 case CPU_PPC604E:
31012 case CPU_PPC620:
31013 case CPU_PPC630:
31014 return 4;
31015 case CPU_POWER4:
31016 case CPU_POWER5:
31017 case CPU_POWER6:
31018 case CPU_POWER7:
31019 return 5;
31020 case CPU_POWER8:
31021 return 7;
31022 case CPU_POWER9:
31023 return 6;
31024 default:
31025 return 1;
31029 /* Return how many instructions to look ahead for better insn
31030 scheduling. */
31032 static int
31033 rs6000_use_sched_lookahead (void)
31035 switch (rs6000_cpu_attr)
31037 case CPU_PPC8540:
31038 case CPU_PPC8548:
31039 return 4;
31041 case CPU_CELL:
31042 return (reload_completed ? 8 : 0);
31044 default:
31045 return 0;
31049 /* We are choosing insn from the ready queue. Return zero if INSN can be
31050 chosen. */
31051 static int
31052 rs6000_use_sched_lookahead_guard (rtx_insn *insn, int ready_index)
31054 if (ready_index == 0)
31055 return 0;
31057 if (rs6000_cpu_attr != CPU_CELL)
31058 return 0;
31060 gcc_assert (insn != NULL_RTX && INSN_P (insn));
31062 if (!reload_completed
31063 || is_nonpipeline_insn (insn)
31064 || is_microcoded_insn (insn))
31065 return 1;
31067 return 0;
31070 /* Determine if PAT refers to memory. If so, set MEM_REF to the MEM rtx
31071 and return true. */
31073 static bool
31074 find_mem_ref (rtx pat, rtx *mem_ref)
31076 const char * fmt;
31077 int i, j;
31079 /* stack_tie does not produce any real memory traffic. */
31080 if (tie_operand (pat, VOIDmode))
31081 return false;
31083 if (GET_CODE (pat) == MEM)
31085 *mem_ref = pat;
31086 return true;
31089 /* Recursively process the pattern. */
31090 fmt = GET_RTX_FORMAT (GET_CODE (pat));
31092 for (i = GET_RTX_LENGTH (GET_CODE (pat)) - 1; i >= 0; i--)
31094 if (fmt[i] == 'e')
31096 if (find_mem_ref (XEXP (pat, i), mem_ref))
31097 return true;
31099 else if (fmt[i] == 'E')
31100 for (j = XVECLEN (pat, i) - 1; j >= 0; j--)
31102 if (find_mem_ref (XVECEXP (pat, i, j), mem_ref))
31103 return true;
31107 return false;
31110 /* Determine if PAT is a PATTERN of a load insn. */
31112 static bool
31113 is_load_insn1 (rtx pat, rtx *load_mem)
31115 if (!pat || pat == NULL_RTX)
31116 return false;
31118 if (GET_CODE (pat) == SET)
31119 return find_mem_ref (SET_SRC (pat), load_mem);
31121 if (GET_CODE (pat) == PARALLEL)
31123 int i;
31125 for (i = 0; i < XVECLEN (pat, 0); i++)
31126 if (is_load_insn1 (XVECEXP (pat, 0, i), load_mem))
31127 return true;
31130 return false;
31133 /* Determine if INSN loads from memory. */
31135 static bool
31136 is_load_insn (rtx insn, rtx *load_mem)
31138 if (!insn || !INSN_P (insn))
31139 return false;
31141 if (CALL_P (insn))
31142 return false;
31144 return is_load_insn1 (PATTERN (insn), load_mem);
31147 /* Determine if PAT is a PATTERN of a store insn. */
31149 static bool
31150 is_store_insn1 (rtx pat, rtx *str_mem)
31152 if (!pat || pat == NULL_RTX)
31153 return false;
31155 if (GET_CODE (pat) == SET)
31156 return find_mem_ref (SET_DEST (pat), str_mem);
31158 if (GET_CODE (pat) == PARALLEL)
31160 int i;
31162 for (i = 0; i < XVECLEN (pat, 0); i++)
31163 if (is_store_insn1 (XVECEXP (pat, 0, i), str_mem))
31164 return true;
31167 return false;
31170 /* Determine if INSN stores to memory. */
31172 static bool
31173 is_store_insn (rtx insn, rtx *str_mem)
31175 if (!insn || !INSN_P (insn))
31176 return false;
31178 return is_store_insn1 (PATTERN (insn), str_mem);
31181 /* Return whether TYPE is a Power9 pairable vector instruction type. */
31183 static bool
31184 is_power9_pairable_vec_type (enum attr_type type)
31186 switch (type)
31188 case TYPE_VECSIMPLE:
31189 case TYPE_VECCOMPLEX:
31190 case TYPE_VECDIV:
31191 case TYPE_VECCMP:
31192 case TYPE_VECPERM:
31193 case TYPE_VECFLOAT:
31194 case TYPE_VECFDIV:
31195 case TYPE_VECDOUBLE:
31196 return true;
31197 default:
31198 break;
31200 return false;
31203 /* Returns whether the dependence between INSN and NEXT is considered
31204 costly by the given target. */
31206 static bool
31207 rs6000_is_costly_dependence (dep_t dep, int cost, int distance)
31209 rtx insn;
31210 rtx next;
31211 rtx load_mem, str_mem;
31213 /* If the flag is not enabled - no dependence is considered costly;
31214 allow all dependent insns in the same group.
31215 This is the most aggressive option. */
31216 if (rs6000_sched_costly_dep == no_dep_costly)
31217 return false;
31219 /* If the flag is set to 1 - a dependence is always considered costly;
31220 do not allow dependent instructions in the same group.
31221 This is the most conservative option. */
31222 if (rs6000_sched_costly_dep == all_deps_costly)
31223 return true;
31225 insn = DEP_PRO (dep);
31226 next = DEP_CON (dep);
31228 if (rs6000_sched_costly_dep == store_to_load_dep_costly
31229 && is_load_insn (next, &load_mem)
31230 && is_store_insn (insn, &str_mem))
31231 /* Prevent load after store in the same group. */
31232 return true;
31234 if (rs6000_sched_costly_dep == true_store_to_load_dep_costly
31235 && is_load_insn (next, &load_mem)
31236 && is_store_insn (insn, &str_mem)
31237 && DEP_TYPE (dep) == REG_DEP_TRUE
31238 && mem_locations_overlap(str_mem, load_mem))
31239 /* Prevent load after store in the same group if it is a true
31240 dependence. */
31241 return true;
31243 /* The flag is set to X; dependences with latency >= X are considered costly,
31244 and will not be scheduled in the same group. */
31245 if (rs6000_sched_costly_dep <= max_dep_latency
31246 && ((cost - distance) >= (int)rs6000_sched_costly_dep))
31247 return true;
31249 return false;
31252 /* Return the next insn after INSN that is found before TAIL is reached,
31253 skipping any "non-active" insns - insns that will not actually occupy
31254 an issue slot. Return NULL_RTX if such an insn is not found. */
31256 static rtx_insn *
31257 get_next_active_insn (rtx_insn *insn, rtx_insn *tail)
31259 if (insn == NULL_RTX || insn == tail)
31260 return NULL;
31262 while (1)
31264 insn = NEXT_INSN (insn);
31265 if (insn == NULL_RTX || insn == tail)
31266 return NULL;
31268 if (CALL_P (insn)
31269 || JUMP_P (insn) || JUMP_TABLE_DATA_P (insn)
31270 || (NONJUMP_INSN_P (insn)
31271 && GET_CODE (PATTERN (insn)) != USE
31272 && GET_CODE (PATTERN (insn)) != CLOBBER
31273 && INSN_CODE (insn) != CODE_FOR_stack_tie))
31274 break;
31276 return insn;
31279 /* Do Power9 specific sched_reorder2 reordering of ready list. */
31281 static int
31282 power9_sched_reorder2 (rtx_insn **ready, int lastpos)
31284 int pos;
31285 int i;
31286 rtx_insn *tmp;
31287 enum attr_type type;
31289 type = get_attr_type (last_scheduled_insn);
31291 /* Try to issue fixed point divides back-to-back in pairs so they will be
31292 routed to separate execution units and execute in parallel. */
31293 if (type == TYPE_DIV && divide_cnt == 0)
31295 /* First divide has been scheduled. */
31296 divide_cnt = 1;
31298 /* Scan the ready list looking for another divide, if found move it
31299 to the end of the list so it is chosen next. */
31300 pos = lastpos;
31301 while (pos >= 0)
31303 if (recog_memoized (ready[pos]) >= 0
31304 && get_attr_type (ready[pos]) == TYPE_DIV)
31306 tmp = ready[pos];
31307 for (i = pos; i < lastpos; i++)
31308 ready[i] = ready[i + 1];
31309 ready[lastpos] = tmp;
31310 break;
31312 pos--;
31315 else
31317 /* Last insn was the 2nd divide or not a divide, reset the counter. */
31318 divide_cnt = 0;
31320 /* Power9 can execute 2 vector operations and 2 vector loads in a single
31321 cycle. So try to pair up and alternate groups of vector and vector
31322 load instructions.
31324 To aid this formation, a counter is maintained to keep track of
31325 vec/vecload insns issued. The value of vec_load_pendulum maintains
31326 the current state with the following values:
31328 0 : Initial state, no vec/vecload group has been started.
31330 -1 : 1 vector load has been issued and another has been found on
31331 the ready list and moved to the end.
31333 -2 : 2 vector loads have been issued and a vector operation has
31334 been found and moved to the end of the ready list.
31336 -3 : 2 vector loads and a vector insn have been issued and a
31337 vector operation has been found and moved to the end of the
31338 ready list.
31340 1 : 1 vector insn has been issued and another has been found and
31341 moved to the end of the ready list.
31343 2 : 2 vector insns have been issued and a vector load has been
31344 found and moved to the end of the ready list.
31346 3 : 2 vector insns and a vector load have been issued and another
31347 vector load has been found and moved to the end of the ready
31348 list. */
31349 if (type == TYPE_VECLOAD)
31351 /* Issued a vecload. */
31352 if (vec_load_pendulum == 0)
31354 /* We issued a single vecload, look for another and move it to
31355 the end of the ready list so it will be scheduled next.
31356 Set pendulum if found. */
31357 pos = lastpos;
31358 while (pos >= 0)
31360 if (recog_memoized (ready[pos]) >= 0
31361 && get_attr_type (ready[pos]) == TYPE_VECLOAD)
31363 tmp = ready[pos];
31364 for (i = pos; i < lastpos; i++)
31365 ready[i] = ready[i + 1];
31366 ready[lastpos] = tmp;
31367 vec_load_pendulum = -1;
31368 return cached_can_issue_more;
31370 pos--;
31373 else if (vec_load_pendulum == -1)
31375 /* This is the second vecload we've issued, search the ready
31376 list for a vector operation so we can try to schedule a
31377 pair of those next. If found move to the end of the ready
31378 list so it is scheduled next and set the pendulum. */
31379 pos = lastpos;
31380 while (pos >= 0)
31382 if (recog_memoized (ready[pos]) >= 0
31383 && is_power9_pairable_vec_type (
31384 get_attr_type (ready[pos])))
31386 tmp = ready[pos];
31387 for (i = pos; i < lastpos; i++)
31388 ready[i] = ready[i + 1];
31389 ready[lastpos] = tmp;
31390 vec_load_pendulum = -2;
31391 return cached_can_issue_more;
31393 pos--;
31396 else if (vec_load_pendulum == 2)
31398 /* Two vector ops have been issued and we've just issued a
31399 vecload, look for another vecload and move to end of ready
31400 list if found. */
31401 pos = lastpos;
31402 while (pos >= 0)
31404 if (recog_memoized (ready[pos]) >= 0
31405 && get_attr_type (ready[pos]) == TYPE_VECLOAD)
31407 tmp = ready[pos];
31408 for (i = pos; i < lastpos; i++)
31409 ready[i] = ready[i + 1];
31410 ready[lastpos] = tmp;
31411 /* Set pendulum so that next vecload will be seen as
31412 finishing a group, not start of one. */
31413 vec_load_pendulum = 3;
31414 return cached_can_issue_more;
31416 pos--;
31420 else if (is_power9_pairable_vec_type (type))
31422 /* Issued a vector operation. */
31423 if (vec_load_pendulum == 0)
31424 /* We issued a single vec op, look for another and move it
31425 to the end of the ready list so it will be scheduled next.
31426 Set pendulum if found. */
31428 pos = lastpos;
31429 while (pos >= 0)
31431 if (recog_memoized (ready[pos]) >= 0
31432 && is_power9_pairable_vec_type (
31433 get_attr_type (ready[pos])))
31435 tmp = ready[pos];
31436 for (i = pos; i < lastpos; i++)
31437 ready[i] = ready[i + 1];
31438 ready[lastpos] = tmp;
31439 vec_load_pendulum = 1;
31440 return cached_can_issue_more;
31442 pos--;
31445 else if (vec_load_pendulum == 1)
31447 /* This is the second vec op we've issued, search the ready
31448 list for a vecload operation so we can try to schedule a
31449 pair of those next. If found move to the end of the ready
31450 list so it is scheduled next and set the pendulum. */
31451 pos = lastpos;
31452 while (pos >= 0)
31454 if (recog_memoized (ready[pos]) >= 0
31455 && get_attr_type (ready[pos]) == TYPE_VECLOAD)
31457 tmp = ready[pos];
31458 for (i = pos; i < lastpos; i++)
31459 ready[i] = ready[i + 1];
31460 ready[lastpos] = tmp;
31461 vec_load_pendulum = 2;
31462 return cached_can_issue_more;
31464 pos--;
31467 else if (vec_load_pendulum == -2)
31469 /* Two vecload ops have been issued and we've just issued a
31470 vec op, look for another vec op and move to end of ready
31471 list if found. */
31472 pos = lastpos;
31473 while (pos >= 0)
31475 if (recog_memoized (ready[pos]) >= 0
31476 && is_power9_pairable_vec_type (
31477 get_attr_type (ready[pos])))
31479 tmp = ready[pos];
31480 for (i = pos; i < lastpos; i++)
31481 ready[i] = ready[i + 1];
31482 ready[lastpos] = tmp;
31483 /* Set pendulum so that next vec op will be seen as
31484 finishing a group, not start of one. */
31485 vec_load_pendulum = -3;
31486 return cached_can_issue_more;
31488 pos--;
31493 /* We've either finished a vec/vecload group, couldn't find an insn to
31494 continue the current group, or the last insn had nothing to do with
31495 with a group. In any case, reset the pendulum. */
31496 vec_load_pendulum = 0;
31499 return cached_can_issue_more;
31502 /* We are about to begin issuing insns for this clock cycle. */
31504 static int
31505 rs6000_sched_reorder (FILE *dump ATTRIBUTE_UNUSED, int sched_verbose,
31506 rtx_insn **ready ATTRIBUTE_UNUSED,
31507 int *pn_ready ATTRIBUTE_UNUSED,
31508 int clock_var ATTRIBUTE_UNUSED)
31510 int n_ready = *pn_ready;
31512 if (sched_verbose)
31513 fprintf (dump, "// rs6000_sched_reorder :\n");
31515 /* Reorder the ready list, if the second to last ready insn
31516 is a nonepipeline insn. */
31517 if (rs6000_cpu_attr == CPU_CELL && n_ready > 1)
31519 if (is_nonpipeline_insn (ready[n_ready - 1])
31520 && (recog_memoized (ready[n_ready - 2]) > 0))
31521 /* Simply swap first two insns. */
31522 std::swap (ready[n_ready - 1], ready[n_ready - 2]);
31525 if (rs6000_cpu == PROCESSOR_POWER6)
31526 load_store_pendulum = 0;
31528 return rs6000_issue_rate ();
31531 /* Like rs6000_sched_reorder, but called after issuing each insn. */
31533 static int
31534 rs6000_sched_reorder2 (FILE *dump, int sched_verbose, rtx_insn **ready,
31535 int *pn_ready, int clock_var ATTRIBUTE_UNUSED)
31537 if (sched_verbose)
31538 fprintf (dump, "// rs6000_sched_reorder2 :\n");
31540 /* For Power6, we need to handle some special cases to try and keep the
31541 store queue from overflowing and triggering expensive flushes.
31543 This code monitors how load and store instructions are being issued
31544 and skews the ready list one way or the other to increase the likelihood
31545 that a desired instruction is issued at the proper time.
31547 A couple of things are done. First, we maintain a "load_store_pendulum"
31548 to track the current state of load/store issue.
31550 - If the pendulum is at zero, then no loads or stores have been
31551 issued in the current cycle so we do nothing.
31553 - If the pendulum is 1, then a single load has been issued in this
31554 cycle and we attempt to locate another load in the ready list to
31555 issue with it.
31557 - If the pendulum is -2, then two stores have already been
31558 issued in this cycle, so we increase the priority of the first load
31559 in the ready list to increase it's likelihood of being chosen first
31560 in the next cycle.
31562 - If the pendulum is -1, then a single store has been issued in this
31563 cycle and we attempt to locate another store in the ready list to
31564 issue with it, preferring a store to an adjacent memory location to
31565 facilitate store pairing in the store queue.
31567 - If the pendulum is 2, then two loads have already been
31568 issued in this cycle, so we increase the priority of the first store
31569 in the ready list to increase it's likelihood of being chosen first
31570 in the next cycle.
31572 - If the pendulum < -2 or > 2, then do nothing.
31574 Note: This code covers the most common scenarios. There exist non
31575 load/store instructions which make use of the LSU and which
31576 would need to be accounted for to strictly model the behavior
31577 of the machine. Those instructions are currently unaccounted
31578 for to help minimize compile time overhead of this code.
31580 if (rs6000_cpu == PROCESSOR_POWER6 && last_scheduled_insn)
31582 int pos;
31583 int i;
31584 rtx_insn *tmp;
31585 rtx load_mem, str_mem;
31587 if (is_store_insn (last_scheduled_insn, &str_mem))
31588 /* Issuing a store, swing the load_store_pendulum to the left */
31589 load_store_pendulum--;
31590 else if (is_load_insn (last_scheduled_insn, &load_mem))
31591 /* Issuing a load, swing the load_store_pendulum to the right */
31592 load_store_pendulum++;
31593 else
31594 return cached_can_issue_more;
31596 /* If the pendulum is balanced, or there is only one instruction on
31597 the ready list, then all is well, so return. */
31598 if ((load_store_pendulum == 0) || (*pn_ready <= 1))
31599 return cached_can_issue_more;
31601 if (load_store_pendulum == 1)
31603 /* A load has been issued in this cycle. Scan the ready list
31604 for another load to issue with it */
31605 pos = *pn_ready-1;
31607 while (pos >= 0)
31609 if (is_load_insn (ready[pos], &load_mem))
31611 /* Found a load. Move it to the head of the ready list,
31612 and adjust it's priority so that it is more likely to
31613 stay there */
31614 tmp = ready[pos];
31615 for (i=pos; i<*pn_ready-1; i++)
31616 ready[i] = ready[i + 1];
31617 ready[*pn_ready-1] = tmp;
31619 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
31620 INSN_PRIORITY (tmp)++;
31621 break;
31623 pos--;
31626 else if (load_store_pendulum == -2)
31628 /* Two stores have been issued in this cycle. Increase the
31629 priority of the first load in the ready list to favor it for
31630 issuing in the next cycle. */
31631 pos = *pn_ready-1;
31633 while (pos >= 0)
31635 if (is_load_insn (ready[pos], &load_mem)
31636 && !sel_sched_p ()
31637 && INSN_PRIORITY_KNOWN (ready[pos]))
31639 INSN_PRIORITY (ready[pos])++;
31641 /* Adjust the pendulum to account for the fact that a load
31642 was found and increased in priority. This is to prevent
31643 increasing the priority of multiple loads */
31644 load_store_pendulum--;
31646 break;
31648 pos--;
31651 else if (load_store_pendulum == -1)
31653 /* A store has been issued in this cycle. Scan the ready list for
31654 another store to issue with it, preferring a store to an adjacent
31655 memory location */
31656 int first_store_pos = -1;
31658 pos = *pn_ready-1;
31660 while (pos >= 0)
31662 if (is_store_insn (ready[pos], &str_mem))
31664 rtx str_mem2;
31665 /* Maintain the index of the first store found on the
31666 list */
31667 if (first_store_pos == -1)
31668 first_store_pos = pos;
31670 if (is_store_insn (last_scheduled_insn, &str_mem2)
31671 && adjacent_mem_locations (str_mem, str_mem2))
31673 /* Found an adjacent store. Move it to the head of the
31674 ready list, and adjust it's priority so that it is
31675 more likely to stay there */
31676 tmp = ready[pos];
31677 for (i=pos; i<*pn_ready-1; i++)
31678 ready[i] = ready[i + 1];
31679 ready[*pn_ready-1] = tmp;
31681 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
31682 INSN_PRIORITY (tmp)++;
31684 first_store_pos = -1;
31686 break;
31689 pos--;
31692 if (first_store_pos >= 0)
31694 /* An adjacent store wasn't found, but a non-adjacent store was,
31695 so move the non-adjacent store to the front of the ready
31696 list, and adjust its priority so that it is more likely to
31697 stay there. */
31698 tmp = ready[first_store_pos];
31699 for (i=first_store_pos; i<*pn_ready-1; i++)
31700 ready[i] = ready[i + 1];
31701 ready[*pn_ready-1] = tmp;
31702 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
31703 INSN_PRIORITY (tmp)++;
31706 else if (load_store_pendulum == 2)
31708 /* Two loads have been issued in this cycle. Increase the priority
31709 of the first store in the ready list to favor it for issuing in
31710 the next cycle. */
31711 pos = *pn_ready-1;
31713 while (pos >= 0)
31715 if (is_store_insn (ready[pos], &str_mem)
31716 && !sel_sched_p ()
31717 && INSN_PRIORITY_KNOWN (ready[pos]))
31719 INSN_PRIORITY (ready[pos])++;
31721 /* Adjust the pendulum to account for the fact that a store
31722 was found and increased in priority. This is to prevent
31723 increasing the priority of multiple stores */
31724 load_store_pendulum++;
31726 break;
31728 pos--;
31733 /* Do Power9 dependent reordering if necessary. */
31734 if (rs6000_cpu == PROCESSOR_POWER9 && last_scheduled_insn
31735 && recog_memoized (last_scheduled_insn) >= 0)
31736 return power9_sched_reorder2 (ready, *pn_ready - 1);
31738 return cached_can_issue_more;
31741 /* Return whether the presence of INSN causes a dispatch group termination
31742 of group WHICH_GROUP.
31744 If WHICH_GROUP == current_group, this function will return true if INSN
31745 causes the termination of the current group (i.e, the dispatch group to
31746 which INSN belongs). This means that INSN will be the last insn in the
31747 group it belongs to.
31749 If WHICH_GROUP == previous_group, this function will return true if INSN
31750 causes the termination of the previous group (i.e, the dispatch group that
31751 precedes the group to which INSN belongs). This means that INSN will be
31752 the first insn in the group it belongs to). */
31754 static bool
31755 insn_terminates_group_p (rtx_insn *insn, enum group_termination which_group)
31757 bool first, last;
31759 if (! insn)
31760 return false;
31762 first = insn_must_be_first_in_group (insn);
31763 last = insn_must_be_last_in_group (insn);
31765 if (first && last)
31766 return true;
31768 if (which_group == current_group)
31769 return last;
31770 else if (which_group == previous_group)
31771 return first;
31773 return false;
31777 static bool
31778 insn_must_be_first_in_group (rtx_insn *insn)
31780 enum attr_type type;
31782 if (!insn
31783 || NOTE_P (insn)
31784 || DEBUG_INSN_P (insn)
31785 || GET_CODE (PATTERN (insn)) == USE
31786 || GET_CODE (PATTERN (insn)) == CLOBBER)
31787 return false;
31789 switch (rs6000_cpu)
31791 case PROCESSOR_POWER5:
31792 if (is_cracked_insn (insn))
31793 return true;
31794 case PROCESSOR_POWER4:
31795 if (is_microcoded_insn (insn))
31796 return true;
31798 if (!rs6000_sched_groups)
31799 return false;
31801 type = get_attr_type (insn);
31803 switch (type)
31805 case TYPE_MFCR:
31806 case TYPE_MFCRF:
31807 case TYPE_MTCR:
31808 case TYPE_DELAYED_CR:
31809 case TYPE_CR_LOGICAL:
31810 case TYPE_MTJMPR:
31811 case TYPE_MFJMPR:
31812 case TYPE_DIV:
31813 case TYPE_LOAD_L:
31814 case TYPE_STORE_C:
31815 case TYPE_ISYNC:
31816 case TYPE_SYNC:
31817 return true;
31818 default:
31819 break;
31821 break;
31822 case PROCESSOR_POWER6:
31823 type = get_attr_type (insn);
31825 switch (type)
31827 case TYPE_EXTS:
31828 case TYPE_CNTLZ:
31829 case TYPE_TRAP:
31830 case TYPE_MUL:
31831 case TYPE_INSERT:
31832 case TYPE_FPCOMPARE:
31833 case TYPE_MFCR:
31834 case TYPE_MTCR:
31835 case TYPE_MFJMPR:
31836 case TYPE_MTJMPR:
31837 case TYPE_ISYNC:
31838 case TYPE_SYNC:
31839 case TYPE_LOAD_L:
31840 case TYPE_STORE_C:
31841 return true;
31842 case TYPE_SHIFT:
31843 if (get_attr_dot (insn) == DOT_NO
31844 || get_attr_var_shift (insn) == VAR_SHIFT_NO)
31845 return true;
31846 else
31847 break;
31848 case TYPE_DIV:
31849 if (get_attr_size (insn) == SIZE_32)
31850 return true;
31851 else
31852 break;
31853 case TYPE_LOAD:
31854 case TYPE_STORE:
31855 case TYPE_FPLOAD:
31856 case TYPE_FPSTORE:
31857 if (get_attr_update (insn) == UPDATE_YES)
31858 return true;
31859 else
31860 break;
31861 default:
31862 break;
31864 break;
31865 case PROCESSOR_POWER7:
31866 type = get_attr_type (insn);
31868 switch (type)
31870 case TYPE_CR_LOGICAL:
31871 case TYPE_MFCR:
31872 case TYPE_MFCRF:
31873 case TYPE_MTCR:
31874 case TYPE_DIV:
31875 case TYPE_ISYNC:
31876 case TYPE_LOAD_L:
31877 case TYPE_STORE_C:
31878 case TYPE_MFJMPR:
31879 case TYPE_MTJMPR:
31880 return true;
31881 case TYPE_MUL:
31882 case TYPE_SHIFT:
31883 case TYPE_EXTS:
31884 if (get_attr_dot (insn) == DOT_YES)
31885 return true;
31886 else
31887 break;
31888 case TYPE_LOAD:
31889 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
31890 || get_attr_update (insn) == UPDATE_YES)
31891 return true;
31892 else
31893 break;
31894 case TYPE_STORE:
31895 case TYPE_FPLOAD:
31896 case TYPE_FPSTORE:
31897 if (get_attr_update (insn) == UPDATE_YES)
31898 return true;
31899 else
31900 break;
31901 default:
31902 break;
31904 break;
31905 case PROCESSOR_POWER8:
31906 type = get_attr_type (insn);
31908 switch (type)
31910 case TYPE_CR_LOGICAL:
31911 case TYPE_DELAYED_CR:
31912 case TYPE_MFCR:
31913 case TYPE_MFCRF:
31914 case TYPE_MTCR:
31915 case TYPE_SYNC:
31916 case TYPE_ISYNC:
31917 case TYPE_LOAD_L:
31918 case TYPE_STORE_C:
31919 case TYPE_VECSTORE:
31920 case TYPE_MFJMPR:
31921 case TYPE_MTJMPR:
31922 return true;
31923 case TYPE_SHIFT:
31924 case TYPE_EXTS:
31925 case TYPE_MUL:
31926 if (get_attr_dot (insn) == DOT_YES)
31927 return true;
31928 else
31929 break;
31930 case TYPE_LOAD:
31931 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
31932 || get_attr_update (insn) == UPDATE_YES)
31933 return true;
31934 else
31935 break;
31936 case TYPE_STORE:
31937 if (get_attr_update (insn) == UPDATE_YES
31938 && get_attr_indexed (insn) == INDEXED_YES)
31939 return true;
31940 else
31941 break;
31942 default:
31943 break;
31945 break;
31946 default:
31947 break;
31950 return false;
31953 static bool
31954 insn_must_be_last_in_group (rtx_insn *insn)
31956 enum attr_type type;
31958 if (!insn
31959 || NOTE_P (insn)
31960 || DEBUG_INSN_P (insn)
31961 || GET_CODE (PATTERN (insn)) == USE
31962 || GET_CODE (PATTERN (insn)) == CLOBBER)
31963 return false;
31965 switch (rs6000_cpu) {
31966 case PROCESSOR_POWER4:
31967 case PROCESSOR_POWER5:
31968 if (is_microcoded_insn (insn))
31969 return true;
31971 if (is_branch_slot_insn (insn))
31972 return true;
31974 break;
31975 case PROCESSOR_POWER6:
31976 type = get_attr_type (insn);
31978 switch (type)
31980 case TYPE_EXTS:
31981 case TYPE_CNTLZ:
31982 case TYPE_TRAP:
31983 case TYPE_MUL:
31984 case TYPE_FPCOMPARE:
31985 case TYPE_MFCR:
31986 case TYPE_MTCR:
31987 case TYPE_MFJMPR:
31988 case TYPE_MTJMPR:
31989 case TYPE_ISYNC:
31990 case TYPE_SYNC:
31991 case TYPE_LOAD_L:
31992 case TYPE_STORE_C:
31993 return true;
31994 case TYPE_SHIFT:
31995 if (get_attr_dot (insn) == DOT_NO
31996 || get_attr_var_shift (insn) == VAR_SHIFT_NO)
31997 return true;
31998 else
31999 break;
32000 case TYPE_DIV:
32001 if (get_attr_size (insn) == SIZE_32)
32002 return true;
32003 else
32004 break;
32005 default:
32006 break;
32008 break;
32009 case PROCESSOR_POWER7:
32010 type = get_attr_type (insn);
32012 switch (type)
32014 case TYPE_ISYNC:
32015 case TYPE_SYNC:
32016 case TYPE_LOAD_L:
32017 case TYPE_STORE_C:
32018 return true;
32019 case TYPE_LOAD:
32020 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
32021 && get_attr_update (insn) == UPDATE_YES)
32022 return true;
32023 else
32024 break;
32025 case TYPE_STORE:
32026 if (get_attr_update (insn) == UPDATE_YES
32027 && get_attr_indexed (insn) == INDEXED_YES)
32028 return true;
32029 else
32030 break;
32031 default:
32032 break;
32034 break;
32035 case PROCESSOR_POWER8:
32036 type = get_attr_type (insn);
32038 switch (type)
32040 case TYPE_MFCR:
32041 case TYPE_MTCR:
32042 case TYPE_ISYNC:
32043 case TYPE_SYNC:
32044 case TYPE_LOAD_L:
32045 case TYPE_STORE_C:
32046 return true;
32047 case TYPE_LOAD:
32048 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
32049 && get_attr_update (insn) == UPDATE_YES)
32050 return true;
32051 else
32052 break;
32053 case TYPE_STORE:
32054 if (get_attr_update (insn) == UPDATE_YES
32055 && get_attr_indexed (insn) == INDEXED_YES)
32056 return true;
32057 else
32058 break;
32059 default:
32060 break;
32062 break;
32063 default:
32064 break;
32067 return false;
32070 /* Return true if it is recommended to keep NEXT_INSN "far" (in a separate
32071 dispatch group) from the insns in GROUP_INSNS. Return false otherwise. */
32073 static bool
32074 is_costly_group (rtx *group_insns, rtx next_insn)
32076 int i;
32077 int issue_rate = rs6000_issue_rate ();
32079 for (i = 0; i < issue_rate; i++)
32081 sd_iterator_def sd_it;
32082 dep_t dep;
32083 rtx insn = group_insns[i];
32085 if (!insn)
32086 continue;
32088 FOR_EACH_DEP (insn, SD_LIST_RES_FORW, sd_it, dep)
32090 rtx next = DEP_CON (dep);
32092 if (next == next_insn
32093 && rs6000_is_costly_dependence (dep, dep_cost (dep), 0))
32094 return true;
32098 return false;
32101 /* Utility of the function redefine_groups.
32102 Check if it is too costly to schedule NEXT_INSN together with GROUP_INSNS
32103 in the same dispatch group. If so, insert nops before NEXT_INSN, in order
32104 to keep it "far" (in a separate group) from GROUP_INSNS, following
32105 one of the following schemes, depending on the value of the flag
32106 -minsert_sched_nops = X:
32107 (1) X == sched_finish_regroup_exact: insert exactly as many nops as needed
32108 in order to force NEXT_INSN into a separate group.
32109 (2) X < sched_finish_regroup_exact: insert exactly X nops.
32110 GROUP_END, CAN_ISSUE_MORE and GROUP_COUNT record the state after nop
32111 insertion (has a group just ended, how many vacant issue slots remain in the
32112 last group, and how many dispatch groups were encountered so far). */
32114 static int
32115 force_new_group (int sched_verbose, FILE *dump, rtx *group_insns,
32116 rtx_insn *next_insn, bool *group_end, int can_issue_more,
32117 int *group_count)
32119 rtx nop;
32120 bool force;
32121 int issue_rate = rs6000_issue_rate ();
32122 bool end = *group_end;
32123 int i;
32125 if (next_insn == NULL_RTX || DEBUG_INSN_P (next_insn))
32126 return can_issue_more;
32128 if (rs6000_sched_insert_nops > sched_finish_regroup_exact)
32129 return can_issue_more;
32131 force = is_costly_group (group_insns, next_insn);
32132 if (!force)
32133 return can_issue_more;
32135 if (sched_verbose > 6)
32136 fprintf (dump,"force: group count = %d, can_issue_more = %d\n",
32137 *group_count ,can_issue_more);
32139 if (rs6000_sched_insert_nops == sched_finish_regroup_exact)
32141 if (*group_end)
32142 can_issue_more = 0;
32144 /* Since only a branch can be issued in the last issue_slot, it is
32145 sufficient to insert 'can_issue_more - 1' nops if next_insn is not
32146 a branch. If next_insn is a branch, we insert 'can_issue_more' nops;
32147 in this case the last nop will start a new group and the branch
32148 will be forced to the new group. */
32149 if (can_issue_more && !is_branch_slot_insn (next_insn))
32150 can_issue_more--;
32152 /* Do we have a special group ending nop? */
32153 if (rs6000_cpu_attr == CPU_POWER6 || rs6000_cpu_attr == CPU_POWER7
32154 || rs6000_cpu_attr == CPU_POWER8)
32156 nop = gen_group_ending_nop ();
32157 emit_insn_before (nop, next_insn);
32158 can_issue_more = 0;
32160 else
32161 while (can_issue_more > 0)
32163 nop = gen_nop ();
32164 emit_insn_before (nop, next_insn);
32165 can_issue_more--;
32168 *group_end = true;
32169 return 0;
32172 if (rs6000_sched_insert_nops < sched_finish_regroup_exact)
32174 int n_nops = rs6000_sched_insert_nops;
32176 /* Nops can't be issued from the branch slot, so the effective
32177 issue_rate for nops is 'issue_rate - 1'. */
32178 if (can_issue_more == 0)
32179 can_issue_more = issue_rate;
32180 can_issue_more--;
32181 if (can_issue_more == 0)
32183 can_issue_more = issue_rate - 1;
32184 (*group_count)++;
32185 end = true;
32186 for (i = 0; i < issue_rate; i++)
32188 group_insns[i] = 0;
32192 while (n_nops > 0)
32194 nop = gen_nop ();
32195 emit_insn_before (nop, next_insn);
32196 if (can_issue_more == issue_rate - 1) /* new group begins */
32197 end = false;
32198 can_issue_more--;
32199 if (can_issue_more == 0)
32201 can_issue_more = issue_rate - 1;
32202 (*group_count)++;
32203 end = true;
32204 for (i = 0; i < issue_rate; i++)
32206 group_insns[i] = 0;
32209 n_nops--;
32212 /* Scale back relative to 'issue_rate' (instead of 'issue_rate - 1'). */
32213 can_issue_more++;
32215 /* Is next_insn going to start a new group? */
32216 *group_end
32217 = (end
32218 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
32219 || (can_issue_more <= 2 && is_cracked_insn (next_insn))
32220 || (can_issue_more < issue_rate &&
32221 insn_terminates_group_p (next_insn, previous_group)));
32222 if (*group_end && end)
32223 (*group_count)--;
32225 if (sched_verbose > 6)
32226 fprintf (dump, "done force: group count = %d, can_issue_more = %d\n",
32227 *group_count, can_issue_more);
32228 return can_issue_more;
32231 return can_issue_more;
32234 /* This function tries to synch the dispatch groups that the compiler "sees"
32235 with the dispatch groups that the processor dispatcher is expected to
32236 form in practice. It tries to achieve this synchronization by forcing the
32237 estimated processor grouping on the compiler (as opposed to the function
32238 'pad_goups' which tries to force the scheduler's grouping on the processor).
32240 The function scans the insn sequence between PREV_HEAD_INSN and TAIL and
32241 examines the (estimated) dispatch groups that will be formed by the processor
32242 dispatcher. It marks these group boundaries to reflect the estimated
32243 processor grouping, overriding the grouping that the scheduler had marked.
32244 Depending on the value of the flag '-minsert-sched-nops' this function can
32245 force certain insns into separate groups or force a certain distance between
32246 them by inserting nops, for example, if there exists a "costly dependence"
32247 between the insns.
32249 The function estimates the group boundaries that the processor will form as
32250 follows: It keeps track of how many vacant issue slots are available after
32251 each insn. A subsequent insn will start a new group if one of the following
32252 4 cases applies:
32253 - no more vacant issue slots remain in the current dispatch group.
32254 - only the last issue slot, which is the branch slot, is vacant, but the next
32255 insn is not a branch.
32256 - only the last 2 or less issue slots, including the branch slot, are vacant,
32257 which means that a cracked insn (which occupies two issue slots) can't be
32258 issued in this group.
32259 - less than 'issue_rate' slots are vacant, and the next insn always needs to
32260 start a new group. */
32262 static int
32263 redefine_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
32264 rtx_insn *tail)
32266 rtx_insn *insn, *next_insn;
32267 int issue_rate;
32268 int can_issue_more;
32269 int slot, i;
32270 bool group_end;
32271 int group_count = 0;
32272 rtx *group_insns;
32274 /* Initialize. */
32275 issue_rate = rs6000_issue_rate ();
32276 group_insns = XALLOCAVEC (rtx, issue_rate);
32277 for (i = 0; i < issue_rate; i++)
32279 group_insns[i] = 0;
32281 can_issue_more = issue_rate;
32282 slot = 0;
32283 insn = get_next_active_insn (prev_head_insn, tail);
32284 group_end = false;
32286 while (insn != NULL_RTX)
32288 slot = (issue_rate - can_issue_more);
32289 group_insns[slot] = insn;
32290 can_issue_more =
32291 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
32292 if (insn_terminates_group_p (insn, current_group))
32293 can_issue_more = 0;
32295 next_insn = get_next_active_insn (insn, tail);
32296 if (next_insn == NULL_RTX)
32297 return group_count + 1;
32299 /* Is next_insn going to start a new group? */
32300 group_end
32301 = (can_issue_more == 0
32302 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
32303 || (can_issue_more <= 2 && is_cracked_insn (next_insn))
32304 || (can_issue_more < issue_rate &&
32305 insn_terminates_group_p (next_insn, previous_group)));
32307 can_issue_more = force_new_group (sched_verbose, dump, group_insns,
32308 next_insn, &group_end, can_issue_more,
32309 &group_count);
32311 if (group_end)
32313 group_count++;
32314 can_issue_more = 0;
32315 for (i = 0; i < issue_rate; i++)
32317 group_insns[i] = 0;
32321 if (GET_MODE (next_insn) == TImode && can_issue_more)
32322 PUT_MODE (next_insn, VOIDmode);
32323 else if (!can_issue_more && GET_MODE (next_insn) != TImode)
32324 PUT_MODE (next_insn, TImode);
32326 insn = next_insn;
32327 if (can_issue_more == 0)
32328 can_issue_more = issue_rate;
32329 } /* while */
32331 return group_count;
32334 /* Scan the insn sequence between PREV_HEAD_INSN and TAIL and examine the
32335 dispatch group boundaries that the scheduler had marked. Pad with nops
32336 any dispatch groups which have vacant issue slots, in order to force the
32337 scheduler's grouping on the processor dispatcher. The function
32338 returns the number of dispatch groups found. */
32340 static int
32341 pad_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
32342 rtx_insn *tail)
32344 rtx_insn *insn, *next_insn;
32345 rtx nop;
32346 int issue_rate;
32347 int can_issue_more;
32348 int group_end;
32349 int group_count = 0;
32351 /* Initialize issue_rate. */
32352 issue_rate = rs6000_issue_rate ();
32353 can_issue_more = issue_rate;
32355 insn = get_next_active_insn (prev_head_insn, tail);
32356 next_insn = get_next_active_insn (insn, tail);
32358 while (insn != NULL_RTX)
32360 can_issue_more =
32361 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
32363 group_end = (next_insn == NULL_RTX || GET_MODE (next_insn) == TImode);
32365 if (next_insn == NULL_RTX)
32366 break;
32368 if (group_end)
32370 /* If the scheduler had marked group termination at this location
32371 (between insn and next_insn), and neither insn nor next_insn will
32372 force group termination, pad the group with nops to force group
32373 termination. */
32374 if (can_issue_more
32375 && (rs6000_sched_insert_nops == sched_finish_pad_groups)
32376 && !insn_terminates_group_p (insn, current_group)
32377 && !insn_terminates_group_p (next_insn, previous_group))
32379 if (!is_branch_slot_insn (next_insn))
32380 can_issue_more--;
32382 while (can_issue_more)
32384 nop = gen_nop ();
32385 emit_insn_before (nop, next_insn);
32386 can_issue_more--;
32390 can_issue_more = issue_rate;
32391 group_count++;
32394 insn = next_insn;
32395 next_insn = get_next_active_insn (insn, tail);
32398 return group_count;
32401 /* We're beginning a new block. Initialize data structures as necessary. */
32403 static void
32404 rs6000_sched_init (FILE *dump ATTRIBUTE_UNUSED,
32405 int sched_verbose ATTRIBUTE_UNUSED,
32406 int max_ready ATTRIBUTE_UNUSED)
32408 last_scheduled_insn = NULL;
32409 load_store_pendulum = 0;
32410 divide_cnt = 0;
32411 vec_load_pendulum = 0;
32414 /* The following function is called at the end of scheduling BB.
32415 After reload, it inserts nops at insn group bundling. */
32417 static void
32418 rs6000_sched_finish (FILE *dump, int sched_verbose)
32420 int n_groups;
32422 if (sched_verbose)
32423 fprintf (dump, "=== Finishing schedule.\n");
32425 if (reload_completed && rs6000_sched_groups)
32427 /* Do not run sched_finish hook when selective scheduling enabled. */
32428 if (sel_sched_p ())
32429 return;
32431 if (rs6000_sched_insert_nops == sched_finish_none)
32432 return;
32434 if (rs6000_sched_insert_nops == sched_finish_pad_groups)
32435 n_groups = pad_groups (dump, sched_verbose,
32436 current_sched_info->prev_head,
32437 current_sched_info->next_tail);
32438 else
32439 n_groups = redefine_groups (dump, sched_verbose,
32440 current_sched_info->prev_head,
32441 current_sched_info->next_tail);
32443 if (sched_verbose >= 6)
32445 fprintf (dump, "ngroups = %d\n", n_groups);
32446 print_rtl (dump, current_sched_info->prev_head);
32447 fprintf (dump, "Done finish_sched\n");
32452 struct rs6000_sched_context
32454 short cached_can_issue_more;
32455 rtx_insn *last_scheduled_insn;
32456 int load_store_pendulum;
32457 int divide_cnt;
32458 int vec_load_pendulum;
32461 typedef struct rs6000_sched_context rs6000_sched_context_def;
32462 typedef rs6000_sched_context_def *rs6000_sched_context_t;
32464 /* Allocate store for new scheduling context. */
32465 static void *
32466 rs6000_alloc_sched_context (void)
32468 return xmalloc (sizeof (rs6000_sched_context_def));
32471 /* If CLEAN_P is true then initializes _SC with clean data,
32472 and from the global context otherwise. */
32473 static void
32474 rs6000_init_sched_context (void *_sc, bool clean_p)
32476 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
32478 if (clean_p)
32480 sc->cached_can_issue_more = 0;
32481 sc->last_scheduled_insn = NULL;
32482 sc->load_store_pendulum = 0;
32483 sc->divide_cnt = 0;
32484 sc->vec_load_pendulum = 0;
32486 else
32488 sc->cached_can_issue_more = cached_can_issue_more;
32489 sc->last_scheduled_insn = last_scheduled_insn;
32490 sc->load_store_pendulum = load_store_pendulum;
32491 sc->divide_cnt = divide_cnt;
32492 sc->vec_load_pendulum = vec_load_pendulum;
32496 /* Sets the global scheduling context to the one pointed to by _SC. */
32497 static void
32498 rs6000_set_sched_context (void *_sc)
32500 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
32502 gcc_assert (sc != NULL);
32504 cached_can_issue_more = sc->cached_can_issue_more;
32505 last_scheduled_insn = sc->last_scheduled_insn;
32506 load_store_pendulum = sc->load_store_pendulum;
32507 divide_cnt = sc->divide_cnt;
32508 vec_load_pendulum = sc->vec_load_pendulum;
32511 /* Free _SC. */
32512 static void
32513 rs6000_free_sched_context (void *_sc)
32515 gcc_assert (_sc != NULL);
32517 free (_sc);
32521 /* Length in units of the trampoline for entering a nested function. */
32524 rs6000_trampoline_size (void)
32526 int ret = 0;
32528 switch (DEFAULT_ABI)
32530 default:
32531 gcc_unreachable ();
32533 case ABI_AIX:
32534 ret = (TARGET_32BIT) ? 12 : 24;
32535 break;
32537 case ABI_ELFv2:
32538 gcc_assert (!TARGET_32BIT);
32539 ret = 32;
32540 break;
32542 case ABI_DARWIN:
32543 case ABI_V4:
32544 ret = (TARGET_32BIT) ? 40 : 48;
32545 break;
32548 return ret;
32551 /* Emit RTL insns to initialize the variable parts of a trampoline.
32552 FNADDR is an RTX for the address of the function's pure code.
32553 CXT is an RTX for the static chain value for the function. */
32555 static void
32556 rs6000_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
32558 int regsize = (TARGET_32BIT) ? 4 : 8;
32559 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
32560 rtx ctx_reg = force_reg (Pmode, cxt);
32561 rtx addr = force_reg (Pmode, XEXP (m_tramp, 0));
32563 switch (DEFAULT_ABI)
32565 default:
32566 gcc_unreachable ();
32568 /* Under AIX, just build the 3 word function descriptor */
32569 case ABI_AIX:
32571 rtx fnmem, fn_reg, toc_reg;
32573 if (!TARGET_POINTERS_TO_NESTED_FUNCTIONS)
32574 error ("You cannot take the address of a nested function if you use "
32575 "the -mno-pointers-to-nested-functions option.");
32577 fnmem = gen_const_mem (Pmode, force_reg (Pmode, fnaddr));
32578 fn_reg = gen_reg_rtx (Pmode);
32579 toc_reg = gen_reg_rtx (Pmode);
32581 /* Macro to shorten the code expansions below. */
32582 # define MEM_PLUS(MEM, OFFSET) adjust_address (MEM, Pmode, OFFSET)
32584 m_tramp = replace_equiv_address (m_tramp, addr);
32586 emit_move_insn (fn_reg, MEM_PLUS (fnmem, 0));
32587 emit_move_insn (toc_reg, MEM_PLUS (fnmem, regsize));
32588 emit_move_insn (MEM_PLUS (m_tramp, 0), fn_reg);
32589 emit_move_insn (MEM_PLUS (m_tramp, regsize), toc_reg);
32590 emit_move_insn (MEM_PLUS (m_tramp, 2*regsize), ctx_reg);
32592 # undef MEM_PLUS
32594 break;
32596 /* Under V.4/eabi/darwin, __trampoline_setup does the real work. */
32597 case ABI_ELFv2:
32598 case ABI_DARWIN:
32599 case ABI_V4:
32600 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__trampoline_setup"),
32601 LCT_NORMAL, VOIDmode, 4,
32602 addr, Pmode,
32603 GEN_INT (rs6000_trampoline_size ()), SImode,
32604 fnaddr, Pmode,
32605 ctx_reg, Pmode);
32606 break;
32611 /* Returns TRUE iff the target attribute indicated by ATTR_ID takes a plain
32612 identifier as an argument, so the front end shouldn't look it up. */
32614 static bool
32615 rs6000_attribute_takes_identifier_p (const_tree attr_id)
32617 return is_attribute_p ("altivec", attr_id);
32620 /* Handle the "altivec" attribute. The attribute may have
32621 arguments as follows:
32623 __attribute__((altivec(vector__)))
32624 __attribute__((altivec(pixel__))) (always followed by 'unsigned short')
32625 __attribute__((altivec(bool__))) (always followed by 'unsigned')
32627 and may appear more than once (e.g., 'vector bool char') in a
32628 given declaration. */
32630 static tree
32631 rs6000_handle_altivec_attribute (tree *node,
32632 tree name ATTRIBUTE_UNUSED,
32633 tree args,
32634 int flags ATTRIBUTE_UNUSED,
32635 bool *no_add_attrs)
32637 tree type = *node, result = NULL_TREE;
32638 machine_mode mode;
32639 int unsigned_p;
32640 char altivec_type
32641 = ((args && TREE_CODE (args) == TREE_LIST && TREE_VALUE (args)
32642 && TREE_CODE (TREE_VALUE (args)) == IDENTIFIER_NODE)
32643 ? *IDENTIFIER_POINTER (TREE_VALUE (args))
32644 : '?');
32646 while (POINTER_TYPE_P (type)
32647 || TREE_CODE (type) == FUNCTION_TYPE
32648 || TREE_CODE (type) == METHOD_TYPE
32649 || TREE_CODE (type) == ARRAY_TYPE)
32650 type = TREE_TYPE (type);
32652 mode = TYPE_MODE (type);
32654 /* Check for invalid AltiVec type qualifiers. */
32655 if (type == long_double_type_node)
32656 error ("use of %<long double%> in AltiVec types is invalid");
32657 else if (type == boolean_type_node)
32658 error ("use of boolean types in AltiVec types is invalid");
32659 else if (TREE_CODE (type) == COMPLEX_TYPE)
32660 error ("use of %<complex%> in AltiVec types is invalid");
32661 else if (DECIMAL_FLOAT_MODE_P (mode))
32662 error ("use of decimal floating point types in AltiVec types is invalid");
32663 else if (!TARGET_VSX)
32665 if (type == long_unsigned_type_node || type == long_integer_type_node)
32667 if (TARGET_64BIT)
32668 error ("use of %<long%> in AltiVec types is invalid for "
32669 "64-bit code without -mvsx");
32670 else if (rs6000_warn_altivec_long)
32671 warning (0, "use of %<long%> in AltiVec types is deprecated; "
32672 "use %<int%>");
32674 else if (type == long_long_unsigned_type_node
32675 || type == long_long_integer_type_node)
32676 error ("use of %<long long%> in AltiVec types is invalid without "
32677 "-mvsx");
32678 else if (type == double_type_node)
32679 error ("use of %<double%> in AltiVec types is invalid without -mvsx");
32682 switch (altivec_type)
32684 case 'v':
32685 unsigned_p = TYPE_UNSIGNED (type);
32686 switch (mode)
32688 case TImode:
32689 result = (unsigned_p ? unsigned_V1TI_type_node : V1TI_type_node);
32690 break;
32691 case DImode:
32692 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
32693 break;
32694 case SImode:
32695 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
32696 break;
32697 case HImode:
32698 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
32699 break;
32700 case QImode:
32701 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
32702 break;
32703 case SFmode: result = V4SF_type_node; break;
32704 case DFmode: result = V2DF_type_node; break;
32705 /* If the user says 'vector int bool', we may be handed the 'bool'
32706 attribute _before_ the 'vector' attribute, and so select the
32707 proper type in the 'b' case below. */
32708 case V4SImode: case V8HImode: case V16QImode: case V4SFmode:
32709 case V2DImode: case V2DFmode:
32710 result = type;
32711 default: break;
32713 break;
32714 case 'b':
32715 switch (mode)
32717 case DImode: case V2DImode: result = bool_V2DI_type_node; break;
32718 case SImode: case V4SImode: result = bool_V4SI_type_node; break;
32719 case HImode: case V8HImode: result = bool_V8HI_type_node; break;
32720 case QImode: case V16QImode: result = bool_V16QI_type_node;
32721 default: break;
32723 break;
32724 case 'p':
32725 switch (mode)
32727 case V8HImode: result = pixel_V8HI_type_node;
32728 default: break;
32730 default: break;
32733 /* Propagate qualifiers attached to the element type
32734 onto the vector type. */
32735 if (result && result != type && TYPE_QUALS (type))
32736 result = build_qualified_type (result, TYPE_QUALS (type));
32738 *no_add_attrs = true; /* No need to hang on to the attribute. */
32740 if (result)
32741 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
32743 return NULL_TREE;
32746 /* AltiVec defines four built-in scalar types that serve as vector
32747 elements; we must teach the compiler how to mangle them. */
32749 static const char *
32750 rs6000_mangle_type (const_tree type)
32752 type = TYPE_MAIN_VARIANT (type);
32754 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
32755 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
32756 return NULL;
32758 if (type == bool_char_type_node) return "U6__boolc";
32759 if (type == bool_short_type_node) return "U6__bools";
32760 if (type == pixel_type_node) return "u7__pixel";
32761 if (type == bool_int_type_node) return "U6__booli";
32762 if (type == bool_long_type_node) return "U6__booll";
32764 /* Use a unique name for __float128 rather than trying to use "e" or "g". Use
32765 "g" for IBM extended double, no matter whether it is long double (using
32766 -mabi=ibmlongdouble) or the distinct __ibm128 type. */
32767 if (TARGET_FLOAT128)
32769 if (type == ieee128_float_type_node)
32770 return "U10__float128";
32772 if (type == ibm128_float_type_node)
32773 return "g";
32775 if (type == long_double_type_node && TARGET_LONG_DOUBLE_128)
32776 return (TARGET_IEEEQUAD) ? "U10__float128" : "g";
32779 /* Mangle IBM extended float long double as `g' (__float128) on
32780 powerpc*-linux where long-double-64 previously was the default. */
32781 if (TYPE_MAIN_VARIANT (type) == long_double_type_node
32782 && TARGET_ELF
32783 && TARGET_LONG_DOUBLE_128
32784 && !TARGET_IEEEQUAD)
32785 return "g";
32787 /* For all other types, use normal C++ mangling. */
32788 return NULL;
32791 /* Handle a "longcall" or "shortcall" attribute; arguments as in
32792 struct attribute_spec.handler. */
32794 static tree
32795 rs6000_handle_longcall_attribute (tree *node, tree name,
32796 tree args ATTRIBUTE_UNUSED,
32797 int flags ATTRIBUTE_UNUSED,
32798 bool *no_add_attrs)
32800 if (TREE_CODE (*node) != FUNCTION_TYPE
32801 && TREE_CODE (*node) != FIELD_DECL
32802 && TREE_CODE (*node) != TYPE_DECL)
32804 warning (OPT_Wattributes, "%qE attribute only applies to functions",
32805 name);
32806 *no_add_attrs = true;
32809 return NULL_TREE;
32812 /* Set longcall attributes on all functions declared when
32813 rs6000_default_long_calls is true. */
32814 static void
32815 rs6000_set_default_type_attributes (tree type)
32817 if (rs6000_default_long_calls
32818 && (TREE_CODE (type) == FUNCTION_TYPE
32819 || TREE_CODE (type) == METHOD_TYPE))
32820 TYPE_ATTRIBUTES (type) = tree_cons (get_identifier ("longcall"),
32821 NULL_TREE,
32822 TYPE_ATTRIBUTES (type));
32824 #if TARGET_MACHO
32825 darwin_set_default_type_attributes (type);
32826 #endif
32829 /* Return a reference suitable for calling a function with the
32830 longcall attribute. */
32833 rs6000_longcall_ref (rtx call_ref)
32835 const char *call_name;
32836 tree node;
32838 if (GET_CODE (call_ref) != SYMBOL_REF)
32839 return call_ref;
32841 /* System V adds '.' to the internal name, so skip them. */
32842 call_name = XSTR (call_ref, 0);
32843 if (*call_name == '.')
32845 while (*call_name == '.')
32846 call_name++;
32848 node = get_identifier (call_name);
32849 call_ref = gen_rtx_SYMBOL_REF (VOIDmode, IDENTIFIER_POINTER (node));
32852 return force_reg (Pmode, call_ref);
32855 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
32856 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
32857 #endif
32859 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
32860 struct attribute_spec.handler. */
32861 static tree
32862 rs6000_handle_struct_attribute (tree *node, tree name,
32863 tree args ATTRIBUTE_UNUSED,
32864 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
32866 tree *type = NULL;
32867 if (DECL_P (*node))
32869 if (TREE_CODE (*node) == TYPE_DECL)
32870 type = &TREE_TYPE (*node);
32872 else
32873 type = node;
32875 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
32876 || TREE_CODE (*type) == UNION_TYPE)))
32878 warning (OPT_Wattributes, "%qE attribute ignored", name);
32879 *no_add_attrs = true;
32882 else if ((is_attribute_p ("ms_struct", name)
32883 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
32884 || ((is_attribute_p ("gcc_struct", name)
32885 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
32887 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
32888 name);
32889 *no_add_attrs = true;
32892 return NULL_TREE;
32895 static bool
32896 rs6000_ms_bitfield_layout_p (const_tree record_type)
32898 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
32899 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
32900 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
32903 #ifdef USING_ELFOS_H
32905 /* A get_unnamed_section callback, used for switching to toc_section. */
32907 static void
32908 rs6000_elf_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED)
32910 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
32911 && TARGET_MINIMAL_TOC)
32913 if (!toc_initialized)
32915 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
32916 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
32917 (*targetm.asm_out.internal_label) (asm_out_file, "LCTOC", 0);
32918 fprintf (asm_out_file, "\t.tc ");
32919 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1[TC],");
32920 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
32921 fprintf (asm_out_file, "\n");
32923 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
32924 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
32925 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
32926 fprintf (asm_out_file, " = .+32768\n");
32927 toc_initialized = 1;
32929 else
32930 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
32932 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
32934 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
32935 if (!toc_initialized)
32937 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
32938 toc_initialized = 1;
32941 else
32943 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
32944 if (!toc_initialized)
32946 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
32947 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
32948 fprintf (asm_out_file, " = .+32768\n");
32949 toc_initialized = 1;
32954 /* Implement TARGET_ASM_INIT_SECTIONS. */
32956 static void
32957 rs6000_elf_asm_init_sections (void)
32959 toc_section
32960 = get_unnamed_section (0, rs6000_elf_output_toc_section_asm_op, NULL);
32962 sdata2_section
32963 = get_unnamed_section (SECTION_WRITE, output_section_asm_op,
32964 SDATA2_SECTION_ASM_OP);
32967 /* Implement TARGET_SELECT_RTX_SECTION. */
32969 static section *
32970 rs6000_elf_select_rtx_section (machine_mode mode, rtx x,
32971 unsigned HOST_WIDE_INT align)
32973 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
32974 return toc_section;
32975 else
32976 return default_elf_select_rtx_section (mode, x, align);
32979 /* For a SYMBOL_REF, set generic flags and then perform some
32980 target-specific processing.
32982 When the AIX ABI is requested on a non-AIX system, replace the
32983 function name with the real name (with a leading .) rather than the
32984 function descriptor name. This saves a lot of overriding code to
32985 read the prefixes. */
32987 static void rs6000_elf_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED;
32988 static void
32989 rs6000_elf_encode_section_info (tree decl, rtx rtl, int first)
32991 default_encode_section_info (decl, rtl, first);
32993 if (first
32994 && TREE_CODE (decl) == FUNCTION_DECL
32995 && !TARGET_AIX
32996 && DEFAULT_ABI == ABI_AIX)
32998 rtx sym_ref = XEXP (rtl, 0);
32999 size_t len = strlen (XSTR (sym_ref, 0));
33000 char *str = XALLOCAVEC (char, len + 2);
33001 str[0] = '.';
33002 memcpy (str + 1, XSTR (sym_ref, 0), len + 1);
33003 XSTR (sym_ref, 0) = ggc_alloc_string (str, len + 1);
33007 static inline bool
33008 compare_section_name (const char *section, const char *templ)
33010 int len;
33012 len = strlen (templ);
33013 return (strncmp (section, templ, len) == 0
33014 && (section[len] == 0 || section[len] == '.'));
33017 bool
33018 rs6000_elf_in_small_data_p (const_tree decl)
33020 if (rs6000_sdata == SDATA_NONE)
33021 return false;
33023 /* We want to merge strings, so we never consider them small data. */
33024 if (TREE_CODE (decl) == STRING_CST)
33025 return false;
33027 /* Functions are never in the small data area. */
33028 if (TREE_CODE (decl) == FUNCTION_DECL)
33029 return false;
33031 if (TREE_CODE (decl) == VAR_DECL && DECL_SECTION_NAME (decl))
33033 const char *section = DECL_SECTION_NAME (decl);
33034 if (compare_section_name (section, ".sdata")
33035 || compare_section_name (section, ".sdata2")
33036 || compare_section_name (section, ".gnu.linkonce.s")
33037 || compare_section_name (section, ".sbss")
33038 || compare_section_name (section, ".sbss2")
33039 || compare_section_name (section, ".gnu.linkonce.sb")
33040 || strcmp (section, ".PPC.EMB.sdata0") == 0
33041 || strcmp (section, ".PPC.EMB.sbss0") == 0)
33042 return true;
33044 else
33046 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (decl));
33048 if (size > 0
33049 && size <= g_switch_value
33050 /* If it's not public, and we're not going to reference it there,
33051 there's no need to put it in the small data section. */
33052 && (rs6000_sdata != SDATA_DATA || TREE_PUBLIC (decl)))
33053 return true;
33056 return false;
33059 #endif /* USING_ELFOS_H */
33061 /* Implement TARGET_USE_BLOCKS_FOR_CONSTANT_P. */
33063 static bool
33064 rs6000_use_blocks_for_constant_p (machine_mode mode, const_rtx x)
33066 return !ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode);
33069 /* Do not place thread-local symbols refs in the object blocks. */
33071 static bool
33072 rs6000_use_blocks_for_decl_p (const_tree decl)
33074 return !DECL_THREAD_LOCAL_P (decl);
33077 /* Return a REG that occurs in ADDR with coefficient 1.
33078 ADDR can be effectively incremented by incrementing REG.
33080 r0 is special and we must not select it as an address
33081 register by this routine since our caller will try to
33082 increment the returned register via an "la" instruction. */
33085 find_addr_reg (rtx addr)
33087 while (GET_CODE (addr) == PLUS)
33089 if (GET_CODE (XEXP (addr, 0)) == REG
33090 && REGNO (XEXP (addr, 0)) != 0)
33091 addr = XEXP (addr, 0);
33092 else if (GET_CODE (XEXP (addr, 1)) == REG
33093 && REGNO (XEXP (addr, 1)) != 0)
33094 addr = XEXP (addr, 1);
33095 else if (CONSTANT_P (XEXP (addr, 0)))
33096 addr = XEXP (addr, 1);
33097 else if (CONSTANT_P (XEXP (addr, 1)))
33098 addr = XEXP (addr, 0);
33099 else
33100 gcc_unreachable ();
33102 gcc_assert (GET_CODE (addr) == REG && REGNO (addr) != 0);
33103 return addr;
33106 void
33107 rs6000_fatal_bad_address (rtx op)
33109 fatal_insn ("bad address", op);
33112 #if TARGET_MACHO
33114 typedef struct branch_island_d {
33115 tree function_name;
33116 tree label_name;
33117 int line_number;
33118 } branch_island;
33121 static vec<branch_island, va_gc> *branch_islands;
33123 /* Remember to generate a branch island for far calls to the given
33124 function. */
33126 static void
33127 add_compiler_branch_island (tree label_name, tree function_name,
33128 int line_number)
33130 branch_island bi = {function_name, label_name, line_number};
33131 vec_safe_push (branch_islands, bi);
33134 /* Generate far-jump branch islands for everything recorded in
33135 branch_islands. Invoked immediately after the last instruction of
33136 the epilogue has been emitted; the branch islands must be appended
33137 to, and contiguous with, the function body. Mach-O stubs are
33138 generated in machopic_output_stub(). */
33140 static void
33141 macho_branch_islands (void)
33143 char tmp_buf[512];
33145 while (!vec_safe_is_empty (branch_islands))
33147 branch_island *bi = &branch_islands->last ();
33148 const char *label = IDENTIFIER_POINTER (bi->label_name);
33149 const char *name = IDENTIFIER_POINTER (bi->function_name);
33150 char name_buf[512];
33151 /* Cheap copy of the details from the Darwin ASM_OUTPUT_LABELREF(). */
33152 if (name[0] == '*' || name[0] == '&')
33153 strcpy (name_buf, name+1);
33154 else
33156 name_buf[0] = '_';
33157 strcpy (name_buf+1, name);
33159 strcpy (tmp_buf, "\n");
33160 strcat (tmp_buf, label);
33161 #if defined (DBX_DEBUGGING_INFO) || defined (XCOFF_DEBUGGING_INFO)
33162 if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG)
33163 dbxout_stabd (N_SLINE, bi->line_number);
33164 #endif /* DBX_DEBUGGING_INFO || XCOFF_DEBUGGING_INFO */
33165 if (flag_pic)
33167 if (TARGET_LINK_STACK)
33169 char name[32];
33170 get_ppc476_thunk_name (name);
33171 strcat (tmp_buf, ":\n\tmflr r0\n\tbl ");
33172 strcat (tmp_buf, name);
33173 strcat (tmp_buf, "\n");
33174 strcat (tmp_buf, label);
33175 strcat (tmp_buf, "_pic:\n\tmflr r11\n");
33177 else
33179 strcat (tmp_buf, ":\n\tmflr r0\n\tbcl 20,31,");
33180 strcat (tmp_buf, label);
33181 strcat (tmp_buf, "_pic\n");
33182 strcat (tmp_buf, label);
33183 strcat (tmp_buf, "_pic:\n\tmflr r11\n");
33186 strcat (tmp_buf, "\taddis r11,r11,ha16(");
33187 strcat (tmp_buf, name_buf);
33188 strcat (tmp_buf, " - ");
33189 strcat (tmp_buf, label);
33190 strcat (tmp_buf, "_pic)\n");
33192 strcat (tmp_buf, "\tmtlr r0\n");
33194 strcat (tmp_buf, "\taddi r12,r11,lo16(");
33195 strcat (tmp_buf, name_buf);
33196 strcat (tmp_buf, " - ");
33197 strcat (tmp_buf, label);
33198 strcat (tmp_buf, "_pic)\n");
33200 strcat (tmp_buf, "\tmtctr r12\n\tbctr\n");
33202 else
33204 strcat (tmp_buf, ":\nlis r12,hi16(");
33205 strcat (tmp_buf, name_buf);
33206 strcat (tmp_buf, ")\n\tori r12,r12,lo16(");
33207 strcat (tmp_buf, name_buf);
33208 strcat (tmp_buf, ")\n\tmtctr r12\n\tbctr");
33210 output_asm_insn (tmp_buf, 0);
33211 #if defined (DBX_DEBUGGING_INFO) || defined (XCOFF_DEBUGGING_INFO)
33212 if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG)
33213 dbxout_stabd (N_SLINE, bi->line_number);
33214 #endif /* DBX_DEBUGGING_INFO || XCOFF_DEBUGGING_INFO */
33215 branch_islands->pop ();
33219 /* NO_PREVIOUS_DEF checks in the link list whether the function name is
33220 already there or not. */
33222 static int
33223 no_previous_def (tree function_name)
33225 branch_island *bi;
33226 unsigned ix;
33228 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
33229 if (function_name == bi->function_name)
33230 return 0;
33231 return 1;
33234 /* GET_PREV_LABEL gets the label name from the previous definition of
33235 the function. */
33237 static tree
33238 get_prev_label (tree function_name)
33240 branch_island *bi;
33241 unsigned ix;
33243 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
33244 if (function_name == bi->function_name)
33245 return bi->label_name;
33246 return NULL_TREE;
33249 /* INSN is either a function call or a millicode call. It may have an
33250 unconditional jump in its delay slot.
33252 CALL_DEST is the routine we are calling. */
33254 char *
33255 output_call (rtx_insn *insn, rtx *operands, int dest_operand_number,
33256 int cookie_operand_number)
33258 static char buf[256];
33259 if (darwin_emit_branch_islands
33260 && GET_CODE (operands[dest_operand_number]) == SYMBOL_REF
33261 && (INTVAL (operands[cookie_operand_number]) & CALL_LONG))
33263 tree labelname;
33264 tree funname = get_identifier (XSTR (operands[dest_operand_number], 0));
33266 if (no_previous_def (funname))
33268 rtx label_rtx = gen_label_rtx ();
33269 char *label_buf, temp_buf[256];
33270 ASM_GENERATE_INTERNAL_LABEL (temp_buf, "L",
33271 CODE_LABEL_NUMBER (label_rtx));
33272 label_buf = temp_buf[0] == '*' ? temp_buf + 1 : temp_buf;
33273 labelname = get_identifier (label_buf);
33274 add_compiler_branch_island (labelname, funname, insn_line (insn));
33276 else
33277 labelname = get_prev_label (funname);
33279 /* "jbsr foo, L42" is Mach-O for "Link as 'bl foo' if a 'bl'
33280 instruction will reach 'foo', otherwise link as 'bl L42'".
33281 "L42" should be a 'branch island', that will do a far jump to
33282 'foo'. Branch islands are generated in
33283 macho_branch_islands(). */
33284 sprintf (buf, "jbsr %%z%d,%.246s",
33285 dest_operand_number, IDENTIFIER_POINTER (labelname));
33287 else
33288 sprintf (buf, "bl %%z%d", dest_operand_number);
33289 return buf;
33292 /* Generate PIC and indirect symbol stubs. */
33294 void
33295 machopic_output_stub (FILE *file, const char *symb, const char *stub)
33297 unsigned int length;
33298 char *symbol_name, *lazy_ptr_name;
33299 char *local_label_0;
33300 static int label = 0;
33302 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
33303 symb = (*targetm.strip_name_encoding) (symb);
33306 length = strlen (symb);
33307 symbol_name = XALLOCAVEC (char, length + 32);
33308 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
33310 lazy_ptr_name = XALLOCAVEC (char, length + 32);
33311 GEN_LAZY_PTR_NAME_FOR_SYMBOL (lazy_ptr_name, symb, length);
33313 if (flag_pic == 2)
33314 switch_to_section (darwin_sections[machopic_picsymbol_stub1_section]);
33315 else
33316 switch_to_section (darwin_sections[machopic_symbol_stub1_section]);
33318 if (flag_pic == 2)
33320 fprintf (file, "\t.align 5\n");
33322 fprintf (file, "%s:\n", stub);
33323 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
33325 label++;
33326 local_label_0 = XALLOCAVEC (char, sizeof ("\"L00000000000$spb\""));
33327 sprintf (local_label_0, "\"L%011d$spb\"", label);
33329 fprintf (file, "\tmflr r0\n");
33330 if (TARGET_LINK_STACK)
33332 char name[32];
33333 get_ppc476_thunk_name (name);
33334 fprintf (file, "\tbl %s\n", name);
33335 fprintf (file, "%s:\n\tmflr r11\n", local_label_0);
33337 else
33339 fprintf (file, "\tbcl 20,31,%s\n", local_label_0);
33340 fprintf (file, "%s:\n\tmflr r11\n", local_label_0);
33342 fprintf (file, "\taddis r11,r11,ha16(%s-%s)\n",
33343 lazy_ptr_name, local_label_0);
33344 fprintf (file, "\tmtlr r0\n");
33345 fprintf (file, "\t%s r12,lo16(%s-%s)(r11)\n",
33346 (TARGET_64BIT ? "ldu" : "lwzu"),
33347 lazy_ptr_name, local_label_0);
33348 fprintf (file, "\tmtctr r12\n");
33349 fprintf (file, "\tbctr\n");
33351 else
33353 fprintf (file, "\t.align 4\n");
33355 fprintf (file, "%s:\n", stub);
33356 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
33358 fprintf (file, "\tlis r11,ha16(%s)\n", lazy_ptr_name);
33359 fprintf (file, "\t%s r12,lo16(%s)(r11)\n",
33360 (TARGET_64BIT ? "ldu" : "lwzu"),
33361 lazy_ptr_name);
33362 fprintf (file, "\tmtctr r12\n");
33363 fprintf (file, "\tbctr\n");
33366 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
33367 fprintf (file, "%s:\n", lazy_ptr_name);
33368 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
33369 fprintf (file, "%sdyld_stub_binding_helper\n",
33370 (TARGET_64BIT ? DOUBLE_INT_ASM_OP : "\t.long\t"));
33373 /* Legitimize PIC addresses. If the address is already
33374 position-independent, we return ORIG. Newly generated
33375 position-independent addresses go into a reg. This is REG if non
33376 zero, otherwise we allocate register(s) as necessary. */
33378 #define SMALL_INT(X) ((UINTVAL (X) + 0x8000) < 0x10000)
33381 rs6000_machopic_legitimize_pic_address (rtx orig, machine_mode mode,
33382 rtx reg)
33384 rtx base, offset;
33386 if (reg == NULL && ! reload_in_progress && ! reload_completed)
33387 reg = gen_reg_rtx (Pmode);
33389 if (GET_CODE (orig) == CONST)
33391 rtx reg_temp;
33393 if (GET_CODE (XEXP (orig, 0)) == PLUS
33394 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
33395 return orig;
33397 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
33399 /* Use a different reg for the intermediate value, as
33400 it will be marked UNCHANGING. */
33401 reg_temp = !can_create_pseudo_p () ? reg : gen_reg_rtx (Pmode);
33402 base = rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 0),
33403 Pmode, reg_temp);
33404 offset =
33405 rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 1),
33406 Pmode, reg);
33408 if (GET_CODE (offset) == CONST_INT)
33410 if (SMALL_INT (offset))
33411 return plus_constant (Pmode, base, INTVAL (offset));
33412 else if (! reload_in_progress && ! reload_completed)
33413 offset = force_reg (Pmode, offset);
33414 else
33416 rtx mem = force_const_mem (Pmode, orig);
33417 return machopic_legitimize_pic_address (mem, Pmode, reg);
33420 return gen_rtx_PLUS (Pmode, base, offset);
33423 /* Fall back on generic machopic code. */
33424 return machopic_legitimize_pic_address (orig, mode, reg);
33427 /* Output a .machine directive for the Darwin assembler, and call
33428 the generic start_file routine. */
33430 static void
33431 rs6000_darwin_file_start (void)
33433 static const struct
33435 const char *arg;
33436 const char *name;
33437 HOST_WIDE_INT if_set;
33438 } mapping[] = {
33439 { "ppc64", "ppc64", MASK_64BIT },
33440 { "970", "ppc970", MASK_PPC_GPOPT | MASK_MFCRF | MASK_POWERPC64 },
33441 { "power4", "ppc970", 0 },
33442 { "G5", "ppc970", 0 },
33443 { "7450", "ppc7450", 0 },
33444 { "7400", "ppc7400", MASK_ALTIVEC },
33445 { "G4", "ppc7400", 0 },
33446 { "750", "ppc750", 0 },
33447 { "740", "ppc750", 0 },
33448 { "G3", "ppc750", 0 },
33449 { "604e", "ppc604e", 0 },
33450 { "604", "ppc604", 0 },
33451 { "603e", "ppc603", 0 },
33452 { "603", "ppc603", 0 },
33453 { "601", "ppc601", 0 },
33454 { NULL, "ppc", 0 } };
33455 const char *cpu_id = "";
33456 size_t i;
33458 rs6000_file_start ();
33459 darwin_file_start ();
33461 /* Determine the argument to -mcpu=. Default to G3 if not specified. */
33463 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
33464 cpu_id = rs6000_default_cpu;
33466 if (global_options_set.x_rs6000_cpu_index)
33467 cpu_id = processor_target_table[rs6000_cpu_index].name;
33469 /* Look through the mapping array. Pick the first name that either
33470 matches the argument, has a bit set in IF_SET that is also set
33471 in the target flags, or has a NULL name. */
33473 i = 0;
33474 while (mapping[i].arg != NULL
33475 && strcmp (mapping[i].arg, cpu_id) != 0
33476 && (mapping[i].if_set & rs6000_isa_flags) == 0)
33477 i++;
33479 fprintf (asm_out_file, "\t.machine %s\n", mapping[i].name);
33482 #endif /* TARGET_MACHO */
33484 #if TARGET_ELF
33485 static int
33486 rs6000_elf_reloc_rw_mask (void)
33488 if (flag_pic)
33489 return 3;
33490 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
33491 return 2;
33492 else
33493 return 0;
33496 /* Record an element in the table of global constructors. SYMBOL is
33497 a SYMBOL_REF of the function to be called; PRIORITY is a number
33498 between 0 and MAX_INIT_PRIORITY.
33500 This differs from default_named_section_asm_out_constructor in
33501 that we have special handling for -mrelocatable. */
33503 static void rs6000_elf_asm_out_constructor (rtx, int) ATTRIBUTE_UNUSED;
33504 static void
33505 rs6000_elf_asm_out_constructor (rtx symbol, int priority)
33507 const char *section = ".ctors";
33508 char buf[16];
33510 if (priority != DEFAULT_INIT_PRIORITY)
33512 sprintf (buf, ".ctors.%.5u",
33513 /* Invert the numbering so the linker puts us in the proper
33514 order; constructors are run from right to left, and the
33515 linker sorts in increasing order. */
33516 MAX_INIT_PRIORITY - priority);
33517 section = buf;
33520 switch_to_section (get_section (section, SECTION_WRITE, NULL));
33521 assemble_align (POINTER_SIZE);
33523 if (DEFAULT_ABI == ABI_V4
33524 && (TARGET_RELOCATABLE || flag_pic > 1))
33526 fputs ("\t.long (", asm_out_file);
33527 output_addr_const (asm_out_file, symbol);
33528 fputs (")@fixup\n", asm_out_file);
33530 else
33531 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
33534 static void rs6000_elf_asm_out_destructor (rtx, int) ATTRIBUTE_UNUSED;
33535 static void
33536 rs6000_elf_asm_out_destructor (rtx symbol, int priority)
33538 const char *section = ".dtors";
33539 char buf[16];
33541 if (priority != DEFAULT_INIT_PRIORITY)
33543 sprintf (buf, ".dtors.%.5u",
33544 /* Invert the numbering so the linker puts us in the proper
33545 order; constructors are run from right to left, and the
33546 linker sorts in increasing order. */
33547 MAX_INIT_PRIORITY - priority);
33548 section = buf;
33551 switch_to_section (get_section (section, SECTION_WRITE, NULL));
33552 assemble_align (POINTER_SIZE);
33554 if (DEFAULT_ABI == ABI_V4
33555 && (TARGET_RELOCATABLE || flag_pic > 1))
33557 fputs ("\t.long (", asm_out_file);
33558 output_addr_const (asm_out_file, symbol);
33559 fputs (")@fixup\n", asm_out_file);
33561 else
33562 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
33565 void
33566 rs6000_elf_declare_function_name (FILE *file, const char *name, tree decl)
33568 if (TARGET_64BIT && DEFAULT_ABI != ABI_ELFv2)
33570 fputs ("\t.section\t\".opd\",\"aw\"\n\t.align 3\n", file);
33571 ASM_OUTPUT_LABEL (file, name);
33572 fputs (DOUBLE_INT_ASM_OP, file);
33573 rs6000_output_function_entry (file, name);
33574 fputs (",.TOC.@tocbase,0\n\t.previous\n", file);
33575 if (DOT_SYMBOLS)
33577 fputs ("\t.size\t", file);
33578 assemble_name (file, name);
33579 fputs (",24\n\t.type\t.", file);
33580 assemble_name (file, name);
33581 fputs (",@function\n", file);
33582 if (TREE_PUBLIC (decl) && ! DECL_WEAK (decl))
33584 fputs ("\t.globl\t.", file);
33585 assemble_name (file, name);
33586 putc ('\n', file);
33589 else
33590 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
33591 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
33592 rs6000_output_function_entry (file, name);
33593 fputs (":\n", file);
33594 return;
33597 if (DEFAULT_ABI == ABI_V4
33598 && (TARGET_RELOCATABLE || flag_pic > 1)
33599 && !TARGET_SECURE_PLT
33600 && (get_pool_size () != 0 || crtl->profile)
33601 && uses_TOC ())
33603 char buf[256];
33605 (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno);
33607 fprintf (file, "\t.long ");
33608 assemble_name (file, toc_label_name);
33609 need_toc_init = 1;
33610 putc ('-', file);
33611 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
33612 assemble_name (file, buf);
33613 putc ('\n', file);
33616 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
33617 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
33619 if (TARGET_CMODEL == CMODEL_LARGE && rs6000_global_entry_point_needed_p ())
33621 char buf[256];
33623 (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno);
33625 fprintf (file, "\t.quad .TOC.-");
33626 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
33627 assemble_name (file, buf);
33628 putc ('\n', file);
33631 if (DEFAULT_ABI == ABI_AIX)
33633 const char *desc_name, *orig_name;
33635 orig_name = (*targetm.strip_name_encoding) (name);
33636 desc_name = orig_name;
33637 while (*desc_name == '.')
33638 desc_name++;
33640 if (TREE_PUBLIC (decl))
33641 fprintf (file, "\t.globl %s\n", desc_name);
33643 fprintf (file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
33644 fprintf (file, "%s:\n", desc_name);
33645 fprintf (file, "\t.long %s\n", orig_name);
33646 fputs ("\t.long _GLOBAL_OFFSET_TABLE_\n", file);
33647 fputs ("\t.long 0\n", file);
33648 fprintf (file, "\t.previous\n");
33650 ASM_OUTPUT_LABEL (file, name);
33653 static void rs6000_elf_file_end (void) ATTRIBUTE_UNUSED;
33654 static void
33655 rs6000_elf_file_end (void)
33657 #ifdef HAVE_AS_GNU_ATTRIBUTE
33658 if (TARGET_32BIT && DEFAULT_ABI == ABI_V4)
33660 if (rs6000_passes_float)
33661 fprintf (asm_out_file, "\t.gnu_attribute 4, %d\n",
33662 ((TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT) ? 1
33663 : (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT) ? 3
33664 : 2));
33665 if (rs6000_passes_vector)
33666 fprintf (asm_out_file, "\t.gnu_attribute 8, %d\n",
33667 (TARGET_ALTIVEC_ABI ? 2
33668 : TARGET_SPE_ABI ? 3
33669 : 1));
33670 if (rs6000_returns_struct)
33671 fprintf (asm_out_file, "\t.gnu_attribute 12, %d\n",
33672 aix_struct_return ? 2 : 1);
33674 #endif
33675 #if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD)
33676 if (TARGET_32BIT || DEFAULT_ABI == ABI_ELFv2)
33677 file_end_indicate_exec_stack ();
33678 #endif
33680 if (flag_split_stack)
33681 file_end_indicate_split_stack ();
33683 if (cpu_builtin_p)
33685 /* We have expanded a CPU builtin, so we need to emit a reference to
33686 the special symbol that LIBC uses to declare it supports the
33687 AT_PLATFORM and AT_HWCAP/AT_HWCAP2 in the TCB feature. */
33688 switch_to_section (data_section);
33689 fprintf (asm_out_file, "\t.align %u\n", TARGET_32BIT ? 2 : 3);
33690 fprintf (asm_out_file, "\t%s %s\n",
33691 TARGET_32BIT ? ".long" : ".quad", tcb_verification_symbol);
33694 #endif
33696 #if TARGET_XCOFF
33698 #ifndef HAVE_XCOFF_DWARF_EXTRAS
33699 #define HAVE_XCOFF_DWARF_EXTRAS 0
33700 #endif
33702 static enum unwind_info_type
33703 rs6000_xcoff_debug_unwind_info (void)
33705 return UI_NONE;
33708 static void
33709 rs6000_xcoff_asm_output_anchor (rtx symbol)
33711 char buffer[100];
33713 sprintf (buffer, "$ + " HOST_WIDE_INT_PRINT_DEC,
33714 SYMBOL_REF_BLOCK_OFFSET (symbol));
33715 fprintf (asm_out_file, "%s", SET_ASM_OP);
33716 RS6000_OUTPUT_BASENAME (asm_out_file, XSTR (symbol, 0));
33717 fprintf (asm_out_file, ",");
33718 RS6000_OUTPUT_BASENAME (asm_out_file, buffer);
33719 fprintf (asm_out_file, "\n");
33722 static void
33723 rs6000_xcoff_asm_globalize_label (FILE *stream, const char *name)
33725 fputs (GLOBAL_ASM_OP, stream);
33726 RS6000_OUTPUT_BASENAME (stream, name);
33727 putc ('\n', stream);
33730 /* A get_unnamed_decl callback, used for read-only sections. PTR
33731 points to the section string variable. */
33733 static void
33734 rs6000_xcoff_output_readonly_section_asm_op (const void *directive)
33736 fprintf (asm_out_file, "\t.csect %s[RO],%s\n",
33737 *(const char *const *) directive,
33738 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
33741 /* Likewise for read-write sections. */
33743 static void
33744 rs6000_xcoff_output_readwrite_section_asm_op (const void *directive)
33746 fprintf (asm_out_file, "\t.csect %s[RW],%s\n",
33747 *(const char *const *) directive,
33748 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
33751 static void
33752 rs6000_xcoff_output_tls_section_asm_op (const void *directive)
33754 fprintf (asm_out_file, "\t.csect %s[TL],%s\n",
33755 *(const char *const *) directive,
33756 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
33759 /* A get_unnamed_section callback, used for switching to toc_section. */
33761 static void
33762 rs6000_xcoff_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED)
33764 if (TARGET_MINIMAL_TOC)
33766 /* toc_section is always selected at least once from
33767 rs6000_xcoff_file_start, so this is guaranteed to
33768 always be defined once and only once in each file. */
33769 if (!toc_initialized)
33771 fputs ("\t.toc\nLCTOC..1:\n", asm_out_file);
33772 fputs ("\t.tc toc_table[TC],toc_table[RW]\n", asm_out_file);
33773 toc_initialized = 1;
33775 fprintf (asm_out_file, "\t.csect toc_table[RW]%s\n",
33776 (TARGET_32BIT ? "" : ",3"));
33778 else
33779 fputs ("\t.toc\n", asm_out_file);
33782 /* Implement TARGET_ASM_INIT_SECTIONS. */
33784 static void
33785 rs6000_xcoff_asm_init_sections (void)
33787 read_only_data_section
33788 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
33789 &xcoff_read_only_section_name);
33791 private_data_section
33792 = get_unnamed_section (SECTION_WRITE,
33793 rs6000_xcoff_output_readwrite_section_asm_op,
33794 &xcoff_private_data_section_name);
33796 tls_data_section
33797 = get_unnamed_section (SECTION_TLS,
33798 rs6000_xcoff_output_tls_section_asm_op,
33799 &xcoff_tls_data_section_name);
33801 tls_private_data_section
33802 = get_unnamed_section (SECTION_TLS,
33803 rs6000_xcoff_output_tls_section_asm_op,
33804 &xcoff_private_data_section_name);
33806 read_only_private_data_section
33807 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
33808 &xcoff_private_data_section_name);
33810 toc_section
33811 = get_unnamed_section (0, rs6000_xcoff_output_toc_section_asm_op, NULL);
33813 readonly_data_section = read_only_data_section;
33816 static int
33817 rs6000_xcoff_reloc_rw_mask (void)
33819 return 3;
33822 static void
33823 rs6000_xcoff_asm_named_section (const char *name, unsigned int flags,
33824 tree decl ATTRIBUTE_UNUSED)
33826 int smclass;
33827 static const char * const suffix[5] = { "PR", "RO", "RW", "TL", "XO" };
33829 if (flags & SECTION_EXCLUDE)
33830 smclass = 4;
33831 else if (flags & SECTION_DEBUG)
33833 fprintf (asm_out_file, "\t.dwsect %s\n", name);
33834 return;
33836 else if (flags & SECTION_CODE)
33837 smclass = 0;
33838 else if (flags & SECTION_TLS)
33839 smclass = 3;
33840 else if (flags & SECTION_WRITE)
33841 smclass = 2;
33842 else
33843 smclass = 1;
33845 fprintf (asm_out_file, "\t.csect %s%s[%s],%u\n",
33846 (flags & SECTION_CODE) ? "." : "",
33847 name, suffix[smclass], flags & SECTION_ENTSIZE);
33850 #define IN_NAMED_SECTION(DECL) \
33851 ((TREE_CODE (DECL) == FUNCTION_DECL || TREE_CODE (DECL) == VAR_DECL) \
33852 && DECL_SECTION_NAME (DECL) != NULL)
33854 static section *
33855 rs6000_xcoff_select_section (tree decl, int reloc,
33856 unsigned HOST_WIDE_INT align)
33858 /* Place variables with alignment stricter than BIGGEST_ALIGNMENT into
33859 named section. */
33860 if (align > BIGGEST_ALIGNMENT)
33862 resolve_unique_section (decl, reloc, true);
33863 if (IN_NAMED_SECTION (decl))
33864 return get_named_section (decl, NULL, reloc);
33867 if (decl_readonly_section (decl, reloc))
33869 if (TREE_PUBLIC (decl))
33870 return read_only_data_section;
33871 else
33872 return read_only_private_data_section;
33874 else
33876 #if HAVE_AS_TLS
33877 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
33879 if (TREE_PUBLIC (decl))
33880 return tls_data_section;
33881 else if (bss_initializer_p (decl))
33883 /* Convert to COMMON to emit in BSS. */
33884 DECL_COMMON (decl) = 1;
33885 return tls_comm_section;
33887 else
33888 return tls_private_data_section;
33890 else
33891 #endif
33892 if (TREE_PUBLIC (decl))
33893 return data_section;
33894 else
33895 return private_data_section;
33899 static void
33900 rs6000_xcoff_unique_section (tree decl, int reloc ATTRIBUTE_UNUSED)
33902 const char *name;
33904 /* Use select_section for private data and uninitialized data with
33905 alignment <= BIGGEST_ALIGNMENT. */
33906 if (!TREE_PUBLIC (decl)
33907 || DECL_COMMON (decl)
33908 || (DECL_INITIAL (decl) == NULL_TREE
33909 && DECL_ALIGN (decl) <= BIGGEST_ALIGNMENT)
33910 || DECL_INITIAL (decl) == error_mark_node
33911 || (flag_zero_initialized_in_bss
33912 && initializer_zerop (DECL_INITIAL (decl))))
33913 return;
33915 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
33916 name = (*targetm.strip_name_encoding) (name);
33917 set_decl_section_name (decl, name);
33920 /* Select section for constant in constant pool.
33922 On RS/6000, all constants are in the private read-only data area.
33923 However, if this is being placed in the TOC it must be output as a
33924 toc entry. */
33926 static section *
33927 rs6000_xcoff_select_rtx_section (machine_mode mode, rtx x,
33928 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
33930 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
33931 return toc_section;
33932 else
33933 return read_only_private_data_section;
33936 /* Remove any trailing [DS] or the like from the symbol name. */
33938 static const char *
33939 rs6000_xcoff_strip_name_encoding (const char *name)
33941 size_t len;
33942 if (*name == '*')
33943 name++;
33944 len = strlen (name);
33945 if (name[len - 1] == ']')
33946 return ggc_alloc_string (name, len - 4);
33947 else
33948 return name;
33951 /* Section attributes. AIX is always PIC. */
33953 static unsigned int
33954 rs6000_xcoff_section_type_flags (tree decl, const char *name, int reloc)
33956 unsigned int align;
33957 unsigned int flags = default_section_type_flags (decl, name, reloc);
33959 /* Align to at least UNIT size. */
33960 if ((flags & SECTION_CODE) != 0 || !decl || !DECL_P (decl))
33961 align = MIN_UNITS_PER_WORD;
33962 else
33963 /* Increase alignment of large objects if not already stricter. */
33964 align = MAX ((DECL_ALIGN (decl) / BITS_PER_UNIT),
33965 int_size_in_bytes (TREE_TYPE (decl)) > MIN_UNITS_PER_WORD
33966 ? UNITS_PER_FP_WORD : MIN_UNITS_PER_WORD);
33968 return flags | (exact_log2 (align) & SECTION_ENTSIZE);
33971 /* Output at beginning of assembler file.
33973 Initialize the section names for the RS/6000 at this point.
33975 Specify filename, including full path, to assembler.
33977 We want to go into the TOC section so at least one .toc will be emitted.
33978 Also, in order to output proper .bs/.es pairs, we need at least one static
33979 [RW] section emitted.
33981 Finally, declare mcount when profiling to make the assembler happy. */
33983 static void
33984 rs6000_xcoff_file_start (void)
33986 rs6000_gen_section_name (&xcoff_bss_section_name,
33987 main_input_filename, ".bss_");
33988 rs6000_gen_section_name (&xcoff_private_data_section_name,
33989 main_input_filename, ".rw_");
33990 rs6000_gen_section_name (&xcoff_read_only_section_name,
33991 main_input_filename, ".ro_");
33992 rs6000_gen_section_name (&xcoff_tls_data_section_name,
33993 main_input_filename, ".tls_");
33994 rs6000_gen_section_name (&xcoff_tbss_section_name,
33995 main_input_filename, ".tbss_[UL]");
33997 fputs ("\t.file\t", asm_out_file);
33998 output_quoted_string (asm_out_file, main_input_filename);
33999 fputc ('\n', asm_out_file);
34000 if (write_symbols != NO_DEBUG)
34001 switch_to_section (private_data_section);
34002 switch_to_section (toc_section);
34003 switch_to_section (text_section);
34004 if (profile_flag)
34005 fprintf (asm_out_file, "\t.extern %s\n", RS6000_MCOUNT);
34006 rs6000_file_start ();
34009 /* Output at end of assembler file.
34010 On the RS/6000, referencing data should automatically pull in text. */
34012 static void
34013 rs6000_xcoff_file_end (void)
34015 switch_to_section (text_section);
34016 fputs ("_section_.text:\n", asm_out_file);
34017 switch_to_section (data_section);
34018 fputs (TARGET_32BIT
34019 ? "\t.long _section_.text\n" : "\t.llong _section_.text\n",
34020 asm_out_file);
34023 struct declare_alias_data
34025 FILE *file;
34026 bool function_descriptor;
34029 /* Declare alias N. A helper function for for_node_and_aliases. */
34031 static bool
34032 rs6000_declare_alias (struct symtab_node *n, void *d)
34034 struct declare_alias_data *data = (struct declare_alias_data *)d;
34035 /* Main symbol is output specially, because varasm machinery does part of
34036 the job for us - we do not need to declare .globl/lglobs and such. */
34037 if (!n->alias || n->weakref)
34038 return false;
34040 if (lookup_attribute ("ifunc", DECL_ATTRIBUTES (n->decl)))
34041 return false;
34043 /* Prevent assemble_alias from trying to use .set pseudo operation
34044 that does not behave as expected by the middle-end. */
34045 TREE_ASM_WRITTEN (n->decl) = true;
34047 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (n->decl));
34048 char *buffer = (char *) alloca (strlen (name) + 2);
34049 char *p;
34050 int dollar_inside = 0;
34052 strcpy (buffer, name);
34053 p = strchr (buffer, '$');
34054 while (p) {
34055 *p = '_';
34056 dollar_inside++;
34057 p = strchr (p + 1, '$');
34059 if (TREE_PUBLIC (n->decl))
34061 if (!RS6000_WEAK || !DECL_WEAK (n->decl))
34063 if (dollar_inside) {
34064 if (data->function_descriptor)
34065 fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
34066 fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
34068 if (data->function_descriptor)
34070 fputs ("\t.globl .", data->file);
34071 RS6000_OUTPUT_BASENAME (data->file, buffer);
34072 putc ('\n', data->file);
34074 fputs ("\t.globl ", data->file);
34075 RS6000_OUTPUT_BASENAME (data->file, buffer);
34076 putc ('\n', data->file);
34078 #ifdef ASM_WEAKEN_DECL
34079 else if (DECL_WEAK (n->decl) && !data->function_descriptor)
34080 ASM_WEAKEN_DECL (data->file, n->decl, name, NULL);
34081 #endif
34083 else
34085 if (dollar_inside)
34087 if (data->function_descriptor)
34088 fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
34089 fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
34091 if (data->function_descriptor)
34093 fputs ("\t.lglobl .", data->file);
34094 RS6000_OUTPUT_BASENAME (data->file, buffer);
34095 putc ('\n', data->file);
34097 fputs ("\t.lglobl ", data->file);
34098 RS6000_OUTPUT_BASENAME (data->file, buffer);
34099 putc ('\n', data->file);
34101 if (data->function_descriptor)
34102 fputs (".", data->file);
34103 RS6000_OUTPUT_BASENAME (data->file, buffer);
34104 fputs (":\n", data->file);
34105 return false;
34108 /* This macro produces the initial definition of a function name.
34109 On the RS/6000, we need to place an extra '.' in the function name and
34110 output the function descriptor.
34111 Dollar signs are converted to underscores.
34113 The csect for the function will have already been created when
34114 text_section was selected. We do have to go back to that csect, however.
34116 The third and fourth parameters to the .function pseudo-op (16 and 044)
34117 are placeholders which no longer have any use.
34119 Because AIX assembler's .set command has unexpected semantics, we output
34120 all aliases as alternative labels in front of the definition. */
34122 void
34123 rs6000_xcoff_declare_function_name (FILE *file, const char *name, tree decl)
34125 char *buffer = (char *) alloca (strlen (name) + 1);
34126 char *p;
34127 int dollar_inside = 0;
34128 struct declare_alias_data data = {file, false};
34130 strcpy (buffer, name);
34131 p = strchr (buffer, '$');
34132 while (p) {
34133 *p = '_';
34134 dollar_inside++;
34135 p = strchr (p + 1, '$');
34137 if (TREE_PUBLIC (decl))
34139 if (!RS6000_WEAK || !DECL_WEAK (decl))
34141 if (dollar_inside) {
34142 fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
34143 fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
34145 fputs ("\t.globl .", file);
34146 RS6000_OUTPUT_BASENAME (file, buffer);
34147 putc ('\n', file);
34150 else
34152 if (dollar_inside) {
34153 fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
34154 fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
34156 fputs ("\t.lglobl .", file);
34157 RS6000_OUTPUT_BASENAME (file, buffer);
34158 putc ('\n', file);
34160 fputs ("\t.csect ", file);
34161 RS6000_OUTPUT_BASENAME (file, buffer);
34162 fputs (TARGET_32BIT ? "[DS]\n" : "[DS],3\n", file);
34163 RS6000_OUTPUT_BASENAME (file, buffer);
34164 fputs (":\n", file);
34165 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias, &data, true);
34166 fputs (TARGET_32BIT ? "\t.long ." : "\t.llong .", file);
34167 RS6000_OUTPUT_BASENAME (file, buffer);
34168 fputs (", TOC[tc0], 0\n", file);
34169 in_section = NULL;
34170 switch_to_section (function_section (decl));
34171 putc ('.', file);
34172 RS6000_OUTPUT_BASENAME (file, buffer);
34173 fputs (":\n", file);
34174 data.function_descriptor = true;
34175 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias, &data, true);
34176 if (!DECL_IGNORED_P (decl))
34178 if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG)
34179 xcoffout_declare_function (file, decl, buffer);
34180 else if (write_symbols == DWARF2_DEBUG)
34182 name = (*targetm.strip_name_encoding) (name);
34183 fprintf (file, "\t.function .%s,.%s,2,0\n", name, name);
34186 return;
34189 /* This macro produces the initial definition of a object (variable) name.
34190 Because AIX assembler's .set command has unexpected semantics, we output
34191 all aliases as alternative labels in front of the definition. */
34193 void
34194 rs6000_xcoff_declare_object_name (FILE *file, const char *name, tree decl)
34196 struct declare_alias_data data = {file, false};
34197 RS6000_OUTPUT_BASENAME (file, name);
34198 fputs (":\n", file);
34199 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias, &data, true);
34202 /* Overide the default 'SYMBOL-.' syntax with AIX compatible 'SYMBOL-$'. */
34204 void
34205 rs6000_asm_output_dwarf_pcrel (FILE *file, int size, const char *label)
34207 fputs (integer_asm_op (size, FALSE), file);
34208 assemble_name (file, label);
34209 fputs ("-$", file);
34212 /* Output a symbol offset relative to the dbase for the current object.
34213 We use __gcc_unwind_dbase as an arbitrary base for dbase and assume
34214 signed offsets.
34216 __gcc_unwind_dbase is embedded in all executables/libraries through
34217 libgcc/config/rs6000/crtdbase.S. */
34219 void
34220 rs6000_asm_output_dwarf_datarel (FILE *file, int size, const char *label)
34222 fputs (integer_asm_op (size, FALSE), file);
34223 assemble_name (file, label);
34224 fputs("-__gcc_unwind_dbase", file);
34227 #ifdef HAVE_AS_TLS
34228 static void
34229 rs6000_xcoff_encode_section_info (tree decl, rtx rtl, int first)
34231 rtx symbol;
34232 int flags;
34234 default_encode_section_info (decl, rtl, first);
34236 /* Careful not to prod global register variables. */
34237 if (!MEM_P (rtl))
34238 return;
34239 symbol = XEXP (rtl, 0);
34240 if (GET_CODE (symbol) != SYMBOL_REF)
34241 return;
34243 flags = SYMBOL_REF_FLAGS (symbol);
34245 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
34246 flags &= ~SYMBOL_FLAG_HAS_BLOCK_INFO;
34248 SYMBOL_REF_FLAGS (symbol) = flags;
34250 #endif /* HAVE_AS_TLS */
34251 #endif /* TARGET_XCOFF */
34253 /* Return true if INSN should not be copied. */
34255 static bool
34256 rs6000_cannot_copy_insn_p (rtx_insn *insn)
34258 return recog_memoized (insn) >= 0
34259 && get_attr_cannot_copy (insn);
34262 /* Compute a (partial) cost for rtx X. Return true if the complete
34263 cost has been computed, and false if subexpressions should be
34264 scanned. In either case, *TOTAL contains the cost result. */
34266 static bool
34267 rs6000_rtx_costs (rtx x, machine_mode mode, int outer_code,
34268 int opno ATTRIBUTE_UNUSED, int *total, bool speed)
34270 int code = GET_CODE (x);
34272 switch (code)
34274 /* On the RS/6000, if it is valid in the insn, it is free. */
34275 case CONST_INT:
34276 if (((outer_code == SET
34277 || outer_code == PLUS
34278 || outer_code == MINUS)
34279 && (satisfies_constraint_I (x)
34280 || satisfies_constraint_L (x)))
34281 || (outer_code == AND
34282 && (satisfies_constraint_K (x)
34283 || (mode == SImode
34284 ? satisfies_constraint_L (x)
34285 : satisfies_constraint_J (x))))
34286 || ((outer_code == IOR || outer_code == XOR)
34287 && (satisfies_constraint_K (x)
34288 || (mode == SImode
34289 ? satisfies_constraint_L (x)
34290 : satisfies_constraint_J (x))))
34291 || outer_code == ASHIFT
34292 || outer_code == ASHIFTRT
34293 || outer_code == LSHIFTRT
34294 || outer_code == ROTATE
34295 || outer_code == ROTATERT
34296 || outer_code == ZERO_EXTRACT
34297 || (outer_code == MULT
34298 && satisfies_constraint_I (x))
34299 || ((outer_code == DIV || outer_code == UDIV
34300 || outer_code == MOD || outer_code == UMOD)
34301 && exact_log2 (INTVAL (x)) >= 0)
34302 || (outer_code == COMPARE
34303 && (satisfies_constraint_I (x)
34304 || satisfies_constraint_K (x)))
34305 || ((outer_code == EQ || outer_code == NE)
34306 && (satisfies_constraint_I (x)
34307 || satisfies_constraint_K (x)
34308 || (mode == SImode
34309 ? satisfies_constraint_L (x)
34310 : satisfies_constraint_J (x))))
34311 || (outer_code == GTU
34312 && satisfies_constraint_I (x))
34313 || (outer_code == LTU
34314 && satisfies_constraint_P (x)))
34316 *total = 0;
34317 return true;
34319 else if ((outer_code == PLUS
34320 && reg_or_add_cint_operand (x, VOIDmode))
34321 || (outer_code == MINUS
34322 && reg_or_sub_cint_operand (x, VOIDmode))
34323 || ((outer_code == SET
34324 || outer_code == IOR
34325 || outer_code == XOR)
34326 && (INTVAL (x)
34327 & ~ (unsigned HOST_WIDE_INT) 0xffffffff) == 0))
34329 *total = COSTS_N_INSNS (1);
34330 return true;
34332 /* FALLTHRU */
34334 case CONST_DOUBLE:
34335 case CONST_WIDE_INT:
34336 case CONST:
34337 case HIGH:
34338 case SYMBOL_REF:
34339 case MEM:
34340 /* When optimizing for size, MEM should be slightly more expensive
34341 than generating address, e.g., (plus (reg) (const)).
34342 L1 cache latency is about two instructions. */
34343 *total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
34344 return true;
34346 case LABEL_REF:
34347 *total = 0;
34348 return true;
34350 case PLUS:
34351 case MINUS:
34352 if (FLOAT_MODE_P (mode))
34353 *total = rs6000_cost->fp;
34354 else
34355 *total = COSTS_N_INSNS (1);
34356 return false;
34358 case MULT:
34359 if (GET_CODE (XEXP (x, 1)) == CONST_INT
34360 && satisfies_constraint_I (XEXP (x, 1)))
34362 if (INTVAL (XEXP (x, 1)) >= -256
34363 && INTVAL (XEXP (x, 1)) <= 255)
34364 *total = rs6000_cost->mulsi_const9;
34365 else
34366 *total = rs6000_cost->mulsi_const;
34368 else if (mode == SFmode)
34369 *total = rs6000_cost->fp;
34370 else if (FLOAT_MODE_P (mode))
34371 *total = rs6000_cost->dmul;
34372 else if (mode == DImode)
34373 *total = rs6000_cost->muldi;
34374 else
34375 *total = rs6000_cost->mulsi;
34376 return false;
34378 case FMA:
34379 if (mode == SFmode)
34380 *total = rs6000_cost->fp;
34381 else
34382 *total = rs6000_cost->dmul;
34383 break;
34385 case DIV:
34386 case MOD:
34387 if (FLOAT_MODE_P (mode))
34389 *total = mode == DFmode ? rs6000_cost->ddiv
34390 : rs6000_cost->sdiv;
34391 return false;
34393 /* FALLTHRU */
34395 case UDIV:
34396 case UMOD:
34397 if (GET_CODE (XEXP (x, 1)) == CONST_INT
34398 && exact_log2 (INTVAL (XEXP (x, 1))) >= 0)
34400 if (code == DIV || code == MOD)
34401 /* Shift, addze */
34402 *total = COSTS_N_INSNS (2);
34403 else
34404 /* Shift */
34405 *total = COSTS_N_INSNS (1);
34407 else
34409 if (GET_MODE (XEXP (x, 1)) == DImode)
34410 *total = rs6000_cost->divdi;
34411 else
34412 *total = rs6000_cost->divsi;
34414 /* Add in shift and subtract for MOD unless we have a mod instruction. */
34415 if (!TARGET_MODULO && (code == MOD || code == UMOD))
34416 *total += COSTS_N_INSNS (2);
34417 return false;
34419 case CTZ:
34420 *total = COSTS_N_INSNS (TARGET_CTZ ? 1 : 4);
34421 return false;
34423 case FFS:
34424 *total = COSTS_N_INSNS (4);
34425 return false;
34427 case POPCOUNT:
34428 *total = COSTS_N_INSNS (TARGET_POPCNTD ? 1 : 6);
34429 return false;
34431 case PARITY:
34432 *total = COSTS_N_INSNS (TARGET_CMPB ? 2 : 6);
34433 return false;
34435 case NOT:
34436 if (outer_code == AND || outer_code == IOR || outer_code == XOR)
34437 *total = 0;
34438 else
34439 *total = COSTS_N_INSNS (1);
34440 return false;
34442 case AND:
34443 if (CONST_INT_P (XEXP (x, 1)))
34445 rtx left = XEXP (x, 0);
34446 rtx_code left_code = GET_CODE (left);
34448 /* rotate-and-mask: 1 insn. */
34449 if ((left_code == ROTATE
34450 || left_code == ASHIFT
34451 || left_code == LSHIFTRT)
34452 && rs6000_is_valid_shift_mask (XEXP (x, 1), left, mode))
34454 *total = rtx_cost (XEXP (left, 0), mode, left_code, 0, speed);
34455 if (!CONST_INT_P (XEXP (left, 1)))
34456 *total += rtx_cost (XEXP (left, 1), SImode, left_code, 1, speed);
34457 *total += COSTS_N_INSNS (1);
34458 return true;
34461 /* rotate-and-mask (no rotate), andi., andis.: 1 insn. */
34462 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
34463 if (rs6000_is_valid_and_mask (XEXP (x, 1), mode)
34464 || (val & 0xffff) == val
34465 || (val & 0xffff0000) == val
34466 || ((val & 0xffff) == 0 && mode == SImode))
34468 *total = rtx_cost (left, mode, AND, 0, speed);
34469 *total += COSTS_N_INSNS (1);
34470 return true;
34473 /* 2 insns. */
34474 if (rs6000_is_valid_2insn_and (XEXP (x, 1), mode))
34476 *total = rtx_cost (left, mode, AND, 0, speed);
34477 *total += COSTS_N_INSNS (2);
34478 return true;
34482 *total = COSTS_N_INSNS (1);
34483 return false;
34485 case IOR:
34486 /* FIXME */
34487 *total = COSTS_N_INSNS (1);
34488 return true;
34490 case CLZ:
34491 case XOR:
34492 case ZERO_EXTRACT:
34493 *total = COSTS_N_INSNS (1);
34494 return false;
34496 case ASHIFT:
34497 /* The EXTSWSLI instruction is a combined instruction. Don't count both
34498 the sign extend and shift separately within the insn. */
34499 if (TARGET_EXTSWSLI && mode == DImode
34500 && GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
34501 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode)
34503 *total = 0;
34504 return false;
34506 /* fall through */
34508 case ASHIFTRT:
34509 case LSHIFTRT:
34510 case ROTATE:
34511 case ROTATERT:
34512 /* Handle mul_highpart. */
34513 if (outer_code == TRUNCATE
34514 && GET_CODE (XEXP (x, 0)) == MULT)
34516 if (mode == DImode)
34517 *total = rs6000_cost->muldi;
34518 else
34519 *total = rs6000_cost->mulsi;
34520 return true;
34522 else if (outer_code == AND)
34523 *total = 0;
34524 else
34525 *total = COSTS_N_INSNS (1);
34526 return false;
34528 case SIGN_EXTEND:
34529 case ZERO_EXTEND:
34530 if (GET_CODE (XEXP (x, 0)) == MEM)
34531 *total = 0;
34532 else
34533 *total = COSTS_N_INSNS (1);
34534 return false;
34536 case COMPARE:
34537 case NEG:
34538 case ABS:
34539 if (!FLOAT_MODE_P (mode))
34541 *total = COSTS_N_INSNS (1);
34542 return false;
34544 /* FALLTHRU */
34546 case FLOAT:
34547 case UNSIGNED_FLOAT:
34548 case FIX:
34549 case UNSIGNED_FIX:
34550 case FLOAT_TRUNCATE:
34551 *total = rs6000_cost->fp;
34552 return false;
34554 case FLOAT_EXTEND:
34555 if (mode == DFmode)
34556 *total = rs6000_cost->sfdf_convert;
34557 else
34558 *total = rs6000_cost->fp;
34559 return false;
34561 case UNSPEC:
34562 switch (XINT (x, 1))
34564 case UNSPEC_FRSP:
34565 *total = rs6000_cost->fp;
34566 return true;
34568 default:
34569 break;
34571 break;
34573 case CALL:
34574 case IF_THEN_ELSE:
34575 if (!speed)
34577 *total = COSTS_N_INSNS (1);
34578 return true;
34580 else if (FLOAT_MODE_P (mode)
34581 && TARGET_PPC_GFXOPT && TARGET_HARD_FLOAT && TARGET_FPRS)
34583 *total = rs6000_cost->fp;
34584 return false;
34586 break;
34588 case NE:
34589 case EQ:
34590 case GTU:
34591 case LTU:
34592 /* Carry bit requires mode == Pmode.
34593 NEG or PLUS already counted so only add one. */
34594 if (mode == Pmode
34595 && (outer_code == NEG || outer_code == PLUS))
34597 *total = COSTS_N_INSNS (1);
34598 return true;
34600 if (outer_code == SET)
34602 if (XEXP (x, 1) == const0_rtx)
34604 if (TARGET_ISEL && !TARGET_MFCRF)
34605 *total = COSTS_N_INSNS (8);
34606 else
34607 *total = COSTS_N_INSNS (2);
34608 return true;
34610 else
34612 *total = COSTS_N_INSNS (3);
34613 return false;
34616 /* FALLTHRU */
34618 case GT:
34619 case LT:
34620 case UNORDERED:
34621 if (outer_code == SET && (XEXP (x, 1) == const0_rtx))
34623 if (TARGET_ISEL && !TARGET_MFCRF)
34624 *total = COSTS_N_INSNS (8);
34625 else
34626 *total = COSTS_N_INSNS (2);
34627 return true;
34629 /* CC COMPARE. */
34630 if (outer_code == COMPARE)
34632 *total = 0;
34633 return true;
34635 break;
34637 default:
34638 break;
34641 return false;
34644 /* Debug form of r6000_rtx_costs that is selected if -mdebug=cost. */
34646 static bool
34647 rs6000_debug_rtx_costs (rtx x, machine_mode mode, int outer_code,
34648 int opno, int *total, bool speed)
34650 bool ret = rs6000_rtx_costs (x, mode, outer_code, opno, total, speed);
34652 fprintf (stderr,
34653 "\nrs6000_rtx_costs, return = %s, mode = %s, outer_code = %s, "
34654 "opno = %d, total = %d, speed = %s, x:\n",
34655 ret ? "complete" : "scan inner",
34656 GET_MODE_NAME (mode),
34657 GET_RTX_NAME (outer_code),
34658 opno,
34659 *total,
34660 speed ? "true" : "false");
34662 debug_rtx (x);
34664 return ret;
34667 /* Debug form of ADDRESS_COST that is selected if -mdebug=cost. */
34669 static int
34670 rs6000_debug_address_cost (rtx x, machine_mode mode,
34671 addr_space_t as, bool speed)
34673 int ret = TARGET_ADDRESS_COST (x, mode, as, speed);
34675 fprintf (stderr, "\nrs6000_address_cost, return = %d, speed = %s, x:\n",
34676 ret, speed ? "true" : "false");
34677 debug_rtx (x);
34679 return ret;
34683 /* A C expression returning the cost of moving data from a register of class
34684 CLASS1 to one of CLASS2. */
34686 static int
34687 rs6000_register_move_cost (machine_mode mode,
34688 reg_class_t from, reg_class_t to)
34690 int ret;
34692 if (TARGET_DEBUG_COST)
34693 dbg_cost_ctrl++;
34695 /* Moves from/to GENERAL_REGS. */
34696 if (reg_classes_intersect_p (to, GENERAL_REGS)
34697 || reg_classes_intersect_p (from, GENERAL_REGS))
34699 reg_class_t rclass = from;
34701 if (! reg_classes_intersect_p (to, GENERAL_REGS))
34702 rclass = to;
34704 if (rclass == FLOAT_REGS || rclass == ALTIVEC_REGS || rclass == VSX_REGS)
34705 ret = (rs6000_memory_move_cost (mode, rclass, false)
34706 + rs6000_memory_move_cost (mode, GENERAL_REGS, false));
34708 /* It's more expensive to move CR_REGS than CR0_REGS because of the
34709 shift. */
34710 else if (rclass == CR_REGS)
34711 ret = 4;
34713 /* For those processors that have slow LR/CTR moves, make them more
34714 expensive than memory in order to bias spills to memory .*/
34715 else if ((rs6000_cpu == PROCESSOR_POWER6
34716 || rs6000_cpu == PROCESSOR_POWER7
34717 || rs6000_cpu == PROCESSOR_POWER8
34718 || rs6000_cpu == PROCESSOR_POWER9)
34719 && reg_classes_intersect_p (rclass, LINK_OR_CTR_REGS))
34720 ret = 6 * hard_regno_nregs[0][mode];
34722 else
34723 /* A move will cost one instruction per GPR moved. */
34724 ret = 2 * hard_regno_nregs[0][mode];
34727 /* If we have VSX, we can easily move between FPR or Altivec registers. */
34728 else if (VECTOR_MEM_VSX_P (mode)
34729 && reg_classes_intersect_p (to, VSX_REGS)
34730 && reg_classes_intersect_p (from, VSX_REGS))
34731 ret = 2 * hard_regno_nregs[32][mode];
34733 /* Moving between two similar registers is just one instruction. */
34734 else if (reg_classes_intersect_p (to, from))
34735 ret = (FLOAT128_2REG_P (mode)) ? 4 : 2;
34737 /* Everything else has to go through GENERAL_REGS. */
34738 else
34739 ret = (rs6000_register_move_cost (mode, GENERAL_REGS, to)
34740 + rs6000_register_move_cost (mode, from, GENERAL_REGS));
34742 if (TARGET_DEBUG_COST)
34744 if (dbg_cost_ctrl == 1)
34745 fprintf (stderr,
34746 "rs6000_register_move_cost:, ret=%d, mode=%s, from=%s, to=%s\n",
34747 ret, GET_MODE_NAME (mode), reg_class_names[from],
34748 reg_class_names[to]);
34749 dbg_cost_ctrl--;
34752 return ret;
34755 /* A C expressions returning the cost of moving data of MODE from a register to
34756 or from memory. */
34758 static int
34759 rs6000_memory_move_cost (machine_mode mode, reg_class_t rclass,
34760 bool in ATTRIBUTE_UNUSED)
34762 int ret;
34764 if (TARGET_DEBUG_COST)
34765 dbg_cost_ctrl++;
34767 if (reg_classes_intersect_p (rclass, GENERAL_REGS))
34768 ret = 4 * hard_regno_nregs[0][mode];
34769 else if ((reg_classes_intersect_p (rclass, FLOAT_REGS)
34770 || reg_classes_intersect_p (rclass, VSX_REGS)))
34771 ret = 4 * hard_regno_nregs[32][mode];
34772 else if (reg_classes_intersect_p (rclass, ALTIVEC_REGS))
34773 ret = 4 * hard_regno_nregs[FIRST_ALTIVEC_REGNO][mode];
34774 else
34775 ret = 4 + rs6000_register_move_cost (mode, rclass, GENERAL_REGS);
34777 if (TARGET_DEBUG_COST)
34779 if (dbg_cost_ctrl == 1)
34780 fprintf (stderr,
34781 "rs6000_memory_move_cost: ret=%d, mode=%s, rclass=%s, in=%d\n",
34782 ret, GET_MODE_NAME (mode), reg_class_names[rclass], in);
34783 dbg_cost_ctrl--;
34786 return ret;
34789 /* Returns a code for a target-specific builtin that implements
34790 reciprocal of the function, or NULL_TREE if not available. */
34792 static tree
34793 rs6000_builtin_reciprocal (tree fndecl)
34795 switch (DECL_FUNCTION_CODE (fndecl))
34797 case VSX_BUILTIN_XVSQRTDP:
34798 if (!RS6000_RECIP_AUTO_RSQRTE_P (V2DFmode))
34799 return NULL_TREE;
34801 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_2DF];
34803 case VSX_BUILTIN_XVSQRTSP:
34804 if (!RS6000_RECIP_AUTO_RSQRTE_P (V4SFmode))
34805 return NULL_TREE;
34807 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_4SF];
34809 default:
34810 return NULL_TREE;
34814 /* Load up a constant. If the mode is a vector mode, splat the value across
34815 all of the vector elements. */
34817 static rtx
34818 rs6000_load_constant_and_splat (machine_mode mode, REAL_VALUE_TYPE dconst)
34820 rtx reg;
34822 if (mode == SFmode || mode == DFmode)
34824 rtx d = const_double_from_real_value (dconst, mode);
34825 reg = force_reg (mode, d);
34827 else if (mode == V4SFmode)
34829 rtx d = const_double_from_real_value (dconst, SFmode);
34830 rtvec v = gen_rtvec (4, d, d, d, d);
34831 reg = gen_reg_rtx (mode);
34832 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
34834 else if (mode == V2DFmode)
34836 rtx d = const_double_from_real_value (dconst, DFmode);
34837 rtvec v = gen_rtvec (2, d, d);
34838 reg = gen_reg_rtx (mode);
34839 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
34841 else
34842 gcc_unreachable ();
34844 return reg;
34847 /* Generate an FMA instruction. */
34849 static void
34850 rs6000_emit_madd (rtx target, rtx m1, rtx m2, rtx a)
34852 machine_mode mode = GET_MODE (target);
34853 rtx dst;
34855 dst = expand_ternary_op (mode, fma_optab, m1, m2, a, target, 0);
34856 gcc_assert (dst != NULL);
34858 if (dst != target)
34859 emit_move_insn (target, dst);
34862 /* Generate a FNMSUB instruction: dst = -fma(m1, m2, -a). */
34864 static void
34865 rs6000_emit_nmsub (rtx dst, rtx m1, rtx m2, rtx a)
34867 machine_mode mode = GET_MODE (dst);
34868 rtx r;
34870 /* This is a tad more complicated, since the fnma_optab is for
34871 a different expression: fma(-m1, m2, a), which is the same
34872 thing except in the case of signed zeros.
34874 Fortunately we know that if FMA is supported that FNMSUB is
34875 also supported in the ISA. Just expand it directly. */
34877 gcc_assert (optab_handler (fma_optab, mode) != CODE_FOR_nothing);
34879 r = gen_rtx_NEG (mode, a);
34880 r = gen_rtx_FMA (mode, m1, m2, r);
34881 r = gen_rtx_NEG (mode, r);
34882 emit_insn (gen_rtx_SET (dst, r));
34885 /* Newton-Raphson approximation of floating point divide DST = N/D. If NOTE_P,
34886 add a reg_note saying that this was a division. Support both scalar and
34887 vector divide. Assumes no trapping math and finite arguments. */
34889 void
34890 rs6000_emit_swdiv (rtx dst, rtx n, rtx d, bool note_p)
34892 machine_mode mode = GET_MODE (dst);
34893 rtx one, x0, e0, x1, xprev, eprev, xnext, enext, u, v;
34894 int i;
34896 /* Low precision estimates guarantee 5 bits of accuracy. High
34897 precision estimates guarantee 14 bits of accuracy. SFmode
34898 requires 23 bits of accuracy. DFmode requires 52 bits of
34899 accuracy. Each pass at least doubles the accuracy, leading
34900 to the following. */
34901 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
34902 if (mode == DFmode || mode == V2DFmode)
34903 passes++;
34905 enum insn_code code = optab_handler (smul_optab, mode);
34906 insn_gen_fn gen_mul = GEN_FCN (code);
34908 gcc_assert (code != CODE_FOR_nothing);
34910 one = rs6000_load_constant_and_splat (mode, dconst1);
34912 /* x0 = 1./d estimate */
34913 x0 = gen_reg_rtx (mode);
34914 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, d),
34915 UNSPEC_FRES)));
34917 /* Each iteration but the last calculates x_(i+1) = x_i * (2 - d * x_i). */
34918 if (passes > 1) {
34920 /* e0 = 1. - d * x0 */
34921 e0 = gen_reg_rtx (mode);
34922 rs6000_emit_nmsub (e0, d, x0, one);
34924 /* x1 = x0 + e0 * x0 */
34925 x1 = gen_reg_rtx (mode);
34926 rs6000_emit_madd (x1, e0, x0, x0);
34928 for (i = 0, xprev = x1, eprev = e0; i < passes - 2;
34929 ++i, xprev = xnext, eprev = enext) {
34931 /* enext = eprev * eprev */
34932 enext = gen_reg_rtx (mode);
34933 emit_insn (gen_mul (enext, eprev, eprev));
34935 /* xnext = xprev + enext * xprev */
34936 xnext = gen_reg_rtx (mode);
34937 rs6000_emit_madd (xnext, enext, xprev, xprev);
34940 } else
34941 xprev = x0;
34943 /* The last iteration calculates x_(i+1) = n * x_i * (2 - d * x_i). */
34945 /* u = n * xprev */
34946 u = gen_reg_rtx (mode);
34947 emit_insn (gen_mul (u, n, xprev));
34949 /* v = n - (d * u) */
34950 v = gen_reg_rtx (mode);
34951 rs6000_emit_nmsub (v, d, u, n);
34953 /* dst = (v * xprev) + u */
34954 rs6000_emit_madd (dst, v, xprev, u);
34956 if (note_p)
34957 add_reg_note (get_last_insn (), REG_EQUAL, gen_rtx_DIV (mode, n, d));
34960 /* Goldschmidt's Algorithm for single/double-precision floating point
34961 sqrt and rsqrt. Assumes no trapping math and finite arguments. */
34963 void
34964 rs6000_emit_swsqrt (rtx dst, rtx src, bool recip)
34966 machine_mode mode = GET_MODE (src);
34967 rtx e = gen_reg_rtx (mode);
34968 rtx g = gen_reg_rtx (mode);
34969 rtx h = gen_reg_rtx (mode);
34971 /* Low precision estimates guarantee 5 bits of accuracy. High
34972 precision estimates guarantee 14 bits of accuracy. SFmode
34973 requires 23 bits of accuracy. DFmode requires 52 bits of
34974 accuracy. Each pass at least doubles the accuracy, leading
34975 to the following. */
34976 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
34977 if (mode == DFmode || mode == V2DFmode)
34978 passes++;
34980 int i;
34981 rtx mhalf;
34982 enum insn_code code = optab_handler (smul_optab, mode);
34983 insn_gen_fn gen_mul = GEN_FCN (code);
34985 gcc_assert (code != CODE_FOR_nothing);
34987 mhalf = rs6000_load_constant_and_splat (mode, dconsthalf);
34989 /* e = rsqrt estimate */
34990 emit_insn (gen_rtx_SET (e, gen_rtx_UNSPEC (mode, gen_rtvec (1, src),
34991 UNSPEC_RSQRT)));
34993 /* If (src == 0.0) filter infinity to prevent NaN for sqrt(0.0). */
34994 if (!recip)
34996 rtx zero = force_reg (mode, CONST0_RTX (mode));
34998 if (mode == SFmode)
35000 rtx target = emit_conditional_move (e, GT, src, zero, mode,
35001 e, zero, mode, 0);
35002 if (target != e)
35003 emit_move_insn (e, target);
35005 else
35007 rtx cond = gen_rtx_GT (VOIDmode, e, zero);
35008 rs6000_emit_vector_cond_expr (e, e, zero, cond, src, zero);
35012 /* g = sqrt estimate. */
35013 emit_insn (gen_mul (g, e, src));
35014 /* h = 1/(2*sqrt) estimate. */
35015 emit_insn (gen_mul (h, e, mhalf));
35017 if (recip)
35019 if (passes == 1)
35021 rtx t = gen_reg_rtx (mode);
35022 rs6000_emit_nmsub (t, g, h, mhalf);
35023 /* Apply correction directly to 1/rsqrt estimate. */
35024 rs6000_emit_madd (dst, e, t, e);
35026 else
35028 for (i = 0; i < passes; i++)
35030 rtx t1 = gen_reg_rtx (mode);
35031 rtx g1 = gen_reg_rtx (mode);
35032 rtx h1 = gen_reg_rtx (mode);
35034 rs6000_emit_nmsub (t1, g, h, mhalf);
35035 rs6000_emit_madd (g1, g, t1, g);
35036 rs6000_emit_madd (h1, h, t1, h);
35038 g = g1;
35039 h = h1;
35041 /* Multiply by 2 for 1/rsqrt. */
35042 emit_insn (gen_add3_insn (dst, h, h));
35045 else
35047 rtx t = gen_reg_rtx (mode);
35048 rs6000_emit_nmsub (t, g, h, mhalf);
35049 rs6000_emit_madd (dst, g, t, g);
35052 return;
35055 /* Emit popcount intrinsic on TARGET_POPCNTB (Power5) and TARGET_POPCNTD
35056 (Power7) targets. DST is the target, and SRC is the argument operand. */
35058 void
35059 rs6000_emit_popcount (rtx dst, rtx src)
35061 machine_mode mode = GET_MODE (dst);
35062 rtx tmp1, tmp2;
35064 /* Use the PPC ISA 2.06 popcnt{w,d} instruction if we can. */
35065 if (TARGET_POPCNTD)
35067 if (mode == SImode)
35068 emit_insn (gen_popcntdsi2 (dst, src));
35069 else
35070 emit_insn (gen_popcntddi2 (dst, src));
35071 return;
35074 tmp1 = gen_reg_rtx (mode);
35076 if (mode == SImode)
35078 emit_insn (gen_popcntbsi2 (tmp1, src));
35079 tmp2 = expand_mult (SImode, tmp1, GEN_INT (0x01010101),
35080 NULL_RTX, 0);
35081 tmp2 = force_reg (SImode, tmp2);
35082 emit_insn (gen_lshrsi3 (dst, tmp2, GEN_INT (24)));
35084 else
35086 emit_insn (gen_popcntbdi2 (tmp1, src));
35087 tmp2 = expand_mult (DImode, tmp1,
35088 GEN_INT ((HOST_WIDE_INT)
35089 0x01010101 << 32 | 0x01010101),
35090 NULL_RTX, 0);
35091 tmp2 = force_reg (DImode, tmp2);
35092 emit_insn (gen_lshrdi3 (dst, tmp2, GEN_INT (56)));
35097 /* Emit parity intrinsic on TARGET_POPCNTB targets. DST is the
35098 target, and SRC is the argument operand. */
35100 void
35101 rs6000_emit_parity (rtx dst, rtx src)
35103 machine_mode mode = GET_MODE (dst);
35104 rtx tmp;
35106 tmp = gen_reg_rtx (mode);
35108 /* Use the PPC ISA 2.05 prtyw/prtyd instruction if we can. */
35109 if (TARGET_CMPB)
35111 if (mode == SImode)
35113 emit_insn (gen_popcntbsi2 (tmp, src));
35114 emit_insn (gen_paritysi2_cmpb (dst, tmp));
35116 else
35118 emit_insn (gen_popcntbdi2 (tmp, src));
35119 emit_insn (gen_paritydi2_cmpb (dst, tmp));
35121 return;
35124 if (mode == SImode)
35126 /* Is mult+shift >= shift+xor+shift+xor? */
35127 if (rs6000_cost->mulsi_const >= COSTS_N_INSNS (3))
35129 rtx tmp1, tmp2, tmp3, tmp4;
35131 tmp1 = gen_reg_rtx (SImode);
35132 emit_insn (gen_popcntbsi2 (tmp1, src));
35134 tmp2 = gen_reg_rtx (SImode);
35135 emit_insn (gen_lshrsi3 (tmp2, tmp1, GEN_INT (16)));
35136 tmp3 = gen_reg_rtx (SImode);
35137 emit_insn (gen_xorsi3 (tmp3, tmp1, tmp2));
35139 tmp4 = gen_reg_rtx (SImode);
35140 emit_insn (gen_lshrsi3 (tmp4, tmp3, GEN_INT (8)));
35141 emit_insn (gen_xorsi3 (tmp, tmp3, tmp4));
35143 else
35144 rs6000_emit_popcount (tmp, src);
35145 emit_insn (gen_andsi3 (dst, tmp, const1_rtx));
35147 else
35149 /* Is mult+shift >= shift+xor+shift+xor+shift+xor? */
35150 if (rs6000_cost->muldi >= COSTS_N_INSNS (5))
35152 rtx tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
35154 tmp1 = gen_reg_rtx (DImode);
35155 emit_insn (gen_popcntbdi2 (tmp1, src));
35157 tmp2 = gen_reg_rtx (DImode);
35158 emit_insn (gen_lshrdi3 (tmp2, tmp1, GEN_INT (32)));
35159 tmp3 = gen_reg_rtx (DImode);
35160 emit_insn (gen_xordi3 (tmp3, tmp1, tmp2));
35162 tmp4 = gen_reg_rtx (DImode);
35163 emit_insn (gen_lshrdi3 (tmp4, tmp3, GEN_INT (16)));
35164 tmp5 = gen_reg_rtx (DImode);
35165 emit_insn (gen_xordi3 (tmp5, tmp3, tmp4));
35167 tmp6 = gen_reg_rtx (DImode);
35168 emit_insn (gen_lshrdi3 (tmp6, tmp5, GEN_INT (8)));
35169 emit_insn (gen_xordi3 (tmp, tmp5, tmp6));
35171 else
35172 rs6000_emit_popcount (tmp, src);
35173 emit_insn (gen_anddi3 (dst, tmp, const1_rtx));
35177 /* Expand an Altivec constant permutation for little endian mode.
35178 There are two issues: First, the two input operands must be
35179 swapped so that together they form a double-wide array in LE
35180 order. Second, the vperm instruction has surprising behavior
35181 in LE mode: it interprets the elements of the source vectors
35182 in BE mode ("left to right") and interprets the elements of
35183 the destination vector in LE mode ("right to left"). To
35184 correct for this, we must subtract each element of the permute
35185 control vector from 31.
35187 For example, suppose we want to concatenate vr10 = {0, 1, 2, 3}
35188 with vr11 = {4, 5, 6, 7} and extract {0, 2, 4, 6} using a vperm.
35189 We place {0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27} in vr12 to
35190 serve as the permute control vector. Then, in BE mode,
35192 vperm 9,10,11,12
35194 places the desired result in vr9. However, in LE mode the
35195 vector contents will be
35197 vr10 = 00000003 00000002 00000001 00000000
35198 vr11 = 00000007 00000006 00000005 00000004
35200 The result of the vperm using the same permute control vector is
35202 vr9 = 05000000 07000000 01000000 03000000
35204 That is, the leftmost 4 bytes of vr10 are interpreted as the
35205 source for the rightmost 4 bytes of vr9, and so on.
35207 If we change the permute control vector to
35209 vr12 = {31,20,29,28,23,22,21,20,15,14,13,12,7,6,5,4}
35211 and issue
35213 vperm 9,11,10,12
35215 we get the desired
35217 vr9 = 00000006 00000004 00000002 00000000. */
35219 void
35220 altivec_expand_vec_perm_const_le (rtx operands[4])
35222 unsigned int i;
35223 rtx perm[16];
35224 rtx constv, unspec;
35225 rtx target = operands[0];
35226 rtx op0 = operands[1];
35227 rtx op1 = operands[2];
35228 rtx sel = operands[3];
35230 /* Unpack and adjust the constant selector. */
35231 for (i = 0; i < 16; ++i)
35233 rtx e = XVECEXP (sel, 0, i);
35234 unsigned int elt = 31 - (INTVAL (e) & 31);
35235 perm[i] = GEN_INT (elt);
35238 /* Expand to a permute, swapping the inputs and using the
35239 adjusted selector. */
35240 if (!REG_P (op0))
35241 op0 = force_reg (V16QImode, op0);
35242 if (!REG_P (op1))
35243 op1 = force_reg (V16QImode, op1);
35245 constv = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm));
35246 constv = force_reg (V16QImode, constv);
35247 unspec = gen_rtx_UNSPEC (V16QImode, gen_rtvec (3, op1, op0, constv),
35248 UNSPEC_VPERM);
35249 if (!REG_P (target))
35251 rtx tmp = gen_reg_rtx (V16QImode);
35252 emit_move_insn (tmp, unspec);
35253 unspec = tmp;
35256 emit_move_insn (target, unspec);
35259 /* Similarly to altivec_expand_vec_perm_const_le, we must adjust the
35260 permute control vector. But here it's not a constant, so we must
35261 generate a vector NAND or NOR to do the adjustment. */
35263 void
35264 altivec_expand_vec_perm_le (rtx operands[4])
35266 rtx notx, iorx, unspec;
35267 rtx target = operands[0];
35268 rtx op0 = operands[1];
35269 rtx op1 = operands[2];
35270 rtx sel = operands[3];
35271 rtx tmp = target;
35272 rtx norreg = gen_reg_rtx (V16QImode);
35273 machine_mode mode = GET_MODE (target);
35275 /* Get everything in regs so the pattern matches. */
35276 if (!REG_P (op0))
35277 op0 = force_reg (mode, op0);
35278 if (!REG_P (op1))
35279 op1 = force_reg (mode, op1);
35280 if (!REG_P (sel))
35281 sel = force_reg (V16QImode, sel);
35282 if (!REG_P (target))
35283 tmp = gen_reg_rtx (mode);
35285 if (TARGET_P9_VECTOR)
35287 unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op0, op1, sel),
35288 UNSPEC_VPERMR);
35290 else
35292 /* Invert the selector with a VNAND if available, else a VNOR.
35293 The VNAND is preferred for future fusion opportunities. */
35294 notx = gen_rtx_NOT (V16QImode, sel);
35295 iorx = (TARGET_P8_VECTOR
35296 ? gen_rtx_IOR (V16QImode, notx, notx)
35297 : gen_rtx_AND (V16QImode, notx, notx));
35298 emit_insn (gen_rtx_SET (norreg, iorx));
35300 /* Permute with operands reversed and adjusted selector. */
35301 unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op0, norreg),
35302 UNSPEC_VPERM);
35305 /* Copy into target, possibly by way of a register. */
35306 if (!REG_P (target))
35308 emit_move_insn (tmp, unspec);
35309 unspec = tmp;
35312 emit_move_insn (target, unspec);
35315 /* Expand an Altivec constant permutation. Return true if we match
35316 an efficient implementation; false to fall back to VPERM. */
35318 bool
35319 altivec_expand_vec_perm_const (rtx operands[4])
35321 struct altivec_perm_insn {
35322 HOST_WIDE_INT mask;
35323 enum insn_code impl;
35324 unsigned char perm[16];
35326 static const struct altivec_perm_insn patterns[] = {
35327 { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuhum_direct,
35328 { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } },
35329 { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuwum_direct,
35330 { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } },
35331 { OPTION_MASK_ALTIVEC,
35332 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghb_direct
35333 : CODE_FOR_altivec_vmrglb_direct),
35334 { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } },
35335 { OPTION_MASK_ALTIVEC,
35336 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghh_direct
35337 : CODE_FOR_altivec_vmrglh_direct),
35338 { 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } },
35339 { OPTION_MASK_ALTIVEC,
35340 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghw_direct
35341 : CODE_FOR_altivec_vmrglw_direct),
35342 { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } },
35343 { OPTION_MASK_ALTIVEC,
35344 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglb_direct
35345 : CODE_FOR_altivec_vmrghb_direct),
35346 { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } },
35347 { OPTION_MASK_ALTIVEC,
35348 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglh_direct
35349 : CODE_FOR_altivec_vmrghh_direct),
35350 { 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } },
35351 { OPTION_MASK_ALTIVEC,
35352 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglw_direct
35353 : CODE_FOR_altivec_vmrghw_direct),
35354 { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } },
35355 { OPTION_MASK_P8_VECTOR, CODE_FOR_p8_vmrgew,
35356 { 0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 24, 25, 26, 27 } },
35357 { OPTION_MASK_P8_VECTOR, CODE_FOR_p8_vmrgow,
35358 { 4, 5, 6, 7, 20, 21, 22, 23, 12, 13, 14, 15, 28, 29, 30, 31 } }
35361 unsigned int i, j, elt, which;
35362 unsigned char perm[16];
35363 rtx target, op0, op1, sel, x;
35364 bool one_vec;
35366 target = operands[0];
35367 op0 = operands[1];
35368 op1 = operands[2];
35369 sel = operands[3];
35371 /* Unpack the constant selector. */
35372 for (i = which = 0; i < 16; ++i)
35374 rtx e = XVECEXP (sel, 0, i);
35375 elt = INTVAL (e) & 31;
35376 which |= (elt < 16 ? 1 : 2);
35377 perm[i] = elt;
35380 /* Simplify the constant selector based on operands. */
35381 switch (which)
35383 default:
35384 gcc_unreachable ();
35386 case 3:
35387 one_vec = false;
35388 if (!rtx_equal_p (op0, op1))
35389 break;
35390 /* FALLTHRU */
35392 case 2:
35393 for (i = 0; i < 16; ++i)
35394 perm[i] &= 15;
35395 op0 = op1;
35396 one_vec = true;
35397 break;
35399 case 1:
35400 op1 = op0;
35401 one_vec = true;
35402 break;
35405 /* Look for splat patterns. */
35406 if (one_vec)
35408 elt = perm[0];
35410 for (i = 0; i < 16; ++i)
35411 if (perm[i] != elt)
35412 break;
35413 if (i == 16)
35415 if (!BYTES_BIG_ENDIAN)
35416 elt = 15 - elt;
35417 emit_insn (gen_altivec_vspltb_direct (target, op0, GEN_INT (elt)));
35418 return true;
35421 if (elt % 2 == 0)
35423 for (i = 0; i < 16; i += 2)
35424 if (perm[i] != elt || perm[i + 1] != elt + 1)
35425 break;
35426 if (i == 16)
35428 int field = BYTES_BIG_ENDIAN ? elt / 2 : 7 - elt / 2;
35429 x = gen_reg_rtx (V8HImode);
35430 emit_insn (gen_altivec_vsplth_direct (x, gen_lowpart (V8HImode, op0),
35431 GEN_INT (field)));
35432 emit_move_insn (target, gen_lowpart (V16QImode, x));
35433 return true;
35437 if (elt % 4 == 0)
35439 for (i = 0; i < 16; i += 4)
35440 if (perm[i] != elt
35441 || perm[i + 1] != elt + 1
35442 || perm[i + 2] != elt + 2
35443 || perm[i + 3] != elt + 3)
35444 break;
35445 if (i == 16)
35447 int field = BYTES_BIG_ENDIAN ? elt / 4 : 3 - elt / 4;
35448 x = gen_reg_rtx (V4SImode);
35449 emit_insn (gen_altivec_vspltw_direct (x, gen_lowpart (V4SImode, op0),
35450 GEN_INT (field)));
35451 emit_move_insn (target, gen_lowpart (V16QImode, x));
35452 return true;
35457 /* Look for merge and pack patterns. */
35458 for (j = 0; j < ARRAY_SIZE (patterns); ++j)
35460 bool swapped;
35462 if ((patterns[j].mask & rs6000_isa_flags) == 0)
35463 continue;
35465 elt = patterns[j].perm[0];
35466 if (perm[0] == elt)
35467 swapped = false;
35468 else if (perm[0] == elt + 16)
35469 swapped = true;
35470 else
35471 continue;
35472 for (i = 1; i < 16; ++i)
35474 elt = patterns[j].perm[i];
35475 if (swapped)
35476 elt = (elt >= 16 ? elt - 16 : elt + 16);
35477 else if (one_vec && elt >= 16)
35478 elt -= 16;
35479 if (perm[i] != elt)
35480 break;
35482 if (i == 16)
35484 enum insn_code icode = patterns[j].impl;
35485 machine_mode omode = insn_data[icode].operand[0].mode;
35486 machine_mode imode = insn_data[icode].operand[1].mode;
35488 /* For little-endian, don't use vpkuwum and vpkuhum if the
35489 underlying vector type is not V4SI and V8HI, respectively.
35490 For example, using vpkuwum with a V8HI picks up the even
35491 halfwords (BE numbering) when the even halfwords (LE
35492 numbering) are what we need. */
35493 if (!BYTES_BIG_ENDIAN
35494 && icode == CODE_FOR_altivec_vpkuwum_direct
35495 && ((GET_CODE (op0) == REG
35496 && GET_MODE (op0) != V4SImode)
35497 || (GET_CODE (op0) == SUBREG
35498 && GET_MODE (XEXP (op0, 0)) != V4SImode)))
35499 continue;
35500 if (!BYTES_BIG_ENDIAN
35501 && icode == CODE_FOR_altivec_vpkuhum_direct
35502 && ((GET_CODE (op0) == REG
35503 && GET_MODE (op0) != V8HImode)
35504 || (GET_CODE (op0) == SUBREG
35505 && GET_MODE (XEXP (op0, 0)) != V8HImode)))
35506 continue;
35508 /* For little-endian, the two input operands must be swapped
35509 (or swapped back) to ensure proper right-to-left numbering
35510 from 0 to 2N-1. */
35511 if (swapped ^ !BYTES_BIG_ENDIAN)
35512 std::swap (op0, op1);
35513 if (imode != V16QImode)
35515 op0 = gen_lowpart (imode, op0);
35516 op1 = gen_lowpart (imode, op1);
35518 if (omode == V16QImode)
35519 x = target;
35520 else
35521 x = gen_reg_rtx (omode);
35522 emit_insn (GEN_FCN (icode) (x, op0, op1));
35523 if (omode != V16QImode)
35524 emit_move_insn (target, gen_lowpart (V16QImode, x));
35525 return true;
35529 if (!BYTES_BIG_ENDIAN)
35531 altivec_expand_vec_perm_const_le (operands);
35532 return true;
35535 return false;
35538 /* Expand a Paired Single, VSX Permute Doubleword, or SPE constant permutation.
35539 Return true if we match an efficient implementation. */
35541 static bool
35542 rs6000_expand_vec_perm_const_1 (rtx target, rtx op0, rtx op1,
35543 unsigned char perm0, unsigned char perm1)
35545 rtx x;
35547 /* If both selectors come from the same operand, fold to single op. */
35548 if ((perm0 & 2) == (perm1 & 2))
35550 if (perm0 & 2)
35551 op0 = op1;
35552 else
35553 op1 = op0;
35555 /* If both operands are equal, fold to simpler permutation. */
35556 if (rtx_equal_p (op0, op1))
35558 perm0 = perm0 & 1;
35559 perm1 = (perm1 & 1) + 2;
35561 /* If the first selector comes from the second operand, swap. */
35562 else if (perm0 & 2)
35564 if (perm1 & 2)
35565 return false;
35566 perm0 -= 2;
35567 perm1 += 2;
35568 std::swap (op0, op1);
35570 /* If the second selector does not come from the second operand, fail. */
35571 else if ((perm1 & 2) == 0)
35572 return false;
35574 /* Success! */
35575 if (target != NULL)
35577 machine_mode vmode, dmode;
35578 rtvec v;
35580 vmode = GET_MODE (target);
35581 gcc_assert (GET_MODE_NUNITS (vmode) == 2);
35582 dmode = mode_for_vector (GET_MODE_INNER (vmode), 4);
35583 x = gen_rtx_VEC_CONCAT (dmode, op0, op1);
35584 v = gen_rtvec (2, GEN_INT (perm0), GEN_INT (perm1));
35585 x = gen_rtx_VEC_SELECT (vmode, x, gen_rtx_PARALLEL (VOIDmode, v));
35586 emit_insn (gen_rtx_SET (target, x));
35588 return true;
35591 bool
35592 rs6000_expand_vec_perm_const (rtx operands[4])
35594 rtx target, op0, op1, sel;
35595 unsigned char perm0, perm1;
35597 target = operands[0];
35598 op0 = operands[1];
35599 op1 = operands[2];
35600 sel = operands[3];
35602 /* Unpack the constant selector. */
35603 perm0 = INTVAL (XVECEXP (sel, 0, 0)) & 3;
35604 perm1 = INTVAL (XVECEXP (sel, 0, 1)) & 3;
35606 return rs6000_expand_vec_perm_const_1 (target, op0, op1, perm0, perm1);
35609 /* Test whether a constant permutation is supported. */
35611 static bool
35612 rs6000_vectorize_vec_perm_const_ok (machine_mode vmode,
35613 const unsigned char *sel)
35615 /* AltiVec (and thus VSX) can handle arbitrary permutations. */
35616 if (TARGET_ALTIVEC)
35617 return true;
35619 /* Check for ps_merge* or evmerge* insns. */
35620 if ((TARGET_PAIRED_FLOAT && vmode == V2SFmode)
35621 || (TARGET_SPE && vmode == V2SImode))
35623 rtx op0 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 1);
35624 rtx op1 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 2);
35625 return rs6000_expand_vec_perm_const_1 (NULL, op0, op1, sel[0], sel[1]);
35628 return false;
35631 /* A subroutine for rs6000_expand_extract_even & rs6000_expand_interleave. */
35633 static void
35634 rs6000_do_expand_vec_perm (rtx target, rtx op0, rtx op1,
35635 machine_mode vmode, unsigned nelt, rtx perm[])
35637 machine_mode imode;
35638 rtx x;
35640 imode = vmode;
35641 if (GET_MODE_CLASS (vmode) != MODE_VECTOR_INT)
35643 imode = mode_for_size (GET_MODE_UNIT_BITSIZE (vmode), MODE_INT, 0);
35644 imode = mode_for_vector (imode, nelt);
35647 x = gen_rtx_CONST_VECTOR (imode, gen_rtvec_v (nelt, perm));
35648 x = expand_vec_perm (vmode, op0, op1, x, target);
35649 if (x != target)
35650 emit_move_insn (target, x);
35653 /* Expand an extract even operation. */
35655 void
35656 rs6000_expand_extract_even (rtx target, rtx op0, rtx op1)
35658 machine_mode vmode = GET_MODE (target);
35659 unsigned i, nelt = GET_MODE_NUNITS (vmode);
35660 rtx perm[16];
35662 for (i = 0; i < nelt; i++)
35663 perm[i] = GEN_INT (i * 2);
35665 rs6000_do_expand_vec_perm (target, op0, op1, vmode, nelt, perm);
35668 /* Expand a vector interleave operation. */
35670 void
35671 rs6000_expand_interleave (rtx target, rtx op0, rtx op1, bool highp)
35673 machine_mode vmode = GET_MODE (target);
35674 unsigned i, high, nelt = GET_MODE_NUNITS (vmode);
35675 rtx perm[16];
35677 high = (highp ? 0 : nelt / 2);
35678 for (i = 0; i < nelt / 2; i++)
35680 perm[i * 2] = GEN_INT (i + high);
35681 perm[i * 2 + 1] = GEN_INT (i + nelt + high);
35684 rs6000_do_expand_vec_perm (target, op0, op1, vmode, nelt, perm);
35687 /* Scale a V2DF vector SRC by two to the SCALE and place in TGT. */
35688 void
35689 rs6000_scale_v2df (rtx tgt, rtx src, int scale)
35691 HOST_WIDE_INT hwi_scale (scale);
35692 REAL_VALUE_TYPE r_pow;
35693 rtvec v = rtvec_alloc (2);
35694 rtx elt;
35695 rtx scale_vec = gen_reg_rtx (V2DFmode);
35696 (void)real_powi (&r_pow, DFmode, &dconst2, hwi_scale);
35697 elt = const_double_from_real_value (r_pow, DFmode);
35698 RTVEC_ELT (v, 0) = elt;
35699 RTVEC_ELT (v, 1) = elt;
35700 rs6000_expand_vector_init (scale_vec, gen_rtx_PARALLEL (V2DFmode, v));
35701 emit_insn (gen_mulv2df3 (tgt, src, scale_vec));
35704 /* Return an RTX representing where to find the function value of a
35705 function returning MODE. */
35706 static rtx
35707 rs6000_complex_function_value (machine_mode mode)
35709 unsigned int regno;
35710 rtx r1, r2;
35711 machine_mode inner = GET_MODE_INNER (mode);
35712 unsigned int inner_bytes = GET_MODE_UNIT_SIZE (mode);
35714 if (TARGET_FLOAT128
35715 && (mode == KCmode
35716 || (mode == TCmode && TARGET_IEEEQUAD)))
35717 regno = ALTIVEC_ARG_RETURN;
35719 else if (FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT && TARGET_FPRS)
35720 regno = FP_ARG_RETURN;
35722 else
35724 regno = GP_ARG_RETURN;
35726 /* 32-bit is OK since it'll go in r3/r4. */
35727 if (TARGET_32BIT && inner_bytes >= 4)
35728 return gen_rtx_REG (mode, regno);
35731 if (inner_bytes >= 8)
35732 return gen_rtx_REG (mode, regno);
35734 r1 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno),
35735 const0_rtx);
35736 r2 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno + 1),
35737 GEN_INT (inner_bytes));
35738 return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r2));
35741 /* Return an rtx describing a return value of MODE as a PARALLEL
35742 in N_ELTS registers, each of mode ELT_MODE, starting at REGNO,
35743 stride REG_STRIDE. */
35745 static rtx
35746 rs6000_parallel_return (machine_mode mode,
35747 int n_elts, machine_mode elt_mode,
35748 unsigned int regno, unsigned int reg_stride)
35750 rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
35752 int i;
35753 for (i = 0; i < n_elts; i++)
35755 rtx r = gen_rtx_REG (elt_mode, regno);
35756 rtx off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
35757 XVECEXP (par, 0, i) = gen_rtx_EXPR_LIST (VOIDmode, r, off);
35758 regno += reg_stride;
35761 return par;
35764 /* Target hook for TARGET_FUNCTION_VALUE.
35766 On the SPE, both FPs and vectors are returned in r3.
35768 On RS/6000 an integer value is in r3 and a floating-point value is in
35769 fp1, unless -msoft-float. */
35771 static rtx
35772 rs6000_function_value (const_tree valtype,
35773 const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
35774 bool outgoing ATTRIBUTE_UNUSED)
35776 machine_mode mode;
35777 unsigned int regno;
35778 machine_mode elt_mode;
35779 int n_elts;
35781 /* Special handling for structs in darwin64. */
35782 if (TARGET_MACHO
35783 && rs6000_darwin64_struct_check_p (TYPE_MODE (valtype), valtype))
35785 CUMULATIVE_ARGS valcum;
35786 rtx valret;
35788 valcum.words = 0;
35789 valcum.fregno = FP_ARG_MIN_REG;
35790 valcum.vregno = ALTIVEC_ARG_MIN_REG;
35791 /* Do a trial code generation as if this were going to be passed as
35792 an argument; if any part goes in memory, we return NULL. */
35793 valret = rs6000_darwin64_record_arg (&valcum, valtype, true, /* retval= */ true);
35794 if (valret)
35795 return valret;
35796 /* Otherwise fall through to standard ABI rules. */
35799 mode = TYPE_MODE (valtype);
35801 /* The ELFv2 ABI returns homogeneous VFP aggregates in registers. */
35802 if (rs6000_discover_homogeneous_aggregate (mode, valtype, &elt_mode, &n_elts))
35804 int first_reg, n_regs;
35806 if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (elt_mode))
35808 /* _Decimal128 must use even/odd register pairs. */
35809 first_reg = (elt_mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
35810 n_regs = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
35812 else
35814 first_reg = ALTIVEC_ARG_RETURN;
35815 n_regs = 1;
35818 return rs6000_parallel_return (mode, n_elts, elt_mode, first_reg, n_regs);
35821 /* Some return value types need be split in -mpowerpc64, 32bit ABI. */
35822 if (TARGET_32BIT && TARGET_POWERPC64)
35823 switch (mode)
35825 default:
35826 break;
35827 case DImode:
35828 case SCmode:
35829 case DCmode:
35830 case TCmode:
35831 int count = GET_MODE_SIZE (mode) / 4;
35832 return rs6000_parallel_return (mode, count, SImode, GP_ARG_RETURN, 1);
35835 if ((INTEGRAL_TYPE_P (valtype)
35836 && GET_MODE_BITSIZE (mode) < (TARGET_32BIT ? 32 : 64))
35837 || POINTER_TYPE_P (valtype))
35838 mode = TARGET_32BIT ? SImode : DImode;
35840 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT && TARGET_FPRS)
35841 /* _Decimal128 must use an even/odd register pair. */
35842 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
35843 else if (SCALAR_FLOAT_TYPE_P (valtype) && TARGET_HARD_FLOAT && TARGET_FPRS
35844 && !FLOAT128_VECTOR_P (mode)
35845 && ((TARGET_SINGLE_FLOAT && (mode == SFmode)) || TARGET_DOUBLE_FLOAT))
35846 regno = FP_ARG_RETURN;
35847 else if (TREE_CODE (valtype) == COMPLEX_TYPE
35848 && targetm.calls.split_complex_arg)
35849 return rs6000_complex_function_value (mode);
35850 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
35851 return register is used in both cases, and we won't see V2DImode/V2DFmode
35852 for pure altivec, combine the two cases. */
35853 else if ((TREE_CODE (valtype) == VECTOR_TYPE || FLOAT128_VECTOR_P (mode))
35854 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI
35855 && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
35856 regno = ALTIVEC_ARG_RETURN;
35857 else if (TARGET_E500_DOUBLE && TARGET_HARD_FLOAT
35858 && (mode == DFmode || mode == DCmode
35859 || FLOAT128_IBM_P (mode) || mode == TCmode))
35860 return spe_build_register_parallel (mode, GP_ARG_RETURN);
35861 else
35862 regno = GP_ARG_RETURN;
35864 return gen_rtx_REG (mode, regno);
35867 /* Define how to find the value returned by a library function
35868 assuming the value has mode MODE. */
35870 rs6000_libcall_value (machine_mode mode)
35872 unsigned int regno;
35874 /* Long long return value need be split in -mpowerpc64, 32bit ABI. */
35875 if (TARGET_32BIT && TARGET_POWERPC64 && mode == DImode)
35876 return rs6000_parallel_return (mode, 2, SImode, GP_ARG_RETURN, 1);
35878 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT && TARGET_FPRS)
35879 /* _Decimal128 must use an even/odd register pair. */
35880 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
35881 else if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode)
35882 && TARGET_HARD_FLOAT && TARGET_FPRS
35883 && ((TARGET_SINGLE_FLOAT && mode == SFmode) || TARGET_DOUBLE_FLOAT))
35884 regno = FP_ARG_RETURN;
35885 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
35886 return register is used in both cases, and we won't see V2DImode/V2DFmode
35887 for pure altivec, combine the two cases. */
35888 else if (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
35889 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI)
35890 regno = ALTIVEC_ARG_RETURN;
35891 else if (COMPLEX_MODE_P (mode) && targetm.calls.split_complex_arg)
35892 return rs6000_complex_function_value (mode);
35893 else if (TARGET_E500_DOUBLE && TARGET_HARD_FLOAT
35894 && (mode == DFmode || mode == DCmode
35895 || FLOAT128_IBM_P (mode) || mode == TCmode))
35896 return spe_build_register_parallel (mode, GP_ARG_RETURN);
35897 else
35898 regno = GP_ARG_RETURN;
35900 return gen_rtx_REG (mode, regno);
35904 /* Return true if we use LRA instead of reload pass. */
35905 static bool
35906 rs6000_lra_p (void)
35908 return TARGET_LRA;
35911 /* Given FROM and TO register numbers, say whether this elimination is allowed.
35912 Frame pointer elimination is automatically handled.
35914 For the RS/6000, if frame pointer elimination is being done, we would like
35915 to convert ap into fp, not sp.
35917 We need r30 if -mminimal-toc was specified, and there are constant pool
35918 references. */
35920 static bool
35921 rs6000_can_eliminate (const int from, const int to)
35923 return (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM
35924 ? ! frame_pointer_needed
35925 : from == RS6000_PIC_OFFSET_TABLE_REGNUM
35926 ? ! TARGET_MINIMAL_TOC || TARGET_NO_TOC || get_pool_size () == 0
35927 : true);
35930 /* Define the offset between two registers, FROM to be eliminated and its
35931 replacement TO, at the start of a routine. */
35932 HOST_WIDE_INT
35933 rs6000_initial_elimination_offset (int from, int to)
35935 rs6000_stack_t *info = rs6000_stack_info ();
35936 HOST_WIDE_INT offset;
35938 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
35939 offset = info->push_p ? 0 : -info->total_size;
35940 else if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
35942 offset = info->push_p ? 0 : -info->total_size;
35943 if (FRAME_GROWS_DOWNWARD)
35944 offset += info->fixed_size + info->vars_size + info->parm_size;
35946 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
35947 offset = FRAME_GROWS_DOWNWARD
35948 ? info->fixed_size + info->vars_size + info->parm_size
35949 : 0;
35950 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
35951 offset = info->total_size;
35952 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
35953 offset = info->push_p ? info->total_size : 0;
35954 else if (from == RS6000_PIC_OFFSET_TABLE_REGNUM)
35955 offset = 0;
35956 else
35957 gcc_unreachable ();
35959 return offset;
35962 static rtx
35963 rs6000_dwarf_register_span (rtx reg)
35965 rtx parts[8];
35966 int i, words;
35967 unsigned regno = REGNO (reg);
35968 machine_mode mode = GET_MODE (reg);
35970 if (TARGET_SPE
35971 && regno < 32
35972 && (SPE_VECTOR_MODE (GET_MODE (reg))
35973 || (TARGET_E500_DOUBLE && FLOAT_MODE_P (mode)
35974 && mode != SFmode && mode != SDmode && mode != SCmode)))
35976 else
35977 return NULL_RTX;
35979 regno = REGNO (reg);
35981 /* The duality of the SPE register size wreaks all kinds of havoc.
35982 This is a way of distinguishing r0 in 32-bits from r0 in
35983 64-bits. */
35984 words = (GET_MODE_SIZE (mode) + UNITS_PER_FP_WORD - 1) / UNITS_PER_FP_WORD;
35985 gcc_assert (words <= 4);
35986 for (i = 0; i < words; i++, regno++)
35988 if (BYTES_BIG_ENDIAN)
35990 parts[2 * i] = gen_rtx_REG (SImode, regno + FIRST_SPE_HIGH_REGNO);
35991 parts[2 * i + 1] = gen_rtx_REG (SImode, regno);
35993 else
35995 parts[2 * i] = gen_rtx_REG (SImode, regno);
35996 parts[2 * i + 1] = gen_rtx_REG (SImode, regno + FIRST_SPE_HIGH_REGNO);
36000 return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (words * 2, parts));
36003 /* Fill in sizes for SPE register high parts in table used by unwinder. */
36005 static void
36006 rs6000_init_dwarf_reg_sizes_extra (tree address)
36008 if (TARGET_SPE)
36010 int i;
36011 machine_mode mode = TYPE_MODE (char_type_node);
36012 rtx addr = expand_expr (address, NULL_RTX, VOIDmode, EXPAND_NORMAL);
36013 rtx mem = gen_rtx_MEM (BLKmode, addr);
36014 rtx value = gen_int_mode (4, mode);
36016 for (i = FIRST_SPE_HIGH_REGNO; i < LAST_SPE_HIGH_REGNO+1; i++)
36018 int column = DWARF_REG_TO_UNWIND_COLUMN
36019 (DWARF2_FRAME_REG_OUT (DWARF_FRAME_REGNUM (i), true));
36020 HOST_WIDE_INT offset = column * GET_MODE_SIZE (mode);
36022 emit_move_insn (adjust_address (mem, mode, offset), value);
36026 if (TARGET_MACHO && ! TARGET_ALTIVEC)
36028 int i;
36029 machine_mode mode = TYPE_MODE (char_type_node);
36030 rtx addr = expand_expr (address, NULL_RTX, VOIDmode, EXPAND_NORMAL);
36031 rtx mem = gen_rtx_MEM (BLKmode, addr);
36032 rtx value = gen_int_mode (16, mode);
36034 /* On Darwin, libgcc may be built to run on both G3 and G4/5.
36035 The unwinder still needs to know the size of Altivec registers. */
36037 for (i = FIRST_ALTIVEC_REGNO; i < LAST_ALTIVEC_REGNO+1; i++)
36039 int column = DWARF_REG_TO_UNWIND_COLUMN
36040 (DWARF2_FRAME_REG_OUT (DWARF_FRAME_REGNUM (i), true));
36041 HOST_WIDE_INT offset = column * GET_MODE_SIZE (mode);
36043 emit_move_insn (adjust_address (mem, mode, offset), value);
36048 /* Map internal gcc register numbers to debug format register numbers.
36049 FORMAT specifies the type of debug register number to use:
36050 0 -- debug information, except for frame-related sections
36051 1 -- DWARF .debug_frame section
36052 2 -- DWARF .eh_frame section */
36054 unsigned int
36055 rs6000_dbx_register_number (unsigned int regno, unsigned int format)
36057 /* We never use the GCC internal number for SPE high registers.
36058 Those are mapped to the 1200..1231 range for all debug formats. */
36059 if (SPE_HIGH_REGNO_P (regno))
36060 return regno - FIRST_SPE_HIGH_REGNO + 1200;
36062 /* Except for the above, we use the internal number for non-DWARF
36063 debug information, and also for .eh_frame. */
36064 if ((format == 0 && write_symbols != DWARF2_DEBUG) || format == 2)
36065 return regno;
36067 /* On some platforms, we use the standard DWARF register
36068 numbering for .debug_info and .debug_frame. */
36069 #ifdef RS6000_USE_DWARF_NUMBERING
36070 if (regno <= 63)
36071 return regno;
36072 if (regno == LR_REGNO)
36073 return 108;
36074 if (regno == CTR_REGNO)
36075 return 109;
36076 /* Special handling for CR for .debug_frame: rs6000_emit_prologue has
36077 translated any combination of CR2, CR3, CR4 saves to a save of CR2.
36078 The actual code emitted saves the whole of CR, so we map CR2_REGNO
36079 to the DWARF reg for CR. */
36080 if (format == 1 && regno == CR2_REGNO)
36081 return 64;
36082 if (CR_REGNO_P (regno))
36083 return regno - CR0_REGNO + 86;
36084 if (regno == CA_REGNO)
36085 return 101; /* XER */
36086 if (ALTIVEC_REGNO_P (regno))
36087 return regno - FIRST_ALTIVEC_REGNO + 1124;
36088 if (regno == VRSAVE_REGNO)
36089 return 356;
36090 if (regno == VSCR_REGNO)
36091 return 67;
36092 if (regno == SPE_ACC_REGNO)
36093 return 99;
36094 if (regno == SPEFSCR_REGNO)
36095 return 612;
36096 #endif
36097 return regno;
36100 /* target hook eh_return_filter_mode */
36101 static machine_mode
36102 rs6000_eh_return_filter_mode (void)
36104 return TARGET_32BIT ? SImode : word_mode;
36107 /* Target hook for scalar_mode_supported_p. */
36108 static bool
36109 rs6000_scalar_mode_supported_p (machine_mode mode)
36111 /* -m32 does not support TImode. This is the default, from
36112 default_scalar_mode_supported_p. For -m32 -mpowerpc64 we want the
36113 same ABI as for -m32. But default_scalar_mode_supported_p allows
36114 integer modes of precision 2 * BITS_PER_WORD, which matches TImode
36115 for -mpowerpc64. */
36116 if (TARGET_32BIT && mode == TImode)
36117 return false;
36119 if (DECIMAL_FLOAT_MODE_P (mode))
36120 return default_decimal_float_supported_p ();
36121 else if (TARGET_FLOAT128 && (mode == KFmode || mode == IFmode))
36122 return true;
36123 else
36124 return default_scalar_mode_supported_p (mode);
36127 /* Target hook for vector_mode_supported_p. */
36128 static bool
36129 rs6000_vector_mode_supported_p (machine_mode mode)
36132 if (TARGET_PAIRED_FLOAT && PAIRED_VECTOR_MODE (mode))
36133 return true;
36135 if (TARGET_SPE && SPE_VECTOR_MODE (mode))
36136 return true;
36138 /* There is no vector form for IEEE 128-bit. If we return true for IEEE
36139 128-bit, the compiler might try to widen IEEE 128-bit to IBM
36140 double-double. */
36141 else if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode) && !FLOAT128_IEEE_P (mode))
36142 return true;
36144 else
36145 return false;
36148 /* Target hook for c_mode_for_suffix. */
36149 static machine_mode
36150 rs6000_c_mode_for_suffix (char suffix)
36152 if (TARGET_FLOAT128)
36154 if (suffix == 'q' || suffix == 'Q')
36155 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
36157 /* At the moment, we are not defining a suffix for IBM extended double.
36158 If/when the default for -mabi=ieeelongdouble is changed, and we want
36159 to support __ibm128 constants in legacy library code, we may need to
36160 re-evalaute this decision. Currently, c-lex.c only supports 'w' and
36161 'q' as machine dependent suffixes. The x86_64 port uses 'w' for
36162 __float80 constants. */
36165 return VOIDmode;
36168 /* Target hook for invalid_arg_for_unprototyped_fn. */
36169 static const char *
36170 invalid_arg_for_unprototyped_fn (const_tree typelist, const_tree funcdecl, const_tree val)
36172 return (!rs6000_darwin64_abi
36173 && typelist == 0
36174 && TREE_CODE (TREE_TYPE (val)) == VECTOR_TYPE
36175 && (funcdecl == NULL_TREE
36176 || (TREE_CODE (funcdecl) == FUNCTION_DECL
36177 && DECL_BUILT_IN_CLASS (funcdecl) != BUILT_IN_MD)))
36178 ? N_("AltiVec argument passed to unprototyped function")
36179 : NULL;
36182 /* For TARGET_SECURE_PLT 32-bit PIC code we can save PIC register
36183 setup by using __stack_chk_fail_local hidden function instead of
36184 calling __stack_chk_fail directly. Otherwise it is better to call
36185 __stack_chk_fail directly. */
36187 static tree ATTRIBUTE_UNUSED
36188 rs6000_stack_protect_fail (void)
36190 return (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
36191 ? default_hidden_stack_protect_fail ()
36192 : default_external_stack_protect_fail ();
36195 void
36196 rs6000_final_prescan_insn (rtx_insn *insn, rtx *operand ATTRIBUTE_UNUSED,
36197 int num_operands ATTRIBUTE_UNUSED)
36199 if (rs6000_warn_cell_microcode)
36201 const char *temp;
36202 int insn_code_number = recog_memoized (insn);
36203 location_t location = INSN_LOCATION (insn);
36205 /* Punt on insns we cannot recognize. */
36206 if (insn_code_number < 0)
36207 return;
36209 temp = get_insn_template (insn_code_number, insn);
36211 if (get_attr_cell_micro (insn) == CELL_MICRO_ALWAYS)
36212 warning_at (location, OPT_mwarn_cell_microcode,
36213 "emitting microcode insn %s\t[%s] #%d",
36214 temp, insn_data[INSN_CODE (insn)].name, INSN_UID (insn));
36215 else if (get_attr_cell_micro (insn) == CELL_MICRO_CONDITIONAL)
36216 warning_at (location, OPT_mwarn_cell_microcode,
36217 "emitting conditional microcode insn %s\t[%s] #%d",
36218 temp, insn_data[INSN_CODE (insn)].name, INSN_UID (insn));
36222 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
36224 #if TARGET_ELF
36225 static unsigned HOST_WIDE_INT
36226 rs6000_asan_shadow_offset (void)
36228 return (unsigned HOST_WIDE_INT) 1 << (TARGET_64BIT ? 41 : 29);
36230 #endif
36232 /* Mask options that we want to support inside of attribute((target)) and
36233 #pragma GCC target operations. Note, we do not include things like
36234 64/32-bit, endianess, hard/soft floating point, etc. that would have
36235 different calling sequences. */
36237 struct rs6000_opt_mask {
36238 const char *name; /* option name */
36239 HOST_WIDE_INT mask; /* mask to set */
36240 bool invert; /* invert sense of mask */
36241 bool valid_target; /* option is a target option */
36244 static struct rs6000_opt_mask const rs6000_opt_masks[] =
36246 { "altivec", OPTION_MASK_ALTIVEC, false, true },
36247 { "cmpb", OPTION_MASK_CMPB, false, true },
36248 { "crypto", OPTION_MASK_CRYPTO, false, true },
36249 { "direct-move", OPTION_MASK_DIRECT_MOVE, false, true },
36250 { "dlmzb", OPTION_MASK_DLMZB, false, true },
36251 { "efficient-unaligned-vsx", OPTION_MASK_EFFICIENT_UNALIGNED_VSX,
36252 false, true },
36253 { "float128", OPTION_MASK_FLOAT128, false, false },
36254 { "float128-hardware", OPTION_MASK_FLOAT128_HW, false, false },
36255 { "fprnd", OPTION_MASK_FPRND, false, true },
36256 { "hard-dfp", OPTION_MASK_DFP, false, true },
36257 { "htm", OPTION_MASK_HTM, false, true },
36258 { "isel", OPTION_MASK_ISEL, false, true },
36259 { "mfcrf", OPTION_MASK_MFCRF, false, true },
36260 { "mfpgpr", OPTION_MASK_MFPGPR, false, true },
36261 { "modulo", OPTION_MASK_MODULO, false, true },
36262 { "mulhw", OPTION_MASK_MULHW, false, true },
36263 { "multiple", OPTION_MASK_MULTIPLE, false, true },
36264 { "popcntb", OPTION_MASK_POPCNTB, false, true },
36265 { "popcntd", OPTION_MASK_POPCNTD, false, true },
36266 { "power8-fusion", OPTION_MASK_P8_FUSION, false, true },
36267 { "power8-fusion-sign", OPTION_MASK_P8_FUSION_SIGN, false, true },
36268 { "power8-vector", OPTION_MASK_P8_VECTOR, false, true },
36269 { "power9-dform-scalar", OPTION_MASK_P9_DFORM_SCALAR, false, true },
36270 { "power9-dform-vector", OPTION_MASK_P9_DFORM_VECTOR, false, true },
36271 { "power9-fusion", OPTION_MASK_P9_FUSION, false, true },
36272 { "power9-minmax", OPTION_MASK_P9_MINMAX, false, true },
36273 { "power9-misc", OPTION_MASK_P9_MISC, false, true },
36274 { "power9-vector", OPTION_MASK_P9_VECTOR, false, true },
36275 { "powerpc-gfxopt", OPTION_MASK_PPC_GFXOPT, false, true },
36276 { "powerpc-gpopt", OPTION_MASK_PPC_GPOPT, false, true },
36277 { "quad-memory", OPTION_MASK_QUAD_MEMORY, false, true },
36278 { "quad-memory-atomic", OPTION_MASK_QUAD_MEMORY_ATOMIC, false, true },
36279 { "recip-precision", OPTION_MASK_RECIP_PRECISION, false, true },
36280 { "save-toc-indirect", OPTION_MASK_SAVE_TOC_INDIRECT, false, true },
36281 { "string", OPTION_MASK_STRING, false, true },
36282 { "toc-fusion", OPTION_MASK_TOC_FUSION, false, true },
36283 { "update", OPTION_MASK_NO_UPDATE, true , true },
36284 { "upper-regs-di", OPTION_MASK_UPPER_REGS_DI, false, true },
36285 { "upper-regs-df", OPTION_MASK_UPPER_REGS_DF, false, true },
36286 { "upper-regs-sf", OPTION_MASK_UPPER_REGS_SF, false, true },
36287 { "vsx", OPTION_MASK_VSX, false, true },
36288 { "vsx-timode", OPTION_MASK_VSX_TIMODE, false, true },
36289 #ifdef OPTION_MASK_64BIT
36290 #if TARGET_AIX_OS
36291 { "aix64", OPTION_MASK_64BIT, false, false },
36292 { "aix32", OPTION_MASK_64BIT, true, false },
36293 #else
36294 { "64", OPTION_MASK_64BIT, false, false },
36295 { "32", OPTION_MASK_64BIT, true, false },
36296 #endif
36297 #endif
36298 #ifdef OPTION_MASK_EABI
36299 { "eabi", OPTION_MASK_EABI, false, false },
36300 #endif
36301 #ifdef OPTION_MASK_LITTLE_ENDIAN
36302 { "little", OPTION_MASK_LITTLE_ENDIAN, false, false },
36303 { "big", OPTION_MASK_LITTLE_ENDIAN, true, false },
36304 #endif
36305 #ifdef OPTION_MASK_RELOCATABLE
36306 { "relocatable", OPTION_MASK_RELOCATABLE, false, false },
36307 #endif
36308 #ifdef OPTION_MASK_STRICT_ALIGN
36309 { "strict-align", OPTION_MASK_STRICT_ALIGN, false, false },
36310 #endif
36311 { "soft-float", OPTION_MASK_SOFT_FLOAT, false, false },
36312 { "string", OPTION_MASK_STRING, false, false },
36315 /* Builtin mask mapping for printing the flags. */
36316 static struct rs6000_opt_mask const rs6000_builtin_mask_names[] =
36318 { "altivec", RS6000_BTM_ALTIVEC, false, false },
36319 { "vsx", RS6000_BTM_VSX, false, false },
36320 { "spe", RS6000_BTM_SPE, false, false },
36321 { "paired", RS6000_BTM_PAIRED, false, false },
36322 { "fre", RS6000_BTM_FRE, false, false },
36323 { "fres", RS6000_BTM_FRES, false, false },
36324 { "frsqrte", RS6000_BTM_FRSQRTE, false, false },
36325 { "frsqrtes", RS6000_BTM_FRSQRTES, false, false },
36326 { "popcntd", RS6000_BTM_POPCNTD, false, false },
36327 { "cell", RS6000_BTM_CELL, false, false },
36328 { "power8-vector", RS6000_BTM_P8_VECTOR, false, false },
36329 { "power9-vector", RS6000_BTM_P9_VECTOR, false, false },
36330 { "power9-misc", RS6000_BTM_P9_MISC, false, false },
36331 { "crypto", RS6000_BTM_CRYPTO, false, false },
36332 { "htm", RS6000_BTM_HTM, false, false },
36333 { "hard-dfp", RS6000_BTM_DFP, false, false },
36334 { "hard-float", RS6000_BTM_HARD_FLOAT, false, false },
36335 { "long-double-128", RS6000_BTM_LDBL128, false, false },
36336 { "float128", RS6000_BTM_FLOAT128, false, false },
36339 /* Option variables that we want to support inside attribute((target)) and
36340 #pragma GCC target operations. */
36342 struct rs6000_opt_var {
36343 const char *name; /* option name */
36344 size_t global_offset; /* offset of the option in global_options. */
36345 size_t target_offset; /* offset of the option in target options. */
36348 static struct rs6000_opt_var const rs6000_opt_vars[] =
36350 { "friz",
36351 offsetof (struct gcc_options, x_TARGET_FRIZ),
36352 offsetof (struct cl_target_option, x_TARGET_FRIZ), },
36353 { "avoid-indexed-addresses",
36354 offsetof (struct gcc_options, x_TARGET_AVOID_XFORM),
36355 offsetof (struct cl_target_option, x_TARGET_AVOID_XFORM) },
36356 { "paired",
36357 offsetof (struct gcc_options, x_rs6000_paired_float),
36358 offsetof (struct cl_target_option, x_rs6000_paired_float), },
36359 { "longcall",
36360 offsetof (struct gcc_options, x_rs6000_default_long_calls),
36361 offsetof (struct cl_target_option, x_rs6000_default_long_calls), },
36362 { "optimize-swaps",
36363 offsetof (struct gcc_options, x_rs6000_optimize_swaps),
36364 offsetof (struct cl_target_option, x_rs6000_optimize_swaps), },
36365 { "allow-movmisalign",
36366 offsetof (struct gcc_options, x_TARGET_ALLOW_MOVMISALIGN),
36367 offsetof (struct cl_target_option, x_TARGET_ALLOW_MOVMISALIGN), },
36368 { "allow-df-permute",
36369 offsetof (struct gcc_options, x_TARGET_ALLOW_DF_PERMUTE),
36370 offsetof (struct cl_target_option, x_TARGET_ALLOW_DF_PERMUTE), },
36371 { "sched-groups",
36372 offsetof (struct gcc_options, x_TARGET_SCHED_GROUPS),
36373 offsetof (struct cl_target_option, x_TARGET_SCHED_GROUPS), },
36374 { "always-hint",
36375 offsetof (struct gcc_options, x_TARGET_ALWAYS_HINT),
36376 offsetof (struct cl_target_option, x_TARGET_ALWAYS_HINT), },
36377 { "align-branch-targets",
36378 offsetof (struct gcc_options, x_TARGET_ALIGN_BRANCH_TARGETS),
36379 offsetof (struct cl_target_option, x_TARGET_ALIGN_BRANCH_TARGETS), },
36380 { "vectorize-builtins",
36381 offsetof (struct gcc_options, x_TARGET_VECTORIZE_BUILTINS),
36382 offsetof (struct cl_target_option, x_TARGET_VECTORIZE_BUILTINS), },
36383 { "tls-markers",
36384 offsetof (struct gcc_options, x_tls_markers),
36385 offsetof (struct cl_target_option, x_tls_markers), },
36386 { "sched-prolog",
36387 offsetof (struct gcc_options, x_TARGET_SCHED_PROLOG),
36388 offsetof (struct cl_target_option, x_TARGET_SCHED_PROLOG), },
36389 { "sched-epilog",
36390 offsetof (struct gcc_options, x_TARGET_SCHED_PROLOG),
36391 offsetof (struct cl_target_option, x_TARGET_SCHED_PROLOG), },
36392 { "gen-cell-microcode",
36393 offsetof (struct gcc_options, x_rs6000_gen_cell_microcode),
36394 offsetof (struct cl_target_option, x_rs6000_gen_cell_microcode), },
36395 { "warn-cell-microcode",
36396 offsetof (struct gcc_options, x_rs6000_warn_cell_microcode),
36397 offsetof (struct cl_target_option, x_rs6000_warn_cell_microcode), },
36400 /* Inner function to handle attribute((target("..."))) and #pragma GCC target
36401 parsing. Return true if there were no errors. */
36403 static bool
36404 rs6000_inner_target_options (tree args, bool attr_p)
36406 bool ret = true;
36408 if (args == NULL_TREE)
36411 else if (TREE_CODE (args) == STRING_CST)
36413 char *p = ASTRDUP (TREE_STRING_POINTER (args));
36414 char *q;
36416 while ((q = strtok (p, ",")) != NULL)
36418 bool error_p = false;
36419 bool not_valid_p = false;
36420 const char *cpu_opt = NULL;
36422 p = NULL;
36423 if (strncmp (q, "cpu=", 4) == 0)
36425 int cpu_index = rs6000_cpu_name_lookup (q+4);
36426 if (cpu_index >= 0)
36427 rs6000_cpu_index = cpu_index;
36428 else
36430 error_p = true;
36431 cpu_opt = q+4;
36434 else if (strncmp (q, "tune=", 5) == 0)
36436 int tune_index = rs6000_cpu_name_lookup (q+5);
36437 if (tune_index >= 0)
36438 rs6000_tune_index = tune_index;
36439 else
36441 error_p = true;
36442 cpu_opt = q+5;
36445 else
36447 size_t i;
36448 bool invert = false;
36449 char *r = q;
36451 error_p = true;
36452 if (strncmp (r, "no-", 3) == 0)
36454 invert = true;
36455 r += 3;
36458 for (i = 0; i < ARRAY_SIZE (rs6000_opt_masks); i++)
36459 if (strcmp (r, rs6000_opt_masks[i].name) == 0)
36461 HOST_WIDE_INT mask = rs6000_opt_masks[i].mask;
36463 if (!rs6000_opt_masks[i].valid_target)
36464 not_valid_p = true;
36465 else
36467 error_p = false;
36468 rs6000_isa_flags_explicit |= mask;
36470 /* VSX needs altivec, so -mvsx automagically sets
36471 altivec and disables -mavoid-indexed-addresses. */
36472 if (!invert)
36474 if (mask == OPTION_MASK_VSX)
36476 mask |= OPTION_MASK_ALTIVEC;
36477 TARGET_AVOID_XFORM = 0;
36481 if (rs6000_opt_masks[i].invert)
36482 invert = !invert;
36484 if (invert)
36485 rs6000_isa_flags &= ~mask;
36486 else
36487 rs6000_isa_flags |= mask;
36489 break;
36492 if (error_p && !not_valid_p)
36494 for (i = 0; i < ARRAY_SIZE (rs6000_opt_vars); i++)
36495 if (strcmp (r, rs6000_opt_vars[i].name) == 0)
36497 size_t j = rs6000_opt_vars[i].global_offset;
36498 *((int *) ((char *)&global_options + j)) = !invert;
36499 error_p = false;
36500 not_valid_p = false;
36501 break;
36506 if (error_p)
36508 const char *eprefix, *esuffix;
36510 ret = false;
36511 if (attr_p)
36513 eprefix = "__attribute__((__target__(";
36514 esuffix = ")))";
36516 else
36518 eprefix = "#pragma GCC target ";
36519 esuffix = "";
36522 if (cpu_opt)
36523 error ("invalid cpu \"%s\" for %s\"%s\"%s", cpu_opt, eprefix,
36524 q, esuffix);
36525 else if (not_valid_p)
36526 error ("%s\"%s\"%s is not allowed", eprefix, q, esuffix);
36527 else
36528 error ("%s\"%s\"%s is invalid", eprefix, q, esuffix);
36533 else if (TREE_CODE (args) == TREE_LIST)
36537 tree value = TREE_VALUE (args);
36538 if (value)
36540 bool ret2 = rs6000_inner_target_options (value, attr_p);
36541 if (!ret2)
36542 ret = false;
36544 args = TREE_CHAIN (args);
36546 while (args != NULL_TREE);
36549 else
36550 gcc_unreachable ();
36552 return ret;
36555 /* Print out the target options as a list for -mdebug=target. */
36557 static void
36558 rs6000_debug_target_options (tree args, const char *prefix)
36560 if (args == NULL_TREE)
36561 fprintf (stderr, "%s<NULL>", prefix);
36563 else if (TREE_CODE (args) == STRING_CST)
36565 char *p = ASTRDUP (TREE_STRING_POINTER (args));
36566 char *q;
36568 while ((q = strtok (p, ",")) != NULL)
36570 p = NULL;
36571 fprintf (stderr, "%s\"%s\"", prefix, q);
36572 prefix = ", ";
36576 else if (TREE_CODE (args) == TREE_LIST)
36580 tree value = TREE_VALUE (args);
36581 if (value)
36583 rs6000_debug_target_options (value, prefix);
36584 prefix = ", ";
36586 args = TREE_CHAIN (args);
36588 while (args != NULL_TREE);
36591 else
36592 gcc_unreachable ();
36594 return;
36598 /* Hook to validate attribute((target("..."))). */
36600 static bool
36601 rs6000_valid_attribute_p (tree fndecl,
36602 tree ARG_UNUSED (name),
36603 tree args,
36604 int flags)
36606 struct cl_target_option cur_target;
36607 bool ret;
36608 tree old_optimize = build_optimization_node (&global_options);
36609 tree new_target, new_optimize;
36610 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
36612 gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
36614 if (TARGET_DEBUG_TARGET)
36616 tree tname = DECL_NAME (fndecl);
36617 fprintf (stderr, "\n==================== rs6000_valid_attribute_p:\n");
36618 if (tname)
36619 fprintf (stderr, "function: %.*s\n",
36620 (int) IDENTIFIER_LENGTH (tname),
36621 IDENTIFIER_POINTER (tname));
36622 else
36623 fprintf (stderr, "function: unknown\n");
36625 fprintf (stderr, "args:");
36626 rs6000_debug_target_options (args, " ");
36627 fprintf (stderr, "\n");
36629 if (flags)
36630 fprintf (stderr, "flags: 0x%x\n", flags);
36632 fprintf (stderr, "--------------------\n");
36635 old_optimize = build_optimization_node (&global_options);
36636 func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
36638 /* If the function changed the optimization levels as well as setting target
36639 options, start with the optimizations specified. */
36640 if (func_optimize && func_optimize != old_optimize)
36641 cl_optimization_restore (&global_options,
36642 TREE_OPTIMIZATION (func_optimize));
36644 /* The target attributes may also change some optimization flags, so update
36645 the optimization options if necessary. */
36646 cl_target_option_save (&cur_target, &global_options);
36647 rs6000_cpu_index = rs6000_tune_index = -1;
36648 ret = rs6000_inner_target_options (args, true);
36650 /* Set up any additional state. */
36651 if (ret)
36653 ret = rs6000_option_override_internal (false);
36654 new_target = build_target_option_node (&global_options);
36656 else
36657 new_target = NULL;
36659 new_optimize = build_optimization_node (&global_options);
36661 if (!new_target)
36662 ret = false;
36664 else if (fndecl)
36666 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
36668 if (old_optimize != new_optimize)
36669 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
36672 cl_target_option_restore (&global_options, &cur_target);
36674 if (old_optimize != new_optimize)
36675 cl_optimization_restore (&global_options,
36676 TREE_OPTIMIZATION (old_optimize));
36678 return ret;
36682 /* Hook to validate the current #pragma GCC target and set the state, and
36683 update the macros based on what was changed. If ARGS is NULL, then
36684 POP_TARGET is used to reset the options. */
36686 bool
36687 rs6000_pragma_target_parse (tree args, tree pop_target)
36689 tree prev_tree = build_target_option_node (&global_options);
36690 tree cur_tree;
36691 struct cl_target_option *prev_opt, *cur_opt;
36692 HOST_WIDE_INT prev_flags, cur_flags, diff_flags;
36693 HOST_WIDE_INT prev_bumask, cur_bumask, diff_bumask;
36695 if (TARGET_DEBUG_TARGET)
36697 fprintf (stderr, "\n==================== rs6000_pragma_target_parse\n");
36698 fprintf (stderr, "args:");
36699 rs6000_debug_target_options (args, " ");
36700 fprintf (stderr, "\n");
36702 if (pop_target)
36704 fprintf (stderr, "pop_target:\n");
36705 debug_tree (pop_target);
36707 else
36708 fprintf (stderr, "pop_target: <NULL>\n");
36710 fprintf (stderr, "--------------------\n");
36713 if (! args)
36715 cur_tree = ((pop_target)
36716 ? pop_target
36717 : target_option_default_node);
36718 cl_target_option_restore (&global_options,
36719 TREE_TARGET_OPTION (cur_tree));
36721 else
36723 rs6000_cpu_index = rs6000_tune_index = -1;
36724 if (!rs6000_inner_target_options (args, false)
36725 || !rs6000_option_override_internal (false)
36726 || (cur_tree = build_target_option_node (&global_options))
36727 == NULL_TREE)
36729 if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
36730 fprintf (stderr, "invalid pragma\n");
36732 return false;
36736 target_option_current_node = cur_tree;
36738 /* If we have the preprocessor linked in (i.e. C or C++ languages), possibly
36739 change the macros that are defined. */
36740 if (rs6000_target_modify_macros_ptr)
36742 prev_opt = TREE_TARGET_OPTION (prev_tree);
36743 prev_bumask = prev_opt->x_rs6000_builtin_mask;
36744 prev_flags = prev_opt->x_rs6000_isa_flags;
36746 cur_opt = TREE_TARGET_OPTION (cur_tree);
36747 cur_flags = cur_opt->x_rs6000_isa_flags;
36748 cur_bumask = cur_opt->x_rs6000_builtin_mask;
36750 diff_bumask = (prev_bumask ^ cur_bumask);
36751 diff_flags = (prev_flags ^ cur_flags);
36753 if ((diff_flags != 0) || (diff_bumask != 0))
36755 /* Delete old macros. */
36756 rs6000_target_modify_macros_ptr (false,
36757 prev_flags & diff_flags,
36758 prev_bumask & diff_bumask);
36760 /* Define new macros. */
36761 rs6000_target_modify_macros_ptr (true,
36762 cur_flags & diff_flags,
36763 cur_bumask & diff_bumask);
36767 return true;
36771 /* Remember the last target of rs6000_set_current_function. */
36772 static GTY(()) tree rs6000_previous_fndecl;
36774 /* Establish appropriate back-end context for processing the function
36775 FNDECL. The argument might be NULL to indicate processing at top
36776 level, outside of any function scope. */
36777 static void
36778 rs6000_set_current_function (tree fndecl)
36780 tree old_tree = (rs6000_previous_fndecl
36781 ? DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl)
36782 : NULL_TREE);
36784 tree new_tree = (fndecl
36785 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
36786 : NULL_TREE);
36788 if (TARGET_DEBUG_TARGET)
36790 bool print_final = false;
36791 fprintf (stderr, "\n==================== rs6000_set_current_function");
36793 if (fndecl)
36794 fprintf (stderr, ", fndecl %s (%p)",
36795 (DECL_NAME (fndecl)
36796 ? IDENTIFIER_POINTER (DECL_NAME (fndecl))
36797 : "<unknown>"), (void *)fndecl);
36799 if (rs6000_previous_fndecl)
36800 fprintf (stderr, ", prev_fndecl (%p)", (void *)rs6000_previous_fndecl);
36802 fprintf (stderr, "\n");
36803 if (new_tree)
36805 fprintf (stderr, "\nnew fndecl target specific options:\n");
36806 debug_tree (new_tree);
36807 print_final = true;
36810 if (old_tree)
36812 fprintf (stderr, "\nold fndecl target specific options:\n");
36813 debug_tree (old_tree);
36814 print_final = true;
36817 if (print_final)
36818 fprintf (stderr, "--------------------\n");
36821 /* Only change the context if the function changes. This hook is called
36822 several times in the course of compiling a function, and we don't want to
36823 slow things down too much or call target_reinit when it isn't safe. */
36824 if (fndecl && fndecl != rs6000_previous_fndecl)
36826 rs6000_previous_fndecl = fndecl;
36827 if (old_tree == new_tree)
36830 else if (new_tree && new_tree != target_option_default_node)
36832 cl_target_option_restore (&global_options,
36833 TREE_TARGET_OPTION (new_tree));
36834 if (TREE_TARGET_GLOBALS (new_tree))
36835 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
36836 else
36837 TREE_TARGET_GLOBALS (new_tree)
36838 = save_target_globals_default_opts ();
36841 else if (old_tree && old_tree != target_option_default_node)
36843 new_tree = target_option_current_node;
36844 cl_target_option_restore (&global_options,
36845 TREE_TARGET_OPTION (new_tree));
36846 if (TREE_TARGET_GLOBALS (new_tree))
36847 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
36848 else if (new_tree == target_option_default_node)
36849 restore_target_globals (&default_target_globals);
36850 else
36851 TREE_TARGET_GLOBALS (new_tree)
36852 = save_target_globals_default_opts ();
36858 /* Save the current options */
36860 static void
36861 rs6000_function_specific_save (struct cl_target_option *ptr,
36862 struct gcc_options *opts)
36864 ptr->x_rs6000_isa_flags = opts->x_rs6000_isa_flags;
36865 ptr->x_rs6000_isa_flags_explicit = opts->x_rs6000_isa_flags_explicit;
36868 /* Restore the current options */
36870 static void
36871 rs6000_function_specific_restore (struct gcc_options *opts,
36872 struct cl_target_option *ptr)
36875 opts->x_rs6000_isa_flags = ptr->x_rs6000_isa_flags;
36876 opts->x_rs6000_isa_flags_explicit = ptr->x_rs6000_isa_flags_explicit;
36877 (void) rs6000_option_override_internal (false);
36880 /* Print the current options */
36882 static void
36883 rs6000_function_specific_print (FILE *file, int indent,
36884 struct cl_target_option *ptr)
36886 rs6000_print_isa_options (file, indent, "Isa options set",
36887 ptr->x_rs6000_isa_flags);
36889 rs6000_print_isa_options (file, indent, "Isa options explicit",
36890 ptr->x_rs6000_isa_flags_explicit);
36893 /* Helper function to print the current isa or misc options on a line. */
36895 static void
36896 rs6000_print_options_internal (FILE *file,
36897 int indent,
36898 const char *string,
36899 HOST_WIDE_INT flags,
36900 const char *prefix,
36901 const struct rs6000_opt_mask *opts,
36902 size_t num_elements)
36904 size_t i;
36905 size_t start_column = 0;
36906 size_t cur_column;
36907 size_t max_column = 120;
36908 size_t prefix_len = strlen (prefix);
36909 size_t comma_len = 0;
36910 const char *comma = "";
36912 if (indent)
36913 start_column += fprintf (file, "%*s", indent, "");
36915 if (!flags)
36917 fprintf (stderr, DEBUG_FMT_S, string, "<none>");
36918 return;
36921 start_column += fprintf (stderr, DEBUG_FMT_WX, string, flags);
36923 /* Print the various mask options. */
36924 cur_column = start_column;
36925 for (i = 0; i < num_elements; i++)
36927 bool invert = opts[i].invert;
36928 const char *name = opts[i].name;
36929 const char *no_str = "";
36930 HOST_WIDE_INT mask = opts[i].mask;
36931 size_t len = comma_len + prefix_len + strlen (name);
36933 if (!invert)
36935 if ((flags & mask) == 0)
36937 no_str = "no-";
36938 len += sizeof ("no-") - 1;
36941 flags &= ~mask;
36944 else
36946 if ((flags & mask) != 0)
36948 no_str = "no-";
36949 len += sizeof ("no-") - 1;
36952 flags |= mask;
36955 cur_column += len;
36956 if (cur_column > max_column)
36958 fprintf (stderr, ", \\\n%*s", (int)start_column, "");
36959 cur_column = start_column + len;
36960 comma = "";
36963 fprintf (file, "%s%s%s%s", comma, prefix, no_str, name);
36964 comma = ", ";
36965 comma_len = sizeof (", ") - 1;
36968 fputs ("\n", file);
36971 /* Helper function to print the current isa options on a line. */
36973 static void
36974 rs6000_print_isa_options (FILE *file, int indent, const char *string,
36975 HOST_WIDE_INT flags)
36977 rs6000_print_options_internal (file, indent, string, flags, "-m",
36978 &rs6000_opt_masks[0],
36979 ARRAY_SIZE (rs6000_opt_masks));
36982 static void
36983 rs6000_print_builtin_options (FILE *file, int indent, const char *string,
36984 HOST_WIDE_INT flags)
36986 rs6000_print_options_internal (file, indent, string, flags, "",
36987 &rs6000_builtin_mask_names[0],
36988 ARRAY_SIZE (rs6000_builtin_mask_names));
36992 /* Hook to determine if one function can safely inline another. */
36994 static bool
36995 rs6000_can_inline_p (tree caller, tree callee)
36997 bool ret = false;
36998 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
36999 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
37001 /* If callee has no option attributes, then it is ok to inline. */
37002 if (!callee_tree)
37003 ret = true;
37005 /* If caller has no option attributes, but callee does then it is not ok to
37006 inline. */
37007 else if (!caller_tree)
37008 ret = false;
37010 else
37012 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
37013 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
37015 /* Callee's options should a subset of the caller's, i.e. a vsx function
37016 can inline an altivec function but a non-vsx function can't inline a
37017 vsx function. */
37018 if ((caller_opts->x_rs6000_isa_flags & callee_opts->x_rs6000_isa_flags)
37019 == callee_opts->x_rs6000_isa_flags)
37020 ret = true;
37023 if (TARGET_DEBUG_TARGET)
37024 fprintf (stderr, "rs6000_can_inline_p:, caller %s, callee %s, %s inline\n",
37025 (DECL_NAME (caller)
37026 ? IDENTIFIER_POINTER (DECL_NAME (caller))
37027 : "<unknown>"),
37028 (DECL_NAME (callee)
37029 ? IDENTIFIER_POINTER (DECL_NAME (callee))
37030 : "<unknown>"),
37031 (ret ? "can" : "cannot"));
37033 return ret;
37036 /* Allocate a stack temp and fixup the address so it meets the particular
37037 memory requirements (either offetable or REG+REG addressing). */
37040 rs6000_allocate_stack_temp (machine_mode mode,
37041 bool offsettable_p,
37042 bool reg_reg_p)
37044 rtx stack = assign_stack_temp (mode, GET_MODE_SIZE (mode));
37045 rtx addr = XEXP (stack, 0);
37046 int strict_p = (reload_in_progress || reload_completed);
37048 if (!legitimate_indirect_address_p (addr, strict_p))
37050 if (offsettable_p
37051 && !rs6000_legitimate_offset_address_p (mode, addr, strict_p, true))
37052 stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
37054 else if (reg_reg_p && !legitimate_indexed_address_p (addr, strict_p))
37055 stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
37058 return stack;
37061 /* Given a memory reference, if it is not a reg or reg+reg addressing, convert
37062 to such a form to deal with memory reference instructions like STFIWX that
37063 only take reg+reg addressing. */
37066 rs6000_address_for_fpconvert (rtx x)
37068 int strict_p = (reload_in_progress || reload_completed);
37069 rtx addr;
37071 gcc_assert (MEM_P (x));
37072 addr = XEXP (x, 0);
37073 if (! legitimate_indirect_address_p (addr, strict_p)
37074 && ! legitimate_indexed_address_p (addr, strict_p))
37076 if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
37078 rtx reg = XEXP (addr, 0);
37079 HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (x));
37080 rtx size_rtx = GEN_INT ((GET_CODE (addr) == PRE_DEC) ? -size : size);
37081 gcc_assert (REG_P (reg));
37082 emit_insn (gen_add3_insn (reg, reg, size_rtx));
37083 addr = reg;
37085 else if (GET_CODE (addr) == PRE_MODIFY)
37087 rtx reg = XEXP (addr, 0);
37088 rtx expr = XEXP (addr, 1);
37089 gcc_assert (REG_P (reg));
37090 gcc_assert (GET_CODE (expr) == PLUS);
37091 emit_insn (gen_add3_insn (reg, XEXP (expr, 0), XEXP (expr, 1)));
37092 addr = reg;
37095 x = replace_equiv_address (x, copy_addr_to_reg (addr));
37098 return x;
37101 /* Given a memory reference, if it is not in the form for altivec memory
37102 reference instructions (i.e. reg or reg+reg addressing with AND of -16),
37103 convert to the altivec format. */
37106 rs6000_address_for_altivec (rtx x)
37108 gcc_assert (MEM_P (x));
37109 if (!altivec_indexed_or_indirect_operand (x, GET_MODE (x)))
37111 rtx addr = XEXP (x, 0);
37112 int strict_p = (reload_in_progress || reload_completed);
37114 if (!legitimate_indexed_address_p (addr, strict_p)
37115 && !legitimate_indirect_address_p (addr, strict_p))
37116 addr = copy_to_mode_reg (Pmode, addr);
37118 addr = gen_rtx_AND (Pmode, addr, GEN_INT (-16));
37119 x = change_address (x, GET_MODE (x), addr);
37122 return x;
37125 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
37127 On the RS/6000, all integer constants are acceptable, most won't be valid
37128 for particular insns, though. Only easy FP constants are acceptable. */
37130 static bool
37131 rs6000_legitimate_constant_p (machine_mode mode, rtx x)
37133 if (TARGET_ELF && tls_referenced_p (x))
37134 return false;
37136 return ((GET_CODE (x) != CONST_DOUBLE && GET_CODE (x) != CONST_VECTOR)
37137 || GET_MODE (x) == VOIDmode
37138 || (TARGET_POWERPC64 && mode == DImode)
37139 || easy_fp_constant (x, mode)
37140 || easy_vector_constant (x, mode));
37144 /* Return TRUE iff the sequence ending in LAST sets the static chain. */
37146 static bool
37147 chain_already_loaded (rtx_insn *last)
37149 for (; last != NULL; last = PREV_INSN (last))
37151 if (NONJUMP_INSN_P (last))
37153 rtx patt = PATTERN (last);
37155 if (GET_CODE (patt) == SET)
37157 rtx lhs = XEXP (patt, 0);
37159 if (REG_P (lhs) && REGNO (lhs) == STATIC_CHAIN_REGNUM)
37160 return true;
37164 return false;
37167 /* Expand code to perform a call under the AIX or ELFv2 ABI. */
37169 void
37170 rs6000_call_aix (rtx value, rtx func_desc, rtx flag, rtx cookie)
37172 const bool direct_call_p
37173 = GET_CODE (func_desc) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (func_desc);
37174 rtx toc_reg = gen_rtx_REG (Pmode, TOC_REGNUM);
37175 rtx toc_load = NULL_RTX;
37176 rtx toc_restore = NULL_RTX;
37177 rtx func_addr;
37178 rtx abi_reg = NULL_RTX;
37179 rtx call[4];
37180 int n_call;
37181 rtx insn;
37183 /* Handle longcall attributes. */
37184 if (INTVAL (cookie) & CALL_LONG)
37185 func_desc = rs6000_longcall_ref (func_desc);
37187 /* Handle indirect calls. */
37188 if (GET_CODE (func_desc) != SYMBOL_REF
37189 || (DEFAULT_ABI == ABI_AIX && !SYMBOL_REF_FUNCTION_P (func_desc)))
37191 /* Save the TOC into its reserved slot before the call,
37192 and prepare to restore it after the call. */
37193 rtx stack_ptr = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
37194 rtx stack_toc_offset = GEN_INT (RS6000_TOC_SAVE_SLOT);
37195 rtx stack_toc_mem = gen_frame_mem (Pmode,
37196 gen_rtx_PLUS (Pmode, stack_ptr,
37197 stack_toc_offset));
37198 rtx stack_toc_unspec = gen_rtx_UNSPEC (Pmode,
37199 gen_rtvec (1, stack_toc_offset),
37200 UNSPEC_TOCSLOT);
37201 toc_restore = gen_rtx_SET (toc_reg, stack_toc_unspec);
37203 /* Can we optimize saving the TOC in the prologue or
37204 do we need to do it at every call? */
37205 if (TARGET_SAVE_TOC_INDIRECT && !cfun->calls_alloca)
37206 cfun->machine->save_toc_in_prologue = true;
37207 else
37209 MEM_VOLATILE_P (stack_toc_mem) = 1;
37210 emit_move_insn (stack_toc_mem, toc_reg);
37213 if (DEFAULT_ABI == ABI_ELFv2)
37215 /* A function pointer in the ELFv2 ABI is just a plain address, but
37216 the ABI requires it to be loaded into r12 before the call. */
37217 func_addr = gen_rtx_REG (Pmode, 12);
37218 emit_move_insn (func_addr, func_desc);
37219 abi_reg = func_addr;
37221 else
37223 /* A function pointer under AIX is a pointer to a data area whose
37224 first word contains the actual address of the function, whose
37225 second word contains a pointer to its TOC, and whose third word
37226 contains a value to place in the static chain register (r11).
37227 Note that if we load the static chain, our "trampoline" need
37228 not have any executable code. */
37230 /* Load up address of the actual function. */
37231 func_desc = force_reg (Pmode, func_desc);
37232 func_addr = gen_reg_rtx (Pmode);
37233 emit_move_insn (func_addr, gen_rtx_MEM (Pmode, func_desc));
37235 /* Prepare to load the TOC of the called function. Note that the
37236 TOC load must happen immediately before the actual call so
37237 that unwinding the TOC registers works correctly. See the
37238 comment in frob_update_context. */
37239 rtx func_toc_offset = GEN_INT (GET_MODE_SIZE (Pmode));
37240 rtx func_toc_mem = gen_rtx_MEM (Pmode,
37241 gen_rtx_PLUS (Pmode, func_desc,
37242 func_toc_offset));
37243 toc_load = gen_rtx_USE (VOIDmode, func_toc_mem);
37245 /* If we have a static chain, load it up. But, if the call was
37246 originally direct, the 3rd word has not been written since no
37247 trampoline has been built, so we ought not to load it, lest we
37248 override a static chain value. */
37249 if (!direct_call_p
37250 && TARGET_POINTERS_TO_NESTED_FUNCTIONS
37251 && !chain_already_loaded (get_current_sequence ()->next->last))
37253 rtx sc_reg = gen_rtx_REG (Pmode, STATIC_CHAIN_REGNUM);
37254 rtx func_sc_offset = GEN_INT (2 * GET_MODE_SIZE (Pmode));
37255 rtx func_sc_mem = gen_rtx_MEM (Pmode,
37256 gen_rtx_PLUS (Pmode, func_desc,
37257 func_sc_offset));
37258 emit_move_insn (sc_reg, func_sc_mem);
37259 abi_reg = sc_reg;
37263 else
37265 /* Direct calls use the TOC: for local calls, the callee will
37266 assume the TOC register is set; for non-local calls, the
37267 PLT stub needs the TOC register. */
37268 abi_reg = toc_reg;
37269 func_addr = func_desc;
37272 /* Create the call. */
37273 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), flag);
37274 if (value != NULL_RTX)
37275 call[0] = gen_rtx_SET (value, call[0]);
37276 n_call = 1;
37278 if (toc_load)
37279 call[n_call++] = toc_load;
37280 if (toc_restore)
37281 call[n_call++] = toc_restore;
37283 call[n_call++] = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, LR_REGNO));
37285 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (n_call, call));
37286 insn = emit_call_insn (insn);
37288 /* Mention all registers defined by the ABI to hold information
37289 as uses in CALL_INSN_FUNCTION_USAGE. */
37290 if (abi_reg)
37291 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
37294 /* Expand code to perform a sibling call under the AIX or ELFv2 ABI. */
37296 void
37297 rs6000_sibcall_aix (rtx value, rtx func_desc, rtx flag, rtx cookie)
37299 rtx call[2];
37300 rtx insn;
37302 gcc_assert (INTVAL (cookie) == 0);
37304 /* Create the call. */
37305 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_desc), flag);
37306 if (value != NULL_RTX)
37307 call[0] = gen_rtx_SET (value, call[0]);
37309 call[1] = simple_return_rtx;
37311 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (2, call));
37312 insn = emit_call_insn (insn);
37314 /* Note use of the TOC register. */
37315 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), gen_rtx_REG (Pmode, TOC_REGNUM));
37316 /* We need to also mark a use of the link register since the function we
37317 sibling-call to will use it to return to our caller. */
37318 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), gen_rtx_REG (Pmode, LR_REGNO));
37321 /* Return whether we need to always update the saved TOC pointer when we update
37322 the stack pointer. */
37324 static bool
37325 rs6000_save_toc_in_prologue_p (void)
37327 return (cfun && cfun->machine && cfun->machine->save_toc_in_prologue);
37330 #ifdef HAVE_GAS_HIDDEN
37331 # define USE_HIDDEN_LINKONCE 1
37332 #else
37333 # define USE_HIDDEN_LINKONCE 0
37334 #endif
37336 /* Fills in the label name that should be used for a 476 link stack thunk. */
37338 void
37339 get_ppc476_thunk_name (char name[32])
37341 gcc_assert (TARGET_LINK_STACK);
37343 if (USE_HIDDEN_LINKONCE)
37344 sprintf (name, "__ppc476.get_thunk");
37345 else
37346 ASM_GENERATE_INTERNAL_LABEL (name, "LPPC476_", 0);
37349 /* This function emits the simple thunk routine that is used to preserve
37350 the link stack on the 476 cpu. */
37352 static void rs6000_code_end (void) ATTRIBUTE_UNUSED;
37353 static void
37354 rs6000_code_end (void)
37356 char name[32];
37357 tree decl;
37359 if (!TARGET_LINK_STACK)
37360 return;
37362 get_ppc476_thunk_name (name);
37364 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, get_identifier (name),
37365 build_function_type_list (void_type_node, NULL_TREE));
37366 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
37367 NULL_TREE, void_type_node);
37368 TREE_PUBLIC (decl) = 1;
37369 TREE_STATIC (decl) = 1;
37371 #if RS6000_WEAK
37372 if (USE_HIDDEN_LINKONCE)
37374 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
37375 targetm.asm_out.unique_section (decl, 0);
37376 switch_to_section (get_named_section (decl, NULL, 0));
37377 DECL_WEAK (decl) = 1;
37378 ASM_WEAKEN_DECL (asm_out_file, decl, name, 0);
37379 targetm.asm_out.globalize_label (asm_out_file, name);
37380 targetm.asm_out.assemble_visibility (decl, VISIBILITY_HIDDEN);
37381 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
37383 else
37384 #endif
37386 switch_to_section (text_section);
37387 ASM_OUTPUT_LABEL (asm_out_file, name);
37390 DECL_INITIAL (decl) = make_node (BLOCK);
37391 current_function_decl = decl;
37392 allocate_struct_function (decl, false);
37393 init_function_start (decl);
37394 first_function_block_is_cold = false;
37395 /* Make sure unwind info is emitted for the thunk if needed. */
37396 final_start_function (emit_barrier (), asm_out_file, 1);
37398 fputs ("\tblr\n", asm_out_file);
37400 final_end_function ();
37401 init_insn_lengths ();
37402 free_after_compilation (cfun);
37403 set_cfun (NULL);
37404 current_function_decl = NULL;
37407 /* Add r30 to hard reg set if the prologue sets it up and it is not
37408 pic_offset_table_rtx. */
37410 static void
37411 rs6000_set_up_by_prologue (struct hard_reg_set_container *set)
37413 if (!TARGET_SINGLE_PIC_BASE
37414 && TARGET_TOC
37415 && TARGET_MINIMAL_TOC
37416 && get_pool_size () != 0)
37417 add_to_hard_reg_set (&set->set, Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
37418 if (cfun->machine->split_stack_argp_used)
37419 add_to_hard_reg_set (&set->set, Pmode, 12);
37423 /* Helper function for rs6000_split_logical to emit a logical instruction after
37424 spliting the operation to single GPR registers.
37426 DEST is the destination register.
37427 OP1 and OP2 are the input source registers.
37428 CODE is the base operation (AND, IOR, XOR, NOT).
37429 MODE is the machine mode.
37430 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
37431 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
37432 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
37434 static void
37435 rs6000_split_logical_inner (rtx dest,
37436 rtx op1,
37437 rtx op2,
37438 enum rtx_code code,
37439 machine_mode mode,
37440 bool complement_final_p,
37441 bool complement_op1_p,
37442 bool complement_op2_p)
37444 rtx bool_rtx;
37446 /* Optimize AND of 0/0xffffffff and IOR/XOR of 0. */
37447 if (op2 && GET_CODE (op2) == CONST_INT
37448 && (mode == SImode || (mode == DImode && TARGET_POWERPC64))
37449 && !complement_final_p && !complement_op1_p && !complement_op2_p)
37451 HOST_WIDE_INT mask = GET_MODE_MASK (mode);
37452 HOST_WIDE_INT value = INTVAL (op2) & mask;
37454 /* Optimize AND of 0 to just set 0. Optimize AND of -1 to be a move. */
37455 if (code == AND)
37457 if (value == 0)
37459 emit_insn (gen_rtx_SET (dest, const0_rtx));
37460 return;
37463 else if (value == mask)
37465 if (!rtx_equal_p (dest, op1))
37466 emit_insn (gen_rtx_SET (dest, op1));
37467 return;
37471 /* Optimize IOR/XOR of 0 to be a simple move. Split large operations
37472 into separate ORI/ORIS or XORI/XORIS instrucitons. */
37473 else if (code == IOR || code == XOR)
37475 if (value == 0)
37477 if (!rtx_equal_p (dest, op1))
37478 emit_insn (gen_rtx_SET (dest, op1));
37479 return;
37484 if (code == AND && mode == SImode
37485 && !complement_final_p && !complement_op1_p && !complement_op2_p)
37487 emit_insn (gen_andsi3 (dest, op1, op2));
37488 return;
37491 if (complement_op1_p)
37492 op1 = gen_rtx_NOT (mode, op1);
37494 if (complement_op2_p)
37495 op2 = gen_rtx_NOT (mode, op2);
37497 /* For canonical RTL, if only one arm is inverted it is the first. */
37498 if (!complement_op1_p && complement_op2_p)
37499 std::swap (op1, op2);
37501 bool_rtx = ((code == NOT)
37502 ? gen_rtx_NOT (mode, op1)
37503 : gen_rtx_fmt_ee (code, mode, op1, op2));
37505 if (complement_final_p)
37506 bool_rtx = gen_rtx_NOT (mode, bool_rtx);
37508 emit_insn (gen_rtx_SET (dest, bool_rtx));
37511 /* Split a DImode AND/IOR/XOR with a constant on a 32-bit system. These
37512 operations are split immediately during RTL generation to allow for more
37513 optimizations of the AND/IOR/XOR.
37515 OPERANDS is an array containing the destination and two input operands.
37516 CODE is the base operation (AND, IOR, XOR, NOT).
37517 MODE is the machine mode.
37518 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
37519 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
37520 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT.
37521 CLOBBER_REG is either NULL or a scratch register of type CC to allow
37522 formation of the AND instructions. */
37524 static void
37525 rs6000_split_logical_di (rtx operands[3],
37526 enum rtx_code code,
37527 bool complement_final_p,
37528 bool complement_op1_p,
37529 bool complement_op2_p)
37531 const HOST_WIDE_INT lower_32bits = HOST_WIDE_INT_C(0xffffffff);
37532 const HOST_WIDE_INT upper_32bits = ~ lower_32bits;
37533 const HOST_WIDE_INT sign_bit = HOST_WIDE_INT_C(0x80000000);
37534 enum hi_lo { hi = 0, lo = 1 };
37535 rtx op0_hi_lo[2], op1_hi_lo[2], op2_hi_lo[2];
37536 size_t i;
37538 op0_hi_lo[hi] = gen_highpart (SImode, operands[0]);
37539 op1_hi_lo[hi] = gen_highpart (SImode, operands[1]);
37540 op0_hi_lo[lo] = gen_lowpart (SImode, operands[0]);
37541 op1_hi_lo[lo] = gen_lowpart (SImode, operands[1]);
37543 if (code == NOT)
37544 op2_hi_lo[hi] = op2_hi_lo[lo] = NULL_RTX;
37545 else
37547 if (GET_CODE (operands[2]) != CONST_INT)
37549 op2_hi_lo[hi] = gen_highpart_mode (SImode, DImode, operands[2]);
37550 op2_hi_lo[lo] = gen_lowpart (SImode, operands[2]);
37552 else
37554 HOST_WIDE_INT value = INTVAL (operands[2]);
37555 HOST_WIDE_INT value_hi_lo[2];
37557 gcc_assert (!complement_final_p);
37558 gcc_assert (!complement_op1_p);
37559 gcc_assert (!complement_op2_p);
37561 value_hi_lo[hi] = value >> 32;
37562 value_hi_lo[lo] = value & lower_32bits;
37564 for (i = 0; i < 2; i++)
37566 HOST_WIDE_INT sub_value = value_hi_lo[i];
37568 if (sub_value & sign_bit)
37569 sub_value |= upper_32bits;
37571 op2_hi_lo[i] = GEN_INT (sub_value);
37573 /* If this is an AND instruction, check to see if we need to load
37574 the value in a register. */
37575 if (code == AND && sub_value != -1 && sub_value != 0
37576 && !and_operand (op2_hi_lo[i], SImode))
37577 op2_hi_lo[i] = force_reg (SImode, op2_hi_lo[i]);
37582 for (i = 0; i < 2; i++)
37584 /* Split large IOR/XOR operations. */
37585 if ((code == IOR || code == XOR)
37586 && GET_CODE (op2_hi_lo[i]) == CONST_INT
37587 && !complement_final_p
37588 && !complement_op1_p
37589 && !complement_op2_p
37590 && !logical_const_operand (op2_hi_lo[i], SImode))
37592 HOST_WIDE_INT value = INTVAL (op2_hi_lo[i]);
37593 HOST_WIDE_INT hi_16bits = value & HOST_WIDE_INT_C(0xffff0000);
37594 HOST_WIDE_INT lo_16bits = value & HOST_WIDE_INT_C(0x0000ffff);
37595 rtx tmp = gen_reg_rtx (SImode);
37597 /* Make sure the constant is sign extended. */
37598 if ((hi_16bits & sign_bit) != 0)
37599 hi_16bits |= upper_32bits;
37601 rs6000_split_logical_inner (tmp, op1_hi_lo[i], GEN_INT (hi_16bits),
37602 code, SImode, false, false, false);
37604 rs6000_split_logical_inner (op0_hi_lo[i], tmp, GEN_INT (lo_16bits),
37605 code, SImode, false, false, false);
37607 else
37608 rs6000_split_logical_inner (op0_hi_lo[i], op1_hi_lo[i], op2_hi_lo[i],
37609 code, SImode, complement_final_p,
37610 complement_op1_p, complement_op2_p);
37613 return;
37616 /* Split the insns that make up boolean operations operating on multiple GPR
37617 registers. The boolean MD patterns ensure that the inputs either are
37618 exactly the same as the output registers, or there is no overlap.
37620 OPERANDS is an array containing the destination and two input operands.
37621 CODE is the base operation (AND, IOR, XOR, NOT).
37622 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
37623 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
37624 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
37626 void
37627 rs6000_split_logical (rtx operands[3],
37628 enum rtx_code code,
37629 bool complement_final_p,
37630 bool complement_op1_p,
37631 bool complement_op2_p)
37633 machine_mode mode = GET_MODE (operands[0]);
37634 machine_mode sub_mode;
37635 rtx op0, op1, op2;
37636 int sub_size, regno0, regno1, nregs, i;
37638 /* If this is DImode, use the specialized version that can run before
37639 register allocation. */
37640 if (mode == DImode && !TARGET_POWERPC64)
37642 rs6000_split_logical_di (operands, code, complement_final_p,
37643 complement_op1_p, complement_op2_p);
37644 return;
37647 op0 = operands[0];
37648 op1 = operands[1];
37649 op2 = (code == NOT) ? NULL_RTX : operands[2];
37650 sub_mode = (TARGET_POWERPC64) ? DImode : SImode;
37651 sub_size = GET_MODE_SIZE (sub_mode);
37652 regno0 = REGNO (op0);
37653 regno1 = REGNO (op1);
37655 gcc_assert (reload_completed);
37656 gcc_assert (IN_RANGE (regno0, FIRST_GPR_REGNO, LAST_GPR_REGNO));
37657 gcc_assert (IN_RANGE (regno1, FIRST_GPR_REGNO, LAST_GPR_REGNO));
37659 nregs = rs6000_hard_regno_nregs[(int)mode][regno0];
37660 gcc_assert (nregs > 1);
37662 if (op2 && REG_P (op2))
37663 gcc_assert (IN_RANGE (REGNO (op2), FIRST_GPR_REGNO, LAST_GPR_REGNO));
37665 for (i = 0; i < nregs; i++)
37667 int offset = i * sub_size;
37668 rtx sub_op0 = simplify_subreg (sub_mode, op0, mode, offset);
37669 rtx sub_op1 = simplify_subreg (sub_mode, op1, mode, offset);
37670 rtx sub_op2 = ((code == NOT)
37671 ? NULL_RTX
37672 : simplify_subreg (sub_mode, op2, mode, offset));
37674 rs6000_split_logical_inner (sub_op0, sub_op1, sub_op2, code, sub_mode,
37675 complement_final_p, complement_op1_p,
37676 complement_op2_p);
37679 return;
37683 /* Return true if the peephole2 can combine a load involving a combination of
37684 an addis instruction and a load with an offset that can be fused together on
37685 a power8. */
37687 bool
37688 fusion_gpr_load_p (rtx addis_reg, /* register set via addis. */
37689 rtx addis_value, /* addis value. */
37690 rtx target, /* target register that is loaded. */
37691 rtx mem) /* bottom part of the memory addr. */
37693 rtx addr;
37694 rtx base_reg;
37696 /* Validate arguments. */
37697 if (!base_reg_operand (addis_reg, GET_MODE (addis_reg)))
37698 return false;
37700 if (!base_reg_operand (target, GET_MODE (target)))
37701 return false;
37703 if (!fusion_gpr_addis (addis_value, GET_MODE (addis_value)))
37704 return false;
37706 /* Allow sign/zero extension. */
37707 if (GET_CODE (mem) == ZERO_EXTEND
37708 || (GET_CODE (mem) == SIGN_EXTEND && TARGET_P8_FUSION_SIGN))
37709 mem = XEXP (mem, 0);
37711 if (!MEM_P (mem))
37712 return false;
37714 if (!fusion_gpr_mem_load (mem, GET_MODE (mem)))
37715 return false;
37717 addr = XEXP (mem, 0); /* either PLUS or LO_SUM. */
37718 if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM)
37719 return false;
37721 /* Validate that the register used to load the high value is either the
37722 register being loaded, or we can safely replace its use.
37724 This function is only called from the peephole2 pass and we assume that
37725 there are 2 instructions in the peephole (addis and load), so we want to
37726 check if the target register was not used in the memory address and the
37727 register to hold the addis result is dead after the peephole. */
37728 if (REGNO (addis_reg) != REGNO (target))
37730 if (reg_mentioned_p (target, mem))
37731 return false;
37733 if (!peep2_reg_dead_p (2, addis_reg))
37734 return false;
37736 /* If the target register being loaded is the stack pointer, we must
37737 avoid loading any other value into it, even temporarily. */
37738 if (REG_P (target) && REGNO (target) == STACK_POINTER_REGNUM)
37739 return false;
37742 base_reg = XEXP (addr, 0);
37743 return REGNO (addis_reg) == REGNO (base_reg);
37746 /* During the peephole2 pass, adjust and expand the insns for a load fusion
37747 sequence. We adjust the addis register to use the target register. If the
37748 load sign extends, we adjust the code to do the zero extending load, and an
37749 explicit sign extension later since the fusion only covers zero extending
37750 loads.
37752 The operands are:
37753 operands[0] register set with addis (to be replaced with target)
37754 operands[1] value set via addis
37755 operands[2] target register being loaded
37756 operands[3] D-form memory reference using operands[0]. */
37758 void
37759 expand_fusion_gpr_load (rtx *operands)
37761 rtx addis_value = operands[1];
37762 rtx target = operands[2];
37763 rtx orig_mem = operands[3];
37764 rtx new_addr, new_mem, orig_addr, offset;
37765 enum rtx_code plus_or_lo_sum;
37766 machine_mode target_mode = GET_MODE (target);
37767 machine_mode extend_mode = target_mode;
37768 machine_mode ptr_mode = Pmode;
37769 enum rtx_code extend = UNKNOWN;
37771 if (GET_CODE (orig_mem) == ZERO_EXTEND
37772 || (TARGET_P8_FUSION_SIGN && GET_CODE (orig_mem) == SIGN_EXTEND))
37774 extend = GET_CODE (orig_mem);
37775 orig_mem = XEXP (orig_mem, 0);
37776 target_mode = GET_MODE (orig_mem);
37779 gcc_assert (MEM_P (orig_mem));
37781 orig_addr = XEXP (orig_mem, 0);
37782 plus_or_lo_sum = GET_CODE (orig_addr);
37783 gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM);
37785 offset = XEXP (orig_addr, 1);
37786 new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_value, offset);
37787 new_mem = replace_equiv_address_nv (orig_mem, new_addr, false);
37789 if (extend != UNKNOWN)
37790 new_mem = gen_rtx_fmt_e (ZERO_EXTEND, extend_mode, new_mem);
37792 new_mem = gen_rtx_UNSPEC (extend_mode, gen_rtvec (1, new_mem),
37793 UNSPEC_FUSION_GPR);
37794 emit_insn (gen_rtx_SET (target, new_mem));
37796 if (extend == SIGN_EXTEND)
37798 int sub_off = ((BYTES_BIG_ENDIAN)
37799 ? GET_MODE_SIZE (extend_mode) - GET_MODE_SIZE (target_mode)
37800 : 0);
37801 rtx sign_reg
37802 = simplify_subreg (target_mode, target, extend_mode, sub_off);
37804 emit_insn (gen_rtx_SET (target,
37805 gen_rtx_SIGN_EXTEND (extend_mode, sign_reg)));
37808 return;
37811 /* Emit the addis instruction that will be part of a fused instruction
37812 sequence. */
37814 void
37815 emit_fusion_addis (rtx target, rtx addis_value, const char *comment,
37816 const char *mode_name)
37818 rtx fuse_ops[10];
37819 char insn_template[80];
37820 const char *addis_str = NULL;
37821 const char *comment_str = ASM_COMMENT_START;
37823 if (*comment_str == ' ')
37824 comment_str++;
37826 /* Emit the addis instruction. */
37827 fuse_ops[0] = target;
37828 if (satisfies_constraint_L (addis_value))
37830 fuse_ops[1] = addis_value;
37831 addis_str = "lis %0,%v1";
37834 else if (GET_CODE (addis_value) == PLUS)
37836 rtx op0 = XEXP (addis_value, 0);
37837 rtx op1 = XEXP (addis_value, 1);
37839 if (REG_P (op0) && CONST_INT_P (op1)
37840 && satisfies_constraint_L (op1))
37842 fuse_ops[1] = op0;
37843 fuse_ops[2] = op1;
37844 addis_str = "addis %0,%1,%v2";
37848 else if (GET_CODE (addis_value) == HIGH)
37850 rtx value = XEXP (addis_value, 0);
37851 if (GET_CODE (value) == UNSPEC && XINT (value, 1) == UNSPEC_TOCREL)
37853 fuse_ops[1] = XVECEXP (value, 0, 0); /* symbol ref. */
37854 fuse_ops[2] = XVECEXP (value, 0, 1); /* TOC register. */
37855 if (TARGET_ELF)
37856 addis_str = "addis %0,%2,%1@toc@ha";
37858 else if (TARGET_XCOFF)
37859 addis_str = "addis %0,%1@u(%2)";
37861 else
37862 gcc_unreachable ();
37865 else if (GET_CODE (value) == PLUS)
37867 rtx op0 = XEXP (value, 0);
37868 rtx op1 = XEXP (value, 1);
37870 if (GET_CODE (op0) == UNSPEC
37871 && XINT (op0, 1) == UNSPEC_TOCREL
37872 && CONST_INT_P (op1))
37874 fuse_ops[1] = XVECEXP (op0, 0, 0); /* symbol ref. */
37875 fuse_ops[2] = XVECEXP (op0, 0, 1); /* TOC register. */
37876 fuse_ops[3] = op1;
37877 if (TARGET_ELF)
37878 addis_str = "addis %0,%2,%1+%3@toc@ha";
37880 else if (TARGET_XCOFF)
37881 addis_str = "addis %0,%1+%3@u(%2)";
37883 else
37884 gcc_unreachable ();
37888 else if (satisfies_constraint_L (value))
37890 fuse_ops[1] = value;
37891 addis_str = "lis %0,%v1";
37894 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (value))
37896 fuse_ops[1] = value;
37897 addis_str = "lis %0,%1@ha";
37901 if (!addis_str)
37902 fatal_insn ("Could not generate addis value for fusion", addis_value);
37904 sprintf (insn_template, "%s\t\t%s %s, type %s", addis_str, comment_str,
37905 comment, mode_name);
37906 output_asm_insn (insn_template, fuse_ops);
37909 /* Emit a D-form load or store instruction that is the second instruction
37910 of a fusion sequence. */
37912 void
37913 emit_fusion_load_store (rtx load_store_reg, rtx addis_reg, rtx offset,
37914 const char *insn_str)
37916 rtx fuse_ops[10];
37917 char insn_template[80];
37919 fuse_ops[0] = load_store_reg;
37920 fuse_ops[1] = addis_reg;
37922 if (CONST_INT_P (offset) && satisfies_constraint_I (offset))
37924 sprintf (insn_template, "%s %%0,%%2(%%1)", insn_str);
37925 fuse_ops[2] = offset;
37926 output_asm_insn (insn_template, fuse_ops);
37929 else if (GET_CODE (offset) == UNSPEC
37930 && XINT (offset, 1) == UNSPEC_TOCREL)
37932 if (TARGET_ELF)
37933 sprintf (insn_template, "%s %%0,%%2@toc@l(%%1)", insn_str);
37935 else if (TARGET_XCOFF)
37936 sprintf (insn_template, "%s %%0,%%2@l(%%1)", insn_str);
37938 else
37939 gcc_unreachable ();
37941 fuse_ops[2] = XVECEXP (offset, 0, 0);
37942 output_asm_insn (insn_template, fuse_ops);
37945 else if (GET_CODE (offset) == PLUS
37946 && GET_CODE (XEXP (offset, 0)) == UNSPEC
37947 && XINT (XEXP (offset, 0), 1) == UNSPEC_TOCREL
37948 && CONST_INT_P (XEXP (offset, 1)))
37950 rtx tocrel_unspec = XEXP (offset, 0);
37951 if (TARGET_ELF)
37952 sprintf (insn_template, "%s %%0,%%2+%%3@toc@l(%%1)", insn_str);
37954 else if (TARGET_XCOFF)
37955 sprintf (insn_template, "%s %%0,%%2+%%3@l(%%1)", insn_str);
37957 else
37958 gcc_unreachable ();
37960 fuse_ops[2] = XVECEXP (tocrel_unspec, 0, 0);
37961 fuse_ops[3] = XEXP (offset, 1);
37962 output_asm_insn (insn_template, fuse_ops);
37965 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (offset))
37967 sprintf (insn_template, "%s %%0,%%2@l(%%1)", insn_str);
37969 fuse_ops[2] = offset;
37970 output_asm_insn (insn_template, fuse_ops);
37973 else
37974 fatal_insn ("Unable to generate load/store offset for fusion", offset);
37976 return;
37979 /* Wrap a TOC address that can be fused to indicate that special fusion
37980 processing is needed. */
37983 fusion_wrap_memory_address (rtx old_mem)
37985 rtx old_addr = XEXP (old_mem, 0);
37986 rtvec v = gen_rtvec (1, old_addr);
37987 rtx new_addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_FUSION_ADDIS);
37988 return replace_equiv_address_nv (old_mem, new_addr, false);
37991 /* Given an address, convert it into the addis and load offset parts. Addresses
37992 created during the peephole2 process look like:
37993 (lo_sum (high (unspec [(sym)] UNSPEC_TOCREL))
37994 (unspec [(...)] UNSPEC_TOCREL))
37996 Addresses created via toc fusion look like:
37997 (unspec [(unspec [(...)] UNSPEC_TOCREL)] UNSPEC_FUSION_ADDIS)) */
37999 static void
38000 fusion_split_address (rtx addr, rtx *p_hi, rtx *p_lo)
38002 rtx hi, lo;
38004 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_FUSION_ADDIS)
38006 lo = XVECEXP (addr, 0, 0);
38007 hi = gen_rtx_HIGH (Pmode, lo);
38009 else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
38011 hi = XEXP (addr, 0);
38012 lo = XEXP (addr, 1);
38014 else
38015 gcc_unreachable ();
38017 *p_hi = hi;
38018 *p_lo = lo;
38021 /* Return a string to fuse an addis instruction with a gpr load to the same
38022 register that we loaded up the addis instruction. The address that is used
38023 is the logical address that was formed during peephole2:
38024 (lo_sum (high) (low-part))
38026 Or the address is the TOC address that is wrapped before register allocation:
38027 (unspec [(addr) (toc-reg)] UNSPEC_FUSION_ADDIS)
38029 The code is complicated, so we call output_asm_insn directly, and just
38030 return "". */
38032 const char *
38033 emit_fusion_gpr_load (rtx target, rtx mem)
38035 rtx addis_value;
38036 rtx addr;
38037 rtx load_offset;
38038 const char *load_str = NULL;
38039 const char *mode_name = NULL;
38040 machine_mode mode;
38042 if (GET_CODE (mem) == ZERO_EXTEND)
38043 mem = XEXP (mem, 0);
38045 gcc_assert (REG_P (target) && MEM_P (mem));
38047 addr = XEXP (mem, 0);
38048 fusion_split_address (addr, &addis_value, &load_offset);
38050 /* Now emit the load instruction to the same register. */
38051 mode = GET_MODE (mem);
38052 switch (mode)
38054 case QImode:
38055 mode_name = "char";
38056 load_str = "lbz";
38057 break;
38059 case HImode:
38060 mode_name = "short";
38061 load_str = "lhz";
38062 break;
38064 case SImode:
38065 case SFmode:
38066 mode_name = (mode == SFmode) ? "float" : "int";
38067 load_str = "lwz";
38068 break;
38070 case DImode:
38071 case DFmode:
38072 gcc_assert (TARGET_POWERPC64);
38073 mode_name = (mode == DFmode) ? "double" : "long";
38074 load_str = "ld";
38075 break;
38077 default:
38078 fatal_insn ("Bad GPR fusion", gen_rtx_SET (target, mem));
38081 /* Emit the addis instruction. */
38082 emit_fusion_addis (target, addis_value, "gpr load fusion", mode_name);
38084 /* Emit the D-form load instruction. */
38085 emit_fusion_load_store (target, target, load_offset, load_str);
38087 return "";
38091 /* Return true if the peephole2 can combine a load/store involving a
38092 combination of an addis instruction and the memory operation. This was
38093 added to the ISA 3.0 (power9) hardware. */
38095 bool
38096 fusion_p9_p (rtx addis_reg, /* register set via addis. */
38097 rtx addis_value, /* addis value. */
38098 rtx dest, /* destination (memory or register). */
38099 rtx src) /* source (register or memory). */
38101 rtx addr, mem, offset;
38102 enum machine_mode mode = GET_MODE (src);
38104 /* Validate arguments. */
38105 if (!base_reg_operand (addis_reg, GET_MODE (addis_reg)))
38106 return false;
38108 if (!fusion_gpr_addis (addis_value, GET_MODE (addis_value)))
38109 return false;
38111 /* Ignore extend operations that are part of the load. */
38112 if (GET_CODE (src) == FLOAT_EXTEND || GET_CODE (src) == ZERO_EXTEND)
38113 src = XEXP (src, 0);
38115 /* Test for memory<-register or register<-memory. */
38116 if (fpr_reg_operand (src, mode) || int_reg_operand (src, mode))
38118 if (!MEM_P (dest))
38119 return false;
38121 mem = dest;
38124 else if (MEM_P (src))
38126 if (!fpr_reg_operand (dest, mode) && !int_reg_operand (dest, mode))
38127 return false;
38129 mem = src;
38132 else
38133 return false;
38135 addr = XEXP (mem, 0); /* either PLUS or LO_SUM. */
38136 if (GET_CODE (addr) == PLUS)
38138 if (!rtx_equal_p (addis_reg, XEXP (addr, 0)))
38139 return false;
38141 return satisfies_constraint_I (XEXP (addr, 1));
38144 else if (GET_CODE (addr) == LO_SUM)
38146 if (!rtx_equal_p (addis_reg, XEXP (addr, 0)))
38147 return false;
38149 offset = XEXP (addr, 1);
38150 if (TARGET_XCOFF || (TARGET_ELF && TARGET_POWERPC64))
38151 return small_toc_ref (offset, GET_MODE (offset));
38153 else if (TARGET_ELF && !TARGET_POWERPC64)
38154 return CONSTANT_P (offset);
38157 return false;
38160 /* During the peephole2 pass, adjust and expand the insns for an extended fusion
38161 load sequence.
38163 The operands are:
38164 operands[0] register set with addis
38165 operands[1] value set via addis
38166 operands[2] target register being loaded
38167 operands[3] D-form memory reference using operands[0].
38169 This is similar to the fusion introduced with power8, except it scales to
38170 both loads/stores and does not require the result register to be the same as
38171 the base register. At the moment, we only do this if register set with addis
38172 is dead. */
38174 void
38175 expand_fusion_p9_load (rtx *operands)
38177 rtx tmp_reg = operands[0];
38178 rtx addis_value = operands[1];
38179 rtx target = operands[2];
38180 rtx orig_mem = operands[3];
38181 rtx new_addr, new_mem, orig_addr, offset, set, clobber, insn;
38182 enum rtx_code plus_or_lo_sum;
38183 machine_mode target_mode = GET_MODE (target);
38184 machine_mode extend_mode = target_mode;
38185 machine_mode ptr_mode = Pmode;
38186 enum rtx_code extend = UNKNOWN;
38188 if (GET_CODE (orig_mem) == FLOAT_EXTEND || GET_CODE (orig_mem) == ZERO_EXTEND)
38190 extend = GET_CODE (orig_mem);
38191 orig_mem = XEXP (orig_mem, 0);
38192 target_mode = GET_MODE (orig_mem);
38195 gcc_assert (MEM_P (orig_mem));
38197 orig_addr = XEXP (orig_mem, 0);
38198 plus_or_lo_sum = GET_CODE (orig_addr);
38199 gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM);
38201 offset = XEXP (orig_addr, 1);
38202 new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_value, offset);
38203 new_mem = replace_equiv_address_nv (orig_mem, new_addr, false);
38205 if (extend != UNKNOWN)
38206 new_mem = gen_rtx_fmt_e (extend, extend_mode, new_mem);
38208 new_mem = gen_rtx_UNSPEC (extend_mode, gen_rtvec (1, new_mem),
38209 UNSPEC_FUSION_P9);
38211 set = gen_rtx_SET (target, new_mem);
38212 clobber = gen_rtx_CLOBBER (VOIDmode, tmp_reg);
38213 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber));
38214 emit_insn (insn);
38216 return;
38219 /* During the peephole2 pass, adjust and expand the insns for an extended fusion
38220 store sequence.
38222 The operands are:
38223 operands[0] register set with addis
38224 operands[1] value set via addis
38225 operands[2] target D-form memory being stored to
38226 operands[3] register being stored
38228 This is similar to the fusion introduced with power8, except it scales to
38229 both loads/stores and does not require the result register to be the same as
38230 the base register. At the moment, we only do this if register set with addis
38231 is dead. */
38233 void
38234 expand_fusion_p9_store (rtx *operands)
38236 rtx tmp_reg = operands[0];
38237 rtx addis_value = operands[1];
38238 rtx orig_mem = operands[2];
38239 rtx src = operands[3];
38240 rtx new_addr, new_mem, orig_addr, offset, set, clobber, insn, new_src;
38241 enum rtx_code plus_or_lo_sum;
38242 machine_mode target_mode = GET_MODE (orig_mem);
38243 machine_mode ptr_mode = Pmode;
38245 gcc_assert (MEM_P (orig_mem));
38247 orig_addr = XEXP (orig_mem, 0);
38248 plus_or_lo_sum = GET_CODE (orig_addr);
38249 gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM);
38251 offset = XEXP (orig_addr, 1);
38252 new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_value, offset);
38253 new_mem = replace_equiv_address_nv (orig_mem, new_addr, false);
38255 new_src = gen_rtx_UNSPEC (target_mode, gen_rtvec (1, src),
38256 UNSPEC_FUSION_P9);
38258 set = gen_rtx_SET (new_mem, new_src);
38259 clobber = gen_rtx_CLOBBER (VOIDmode, tmp_reg);
38260 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber));
38261 emit_insn (insn);
38263 return;
38266 /* Return a string to fuse an addis instruction with a load using extended
38267 fusion. The address that is used is the logical address that was formed
38268 during peephole2: (lo_sum (high) (low-part))
38270 The code is complicated, so we call output_asm_insn directly, and just
38271 return "". */
38273 const char *
38274 emit_fusion_p9_load (rtx reg, rtx mem, rtx tmp_reg)
38276 enum machine_mode mode = GET_MODE (reg);
38277 rtx hi;
38278 rtx lo;
38279 rtx addr;
38280 const char *load_string;
38281 int r;
38283 if (GET_CODE (mem) == FLOAT_EXTEND || GET_CODE (mem) == ZERO_EXTEND)
38285 mem = XEXP (mem, 0);
38286 mode = GET_MODE (mem);
38289 if (GET_CODE (reg) == SUBREG)
38291 gcc_assert (SUBREG_BYTE (reg) == 0);
38292 reg = SUBREG_REG (reg);
38295 if (!REG_P (reg))
38296 fatal_insn ("emit_fusion_p9_load, bad reg #1", reg);
38298 r = REGNO (reg);
38299 if (FP_REGNO_P (r))
38301 if (mode == SFmode)
38302 load_string = "lfs";
38303 else if (mode == DFmode || mode == DImode)
38304 load_string = "lfd";
38305 else
38306 gcc_unreachable ();
38308 else if (INT_REGNO_P (r))
38310 switch (mode)
38312 case QImode:
38313 load_string = "lbz";
38314 break;
38315 case HImode:
38316 load_string = "lhz";
38317 break;
38318 case SImode:
38319 case SFmode:
38320 load_string = "lwz";
38321 break;
38322 case DImode:
38323 case DFmode:
38324 if (!TARGET_POWERPC64)
38325 gcc_unreachable ();
38326 load_string = "ld";
38327 break;
38328 default:
38329 gcc_unreachable ();
38332 else
38333 fatal_insn ("emit_fusion_p9_load, bad reg #2", reg);
38335 if (!MEM_P (mem))
38336 fatal_insn ("emit_fusion_p9_load not MEM", mem);
38338 addr = XEXP (mem, 0);
38339 fusion_split_address (addr, &hi, &lo);
38341 /* Emit the addis instruction. */
38342 emit_fusion_addis (tmp_reg, hi, "power9 load fusion", GET_MODE_NAME (mode));
38344 /* Emit the D-form load instruction. */
38345 emit_fusion_load_store (reg, tmp_reg, lo, load_string);
38347 return "";
38350 /* Return a string to fuse an addis instruction with a store using extended
38351 fusion. The address that is used is the logical address that was formed
38352 during peephole2: (lo_sum (high) (low-part))
38354 The code is complicated, so we call output_asm_insn directly, and just
38355 return "". */
38357 const char *
38358 emit_fusion_p9_store (rtx mem, rtx reg, rtx tmp_reg)
38360 enum machine_mode mode = GET_MODE (reg);
38361 rtx hi;
38362 rtx lo;
38363 rtx addr;
38364 const char *store_string;
38365 int r;
38367 if (GET_CODE (reg) == SUBREG)
38369 gcc_assert (SUBREG_BYTE (reg) == 0);
38370 reg = SUBREG_REG (reg);
38373 if (!REG_P (reg))
38374 fatal_insn ("emit_fusion_p9_store, bad reg #1", reg);
38376 r = REGNO (reg);
38377 if (FP_REGNO_P (r))
38379 if (mode == SFmode)
38380 store_string = "stfs";
38381 else if (mode == DFmode)
38382 store_string = "stfd";
38383 else
38384 gcc_unreachable ();
38386 else if (INT_REGNO_P (r))
38388 switch (mode)
38390 case QImode:
38391 store_string = "stb";
38392 break;
38393 case HImode:
38394 store_string = "sth";
38395 break;
38396 case SImode:
38397 case SFmode:
38398 store_string = "stw";
38399 break;
38400 case DImode:
38401 case DFmode:
38402 if (!TARGET_POWERPC64)
38403 gcc_unreachable ();
38404 store_string = "std";
38405 break;
38406 default:
38407 gcc_unreachable ();
38410 else
38411 fatal_insn ("emit_fusion_p9_store, bad reg #2", reg);
38413 if (!MEM_P (mem))
38414 fatal_insn ("emit_fusion_p9_store not MEM", mem);
38416 addr = XEXP (mem, 0);
38417 fusion_split_address (addr, &hi, &lo);
38419 /* Emit the addis instruction. */
38420 emit_fusion_addis (tmp_reg, hi, "power9 store fusion", GET_MODE_NAME (mode));
38422 /* Emit the D-form load instruction. */
38423 emit_fusion_load_store (reg, tmp_reg, lo, store_string);
38425 return "";
38429 /* Analyze vector computations and remove unnecessary doubleword
38430 swaps (xxswapdi instructions). This pass is performed only
38431 for little-endian VSX code generation.
38433 For this specific case, loads and stores of 4x32 and 2x64 vectors
38434 are inefficient. These are implemented using the lvx2dx and
38435 stvx2dx instructions, which invert the order of doublewords in
38436 a vector register. Thus the code generation inserts an xxswapdi
38437 after each such load, and prior to each such store. (For spill
38438 code after register assignment, an additional xxswapdi is inserted
38439 following each store in order to return a hard register to its
38440 unpermuted value.)
38442 The extra xxswapdi instructions reduce performance. This can be
38443 particularly bad for vectorized code. The purpose of this pass
38444 is to reduce the number of xxswapdi instructions required for
38445 correctness.
38447 The primary insight is that much code that operates on vectors
38448 does not care about the relative order of elements in a register,
38449 so long as the correct memory order is preserved. If we have
38450 a computation where all input values are provided by lvxd2x/xxswapdi
38451 sequences, all outputs are stored using xxswapdi/stvxd2x sequences,
38452 and all intermediate computations are pure SIMD (independent of
38453 element order), then all the xxswapdi's associated with the loads
38454 and stores may be removed.
38456 This pass uses some of the infrastructure and logical ideas from
38457 the "web" pass in web.c. We create maximal webs of computations
38458 fitting the description above using union-find. Each such web is
38459 then optimized by removing its unnecessary xxswapdi instructions.
38461 The pass is placed prior to global optimization so that we can
38462 perform the optimization in the safest and simplest way possible;
38463 that is, by replacing each xxswapdi insn with a register copy insn.
38464 Subsequent forward propagation will remove copies where possible.
38466 There are some operations sensitive to element order for which we
38467 can still allow the operation, provided we modify those operations.
38468 These include CONST_VECTORs, for which we must swap the first and
38469 second halves of the constant vector; and SUBREGs, for which we
38470 must adjust the byte offset to account for the swapped doublewords.
38471 A remaining opportunity would be non-immediate-form splats, for
38472 which we should adjust the selected lane of the input. We should
38473 also make code generation adjustments for sum-across operations,
38474 since this is a common vectorizer reduction.
38476 Because we run prior to the first split, we can see loads and stores
38477 here that match *vsx_le_perm_{load,store}_<mode>. These are vanilla
38478 vector loads and stores that have not yet been split into a permuting
38479 load/store and a swap. (One way this can happen is with a builtin
38480 call to vec_vsx_{ld,st}.) We can handle these as well, but rather
38481 than deleting a swap, we convert the load/store into a permuting
38482 load/store (which effectively removes the swap). */
38484 /* Notes on Permutes
38486 We do not currently handle computations that contain permutes. There
38487 is a general transformation that can be performed correctly, but it
38488 may introduce more expensive code than it replaces. To handle these
38489 would require a cost model to determine when to perform the optimization.
38490 This commentary records how this could be done if desired.
38492 The most general permute is something like this (example for V16QI):
38494 (vec_select:V16QI (vec_concat:V32QI (op1:V16QI) (op2:V16QI))
38495 (parallel [(const_int a0) (const_int a1)
38497 (const_int a14) (const_int a15)]))
38499 where a0,...,a15 are in [0,31] and select elements from op1 and op2
38500 to produce in the result.
38502 Regardless of mode, we can convert the PARALLEL to a mask of 16
38503 byte-element selectors. Let's call this M, with M[i] representing
38504 the ith byte-element selector value. Then if we swap doublewords
38505 throughout the computation, we can get correct behavior by replacing
38506 M with M' as follows:
38508 M'[i] = { (M[i]+8)%16 : M[i] in [0,15]
38509 { ((M[i]+8)%16)+16 : M[i] in [16,31]
38511 This seems promising at first, since we are just replacing one mask
38512 with another. But certain masks are preferable to others. If M
38513 is a mask that matches a vmrghh pattern, for example, M' certainly
38514 will not. Instead of a single vmrghh, we would generate a load of
38515 M' and a vperm. So we would need to know how many xxswapd's we can
38516 remove as a result of this transformation to determine if it's
38517 profitable; and preferably the logic would need to be aware of all
38518 the special preferable masks.
38520 Another form of permute is an UNSPEC_VPERM, in which the mask is
38521 already in a register. In some cases, this mask may be a constant
38522 that we can discover with ud-chains, in which case the above
38523 transformation is ok. However, the common usage here is for the
38524 mask to be produced by an UNSPEC_LVSL, in which case the mask
38525 cannot be known at compile time. In such a case we would have to
38526 generate several instructions to compute M' as above at run time,
38527 and a cost model is needed again.
38529 However, when the mask M for an UNSPEC_VPERM is loaded from the
38530 constant pool, we can replace M with M' as above at no cost
38531 beyond adding a constant pool entry. */
38533 /* This is based on the union-find logic in web.c. web_entry_base is
38534 defined in df.h. */
38535 class swap_web_entry : public web_entry_base
38537 public:
38538 /* Pointer to the insn. */
38539 rtx_insn *insn;
38540 /* Set if insn contains a mention of a vector register. All other
38541 fields are undefined if this field is unset. */
38542 unsigned int is_relevant : 1;
38543 /* Set if insn is a load. */
38544 unsigned int is_load : 1;
38545 /* Set if insn is a store. */
38546 unsigned int is_store : 1;
38547 /* Set if insn is a doubleword swap. This can either be a register swap
38548 or a permuting load or store (test is_load and is_store for this). */
38549 unsigned int is_swap : 1;
38550 /* Set if the insn has a live-in use of a parameter register. */
38551 unsigned int is_live_in : 1;
38552 /* Set if the insn has a live-out def of a return register. */
38553 unsigned int is_live_out : 1;
38554 /* Set if the insn contains a subreg reference of a vector register. */
38555 unsigned int contains_subreg : 1;
38556 /* Set if the insn contains a 128-bit integer operand. */
38557 unsigned int is_128_int : 1;
38558 /* Set if this is a call-insn. */
38559 unsigned int is_call : 1;
38560 /* Set if this insn does not perform a vector operation for which
38561 element order matters, or if we know how to fix it up if it does.
38562 Undefined if is_swap is set. */
38563 unsigned int is_swappable : 1;
38564 /* A nonzero value indicates what kind of special handling for this
38565 insn is required if doublewords are swapped. Undefined if
38566 is_swappable is not set. */
38567 unsigned int special_handling : 4;
38568 /* Set if the web represented by this entry cannot be optimized. */
38569 unsigned int web_not_optimizable : 1;
38570 /* Set if this insn should be deleted. */
38571 unsigned int will_delete : 1;
38574 enum special_handling_values {
38575 SH_NONE = 0,
38576 SH_CONST_VECTOR,
38577 SH_SUBREG,
38578 SH_NOSWAP_LD,
38579 SH_NOSWAP_ST,
38580 SH_EXTRACT,
38581 SH_SPLAT,
38582 SH_XXPERMDI,
38583 SH_CONCAT,
38584 SH_VPERM
38587 /* Union INSN with all insns containing definitions that reach USE.
38588 Detect whether USE is live-in to the current function. */
38589 static void
38590 union_defs (swap_web_entry *insn_entry, rtx insn, df_ref use)
38592 struct df_link *link = DF_REF_CHAIN (use);
38594 if (!link)
38595 insn_entry[INSN_UID (insn)].is_live_in = 1;
38597 while (link)
38599 if (DF_REF_IS_ARTIFICIAL (link->ref))
38600 insn_entry[INSN_UID (insn)].is_live_in = 1;
38602 if (DF_REF_INSN_INFO (link->ref))
38604 rtx def_insn = DF_REF_INSN (link->ref);
38605 (void)unionfind_union (insn_entry + INSN_UID (insn),
38606 insn_entry + INSN_UID (def_insn));
38609 link = link->next;
38613 /* Union INSN with all insns containing uses reached from DEF.
38614 Detect whether DEF is live-out from the current function. */
38615 static void
38616 union_uses (swap_web_entry *insn_entry, rtx insn, df_ref def)
38618 struct df_link *link = DF_REF_CHAIN (def);
38620 if (!link)
38621 insn_entry[INSN_UID (insn)].is_live_out = 1;
38623 while (link)
38625 /* This could be an eh use or some other artificial use;
38626 we treat these all the same (killing the optimization). */
38627 if (DF_REF_IS_ARTIFICIAL (link->ref))
38628 insn_entry[INSN_UID (insn)].is_live_out = 1;
38630 if (DF_REF_INSN_INFO (link->ref))
38632 rtx use_insn = DF_REF_INSN (link->ref);
38633 (void)unionfind_union (insn_entry + INSN_UID (insn),
38634 insn_entry + INSN_UID (use_insn));
38637 link = link->next;
38641 /* Return 1 iff INSN is a load insn, including permuting loads that
38642 represent an lvxd2x instruction; else return 0. */
38643 static unsigned int
38644 insn_is_load_p (rtx insn)
38646 rtx body = PATTERN (insn);
38648 if (GET_CODE (body) == SET)
38650 if (GET_CODE (SET_SRC (body)) == MEM)
38651 return 1;
38653 if (GET_CODE (SET_SRC (body)) == VEC_SELECT
38654 && GET_CODE (XEXP (SET_SRC (body), 0)) == MEM)
38655 return 1;
38657 return 0;
38660 if (GET_CODE (body) != PARALLEL)
38661 return 0;
38663 rtx set = XVECEXP (body, 0, 0);
38665 if (GET_CODE (set) == SET && GET_CODE (SET_SRC (set)) == MEM)
38666 return 1;
38668 return 0;
38671 /* Return 1 iff INSN is a store insn, including permuting stores that
38672 represent an stvxd2x instruction; else return 0. */
38673 static unsigned int
38674 insn_is_store_p (rtx insn)
38676 rtx body = PATTERN (insn);
38677 if (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == MEM)
38678 return 1;
38679 if (GET_CODE (body) != PARALLEL)
38680 return 0;
38681 rtx set = XVECEXP (body, 0, 0);
38682 if (GET_CODE (set) == SET && GET_CODE (SET_DEST (set)) == MEM)
38683 return 1;
38684 return 0;
38687 /* Return 1 iff INSN swaps doublewords. This may be a reg-reg swap,
38688 a permuting load, or a permuting store. */
38689 static unsigned int
38690 insn_is_swap_p (rtx insn)
38692 rtx body = PATTERN (insn);
38693 if (GET_CODE (body) != SET)
38694 return 0;
38695 rtx rhs = SET_SRC (body);
38696 if (GET_CODE (rhs) != VEC_SELECT)
38697 return 0;
38698 rtx parallel = XEXP (rhs, 1);
38699 if (GET_CODE (parallel) != PARALLEL)
38700 return 0;
38701 unsigned int len = XVECLEN (parallel, 0);
38702 if (len != 2 && len != 4 && len != 8 && len != 16)
38703 return 0;
38704 for (unsigned int i = 0; i < len / 2; ++i)
38706 rtx op = XVECEXP (parallel, 0, i);
38707 if (GET_CODE (op) != CONST_INT || INTVAL (op) != len / 2 + i)
38708 return 0;
38710 for (unsigned int i = len / 2; i < len; ++i)
38712 rtx op = XVECEXP (parallel, 0, i);
38713 if (GET_CODE (op) != CONST_INT || INTVAL (op) != i - len / 2)
38714 return 0;
38716 return 1;
38719 /* Return TRUE if insn is a swap fed by a load from the constant pool. */
38720 static bool
38721 const_load_sequence_p (swap_web_entry *insn_entry, rtx insn)
38723 unsigned uid = INSN_UID (insn);
38724 if (!insn_entry[uid].is_swap || insn_entry[uid].is_load)
38725 return false;
38727 /* Find the unique use in the swap and locate its def. If the def
38728 isn't unique, punt. */
38729 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
38730 df_ref use;
38731 FOR_EACH_INSN_INFO_USE (use, insn_info)
38733 struct df_link *def_link = DF_REF_CHAIN (use);
38734 if (!def_link || def_link->next)
38735 return false;
38737 rtx def_insn = DF_REF_INSN (def_link->ref);
38738 unsigned uid2 = INSN_UID (def_insn);
38739 if (!insn_entry[uid2].is_load || !insn_entry[uid2].is_swap)
38740 return false;
38742 rtx body = PATTERN (def_insn);
38743 if (GET_CODE (body) != SET
38744 || GET_CODE (SET_SRC (body)) != VEC_SELECT
38745 || GET_CODE (XEXP (SET_SRC (body), 0)) != MEM)
38746 return false;
38748 rtx mem = XEXP (SET_SRC (body), 0);
38749 rtx base_reg = XEXP (mem, 0);
38751 df_ref base_use;
38752 insn_info = DF_INSN_INFO_GET (def_insn);
38753 FOR_EACH_INSN_INFO_USE (base_use, insn_info)
38755 if (!rtx_equal_p (DF_REF_REG (base_use), base_reg))
38756 continue;
38758 struct df_link *base_def_link = DF_REF_CHAIN (base_use);
38759 if (!base_def_link || base_def_link->next)
38760 return false;
38762 rtx tocrel_insn = DF_REF_INSN (base_def_link->ref);
38763 rtx tocrel_body = PATTERN (tocrel_insn);
38764 rtx base, offset;
38765 if (GET_CODE (tocrel_body) != SET)
38766 return false;
38767 /* There is an extra level of indirection for small/large
38768 code models. */
38769 rtx tocrel_expr = SET_SRC (tocrel_body);
38770 if (GET_CODE (tocrel_expr) == MEM)
38771 tocrel_expr = XEXP (tocrel_expr, 0);
38772 if (!toc_relative_expr_p (tocrel_expr, false))
38773 return false;
38774 split_const (XVECEXP (tocrel_base, 0, 0), &base, &offset);
38775 if (GET_CODE (base) != SYMBOL_REF || !CONSTANT_POOL_ADDRESS_P (base))
38776 return false;
38779 return true;
38782 /* Return TRUE iff OP matches a V2DF reduction pattern. See the
38783 definition of vsx_reduc_<VEC_reduc_name>_v2df in vsx.md. */
38784 static bool
38785 v2df_reduction_p (rtx op)
38787 if (GET_MODE (op) != V2DFmode)
38788 return false;
38790 enum rtx_code code = GET_CODE (op);
38791 if (code != PLUS && code != SMIN && code != SMAX)
38792 return false;
38794 rtx concat = XEXP (op, 0);
38795 if (GET_CODE (concat) != VEC_CONCAT)
38796 return false;
38798 rtx select0 = XEXP (concat, 0);
38799 rtx select1 = XEXP (concat, 1);
38800 if (GET_CODE (select0) != VEC_SELECT || GET_CODE (select1) != VEC_SELECT)
38801 return false;
38803 rtx reg0 = XEXP (select0, 0);
38804 rtx reg1 = XEXP (select1, 0);
38805 if (!rtx_equal_p (reg0, reg1) || !REG_P (reg0))
38806 return false;
38808 rtx parallel0 = XEXP (select0, 1);
38809 rtx parallel1 = XEXP (select1, 1);
38810 if (GET_CODE (parallel0) != PARALLEL || GET_CODE (parallel1) != PARALLEL)
38811 return false;
38813 if (!rtx_equal_p (XVECEXP (parallel0, 0, 0), const1_rtx)
38814 || !rtx_equal_p (XVECEXP (parallel1, 0, 0), const0_rtx))
38815 return false;
38817 return true;
38820 /* Return 1 iff OP is an operand that will not be affected by having
38821 vector doublewords swapped in memory. */
38822 static unsigned int
38823 rtx_is_swappable_p (rtx op, unsigned int *special)
38825 enum rtx_code code = GET_CODE (op);
38826 int i, j;
38827 rtx parallel;
38829 switch (code)
38831 case LABEL_REF:
38832 case SYMBOL_REF:
38833 case CLOBBER:
38834 case REG:
38835 return 1;
38837 case VEC_CONCAT:
38838 case ASM_INPUT:
38839 case ASM_OPERANDS:
38840 return 0;
38842 case CONST_VECTOR:
38844 *special = SH_CONST_VECTOR;
38845 return 1;
38848 case VEC_DUPLICATE:
38849 /* Opportunity: If XEXP (op, 0) has the same mode as the result,
38850 and XEXP (op, 1) is a PARALLEL with a single QImode const int,
38851 it represents a vector splat for which we can do special
38852 handling. */
38853 if (GET_CODE (XEXP (op, 0)) == CONST_INT)
38854 return 1;
38855 else if (GET_CODE (XEXP (op, 0)) == REG
38856 && GET_MODE_INNER (GET_MODE (op)) == GET_MODE (XEXP (op, 0)))
38857 /* This catches V2DF and V2DI splat, at a minimum. */
38858 return 1;
38859 else if (GET_CODE (XEXP (op, 0)) == VEC_SELECT)
38860 /* If the duplicated item is from a select, defer to the select
38861 processing to see if we can change the lane for the splat. */
38862 return rtx_is_swappable_p (XEXP (op, 0), special);
38863 else
38864 return 0;
38866 case VEC_SELECT:
38867 /* A vec_extract operation is ok if we change the lane. */
38868 if (GET_CODE (XEXP (op, 0)) == REG
38869 && GET_MODE_INNER (GET_MODE (XEXP (op, 0))) == GET_MODE (op)
38870 && GET_CODE ((parallel = XEXP (op, 1))) == PARALLEL
38871 && XVECLEN (parallel, 0) == 1
38872 && GET_CODE (XVECEXP (parallel, 0, 0)) == CONST_INT)
38874 *special = SH_EXTRACT;
38875 return 1;
38877 /* An XXPERMDI is ok if we adjust the lanes. Note that if the
38878 XXPERMDI is a swap operation, it will be identified by
38879 insn_is_swap_p and therefore we won't get here. */
38880 else if (GET_CODE (XEXP (op, 0)) == VEC_CONCAT
38881 && (GET_MODE (XEXP (op, 0)) == V4DFmode
38882 || GET_MODE (XEXP (op, 0)) == V4DImode)
38883 && GET_CODE ((parallel = XEXP (op, 1))) == PARALLEL
38884 && XVECLEN (parallel, 0) == 2
38885 && GET_CODE (XVECEXP (parallel, 0, 0)) == CONST_INT
38886 && GET_CODE (XVECEXP (parallel, 0, 1)) == CONST_INT)
38888 *special = SH_XXPERMDI;
38889 return 1;
38891 else if (v2df_reduction_p (op))
38892 return 1;
38893 else
38894 return 0;
38896 case UNSPEC:
38898 /* Various operations are unsafe for this optimization, at least
38899 without significant additional work. Permutes are obviously
38900 problematic, as both the permute control vector and the ordering
38901 of the target values are invalidated by doubleword swapping.
38902 Vector pack and unpack modify the number of vector lanes.
38903 Merge-high/low will not operate correctly on swapped operands.
38904 Vector shifts across element boundaries are clearly uncool,
38905 as are vector select and concatenate operations. Vector
38906 sum-across instructions define one operand with a specific
38907 order-dependent element, so additional fixup code would be
38908 needed to make those work. Vector set and non-immediate-form
38909 vector splat are element-order sensitive. A few of these
38910 cases might be workable with special handling if required.
38911 Adding cost modeling would be appropriate in some cases. */
38912 int val = XINT (op, 1);
38913 switch (val)
38915 default:
38916 break;
38917 case UNSPEC_VMRGH_DIRECT:
38918 case UNSPEC_VMRGL_DIRECT:
38919 case UNSPEC_VPACK_SIGN_SIGN_SAT:
38920 case UNSPEC_VPACK_SIGN_UNS_SAT:
38921 case UNSPEC_VPACK_UNS_UNS_MOD:
38922 case UNSPEC_VPACK_UNS_UNS_MOD_DIRECT:
38923 case UNSPEC_VPACK_UNS_UNS_SAT:
38924 case UNSPEC_VPERM:
38925 case UNSPEC_VPERM_UNS:
38926 case UNSPEC_VPERMHI:
38927 case UNSPEC_VPERMSI:
38928 case UNSPEC_VPKPX:
38929 case UNSPEC_VSLDOI:
38930 case UNSPEC_VSLO:
38931 case UNSPEC_VSRO:
38932 case UNSPEC_VSUM2SWS:
38933 case UNSPEC_VSUM4S:
38934 case UNSPEC_VSUM4UBS:
38935 case UNSPEC_VSUMSWS:
38936 case UNSPEC_VSUMSWS_DIRECT:
38937 case UNSPEC_VSX_CONCAT:
38938 case UNSPEC_VSX_SET:
38939 case UNSPEC_VSX_SLDWI:
38940 case UNSPEC_VUNPACK_HI_SIGN:
38941 case UNSPEC_VUNPACK_HI_SIGN_DIRECT:
38942 case UNSPEC_VUNPACK_LO_SIGN:
38943 case UNSPEC_VUNPACK_LO_SIGN_DIRECT:
38944 case UNSPEC_VUPKHPX:
38945 case UNSPEC_VUPKHS_V4SF:
38946 case UNSPEC_VUPKHU_V4SF:
38947 case UNSPEC_VUPKLPX:
38948 case UNSPEC_VUPKLS_V4SF:
38949 case UNSPEC_VUPKLU_V4SF:
38950 case UNSPEC_VSX_CVDPSPN:
38951 case UNSPEC_VSX_CVSPDP:
38952 case UNSPEC_VSX_CVSPDPN:
38953 case UNSPEC_VSX_EXTRACT:
38954 case UNSPEC_VSX_VSLO:
38955 return 0;
38956 case UNSPEC_VSPLT_DIRECT:
38957 *special = SH_SPLAT;
38958 return 1;
38959 case UNSPEC_REDUC_PLUS:
38960 case UNSPEC_REDUC:
38961 return 1;
38965 default:
38966 break;
38969 const char *fmt = GET_RTX_FORMAT (code);
38970 int ok = 1;
38972 for (i = 0; i < GET_RTX_LENGTH (code); ++i)
38973 if (fmt[i] == 'e' || fmt[i] == 'u')
38975 unsigned int special_op = SH_NONE;
38976 ok &= rtx_is_swappable_p (XEXP (op, i), &special_op);
38977 if (special_op == SH_NONE)
38978 continue;
38979 /* Ensure we never have two kinds of special handling
38980 for the same insn. */
38981 if (*special != SH_NONE && *special != special_op)
38982 return 0;
38983 *special = special_op;
38985 else if (fmt[i] == 'E')
38986 for (j = 0; j < XVECLEN (op, i); ++j)
38988 unsigned int special_op = SH_NONE;
38989 ok &= rtx_is_swappable_p (XVECEXP (op, i, j), &special_op);
38990 if (special_op == SH_NONE)
38991 continue;
38992 /* Ensure we never have two kinds of special handling
38993 for the same insn. */
38994 if (*special != SH_NONE && *special != special_op)
38995 return 0;
38996 *special = special_op;
38999 return ok;
39002 /* Return 1 iff INSN is an operand that will not be affected by
39003 having vector doublewords swapped in memory (in which case
39004 *SPECIAL is unchanged), or that can be modified to be correct
39005 if vector doublewords are swapped in memory (in which case
39006 *SPECIAL is changed to a value indicating how). */
39007 static unsigned int
39008 insn_is_swappable_p (swap_web_entry *insn_entry, rtx insn,
39009 unsigned int *special)
39011 /* Calls are always bad. */
39012 if (GET_CODE (insn) == CALL_INSN)
39013 return 0;
39015 /* Loads and stores seen here are not permuting, but we can still
39016 fix them up by converting them to permuting ones. Exceptions:
39017 UNSPEC_LVE, UNSPEC_LVX, and UNSPEC_STVX, which have a PARALLEL
39018 body instead of a SET; and UNSPEC_STVE, which has an UNSPEC
39019 for the SET source. Also we must now make an exception for lvx
39020 and stvx when they are not in the UNSPEC_LVX/STVX form (with the
39021 explicit "& -16") since this leads to unrecognizable insns. */
39022 rtx body = PATTERN (insn);
39023 int i = INSN_UID (insn);
39025 if (insn_entry[i].is_load)
39027 if (GET_CODE (body) == SET)
39029 rtx rhs = SET_SRC (body);
39030 gcc_assert (GET_CODE (rhs) == MEM);
39031 if (GET_CODE (XEXP (rhs, 0)) == AND)
39032 return 0;
39034 *special = SH_NOSWAP_LD;
39035 return 1;
39037 else
39038 return 0;
39041 if (insn_entry[i].is_store)
39043 if (GET_CODE (body) == SET
39044 && GET_CODE (SET_SRC (body)) != UNSPEC)
39046 rtx lhs = SET_DEST (body);
39047 gcc_assert (GET_CODE (lhs) == MEM);
39048 if (GET_CODE (XEXP (lhs, 0)) == AND)
39049 return 0;
39051 *special = SH_NOSWAP_ST;
39052 return 1;
39054 else
39055 return 0;
39058 /* A convert to single precision can be left as is provided that
39059 all of its uses are in xxspltw instructions that splat BE element
39060 zero. */
39061 if (GET_CODE (body) == SET
39062 && GET_CODE (SET_SRC (body)) == UNSPEC
39063 && XINT (SET_SRC (body), 1) == UNSPEC_VSX_CVDPSPN)
39065 df_ref def;
39066 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
39068 FOR_EACH_INSN_INFO_DEF (def, insn_info)
39070 struct df_link *link = DF_REF_CHAIN (def);
39071 if (!link)
39072 return 0;
39074 for (; link; link = link->next) {
39075 rtx use_insn = DF_REF_INSN (link->ref);
39076 rtx use_body = PATTERN (use_insn);
39077 if (GET_CODE (use_body) != SET
39078 || GET_CODE (SET_SRC (use_body)) != UNSPEC
39079 || XINT (SET_SRC (use_body), 1) != UNSPEC_VSX_XXSPLTW
39080 || XEXP (XEXP (SET_SRC (use_body), 0), 1) != const0_rtx)
39081 return 0;
39085 return 1;
39088 /* A concatenation of two doublewords is ok if we reverse the
39089 order of the inputs. */
39090 if (GET_CODE (body) == SET
39091 && GET_CODE (SET_SRC (body)) == VEC_CONCAT
39092 && (GET_MODE (SET_SRC (body)) == V2DFmode
39093 || GET_MODE (SET_SRC (body)) == V2DImode))
39095 *special = SH_CONCAT;
39096 return 1;
39099 /* V2DF reductions are always swappable. */
39100 if (GET_CODE (body) == PARALLEL)
39102 rtx expr = XVECEXP (body, 0, 0);
39103 if (GET_CODE (expr) == SET
39104 && v2df_reduction_p (SET_SRC (expr)))
39105 return 1;
39108 /* An UNSPEC_VPERM is ok if the mask operand is loaded from the
39109 constant pool. */
39110 if (GET_CODE (body) == SET
39111 && GET_CODE (SET_SRC (body)) == UNSPEC
39112 && XINT (SET_SRC (body), 1) == UNSPEC_VPERM
39113 && XVECLEN (SET_SRC (body), 0) == 3
39114 && GET_CODE (XVECEXP (SET_SRC (body), 0, 2)) == REG)
39116 rtx mask_reg = XVECEXP (SET_SRC (body), 0, 2);
39117 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
39118 df_ref use;
39119 FOR_EACH_INSN_INFO_USE (use, insn_info)
39120 if (rtx_equal_p (DF_REF_REG (use), mask_reg))
39122 struct df_link *def_link = DF_REF_CHAIN (use);
39123 /* Punt if multiple definitions for this reg. */
39124 if (def_link && !def_link->next &&
39125 const_load_sequence_p (insn_entry,
39126 DF_REF_INSN (def_link->ref)))
39128 *special = SH_VPERM;
39129 return 1;
39134 /* Otherwise check the operands for vector lane violations. */
39135 return rtx_is_swappable_p (body, special);
39138 enum chain_purpose { FOR_LOADS, FOR_STORES };
39140 /* Return true if the UD or DU chain headed by LINK is non-empty,
39141 and every entry on the chain references an insn that is a
39142 register swap. Furthermore, if PURPOSE is FOR_LOADS, each such
39143 register swap must have only permuting loads as reaching defs.
39144 If PURPOSE is FOR_STORES, each such register swap must have only
39145 register swaps or permuting stores as reached uses. */
39146 static bool
39147 chain_contains_only_swaps (swap_web_entry *insn_entry, struct df_link *link,
39148 enum chain_purpose purpose)
39150 if (!link)
39151 return false;
39153 for (; link; link = link->next)
39155 if (!ALTIVEC_OR_VSX_VECTOR_MODE (GET_MODE (DF_REF_REG (link->ref))))
39156 continue;
39158 if (DF_REF_IS_ARTIFICIAL (link->ref))
39159 return false;
39161 rtx reached_insn = DF_REF_INSN (link->ref);
39162 unsigned uid = INSN_UID (reached_insn);
39163 struct df_insn_info *insn_info = DF_INSN_INFO_GET (reached_insn);
39165 if (!insn_entry[uid].is_swap || insn_entry[uid].is_load
39166 || insn_entry[uid].is_store)
39167 return false;
39169 if (purpose == FOR_LOADS)
39171 df_ref use;
39172 FOR_EACH_INSN_INFO_USE (use, insn_info)
39174 struct df_link *swap_link = DF_REF_CHAIN (use);
39176 while (swap_link)
39178 if (DF_REF_IS_ARTIFICIAL (link->ref))
39179 return false;
39181 rtx swap_def_insn = DF_REF_INSN (swap_link->ref);
39182 unsigned uid2 = INSN_UID (swap_def_insn);
39184 /* Only permuting loads are allowed. */
39185 if (!insn_entry[uid2].is_swap || !insn_entry[uid2].is_load)
39186 return false;
39188 swap_link = swap_link->next;
39192 else if (purpose == FOR_STORES)
39194 df_ref def;
39195 FOR_EACH_INSN_INFO_DEF (def, insn_info)
39197 struct df_link *swap_link = DF_REF_CHAIN (def);
39199 while (swap_link)
39201 if (DF_REF_IS_ARTIFICIAL (link->ref))
39202 return false;
39204 rtx swap_use_insn = DF_REF_INSN (swap_link->ref);
39205 unsigned uid2 = INSN_UID (swap_use_insn);
39207 /* Permuting stores or register swaps are allowed. */
39208 if (!insn_entry[uid2].is_swap || insn_entry[uid2].is_load)
39209 return false;
39211 swap_link = swap_link->next;
39217 return true;
39220 /* Mark the xxswapdi instructions associated with permuting loads and
39221 stores for removal. Note that we only flag them for deletion here,
39222 as there is a possibility of a swap being reached from multiple
39223 loads, etc. */
39224 static void
39225 mark_swaps_for_removal (swap_web_entry *insn_entry, unsigned int i)
39227 rtx insn = insn_entry[i].insn;
39228 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
39230 if (insn_entry[i].is_load)
39232 df_ref def;
39233 FOR_EACH_INSN_INFO_DEF (def, insn_info)
39235 struct df_link *link = DF_REF_CHAIN (def);
39237 /* We know by now that these are swaps, so we can delete
39238 them confidently. */
39239 while (link)
39241 rtx use_insn = DF_REF_INSN (link->ref);
39242 insn_entry[INSN_UID (use_insn)].will_delete = 1;
39243 link = link->next;
39247 else if (insn_entry[i].is_store)
39249 df_ref use;
39250 FOR_EACH_INSN_INFO_USE (use, insn_info)
39252 /* Ignore uses for addressability. */
39253 machine_mode mode = GET_MODE (DF_REF_REG (use));
39254 if (!ALTIVEC_OR_VSX_VECTOR_MODE (mode))
39255 continue;
39257 struct df_link *link = DF_REF_CHAIN (use);
39259 /* We know by now that these are swaps, so we can delete
39260 them confidently. */
39261 while (link)
39263 rtx def_insn = DF_REF_INSN (link->ref);
39264 insn_entry[INSN_UID (def_insn)].will_delete = 1;
39265 link = link->next;
39271 /* OP is either a CONST_VECTOR or an expression containing one.
39272 Swap the first half of the vector with the second in the first
39273 case. Recurse to find it in the second. */
39274 static void
39275 swap_const_vector_halves (rtx op)
39277 int i;
39278 enum rtx_code code = GET_CODE (op);
39279 if (GET_CODE (op) == CONST_VECTOR)
39281 int half_units = GET_MODE_NUNITS (GET_MODE (op)) / 2;
39282 for (i = 0; i < half_units; ++i)
39284 rtx temp = CONST_VECTOR_ELT (op, i);
39285 CONST_VECTOR_ELT (op, i) = CONST_VECTOR_ELT (op, i + half_units);
39286 CONST_VECTOR_ELT (op, i + half_units) = temp;
39289 else
39291 int j;
39292 const char *fmt = GET_RTX_FORMAT (code);
39293 for (i = 0; i < GET_RTX_LENGTH (code); ++i)
39294 if (fmt[i] == 'e' || fmt[i] == 'u')
39295 swap_const_vector_halves (XEXP (op, i));
39296 else if (fmt[i] == 'E')
39297 for (j = 0; j < XVECLEN (op, i); ++j)
39298 swap_const_vector_halves (XVECEXP (op, i, j));
39302 /* Find all subregs of a vector expression that perform a narrowing,
39303 and adjust the subreg index to account for doubleword swapping. */
39304 static void
39305 adjust_subreg_index (rtx op)
39307 enum rtx_code code = GET_CODE (op);
39308 if (code == SUBREG
39309 && (GET_MODE_SIZE (GET_MODE (op))
39310 < GET_MODE_SIZE (GET_MODE (XEXP (op, 0)))))
39312 unsigned int index = SUBREG_BYTE (op);
39313 if (index < 8)
39314 index += 8;
39315 else
39316 index -= 8;
39317 SUBREG_BYTE (op) = index;
39320 const char *fmt = GET_RTX_FORMAT (code);
39321 int i,j;
39322 for (i = 0; i < GET_RTX_LENGTH (code); ++i)
39323 if (fmt[i] == 'e' || fmt[i] == 'u')
39324 adjust_subreg_index (XEXP (op, i));
39325 else if (fmt[i] == 'E')
39326 for (j = 0; j < XVECLEN (op, i); ++j)
39327 adjust_subreg_index (XVECEXP (op, i, j));
39330 /* Convert the non-permuting load INSN to a permuting one. */
39331 static void
39332 permute_load (rtx_insn *insn)
39334 rtx body = PATTERN (insn);
39335 rtx mem_op = SET_SRC (body);
39336 rtx tgt_reg = SET_DEST (body);
39337 machine_mode mode = GET_MODE (tgt_reg);
39338 int n_elts = GET_MODE_NUNITS (mode);
39339 int half_elts = n_elts / 2;
39340 rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
39341 int i, j;
39342 for (i = 0, j = half_elts; i < half_elts; ++i, ++j)
39343 XVECEXP (par, 0, i) = GEN_INT (j);
39344 for (i = half_elts, j = 0; j < half_elts; ++i, ++j)
39345 XVECEXP (par, 0, i) = GEN_INT (j);
39346 rtx sel = gen_rtx_VEC_SELECT (mode, mem_op, par);
39347 SET_SRC (body) = sel;
39348 INSN_CODE (insn) = -1; /* Force re-recognition. */
39349 df_insn_rescan (insn);
39351 if (dump_file)
39352 fprintf (dump_file, "Replacing load %d with permuted load\n",
39353 INSN_UID (insn));
39356 /* Convert the non-permuting store INSN to a permuting one. */
39357 static void
39358 permute_store (rtx_insn *insn)
39360 rtx body = PATTERN (insn);
39361 rtx src_reg = SET_SRC (body);
39362 machine_mode mode = GET_MODE (src_reg);
39363 int n_elts = GET_MODE_NUNITS (mode);
39364 int half_elts = n_elts / 2;
39365 rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
39366 int i, j;
39367 for (i = 0, j = half_elts; i < half_elts; ++i, ++j)
39368 XVECEXP (par, 0, i) = GEN_INT (j);
39369 for (i = half_elts, j = 0; j < half_elts; ++i, ++j)
39370 XVECEXP (par, 0, i) = GEN_INT (j);
39371 rtx sel = gen_rtx_VEC_SELECT (mode, src_reg, par);
39372 SET_SRC (body) = sel;
39373 INSN_CODE (insn) = -1; /* Force re-recognition. */
39374 df_insn_rescan (insn);
39376 if (dump_file)
39377 fprintf (dump_file, "Replacing store %d with permuted store\n",
39378 INSN_UID (insn));
39381 /* Given OP that contains a vector extract operation, adjust the index
39382 of the extracted lane to account for the doubleword swap. */
39383 static void
39384 adjust_extract (rtx_insn *insn)
39386 rtx pattern = PATTERN (insn);
39387 if (GET_CODE (pattern) == PARALLEL)
39388 pattern = XVECEXP (pattern, 0, 0);
39389 rtx src = SET_SRC (pattern);
39390 /* The vec_select may be wrapped in a vec_duplicate for a splat, so
39391 account for that. */
39392 rtx sel = GET_CODE (src) == VEC_DUPLICATE ? XEXP (src, 0) : src;
39393 rtx par = XEXP (sel, 1);
39394 int half_elts = GET_MODE_NUNITS (GET_MODE (XEXP (sel, 0))) >> 1;
39395 int lane = INTVAL (XVECEXP (par, 0, 0));
39396 lane = lane >= half_elts ? lane - half_elts : lane + half_elts;
39397 XVECEXP (par, 0, 0) = GEN_INT (lane);
39398 INSN_CODE (insn) = -1; /* Force re-recognition. */
39399 df_insn_rescan (insn);
39401 if (dump_file)
39402 fprintf (dump_file, "Changing lane for extract %d\n", INSN_UID (insn));
39405 /* Given OP that contains a vector direct-splat operation, adjust the index
39406 of the source lane to account for the doubleword swap. */
39407 static void
39408 adjust_splat (rtx_insn *insn)
39410 rtx body = PATTERN (insn);
39411 rtx unspec = XEXP (body, 1);
39412 int half_elts = GET_MODE_NUNITS (GET_MODE (unspec)) >> 1;
39413 int lane = INTVAL (XVECEXP (unspec, 0, 1));
39414 lane = lane >= half_elts ? lane - half_elts : lane + half_elts;
39415 XVECEXP (unspec, 0, 1) = GEN_INT (lane);
39416 INSN_CODE (insn) = -1; /* Force re-recognition. */
39417 df_insn_rescan (insn);
39419 if (dump_file)
39420 fprintf (dump_file, "Changing lane for splat %d\n", INSN_UID (insn));
39423 /* Given OP that contains an XXPERMDI operation (that is not a doubleword
39424 swap), reverse the order of the source operands and adjust the indices
39425 of the source lanes to account for doubleword reversal. */
39426 static void
39427 adjust_xxpermdi (rtx_insn *insn)
39429 rtx set = PATTERN (insn);
39430 rtx select = XEXP (set, 1);
39431 rtx concat = XEXP (select, 0);
39432 rtx src0 = XEXP (concat, 0);
39433 XEXP (concat, 0) = XEXP (concat, 1);
39434 XEXP (concat, 1) = src0;
39435 rtx parallel = XEXP (select, 1);
39436 int lane0 = INTVAL (XVECEXP (parallel, 0, 0));
39437 int lane1 = INTVAL (XVECEXP (parallel, 0, 1));
39438 int new_lane0 = 3 - lane1;
39439 int new_lane1 = 3 - lane0;
39440 XVECEXP (parallel, 0, 0) = GEN_INT (new_lane0);
39441 XVECEXP (parallel, 0, 1) = GEN_INT (new_lane1);
39442 INSN_CODE (insn) = -1; /* Force re-recognition. */
39443 df_insn_rescan (insn);
39445 if (dump_file)
39446 fprintf (dump_file, "Changing lanes for xxpermdi %d\n", INSN_UID (insn));
39449 /* Given OP that contains a VEC_CONCAT operation of two doublewords,
39450 reverse the order of those inputs. */
39451 static void
39452 adjust_concat (rtx_insn *insn)
39454 rtx set = PATTERN (insn);
39455 rtx concat = XEXP (set, 1);
39456 rtx src0 = XEXP (concat, 0);
39457 XEXP (concat, 0) = XEXP (concat, 1);
39458 XEXP (concat, 1) = src0;
39459 INSN_CODE (insn) = -1; /* Force re-recognition. */
39460 df_insn_rescan (insn);
39462 if (dump_file)
39463 fprintf (dump_file, "Reversing inputs for concat %d\n", INSN_UID (insn));
39466 /* Given an UNSPEC_VPERM insn, modify the mask loaded from the
39467 constant pool to reflect swapped doublewords. */
39468 static void
39469 adjust_vperm (rtx_insn *insn)
39471 /* We previously determined that the UNSPEC_VPERM was fed by a
39472 swap of a swapping load of a TOC-relative constant pool symbol.
39473 Find the MEM in the swapping load and replace it with a MEM for
39474 the adjusted mask constant. */
39475 rtx set = PATTERN (insn);
39476 rtx mask_reg = XVECEXP (SET_SRC (set), 0, 2);
39478 /* Find the swap. */
39479 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
39480 df_ref use;
39481 rtx_insn *swap_insn = 0;
39482 FOR_EACH_INSN_INFO_USE (use, insn_info)
39483 if (rtx_equal_p (DF_REF_REG (use), mask_reg))
39485 struct df_link *def_link = DF_REF_CHAIN (use);
39486 gcc_assert (def_link && !def_link->next);
39487 swap_insn = DF_REF_INSN (def_link->ref);
39488 break;
39490 gcc_assert (swap_insn);
39492 /* Find the load. */
39493 insn_info = DF_INSN_INFO_GET (swap_insn);
39494 rtx_insn *load_insn = 0;
39495 FOR_EACH_INSN_INFO_USE (use, insn_info)
39497 struct df_link *def_link = DF_REF_CHAIN (use);
39498 gcc_assert (def_link && !def_link->next);
39499 load_insn = DF_REF_INSN (def_link->ref);
39500 break;
39502 gcc_assert (load_insn);
39504 /* Find the TOC-relative symbol access. */
39505 insn_info = DF_INSN_INFO_GET (load_insn);
39506 rtx_insn *tocrel_insn = 0;
39507 FOR_EACH_INSN_INFO_USE (use, insn_info)
39509 struct df_link *def_link = DF_REF_CHAIN (use);
39510 gcc_assert (def_link && !def_link->next);
39511 tocrel_insn = DF_REF_INSN (def_link->ref);
39512 break;
39514 gcc_assert (tocrel_insn);
39516 /* Find the embedded CONST_VECTOR. We have to call toc_relative_expr_p
39517 to set tocrel_base; otherwise it would be unnecessary as we've
39518 already established it will return true. */
39519 rtx base, offset;
39520 rtx tocrel_expr = SET_SRC (PATTERN (tocrel_insn));
39521 /* There is an extra level of indirection for small/large code models. */
39522 if (GET_CODE (tocrel_expr) == MEM)
39523 tocrel_expr = XEXP (tocrel_expr, 0);
39524 if (!toc_relative_expr_p (tocrel_expr, false))
39525 gcc_unreachable ();
39526 split_const (XVECEXP (tocrel_base, 0, 0), &base, &offset);
39527 rtx const_vector = get_pool_constant (base);
39528 /* With the extra indirection, get_pool_constant will produce the
39529 real constant from the reg_equal expression, so get the real
39530 constant. */
39531 if (GET_CODE (const_vector) == SYMBOL_REF)
39532 const_vector = get_pool_constant (const_vector);
39533 gcc_assert (GET_CODE (const_vector) == CONST_VECTOR);
39535 /* Create an adjusted mask from the initial mask. */
39536 unsigned int new_mask[16], i, val;
39537 for (i = 0; i < 16; ++i) {
39538 val = INTVAL (XVECEXP (const_vector, 0, i));
39539 if (val < 16)
39540 new_mask[i] = (val + 8) % 16;
39541 else
39542 new_mask[i] = ((val + 8) % 16) + 16;
39545 /* Create a new CONST_VECTOR and a MEM that references it. */
39546 rtx vals = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
39547 for (i = 0; i < 16; ++i)
39548 XVECEXP (vals, 0, i) = GEN_INT (new_mask[i]);
39549 rtx new_const_vector = gen_rtx_CONST_VECTOR (V16QImode, XVEC (vals, 0));
39550 rtx new_mem = force_const_mem (V16QImode, new_const_vector);
39551 /* This gives us a MEM whose base operand is a SYMBOL_REF, which we
39552 can't recognize. Force the SYMBOL_REF into a register. */
39553 if (!REG_P (XEXP (new_mem, 0))) {
39554 rtx base_reg = force_reg (Pmode, XEXP (new_mem, 0));
39555 XEXP (new_mem, 0) = base_reg;
39556 /* Move the newly created insn ahead of the load insn. */
39557 rtx_insn *force_insn = get_last_insn ();
39558 remove_insn (force_insn);
39559 rtx_insn *before_load_insn = PREV_INSN (load_insn);
39560 add_insn_after (force_insn, before_load_insn, BLOCK_FOR_INSN (load_insn));
39561 df_insn_rescan (before_load_insn);
39562 df_insn_rescan (force_insn);
39565 /* Replace the MEM in the load instruction and rescan it. */
39566 XEXP (SET_SRC (PATTERN (load_insn)), 0) = new_mem;
39567 INSN_CODE (load_insn) = -1; /* Force re-recognition. */
39568 df_insn_rescan (load_insn);
39570 if (dump_file)
39571 fprintf (dump_file, "Adjusting mask for vperm %d\n", INSN_UID (insn));
39574 /* The insn described by INSN_ENTRY[I] can be swapped, but only
39575 with special handling. Take care of that here. */
39576 static void
39577 handle_special_swappables (swap_web_entry *insn_entry, unsigned i)
39579 rtx_insn *insn = insn_entry[i].insn;
39580 rtx body = PATTERN (insn);
39582 switch (insn_entry[i].special_handling)
39584 default:
39585 gcc_unreachable ();
39586 case SH_CONST_VECTOR:
39588 /* A CONST_VECTOR will only show up somewhere in the RHS of a SET. */
39589 gcc_assert (GET_CODE (body) == SET);
39590 rtx rhs = SET_SRC (body);
39591 swap_const_vector_halves (rhs);
39592 if (dump_file)
39593 fprintf (dump_file, "Swapping constant halves in insn %d\n", i);
39594 break;
39596 case SH_SUBREG:
39597 /* A subreg of the same size is already safe. For subregs that
39598 select a smaller portion of a reg, adjust the index for
39599 swapped doublewords. */
39600 adjust_subreg_index (body);
39601 if (dump_file)
39602 fprintf (dump_file, "Adjusting subreg in insn %d\n", i);
39603 break;
39604 case SH_NOSWAP_LD:
39605 /* Convert a non-permuting load to a permuting one. */
39606 permute_load (insn);
39607 break;
39608 case SH_NOSWAP_ST:
39609 /* Convert a non-permuting store to a permuting one. */
39610 permute_store (insn);
39611 break;
39612 case SH_EXTRACT:
39613 /* Change the lane on an extract operation. */
39614 adjust_extract (insn);
39615 break;
39616 case SH_SPLAT:
39617 /* Change the lane on a direct-splat operation. */
39618 adjust_splat (insn);
39619 break;
39620 case SH_XXPERMDI:
39621 /* Change the lanes on an XXPERMDI operation. */
39622 adjust_xxpermdi (insn);
39623 break;
39624 case SH_CONCAT:
39625 /* Reverse the order of a concatenation operation. */
39626 adjust_concat (insn);
39627 break;
39628 case SH_VPERM:
39629 /* Change the mask loaded from the constant pool for a VPERM. */
39630 adjust_vperm (insn);
39631 break;
39635 /* Find the insn from the Ith table entry, which is known to be a
39636 register swap Y = SWAP(X). Replace it with a copy Y = X. */
39637 static void
39638 replace_swap_with_copy (swap_web_entry *insn_entry, unsigned i)
39640 rtx_insn *insn = insn_entry[i].insn;
39641 rtx body = PATTERN (insn);
39642 rtx src_reg = XEXP (SET_SRC (body), 0);
39643 rtx copy = gen_rtx_SET (SET_DEST (body), src_reg);
39644 rtx_insn *new_insn = emit_insn_before (copy, insn);
39645 set_block_for_insn (new_insn, BLOCK_FOR_INSN (insn));
39646 df_insn_rescan (new_insn);
39648 if (dump_file)
39650 unsigned int new_uid = INSN_UID (new_insn);
39651 fprintf (dump_file, "Replacing swap %d with copy %d\n", i, new_uid);
39654 df_insn_delete (insn);
39655 remove_insn (insn);
39656 insn->set_deleted ();
39659 /* Dump the swap table to DUMP_FILE. */
39660 static void
39661 dump_swap_insn_table (swap_web_entry *insn_entry)
39663 int e = get_max_uid ();
39664 fprintf (dump_file, "\nRelevant insns with their flag settings\n\n");
39666 for (int i = 0; i < e; ++i)
39667 if (insn_entry[i].is_relevant)
39669 swap_web_entry *pred_entry = (swap_web_entry *)insn_entry[i].pred ();
39670 fprintf (dump_file, "%6d %6d ", i,
39671 pred_entry && pred_entry->insn
39672 ? INSN_UID (pred_entry->insn) : 0);
39673 if (insn_entry[i].is_load)
39674 fputs ("load ", dump_file);
39675 if (insn_entry[i].is_store)
39676 fputs ("store ", dump_file);
39677 if (insn_entry[i].is_swap)
39678 fputs ("swap ", dump_file);
39679 if (insn_entry[i].is_live_in)
39680 fputs ("live-in ", dump_file);
39681 if (insn_entry[i].is_live_out)
39682 fputs ("live-out ", dump_file);
39683 if (insn_entry[i].contains_subreg)
39684 fputs ("subreg ", dump_file);
39685 if (insn_entry[i].is_128_int)
39686 fputs ("int128 ", dump_file);
39687 if (insn_entry[i].is_call)
39688 fputs ("call ", dump_file);
39689 if (insn_entry[i].is_swappable)
39691 fputs ("swappable ", dump_file);
39692 if (insn_entry[i].special_handling == SH_CONST_VECTOR)
39693 fputs ("special:constvec ", dump_file);
39694 else if (insn_entry[i].special_handling == SH_SUBREG)
39695 fputs ("special:subreg ", dump_file);
39696 else if (insn_entry[i].special_handling == SH_NOSWAP_LD)
39697 fputs ("special:load ", dump_file);
39698 else if (insn_entry[i].special_handling == SH_NOSWAP_ST)
39699 fputs ("special:store ", dump_file);
39700 else if (insn_entry[i].special_handling == SH_EXTRACT)
39701 fputs ("special:extract ", dump_file);
39702 else if (insn_entry[i].special_handling == SH_SPLAT)
39703 fputs ("special:splat ", dump_file);
39704 else if (insn_entry[i].special_handling == SH_XXPERMDI)
39705 fputs ("special:xxpermdi ", dump_file);
39706 else if (insn_entry[i].special_handling == SH_CONCAT)
39707 fputs ("special:concat ", dump_file);
39708 else if (insn_entry[i].special_handling == SH_VPERM)
39709 fputs ("special:vperm ", dump_file);
39711 if (insn_entry[i].web_not_optimizable)
39712 fputs ("unoptimizable ", dump_file);
39713 if (insn_entry[i].will_delete)
39714 fputs ("delete ", dump_file);
39715 fputs ("\n", dump_file);
39717 fputs ("\n", dump_file);
39720 /* Return RTX with its address canonicalized to (reg) or (+ reg reg).
39721 Here RTX is an (& addr (const_int -16)). Always return a new copy
39722 to avoid problems with combine. */
39723 static rtx
39724 alignment_with_canonical_addr (rtx align)
39726 rtx canon;
39727 rtx addr = XEXP (align, 0);
39729 if (REG_P (addr))
39730 canon = addr;
39732 else if (GET_CODE (addr) == PLUS)
39734 rtx addrop0 = XEXP (addr, 0);
39735 rtx addrop1 = XEXP (addr, 1);
39737 if (!REG_P (addrop0))
39738 addrop0 = force_reg (GET_MODE (addrop0), addrop0);
39740 if (!REG_P (addrop1))
39741 addrop1 = force_reg (GET_MODE (addrop1), addrop1);
39743 canon = gen_rtx_PLUS (GET_MODE (addr), addrop0, addrop1);
39746 else
39747 canon = force_reg (GET_MODE (addr), addr);
39749 return gen_rtx_AND (GET_MODE (align), canon, GEN_INT (-16));
39752 /* Check whether an rtx is an alignment mask, and if so, return
39753 a fully-expanded rtx for the masking operation. */
39754 static rtx
39755 alignment_mask (rtx_insn *insn)
39757 rtx body = PATTERN (insn);
39759 if (GET_CODE (body) != SET
39760 || GET_CODE (SET_SRC (body)) != AND
39761 || !REG_P (XEXP (SET_SRC (body), 0)))
39762 return 0;
39764 rtx mask = XEXP (SET_SRC (body), 1);
39766 if (GET_CODE (mask) == CONST_INT)
39768 if (INTVAL (mask) == -16)
39769 return alignment_with_canonical_addr (SET_SRC (body));
39770 else
39771 return 0;
39774 if (!REG_P (mask))
39775 return 0;
39777 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
39778 df_ref use;
39779 rtx real_mask = 0;
39781 FOR_EACH_INSN_INFO_USE (use, insn_info)
39783 if (!rtx_equal_p (DF_REF_REG (use), mask))
39784 continue;
39786 struct df_link *def_link = DF_REF_CHAIN (use);
39787 if (!def_link || def_link->next)
39788 return 0;
39790 rtx_insn *const_insn = DF_REF_INSN (def_link->ref);
39791 rtx const_body = PATTERN (const_insn);
39792 if (GET_CODE (const_body) != SET)
39793 return 0;
39795 real_mask = SET_SRC (const_body);
39797 if (GET_CODE (real_mask) != CONST_INT
39798 || INTVAL (real_mask) != -16)
39799 return 0;
39802 if (real_mask == 0)
39803 return 0;
39805 return alignment_with_canonical_addr (SET_SRC (body));
39808 /* Given INSN that's a load or store based at BASE_REG, look for a
39809 feeding computation that aligns its address on a 16-byte boundary. */
39810 static rtx
39811 find_alignment_op (rtx_insn *insn, rtx base_reg)
39813 df_ref base_use;
39814 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
39815 rtx and_operation = 0;
39817 FOR_EACH_INSN_INFO_USE (base_use, insn_info)
39819 if (!rtx_equal_p (DF_REF_REG (base_use), base_reg))
39820 continue;
39822 struct df_link *base_def_link = DF_REF_CHAIN (base_use);
39823 if (!base_def_link || base_def_link->next)
39824 break;
39826 rtx_insn *and_insn = DF_REF_INSN (base_def_link->ref);
39827 and_operation = alignment_mask (and_insn);
39828 if (and_operation != 0)
39829 break;
39832 return and_operation;
39835 struct del_info { bool replace; rtx_insn *replace_insn; };
39837 /* If INSN is the load for an lvx pattern, put it in canonical form. */
39838 static void
39839 recombine_lvx_pattern (rtx_insn *insn, del_info *to_delete)
39841 rtx body = PATTERN (insn);
39842 gcc_assert (GET_CODE (body) == SET
39843 && GET_CODE (SET_SRC (body)) == VEC_SELECT
39844 && GET_CODE (XEXP (SET_SRC (body), 0)) == MEM);
39846 rtx mem = XEXP (SET_SRC (body), 0);
39847 rtx base_reg = XEXP (mem, 0);
39849 rtx and_operation = find_alignment_op (insn, base_reg);
39851 if (and_operation != 0)
39853 df_ref def;
39854 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
39855 FOR_EACH_INSN_INFO_DEF (def, insn_info)
39857 struct df_link *link = DF_REF_CHAIN (def);
39858 if (!link || link->next)
39859 break;
39861 rtx_insn *swap_insn = DF_REF_INSN (link->ref);
39862 if (!insn_is_swap_p (swap_insn)
39863 || insn_is_load_p (swap_insn)
39864 || insn_is_store_p (swap_insn))
39865 break;
39867 /* Expected lvx pattern found. Change the swap to
39868 a copy, and propagate the AND operation into the
39869 load. */
39870 to_delete[INSN_UID (swap_insn)].replace = true;
39871 to_delete[INSN_UID (swap_insn)].replace_insn = swap_insn;
39873 XEXP (mem, 0) = and_operation;
39874 SET_SRC (body) = mem;
39875 INSN_CODE (insn) = -1; /* Force re-recognition. */
39876 df_insn_rescan (insn);
39878 if (dump_file)
39879 fprintf (dump_file, "lvx opportunity found at %d\n",
39880 INSN_UID (insn));
39885 /* If INSN is the store for an stvx pattern, put it in canonical form. */
39886 static void
39887 recombine_stvx_pattern (rtx_insn *insn, del_info *to_delete)
39889 rtx body = PATTERN (insn);
39890 gcc_assert (GET_CODE (body) == SET
39891 && GET_CODE (SET_DEST (body)) == MEM
39892 && GET_CODE (SET_SRC (body)) == VEC_SELECT);
39893 rtx mem = SET_DEST (body);
39894 rtx base_reg = XEXP (mem, 0);
39896 rtx and_operation = find_alignment_op (insn, base_reg);
39898 if (and_operation != 0)
39900 rtx src_reg = XEXP (SET_SRC (body), 0);
39901 df_ref src_use;
39902 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
39903 FOR_EACH_INSN_INFO_USE (src_use, insn_info)
39905 if (!rtx_equal_p (DF_REF_REG (src_use), src_reg))
39906 continue;
39908 struct df_link *link = DF_REF_CHAIN (src_use);
39909 if (!link || link->next)
39910 break;
39912 rtx_insn *swap_insn = DF_REF_INSN (link->ref);
39913 if (!insn_is_swap_p (swap_insn)
39914 || insn_is_load_p (swap_insn)
39915 || insn_is_store_p (swap_insn))
39916 break;
39918 /* Expected stvx pattern found. Change the swap to
39919 a copy, and propagate the AND operation into the
39920 store. */
39921 to_delete[INSN_UID (swap_insn)].replace = true;
39922 to_delete[INSN_UID (swap_insn)].replace_insn = swap_insn;
39924 XEXP (mem, 0) = and_operation;
39925 SET_SRC (body) = src_reg;
39926 INSN_CODE (insn) = -1; /* Force re-recognition. */
39927 df_insn_rescan (insn);
39929 if (dump_file)
39930 fprintf (dump_file, "stvx opportunity found at %d\n",
39931 INSN_UID (insn));
39936 /* Look for patterns created from builtin lvx and stvx calls, and
39937 canonicalize them to be properly recognized as such. */
39938 static void
39939 recombine_lvx_stvx_patterns (function *fun)
39941 int i;
39942 basic_block bb;
39943 rtx_insn *insn;
39945 int num_insns = get_max_uid ();
39946 del_info *to_delete = XCNEWVEC (del_info, num_insns);
39948 FOR_ALL_BB_FN (bb, fun)
39949 FOR_BB_INSNS (bb, insn)
39951 if (!NONDEBUG_INSN_P (insn))
39952 continue;
39954 if (insn_is_load_p (insn) && insn_is_swap_p (insn))
39955 recombine_lvx_pattern (insn, to_delete);
39956 else if (insn_is_store_p (insn) && insn_is_swap_p (insn))
39957 recombine_stvx_pattern (insn, to_delete);
39960 /* Turning swaps into copies is delayed until now, to avoid problems
39961 with deleting instructions during the insn walk. */
39962 for (i = 0; i < num_insns; i++)
39963 if (to_delete[i].replace)
39965 rtx swap_body = PATTERN (to_delete[i].replace_insn);
39966 rtx src_reg = XEXP (SET_SRC (swap_body), 0);
39967 rtx copy = gen_rtx_SET (SET_DEST (swap_body), src_reg);
39968 rtx_insn *new_insn = emit_insn_before (copy,
39969 to_delete[i].replace_insn);
39970 set_block_for_insn (new_insn,
39971 BLOCK_FOR_INSN (to_delete[i].replace_insn));
39972 df_insn_rescan (new_insn);
39973 df_insn_delete (to_delete[i].replace_insn);
39974 remove_insn (to_delete[i].replace_insn);
39975 to_delete[i].replace_insn->set_deleted ();
39978 free (to_delete);
39981 /* Main entry point for this pass. */
39982 unsigned int
39983 rs6000_analyze_swaps (function *fun)
39985 swap_web_entry *insn_entry;
39986 basic_block bb;
39987 rtx_insn *insn, *curr_insn = 0;
39989 /* Dataflow analysis for use-def chains. */
39990 df_set_flags (DF_RD_PRUNE_DEAD_DEFS);
39991 df_chain_add_problem (DF_DU_CHAIN | DF_UD_CHAIN);
39992 df_analyze ();
39993 df_set_flags (DF_DEFER_INSN_RESCAN);
39995 /* Pre-pass to recombine lvx and stvx patterns so we don't lose info. */
39996 recombine_lvx_stvx_patterns (fun);
39998 /* Allocate structure to represent webs of insns. */
39999 insn_entry = XCNEWVEC (swap_web_entry, get_max_uid ());
40001 /* Walk the insns to gather basic data. */
40002 FOR_ALL_BB_FN (bb, fun)
40003 FOR_BB_INSNS_SAFE (bb, insn, curr_insn)
40005 unsigned int uid = INSN_UID (insn);
40006 if (NONDEBUG_INSN_P (insn))
40008 insn_entry[uid].insn = insn;
40010 if (GET_CODE (insn) == CALL_INSN)
40011 insn_entry[uid].is_call = 1;
40013 /* Walk the uses and defs to see if we mention vector regs.
40014 Record any constraints on optimization of such mentions. */
40015 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
40016 df_ref mention;
40017 FOR_EACH_INSN_INFO_USE (mention, insn_info)
40019 /* We use DF_REF_REAL_REG here to get inside any subregs. */
40020 machine_mode mode = GET_MODE (DF_REF_REAL_REG (mention));
40022 /* If a use gets its value from a call insn, it will be
40023 a hard register and will look like (reg:V4SI 3 3).
40024 The df analysis creates two mentions for GPR3 and GPR4,
40025 both DImode. We must recognize this and treat it as a
40026 vector mention to ensure the call is unioned with this
40027 use. */
40028 if (mode == DImode && DF_REF_INSN_INFO (mention))
40030 rtx feeder = DF_REF_INSN (mention);
40031 /* FIXME: It is pretty hard to get from the df mention
40032 to the mode of the use in the insn. We arbitrarily
40033 pick a vector mode here, even though the use might
40034 be a real DImode. We can be too conservative
40035 (create a web larger than necessary) because of
40036 this, so consider eventually fixing this. */
40037 if (GET_CODE (feeder) == CALL_INSN)
40038 mode = V4SImode;
40041 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode) || mode == TImode)
40043 insn_entry[uid].is_relevant = 1;
40044 if (mode == TImode || mode == V1TImode
40045 || FLOAT128_VECTOR_P (mode))
40046 insn_entry[uid].is_128_int = 1;
40047 if (DF_REF_INSN_INFO (mention))
40048 insn_entry[uid].contains_subreg
40049 = !rtx_equal_p (DF_REF_REG (mention),
40050 DF_REF_REAL_REG (mention));
40051 union_defs (insn_entry, insn, mention);
40054 FOR_EACH_INSN_INFO_DEF (mention, insn_info)
40056 /* We use DF_REF_REAL_REG here to get inside any subregs. */
40057 machine_mode mode = GET_MODE (DF_REF_REAL_REG (mention));
40059 /* If we're loading up a hard vector register for a call,
40060 it looks like (set (reg:V4SI 9 9) (...)). The df
40061 analysis creates two mentions for GPR9 and GPR10, both
40062 DImode. So relying on the mode from the mentions
40063 isn't sufficient to ensure we union the call into the
40064 web with the parameter setup code. */
40065 if (mode == DImode && GET_CODE (insn) == SET
40066 && ALTIVEC_OR_VSX_VECTOR_MODE (GET_MODE (SET_DEST (insn))))
40067 mode = GET_MODE (SET_DEST (insn));
40069 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode) || mode == TImode)
40071 insn_entry[uid].is_relevant = 1;
40072 if (mode == TImode || mode == V1TImode
40073 || FLOAT128_VECTOR_P (mode))
40074 insn_entry[uid].is_128_int = 1;
40075 if (DF_REF_INSN_INFO (mention))
40076 insn_entry[uid].contains_subreg
40077 = !rtx_equal_p (DF_REF_REG (mention),
40078 DF_REF_REAL_REG (mention));
40079 /* REG_FUNCTION_VALUE_P is not valid for subregs. */
40080 else if (REG_FUNCTION_VALUE_P (DF_REF_REG (mention)))
40081 insn_entry[uid].is_live_out = 1;
40082 union_uses (insn_entry, insn, mention);
40086 if (insn_entry[uid].is_relevant)
40088 /* Determine if this is a load or store. */
40089 insn_entry[uid].is_load = insn_is_load_p (insn);
40090 insn_entry[uid].is_store = insn_is_store_p (insn);
40092 /* Determine if this is a doubleword swap. If not,
40093 determine whether it can legally be swapped. */
40094 if (insn_is_swap_p (insn))
40095 insn_entry[uid].is_swap = 1;
40096 else
40098 unsigned int special = SH_NONE;
40099 insn_entry[uid].is_swappable
40100 = insn_is_swappable_p (insn_entry, insn, &special);
40101 if (special != SH_NONE && insn_entry[uid].contains_subreg)
40102 insn_entry[uid].is_swappable = 0;
40103 else if (special != SH_NONE)
40104 insn_entry[uid].special_handling = special;
40105 else if (insn_entry[uid].contains_subreg)
40106 insn_entry[uid].special_handling = SH_SUBREG;
40112 if (dump_file)
40114 fprintf (dump_file, "\nSwap insn entry table when first built\n");
40115 dump_swap_insn_table (insn_entry);
40118 /* Record unoptimizable webs. */
40119 unsigned e = get_max_uid (), i;
40120 for (i = 0; i < e; ++i)
40122 if (!insn_entry[i].is_relevant)
40123 continue;
40125 swap_web_entry *root
40126 = (swap_web_entry*)(&insn_entry[i])->unionfind_root ();
40128 if (insn_entry[i].is_live_in || insn_entry[i].is_live_out
40129 || (insn_entry[i].contains_subreg
40130 && insn_entry[i].special_handling != SH_SUBREG)
40131 || insn_entry[i].is_128_int || insn_entry[i].is_call
40132 || !(insn_entry[i].is_swappable || insn_entry[i].is_swap))
40133 root->web_not_optimizable = 1;
40135 /* If we have loads or stores that aren't permuting then the
40136 optimization isn't appropriate. */
40137 else if ((insn_entry[i].is_load || insn_entry[i].is_store)
40138 && !insn_entry[i].is_swap && !insn_entry[i].is_swappable)
40139 root->web_not_optimizable = 1;
40141 /* If we have permuting loads or stores that are not accompanied
40142 by a register swap, the optimization isn't appropriate. */
40143 else if (insn_entry[i].is_load && insn_entry[i].is_swap)
40145 rtx insn = insn_entry[i].insn;
40146 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
40147 df_ref def;
40149 FOR_EACH_INSN_INFO_DEF (def, insn_info)
40151 struct df_link *link = DF_REF_CHAIN (def);
40153 if (!chain_contains_only_swaps (insn_entry, link, FOR_LOADS))
40155 root->web_not_optimizable = 1;
40156 break;
40160 else if (insn_entry[i].is_store && insn_entry[i].is_swap)
40162 rtx insn = insn_entry[i].insn;
40163 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
40164 df_ref use;
40166 FOR_EACH_INSN_INFO_USE (use, insn_info)
40168 struct df_link *link = DF_REF_CHAIN (use);
40170 if (!chain_contains_only_swaps (insn_entry, link, FOR_STORES))
40172 root->web_not_optimizable = 1;
40173 break;
40179 if (dump_file)
40181 fprintf (dump_file, "\nSwap insn entry table after web analysis\n");
40182 dump_swap_insn_table (insn_entry);
40185 /* For each load and store in an optimizable web (which implies
40186 the loads and stores are permuting), find the associated
40187 register swaps and mark them for removal. Due to various
40188 optimizations we may mark the same swap more than once. Also
40189 perform special handling for swappable insns that require it. */
40190 for (i = 0; i < e; ++i)
40191 if ((insn_entry[i].is_load || insn_entry[i].is_store)
40192 && insn_entry[i].is_swap)
40194 swap_web_entry* root_entry
40195 = (swap_web_entry*)((&insn_entry[i])->unionfind_root ());
40196 if (!root_entry->web_not_optimizable)
40197 mark_swaps_for_removal (insn_entry, i);
40199 else if (insn_entry[i].is_swappable && insn_entry[i].special_handling)
40201 swap_web_entry* root_entry
40202 = (swap_web_entry*)((&insn_entry[i])->unionfind_root ());
40203 if (!root_entry->web_not_optimizable)
40204 handle_special_swappables (insn_entry, i);
40207 /* Now delete the swaps marked for removal. */
40208 for (i = 0; i < e; ++i)
40209 if (insn_entry[i].will_delete)
40210 replace_swap_with_copy (insn_entry, i);
40212 /* Clean up. */
40213 free (insn_entry);
40214 return 0;
40217 const pass_data pass_data_analyze_swaps =
40219 RTL_PASS, /* type */
40220 "swaps", /* name */
40221 OPTGROUP_NONE, /* optinfo_flags */
40222 TV_NONE, /* tv_id */
40223 0, /* properties_required */
40224 0, /* properties_provided */
40225 0, /* properties_destroyed */
40226 0, /* todo_flags_start */
40227 TODO_df_finish, /* todo_flags_finish */
40230 class pass_analyze_swaps : public rtl_opt_pass
40232 public:
40233 pass_analyze_swaps(gcc::context *ctxt)
40234 : rtl_opt_pass(pass_data_analyze_swaps, ctxt)
40237 /* opt_pass methods: */
40238 virtual bool gate (function *)
40240 return (optimize > 0 && !BYTES_BIG_ENDIAN && TARGET_VSX
40241 && !TARGET_P9_VECTOR && rs6000_optimize_swaps);
40244 virtual unsigned int execute (function *fun)
40246 return rs6000_analyze_swaps (fun);
40249 }; // class pass_analyze_swaps
40251 rtl_opt_pass *
40252 make_pass_analyze_swaps (gcc::context *ctxt)
40254 return new pass_analyze_swaps (ctxt);
40257 #ifdef RS6000_GLIBC_ATOMIC_FENV
40258 /* Function declarations for rs6000_atomic_assign_expand_fenv. */
40259 static tree atomic_hold_decl, atomic_clear_decl, atomic_update_decl;
40260 #endif
40262 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
40264 static void
40265 rs6000_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
40267 if (!TARGET_HARD_FLOAT || !TARGET_FPRS)
40269 #ifdef RS6000_GLIBC_ATOMIC_FENV
40270 if (atomic_hold_decl == NULL_TREE)
40272 atomic_hold_decl
40273 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
40274 get_identifier ("__atomic_feholdexcept"),
40275 build_function_type_list (void_type_node,
40276 double_ptr_type_node,
40277 NULL_TREE));
40278 TREE_PUBLIC (atomic_hold_decl) = 1;
40279 DECL_EXTERNAL (atomic_hold_decl) = 1;
40282 if (atomic_clear_decl == NULL_TREE)
40284 atomic_clear_decl
40285 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
40286 get_identifier ("__atomic_feclearexcept"),
40287 build_function_type_list (void_type_node,
40288 NULL_TREE));
40289 TREE_PUBLIC (atomic_clear_decl) = 1;
40290 DECL_EXTERNAL (atomic_clear_decl) = 1;
40293 tree const_double = build_qualified_type (double_type_node,
40294 TYPE_QUAL_CONST);
40295 tree const_double_ptr = build_pointer_type (const_double);
40296 if (atomic_update_decl == NULL_TREE)
40298 atomic_update_decl
40299 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
40300 get_identifier ("__atomic_feupdateenv"),
40301 build_function_type_list (void_type_node,
40302 const_double_ptr,
40303 NULL_TREE));
40304 TREE_PUBLIC (atomic_update_decl) = 1;
40305 DECL_EXTERNAL (atomic_update_decl) = 1;
40308 tree fenv_var = create_tmp_var_raw (double_type_node);
40309 TREE_ADDRESSABLE (fenv_var) = 1;
40310 tree fenv_addr = build1 (ADDR_EXPR, double_ptr_type_node, fenv_var);
40312 *hold = build_call_expr (atomic_hold_decl, 1, fenv_addr);
40313 *clear = build_call_expr (atomic_clear_decl, 0);
40314 *update = build_call_expr (atomic_update_decl, 1,
40315 fold_convert (const_double_ptr, fenv_addr));
40316 #endif
40317 return;
40320 tree mffs = rs6000_builtin_decls[RS6000_BUILTIN_MFFS];
40321 tree mtfsf = rs6000_builtin_decls[RS6000_BUILTIN_MTFSF];
40322 tree call_mffs = build_call_expr (mffs, 0);
40324 /* Generates the equivalent of feholdexcept (&fenv_var)
40326 *fenv_var = __builtin_mffs ();
40327 double fenv_hold;
40328 *(uint64_t*)&fenv_hold = *(uint64_t*)fenv_var & 0xffffffff00000007LL;
40329 __builtin_mtfsf (0xff, fenv_hold); */
40331 /* Mask to clear everything except for the rounding modes and non-IEEE
40332 arithmetic flag. */
40333 const unsigned HOST_WIDE_INT hold_exception_mask =
40334 HOST_WIDE_INT_C (0xffffffff00000007);
40336 tree fenv_var = create_tmp_var_raw (double_type_node);
40338 tree hold_mffs = build2 (MODIFY_EXPR, void_type_node, fenv_var, call_mffs);
40340 tree fenv_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_var);
40341 tree fenv_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
40342 build_int_cst (uint64_type_node,
40343 hold_exception_mask));
40345 tree fenv_hold_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
40346 fenv_llu_and);
40348 tree hold_mtfsf = build_call_expr (mtfsf, 2,
40349 build_int_cst (unsigned_type_node, 0xff),
40350 fenv_hold_mtfsf);
40352 *hold = build2 (COMPOUND_EXPR, void_type_node, hold_mffs, hold_mtfsf);
40354 /* Generates the equivalent of feclearexcept (FE_ALL_EXCEPT):
40356 double fenv_clear = __builtin_mffs ();
40357 *(uint64_t)&fenv_clear &= 0xffffffff00000000LL;
40358 __builtin_mtfsf (0xff, fenv_clear); */
40360 /* Mask to clear everything except for the rounding modes and non-IEEE
40361 arithmetic flag. */
40362 const unsigned HOST_WIDE_INT clear_exception_mask =
40363 HOST_WIDE_INT_C (0xffffffff00000000);
40365 tree fenv_clear = create_tmp_var_raw (double_type_node);
40367 tree clear_mffs = build2 (MODIFY_EXPR, void_type_node, fenv_clear, call_mffs);
40369 tree fenv_clean_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_clear);
40370 tree fenv_clear_llu_and = build2 (BIT_AND_EXPR, uint64_type_node,
40371 fenv_clean_llu,
40372 build_int_cst (uint64_type_node,
40373 clear_exception_mask));
40375 tree fenv_clear_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
40376 fenv_clear_llu_and);
40378 tree clear_mtfsf = build_call_expr (mtfsf, 2,
40379 build_int_cst (unsigned_type_node, 0xff),
40380 fenv_clear_mtfsf);
40382 *clear = build2 (COMPOUND_EXPR, void_type_node, clear_mffs, clear_mtfsf);
40384 /* Generates the equivalent of feupdateenv (&fenv_var)
40386 double old_fenv = __builtin_mffs ();
40387 double fenv_update;
40388 *(uint64_t*)&fenv_update = (*(uint64_t*)&old & 0xffffffff1fffff00LL) |
40389 (*(uint64_t*)fenv_var 0x1ff80fff);
40390 __builtin_mtfsf (0xff, fenv_update); */
40392 const unsigned HOST_WIDE_INT update_exception_mask =
40393 HOST_WIDE_INT_C (0xffffffff1fffff00);
40394 const unsigned HOST_WIDE_INT new_exception_mask =
40395 HOST_WIDE_INT_C (0x1ff80fff);
40397 tree old_fenv = create_tmp_var_raw (double_type_node);
40398 tree update_mffs = build2 (MODIFY_EXPR, void_type_node, old_fenv, call_mffs);
40400 tree old_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, old_fenv);
40401 tree old_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, old_llu,
40402 build_int_cst (uint64_type_node,
40403 update_exception_mask));
40405 tree new_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
40406 build_int_cst (uint64_type_node,
40407 new_exception_mask));
40409 tree new_llu_mask = build2 (BIT_IOR_EXPR, uint64_type_node,
40410 old_llu_and, new_llu_and);
40412 tree fenv_update_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
40413 new_llu_mask);
40415 tree update_mtfsf = build_call_expr (mtfsf, 2,
40416 build_int_cst (unsigned_type_node, 0xff),
40417 fenv_update_mtfsf);
40419 *update = build2 (COMPOUND_EXPR, void_type_node, update_mffs, update_mtfsf);
40422 /* Implement the TARGET_OPTAB_SUPPORTED_P hook. */
40424 static bool
40425 rs6000_optab_supported_p (int op, machine_mode mode1, machine_mode,
40426 optimization_type opt_type)
40428 switch (op)
40430 case rsqrt_optab:
40431 return (opt_type == OPTIMIZE_FOR_SPEED
40432 && RS6000_RECIP_AUTO_RSQRTE_P (mode1));
40434 default:
40435 return true;
40439 struct gcc_target targetm = TARGET_INITIALIZER;
40441 #include "gt-rs6000.h"