* config/rs6000/rs6000.md (cmpmemsi): New define_expand.
[official-gcc.git] / gcc / config / rs6000 / rs6000.c
blob7c8a82f25786ea3b334433ae0314831737eb5949
1 /* Subroutines used for code generation on IBM RS/6000.
2 Copyright (C) 1991-2016 Free Software Foundation, Inc.
3 Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published
9 by the Free Software Foundation; either version 3, or (at your
10 option) any later version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "gimple.h"
28 #include "cfghooks.h"
29 #include "cfgloop.h"
30 #include "df.h"
31 #include "tm_p.h"
32 #include "stringpool.h"
33 #include "expmed.h"
34 #include "optabs.h"
35 #include "regs.h"
36 #include "ira.h"
37 #include "recog.h"
38 #include "cgraph.h"
39 #include "diagnostic-core.h"
40 #include "insn-attr.h"
41 #include "flags.h"
42 #include "alias.h"
43 #include "fold-const.h"
44 #include "stor-layout.h"
45 #include "calls.h"
46 #include "print-tree.h"
47 #include "varasm.h"
48 #include "explow.h"
49 #include "expr.h"
50 #include "output.h"
51 #include "dbxout.h"
52 #include "common/common-target.h"
53 #include "langhooks.h"
54 #include "reload.h"
55 #include "sched-int.h"
56 #include "gimplify.h"
57 #include "gimple-iterator.h"
58 #include "gimple-walk.h"
59 #include "intl.h"
60 #include "params.h"
61 #include "tm-constrs.h"
62 #include "tree-vectorizer.h"
63 #include "target-globals.h"
64 #include "builtins.h"
65 #include "context.h"
66 #include "tree-pass.h"
67 #if TARGET_XCOFF
68 #include "xcoffout.h" /* get declarations of xcoff_*_section_name */
69 #endif
70 #if TARGET_MACHO
71 #include "gstab.h" /* for N_SLINE */
72 #endif
73 #include "case-cfn-macros.h"
74 #include "ppc-auxv.h"
76 /* This file should be included last. */
77 #include "target-def.h"
79 #ifndef TARGET_NO_PROTOTYPE
80 #define TARGET_NO_PROTOTYPE 0
81 #endif
83 #define min(A,B) ((A) < (B) ? (A) : (B))
84 #define max(A,B) ((A) > (B) ? (A) : (B))
86 /* Structure used to define the rs6000 stack */
87 typedef struct rs6000_stack {
88 int reload_completed; /* stack info won't change from here on */
89 int first_gp_reg_save; /* first callee saved GP register used */
90 int first_fp_reg_save; /* first callee saved FP register used */
91 int first_altivec_reg_save; /* first callee saved AltiVec register used */
92 int lr_save_p; /* true if the link reg needs to be saved */
93 int cr_save_p; /* true if the CR reg needs to be saved */
94 unsigned int vrsave_mask; /* mask of vec registers to save */
95 int push_p; /* true if we need to allocate stack space */
96 int calls_p; /* true if the function makes any calls */
97 int world_save_p; /* true if we're saving *everything*:
98 r13-r31, cr, f14-f31, vrsave, v20-v31 */
99 enum rs6000_abi abi; /* which ABI to use */
100 int gp_save_offset; /* offset to save GP regs from initial SP */
101 int fp_save_offset; /* offset to save FP regs from initial SP */
102 int altivec_save_offset; /* offset to save AltiVec regs from initial SP */
103 int lr_save_offset; /* offset to save LR from initial SP */
104 int cr_save_offset; /* offset to save CR from initial SP */
105 int vrsave_save_offset; /* offset to save VRSAVE from initial SP */
106 int spe_gp_save_offset; /* offset to save spe 64-bit gprs */
107 int varargs_save_offset; /* offset to save the varargs registers */
108 int ehrd_offset; /* offset to EH return data */
109 int ehcr_offset; /* offset to EH CR field data */
110 int reg_size; /* register size (4 or 8) */
111 HOST_WIDE_INT vars_size; /* variable save area size */
112 int parm_size; /* outgoing parameter size */
113 int save_size; /* save area size */
114 int fixed_size; /* fixed size of stack frame */
115 int gp_size; /* size of saved GP registers */
116 int fp_size; /* size of saved FP registers */
117 int altivec_size; /* size of saved AltiVec registers */
118 int cr_size; /* size to hold CR if not in fixed area */
119 int vrsave_size; /* size to hold VRSAVE */
120 int altivec_padding_size; /* size of altivec alignment padding */
121 int spe_gp_size; /* size of 64-bit GPR save size for SPE */
122 int spe_padding_size;
123 HOST_WIDE_INT total_size; /* total bytes allocated for stack */
124 int spe_64bit_regs_used;
125 int savres_strategy;
126 } rs6000_stack_t;
128 /* A C structure for machine-specific, per-function data.
129 This is added to the cfun structure. */
130 typedef struct GTY(()) machine_function
132 /* Whether the instruction chain has been scanned already. */
133 int spe_insn_chain_scanned_p;
134 /* Flags if __builtin_return_address (n) with n >= 1 was used. */
135 int ra_needs_full_frame;
136 /* Flags if __builtin_return_address (0) was used. */
137 int ra_need_lr;
138 /* Cache lr_save_p after expansion of builtin_eh_return. */
139 int lr_save_state;
140 /* Whether we need to save the TOC to the reserved stack location in the
141 function prologue. */
142 bool save_toc_in_prologue;
143 /* Offset from virtual_stack_vars_rtx to the start of the ABI_V4
144 varargs save area. */
145 HOST_WIDE_INT varargs_save_offset;
146 /* Temporary stack slot to use for SDmode copies. This slot is
147 64-bits wide and is allocated early enough so that the offset
148 does not overflow the 16-bit load/store offset field. */
149 rtx sdmode_stack_slot;
150 /* Alternative internal arg pointer for -fsplit-stack. */
151 rtx split_stack_arg_pointer;
152 bool split_stack_argp_used;
153 /* Flag if r2 setup is needed with ELFv2 ABI. */
154 bool r2_setup_needed;
155 } machine_function;
157 /* Support targetm.vectorize.builtin_mask_for_load. */
158 static GTY(()) tree altivec_builtin_mask_for_load;
160 /* Set to nonzero once AIX common-mode calls have been defined. */
161 static GTY(()) int common_mode_defined;
163 /* Label number of label created for -mrelocatable, to call to so we can
164 get the address of the GOT section */
165 static int rs6000_pic_labelno;
167 #ifdef USING_ELFOS_H
168 /* Counter for labels which are to be placed in .fixup. */
169 int fixuplabelno = 0;
170 #endif
172 /* Whether to use variant of AIX ABI for PowerPC64 Linux. */
173 int dot_symbols;
175 /* Specify the machine mode that pointers have. After generation of rtl, the
176 compiler makes no further distinction between pointers and any other objects
177 of this machine mode. The type is unsigned since not all things that
178 include rs6000.h also include machmode.h. */
179 unsigned rs6000_pmode;
181 /* Width in bits of a pointer. */
182 unsigned rs6000_pointer_size;
184 #ifdef HAVE_AS_GNU_ATTRIBUTE
185 /* Flag whether floating point values have been passed/returned. */
186 static bool rs6000_passes_float;
187 /* Flag whether vector values have been passed/returned. */
188 static bool rs6000_passes_vector;
189 /* Flag whether small (<= 8 byte) structures have been returned. */
190 static bool rs6000_returns_struct;
191 #endif
193 /* Value is TRUE if register/mode pair is acceptable. */
194 bool rs6000_hard_regno_mode_ok_p[NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
196 /* Maximum number of registers needed for a given register class and mode. */
197 unsigned char rs6000_class_max_nregs[NUM_MACHINE_MODES][LIM_REG_CLASSES];
199 /* How many registers are needed for a given register and mode. */
200 unsigned char rs6000_hard_regno_nregs[NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
202 /* Map register number to register class. */
203 enum reg_class rs6000_regno_regclass[FIRST_PSEUDO_REGISTER];
205 static int dbg_cost_ctrl;
207 /* Built in types. */
208 tree rs6000_builtin_types[RS6000_BTI_MAX];
209 tree rs6000_builtin_decls[RS6000_BUILTIN_COUNT];
211 /* Flag to say the TOC is initialized */
212 int toc_initialized, need_toc_init;
213 char toc_label_name[10];
215 /* Cached value of rs6000_variable_issue. This is cached in
216 rs6000_variable_issue hook and returned from rs6000_sched_reorder2. */
217 static short cached_can_issue_more;
219 static GTY(()) section *read_only_data_section;
220 static GTY(()) section *private_data_section;
221 static GTY(()) section *tls_data_section;
222 static GTY(()) section *tls_private_data_section;
223 static GTY(()) section *read_only_private_data_section;
224 static GTY(()) section *sdata2_section;
225 static GTY(()) section *toc_section;
227 struct builtin_description
229 const HOST_WIDE_INT mask;
230 const enum insn_code icode;
231 const char *const name;
232 const enum rs6000_builtins code;
235 /* Describe the vector unit used for modes. */
236 enum rs6000_vector rs6000_vector_unit[NUM_MACHINE_MODES];
237 enum rs6000_vector rs6000_vector_mem[NUM_MACHINE_MODES];
239 /* Register classes for various constraints that are based on the target
240 switches. */
241 enum reg_class rs6000_constraints[RS6000_CONSTRAINT_MAX];
243 /* Describe the alignment of a vector. */
244 int rs6000_vector_align[NUM_MACHINE_MODES];
246 /* Map selected modes to types for builtins. */
247 static GTY(()) tree builtin_mode_to_type[MAX_MACHINE_MODE][2];
249 /* What modes to automatically generate reciprocal divide estimate (fre) and
250 reciprocal sqrt (frsqrte) for. */
251 unsigned char rs6000_recip_bits[MAX_MACHINE_MODE];
253 /* Masks to determine which reciprocal esitmate instructions to generate
254 automatically. */
255 enum rs6000_recip_mask {
256 RECIP_SF_DIV = 0x001, /* Use divide estimate */
257 RECIP_DF_DIV = 0x002,
258 RECIP_V4SF_DIV = 0x004,
259 RECIP_V2DF_DIV = 0x008,
261 RECIP_SF_RSQRT = 0x010, /* Use reciprocal sqrt estimate. */
262 RECIP_DF_RSQRT = 0x020,
263 RECIP_V4SF_RSQRT = 0x040,
264 RECIP_V2DF_RSQRT = 0x080,
266 /* Various combination of flags for -mrecip=xxx. */
267 RECIP_NONE = 0,
268 RECIP_ALL = (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
269 | RECIP_V2DF_DIV | RECIP_SF_RSQRT | RECIP_DF_RSQRT
270 | RECIP_V4SF_RSQRT | RECIP_V2DF_RSQRT),
272 RECIP_HIGH_PRECISION = RECIP_ALL,
274 /* On low precision machines like the power5, don't enable double precision
275 reciprocal square root estimate, since it isn't accurate enough. */
276 RECIP_LOW_PRECISION = (RECIP_ALL & ~(RECIP_DF_RSQRT | RECIP_V2DF_RSQRT))
279 /* -mrecip options. */
280 static struct
282 const char *string; /* option name */
283 unsigned int mask; /* mask bits to set */
284 } recip_options[] = {
285 { "all", RECIP_ALL },
286 { "none", RECIP_NONE },
287 { "div", (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
288 | RECIP_V2DF_DIV) },
289 { "divf", (RECIP_SF_DIV | RECIP_V4SF_DIV) },
290 { "divd", (RECIP_DF_DIV | RECIP_V2DF_DIV) },
291 { "rsqrt", (RECIP_SF_RSQRT | RECIP_DF_RSQRT | RECIP_V4SF_RSQRT
292 | RECIP_V2DF_RSQRT) },
293 { "rsqrtf", (RECIP_SF_RSQRT | RECIP_V4SF_RSQRT) },
294 { "rsqrtd", (RECIP_DF_RSQRT | RECIP_V2DF_RSQRT) },
297 /* Used by __builtin_cpu_is(), mapping from PLATFORM names to values. */
298 static const struct
300 const char *cpu;
301 unsigned int cpuid;
302 } cpu_is_info[] = {
303 { "power9", PPC_PLATFORM_POWER9 },
304 { "power8", PPC_PLATFORM_POWER8 },
305 { "power7", PPC_PLATFORM_POWER7 },
306 { "power6x", PPC_PLATFORM_POWER6X },
307 { "power6", PPC_PLATFORM_POWER6 },
308 { "power5+", PPC_PLATFORM_POWER5_PLUS },
309 { "power5", PPC_PLATFORM_POWER5 },
310 { "ppc970", PPC_PLATFORM_PPC970 },
311 { "power4", PPC_PLATFORM_POWER4 },
312 { "ppca2", PPC_PLATFORM_PPCA2 },
313 { "ppc476", PPC_PLATFORM_PPC476 },
314 { "ppc464", PPC_PLATFORM_PPC464 },
315 { "ppc440", PPC_PLATFORM_PPC440 },
316 { "ppc405", PPC_PLATFORM_PPC405 },
317 { "ppc-cell-be", PPC_PLATFORM_CELL_BE }
320 /* Used by __builtin_cpu_supports(), mapping from HWCAP names to masks. */
321 static const struct
323 const char *hwcap;
324 int mask;
325 unsigned int id;
326 } cpu_supports_info[] = {
327 /* AT_HWCAP masks. */
328 { "4xxmac", PPC_FEATURE_HAS_4xxMAC, 0 },
329 { "altivec", PPC_FEATURE_HAS_ALTIVEC, 0 },
330 { "arch_2_05", PPC_FEATURE_ARCH_2_05, 0 },
331 { "arch_2_06", PPC_FEATURE_ARCH_2_06, 0 },
332 { "archpmu", PPC_FEATURE_PERFMON_COMPAT, 0 },
333 { "booke", PPC_FEATURE_BOOKE, 0 },
334 { "cellbe", PPC_FEATURE_CELL_BE, 0 },
335 { "dfp", PPC_FEATURE_HAS_DFP, 0 },
336 { "efpdouble", PPC_FEATURE_HAS_EFP_DOUBLE, 0 },
337 { "efpsingle", PPC_FEATURE_HAS_EFP_SINGLE, 0 },
338 { "fpu", PPC_FEATURE_HAS_FPU, 0 },
339 { "ic_snoop", PPC_FEATURE_ICACHE_SNOOP, 0 },
340 { "mmu", PPC_FEATURE_HAS_MMU, 0 },
341 { "notb", PPC_FEATURE_NO_TB, 0 },
342 { "pa6t", PPC_FEATURE_PA6T, 0 },
343 { "power4", PPC_FEATURE_POWER4, 0 },
344 { "power5", PPC_FEATURE_POWER5, 0 },
345 { "power5+", PPC_FEATURE_POWER5_PLUS, 0 },
346 { "power6x", PPC_FEATURE_POWER6_EXT, 0 },
347 { "ppc32", PPC_FEATURE_32, 0 },
348 { "ppc601", PPC_FEATURE_601_INSTR, 0 },
349 { "ppc64", PPC_FEATURE_64, 0 },
350 { "ppcle", PPC_FEATURE_PPC_LE, 0 },
351 { "smt", PPC_FEATURE_SMT, 0 },
352 { "spe", PPC_FEATURE_HAS_SPE, 0 },
353 { "true_le", PPC_FEATURE_TRUE_LE, 0 },
354 { "ucache", PPC_FEATURE_UNIFIED_CACHE, 0 },
355 { "vsx", PPC_FEATURE_HAS_VSX, 0 },
357 /* AT_HWCAP2 masks. */
358 { "arch_2_07", PPC_FEATURE2_ARCH_2_07, 1 },
359 { "dscr", PPC_FEATURE2_HAS_DSCR, 1 },
360 { "ebb", PPC_FEATURE2_HAS_EBB, 1 },
361 { "htm", PPC_FEATURE2_HAS_HTM, 1 },
362 { "htm-nosc", PPC_FEATURE2_HTM_NOSC, 1 },
363 { "isel", PPC_FEATURE2_HAS_ISEL, 1 },
364 { "tar", PPC_FEATURE2_HAS_TAR, 1 },
365 { "vcrypto", PPC_FEATURE2_HAS_VEC_CRYPTO, 1 },
366 { "arch_3_00", PPC_FEATURE2_ARCH_3_00, 1 },
367 { "ieee128", PPC_FEATURE2_HAS_IEEE128, 1 }
370 /* Newer LIBCs explicitly export this symbol to declare that they provide
371 the AT_PLATFORM and AT_HWCAP/AT_HWCAP2 values in the TCB. We emit a
372 reference to this symbol whenever we expand a CPU builtin, so that
373 we never link against an old LIBC. */
374 const char *tcb_verification_symbol = "__parse_hwcap_and_convert_at_platform";
376 /* True if we have expanded a CPU builtin. */
377 bool cpu_builtin_p;
379 /* Pointer to function (in rs6000-c.c) that can define or undefine target
380 macros that have changed. Languages that don't support the preprocessor
381 don't link in rs6000-c.c, so we can't call it directly. */
382 void (*rs6000_target_modify_macros_ptr) (bool, HOST_WIDE_INT, HOST_WIDE_INT);
384 /* Simplfy register classes into simpler classifications. We assume
385 GPR_REG_TYPE - FPR_REG_TYPE are ordered so that we can use a simple range
386 check for standard register classes (gpr/floating/altivec/vsx) and
387 floating/vector classes (float/altivec/vsx). */
389 enum rs6000_reg_type {
390 NO_REG_TYPE,
391 PSEUDO_REG_TYPE,
392 GPR_REG_TYPE,
393 VSX_REG_TYPE,
394 ALTIVEC_REG_TYPE,
395 FPR_REG_TYPE,
396 SPR_REG_TYPE,
397 CR_REG_TYPE,
398 SPE_ACC_TYPE,
399 SPEFSCR_REG_TYPE
402 /* Map register class to register type. */
403 static enum rs6000_reg_type reg_class_to_reg_type[N_REG_CLASSES];
405 /* First/last register type for the 'normal' register types (i.e. general
406 purpose, floating point, altivec, and VSX registers). */
407 #define IS_STD_REG_TYPE(RTYPE) IN_RANGE(RTYPE, GPR_REG_TYPE, FPR_REG_TYPE)
409 #define IS_FP_VECT_REG_TYPE(RTYPE) IN_RANGE(RTYPE, VSX_REG_TYPE, FPR_REG_TYPE)
412 /* Register classes we care about in secondary reload or go if legitimate
413 address. We only need to worry about GPR, FPR, and Altivec registers here,
414 along an ANY field that is the OR of the 3 register classes. */
416 enum rs6000_reload_reg_type {
417 RELOAD_REG_GPR, /* General purpose registers. */
418 RELOAD_REG_FPR, /* Traditional floating point regs. */
419 RELOAD_REG_VMX, /* Altivec (VMX) registers. */
420 RELOAD_REG_ANY, /* OR of GPR, FPR, Altivec masks. */
421 N_RELOAD_REG
424 /* For setting up register classes, loop through the 3 register classes mapping
425 into real registers, and skip the ANY class, which is just an OR of the
426 bits. */
427 #define FIRST_RELOAD_REG_CLASS RELOAD_REG_GPR
428 #define LAST_RELOAD_REG_CLASS RELOAD_REG_VMX
430 /* Map reload register type to a register in the register class. */
431 struct reload_reg_map_type {
432 const char *name; /* Register class name. */
433 int reg; /* Register in the register class. */
436 static const struct reload_reg_map_type reload_reg_map[N_RELOAD_REG] = {
437 { "Gpr", FIRST_GPR_REGNO }, /* RELOAD_REG_GPR. */
438 { "Fpr", FIRST_FPR_REGNO }, /* RELOAD_REG_FPR. */
439 { "VMX", FIRST_ALTIVEC_REGNO }, /* RELOAD_REG_VMX. */
440 { "Any", -1 }, /* RELOAD_REG_ANY. */
443 /* Mask bits for each register class, indexed per mode. Historically the
444 compiler has been more restrictive which types can do PRE_MODIFY instead of
445 PRE_INC and PRE_DEC, so keep track of sepaate bits for these two. */
446 typedef unsigned char addr_mask_type;
448 #define RELOAD_REG_VALID 0x01 /* Mode valid in register.. */
449 #define RELOAD_REG_MULTIPLE 0x02 /* Mode takes multiple registers. */
450 #define RELOAD_REG_INDEXED 0x04 /* Reg+reg addressing. */
451 #define RELOAD_REG_OFFSET 0x08 /* Reg+offset addressing. */
452 #define RELOAD_REG_PRE_INCDEC 0x10 /* PRE_INC/PRE_DEC valid. */
453 #define RELOAD_REG_PRE_MODIFY 0x20 /* PRE_MODIFY valid. */
454 #define RELOAD_REG_AND_M16 0x40 /* AND -16 addressing. */
455 #define RELOAD_REG_QUAD_OFFSET 0x80 /* quad offset is limited. */
457 /* Register type masks based on the type, of valid addressing modes. */
458 struct rs6000_reg_addr {
459 enum insn_code reload_load; /* INSN to reload for loading. */
460 enum insn_code reload_store; /* INSN to reload for storing. */
461 enum insn_code reload_fpr_gpr; /* INSN to move from FPR to GPR. */
462 enum insn_code reload_gpr_vsx; /* INSN to move from GPR to VSX. */
463 enum insn_code reload_vsx_gpr; /* INSN to move from VSX to GPR. */
464 enum insn_code fusion_gpr_ld; /* INSN for fusing gpr ADDIS/loads. */
465 /* INSNs for fusing addi with loads
466 or stores for each reg. class. */
467 enum insn_code fusion_addi_ld[(int)N_RELOAD_REG];
468 enum insn_code fusion_addi_st[(int)N_RELOAD_REG];
469 /* INSNs for fusing addis with loads
470 or stores for each reg. class. */
471 enum insn_code fusion_addis_ld[(int)N_RELOAD_REG];
472 enum insn_code fusion_addis_st[(int)N_RELOAD_REG];
473 addr_mask_type addr_mask[(int)N_RELOAD_REG]; /* Valid address masks. */
474 bool scalar_in_vmx_p; /* Scalar value can go in VMX. */
475 bool fused_toc; /* Mode supports TOC fusion. */
478 static struct rs6000_reg_addr reg_addr[NUM_MACHINE_MODES];
480 /* Helper function to say whether a mode supports PRE_INC or PRE_DEC. */
481 static inline bool
482 mode_supports_pre_incdec_p (machine_mode mode)
484 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_INCDEC)
485 != 0);
488 /* Helper function to say whether a mode supports PRE_MODIFY. */
489 static inline bool
490 mode_supports_pre_modify_p (machine_mode mode)
492 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_MODIFY)
493 != 0);
496 /* Return true if we have D-form addressing in altivec registers. */
497 static inline bool
498 mode_supports_vmx_dform (machine_mode mode)
500 return ((reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_OFFSET) != 0);
503 /* Return true if we have D-form addressing in VSX registers. This addressing
504 is more limited than normal d-form addressing in that the offset must be
505 aligned on a 16-byte boundary. */
506 static inline bool
507 mode_supports_vsx_dform_quad (machine_mode mode)
509 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_QUAD_OFFSET)
510 != 0);
514 /* Target cpu costs. */
516 struct processor_costs {
517 const int mulsi; /* cost of SImode multiplication. */
518 const int mulsi_const; /* cost of SImode multiplication by constant. */
519 const int mulsi_const9; /* cost of SImode mult by short constant. */
520 const int muldi; /* cost of DImode multiplication. */
521 const int divsi; /* cost of SImode division. */
522 const int divdi; /* cost of DImode division. */
523 const int fp; /* cost of simple SFmode and DFmode insns. */
524 const int dmul; /* cost of DFmode multiplication (and fmadd). */
525 const int sdiv; /* cost of SFmode division (fdivs). */
526 const int ddiv; /* cost of DFmode division (fdiv). */
527 const int cache_line_size; /* cache line size in bytes. */
528 const int l1_cache_size; /* size of l1 cache, in kilobytes. */
529 const int l2_cache_size; /* size of l2 cache, in kilobytes. */
530 const int simultaneous_prefetches; /* number of parallel prefetch
531 operations. */
532 const int sfdf_convert; /* cost of SF->DF conversion. */
535 const struct processor_costs *rs6000_cost;
537 /* Processor costs (relative to an add) */
539 /* Instruction size costs on 32bit processors. */
540 static const
541 struct processor_costs size32_cost = {
542 COSTS_N_INSNS (1), /* mulsi */
543 COSTS_N_INSNS (1), /* mulsi_const */
544 COSTS_N_INSNS (1), /* mulsi_const9 */
545 COSTS_N_INSNS (1), /* muldi */
546 COSTS_N_INSNS (1), /* divsi */
547 COSTS_N_INSNS (1), /* divdi */
548 COSTS_N_INSNS (1), /* fp */
549 COSTS_N_INSNS (1), /* dmul */
550 COSTS_N_INSNS (1), /* sdiv */
551 COSTS_N_INSNS (1), /* ddiv */
552 32, /* cache line size */
553 0, /* l1 cache */
554 0, /* l2 cache */
555 0, /* streams */
556 0, /* SF->DF convert */
559 /* Instruction size costs on 64bit processors. */
560 static const
561 struct processor_costs size64_cost = {
562 COSTS_N_INSNS (1), /* mulsi */
563 COSTS_N_INSNS (1), /* mulsi_const */
564 COSTS_N_INSNS (1), /* mulsi_const9 */
565 COSTS_N_INSNS (1), /* muldi */
566 COSTS_N_INSNS (1), /* divsi */
567 COSTS_N_INSNS (1), /* divdi */
568 COSTS_N_INSNS (1), /* fp */
569 COSTS_N_INSNS (1), /* dmul */
570 COSTS_N_INSNS (1), /* sdiv */
571 COSTS_N_INSNS (1), /* ddiv */
572 128, /* cache line size */
573 0, /* l1 cache */
574 0, /* l2 cache */
575 0, /* streams */
576 0, /* SF->DF convert */
579 /* Instruction costs on RS64A processors. */
580 static const
581 struct processor_costs rs64a_cost = {
582 COSTS_N_INSNS (20), /* mulsi */
583 COSTS_N_INSNS (12), /* mulsi_const */
584 COSTS_N_INSNS (8), /* mulsi_const9 */
585 COSTS_N_INSNS (34), /* muldi */
586 COSTS_N_INSNS (65), /* divsi */
587 COSTS_N_INSNS (67), /* divdi */
588 COSTS_N_INSNS (4), /* fp */
589 COSTS_N_INSNS (4), /* dmul */
590 COSTS_N_INSNS (31), /* sdiv */
591 COSTS_N_INSNS (31), /* ddiv */
592 128, /* cache line size */
593 128, /* l1 cache */
594 2048, /* l2 cache */
595 1, /* streams */
596 0, /* SF->DF convert */
599 /* Instruction costs on MPCCORE processors. */
600 static const
601 struct processor_costs mpccore_cost = {
602 COSTS_N_INSNS (2), /* mulsi */
603 COSTS_N_INSNS (2), /* mulsi_const */
604 COSTS_N_INSNS (2), /* mulsi_const9 */
605 COSTS_N_INSNS (2), /* muldi */
606 COSTS_N_INSNS (6), /* divsi */
607 COSTS_N_INSNS (6), /* divdi */
608 COSTS_N_INSNS (4), /* fp */
609 COSTS_N_INSNS (5), /* dmul */
610 COSTS_N_INSNS (10), /* sdiv */
611 COSTS_N_INSNS (17), /* ddiv */
612 32, /* cache line size */
613 4, /* l1 cache */
614 16, /* l2 cache */
615 1, /* streams */
616 0, /* SF->DF convert */
619 /* Instruction costs on PPC403 processors. */
620 static const
621 struct processor_costs ppc403_cost = {
622 COSTS_N_INSNS (4), /* mulsi */
623 COSTS_N_INSNS (4), /* mulsi_const */
624 COSTS_N_INSNS (4), /* mulsi_const9 */
625 COSTS_N_INSNS (4), /* muldi */
626 COSTS_N_INSNS (33), /* divsi */
627 COSTS_N_INSNS (33), /* divdi */
628 COSTS_N_INSNS (11), /* fp */
629 COSTS_N_INSNS (11), /* dmul */
630 COSTS_N_INSNS (11), /* sdiv */
631 COSTS_N_INSNS (11), /* ddiv */
632 32, /* cache line size */
633 4, /* l1 cache */
634 16, /* l2 cache */
635 1, /* streams */
636 0, /* SF->DF convert */
639 /* Instruction costs on PPC405 processors. */
640 static const
641 struct processor_costs ppc405_cost = {
642 COSTS_N_INSNS (5), /* mulsi */
643 COSTS_N_INSNS (4), /* mulsi_const */
644 COSTS_N_INSNS (3), /* mulsi_const9 */
645 COSTS_N_INSNS (5), /* muldi */
646 COSTS_N_INSNS (35), /* divsi */
647 COSTS_N_INSNS (35), /* divdi */
648 COSTS_N_INSNS (11), /* fp */
649 COSTS_N_INSNS (11), /* dmul */
650 COSTS_N_INSNS (11), /* sdiv */
651 COSTS_N_INSNS (11), /* ddiv */
652 32, /* cache line size */
653 16, /* l1 cache */
654 128, /* l2 cache */
655 1, /* streams */
656 0, /* SF->DF convert */
659 /* Instruction costs on PPC440 processors. */
660 static const
661 struct processor_costs ppc440_cost = {
662 COSTS_N_INSNS (3), /* mulsi */
663 COSTS_N_INSNS (2), /* mulsi_const */
664 COSTS_N_INSNS (2), /* mulsi_const9 */
665 COSTS_N_INSNS (3), /* muldi */
666 COSTS_N_INSNS (34), /* divsi */
667 COSTS_N_INSNS (34), /* divdi */
668 COSTS_N_INSNS (5), /* fp */
669 COSTS_N_INSNS (5), /* dmul */
670 COSTS_N_INSNS (19), /* sdiv */
671 COSTS_N_INSNS (33), /* ddiv */
672 32, /* cache line size */
673 32, /* l1 cache */
674 256, /* l2 cache */
675 1, /* streams */
676 0, /* SF->DF convert */
679 /* Instruction costs on PPC476 processors. */
680 static const
681 struct processor_costs ppc476_cost = {
682 COSTS_N_INSNS (4), /* mulsi */
683 COSTS_N_INSNS (4), /* mulsi_const */
684 COSTS_N_INSNS (4), /* mulsi_const9 */
685 COSTS_N_INSNS (4), /* muldi */
686 COSTS_N_INSNS (11), /* divsi */
687 COSTS_N_INSNS (11), /* divdi */
688 COSTS_N_INSNS (6), /* fp */
689 COSTS_N_INSNS (6), /* dmul */
690 COSTS_N_INSNS (19), /* sdiv */
691 COSTS_N_INSNS (33), /* ddiv */
692 32, /* l1 cache line size */
693 32, /* l1 cache */
694 512, /* l2 cache */
695 1, /* streams */
696 0, /* SF->DF convert */
699 /* Instruction costs on PPC601 processors. */
700 static const
701 struct processor_costs ppc601_cost = {
702 COSTS_N_INSNS (5), /* mulsi */
703 COSTS_N_INSNS (5), /* mulsi_const */
704 COSTS_N_INSNS (5), /* mulsi_const9 */
705 COSTS_N_INSNS (5), /* muldi */
706 COSTS_N_INSNS (36), /* divsi */
707 COSTS_N_INSNS (36), /* divdi */
708 COSTS_N_INSNS (4), /* fp */
709 COSTS_N_INSNS (5), /* dmul */
710 COSTS_N_INSNS (17), /* sdiv */
711 COSTS_N_INSNS (31), /* ddiv */
712 32, /* cache line size */
713 32, /* l1 cache */
714 256, /* l2 cache */
715 1, /* streams */
716 0, /* SF->DF convert */
719 /* Instruction costs on PPC603 processors. */
720 static const
721 struct processor_costs ppc603_cost = {
722 COSTS_N_INSNS (5), /* mulsi */
723 COSTS_N_INSNS (3), /* mulsi_const */
724 COSTS_N_INSNS (2), /* mulsi_const9 */
725 COSTS_N_INSNS (5), /* muldi */
726 COSTS_N_INSNS (37), /* divsi */
727 COSTS_N_INSNS (37), /* divdi */
728 COSTS_N_INSNS (3), /* fp */
729 COSTS_N_INSNS (4), /* dmul */
730 COSTS_N_INSNS (18), /* sdiv */
731 COSTS_N_INSNS (33), /* ddiv */
732 32, /* cache line size */
733 8, /* l1 cache */
734 64, /* l2 cache */
735 1, /* streams */
736 0, /* SF->DF convert */
739 /* Instruction costs on PPC604 processors. */
740 static const
741 struct processor_costs ppc604_cost = {
742 COSTS_N_INSNS (4), /* mulsi */
743 COSTS_N_INSNS (4), /* mulsi_const */
744 COSTS_N_INSNS (4), /* mulsi_const9 */
745 COSTS_N_INSNS (4), /* muldi */
746 COSTS_N_INSNS (20), /* divsi */
747 COSTS_N_INSNS (20), /* divdi */
748 COSTS_N_INSNS (3), /* fp */
749 COSTS_N_INSNS (3), /* dmul */
750 COSTS_N_INSNS (18), /* sdiv */
751 COSTS_N_INSNS (32), /* ddiv */
752 32, /* cache line size */
753 16, /* l1 cache */
754 512, /* l2 cache */
755 1, /* streams */
756 0, /* SF->DF convert */
759 /* Instruction costs on PPC604e processors. */
760 static const
761 struct processor_costs ppc604e_cost = {
762 COSTS_N_INSNS (2), /* mulsi */
763 COSTS_N_INSNS (2), /* mulsi_const */
764 COSTS_N_INSNS (2), /* mulsi_const9 */
765 COSTS_N_INSNS (2), /* muldi */
766 COSTS_N_INSNS (20), /* divsi */
767 COSTS_N_INSNS (20), /* divdi */
768 COSTS_N_INSNS (3), /* fp */
769 COSTS_N_INSNS (3), /* dmul */
770 COSTS_N_INSNS (18), /* sdiv */
771 COSTS_N_INSNS (32), /* ddiv */
772 32, /* cache line size */
773 32, /* l1 cache */
774 1024, /* l2 cache */
775 1, /* streams */
776 0, /* SF->DF convert */
779 /* Instruction costs on PPC620 processors. */
780 static const
781 struct processor_costs ppc620_cost = {
782 COSTS_N_INSNS (5), /* mulsi */
783 COSTS_N_INSNS (4), /* mulsi_const */
784 COSTS_N_INSNS (3), /* mulsi_const9 */
785 COSTS_N_INSNS (7), /* muldi */
786 COSTS_N_INSNS (21), /* divsi */
787 COSTS_N_INSNS (37), /* divdi */
788 COSTS_N_INSNS (3), /* fp */
789 COSTS_N_INSNS (3), /* dmul */
790 COSTS_N_INSNS (18), /* sdiv */
791 COSTS_N_INSNS (32), /* ddiv */
792 128, /* cache line size */
793 32, /* l1 cache */
794 1024, /* l2 cache */
795 1, /* streams */
796 0, /* SF->DF convert */
799 /* Instruction costs on PPC630 processors. */
800 static const
801 struct processor_costs ppc630_cost = {
802 COSTS_N_INSNS (5), /* mulsi */
803 COSTS_N_INSNS (4), /* mulsi_const */
804 COSTS_N_INSNS (3), /* mulsi_const9 */
805 COSTS_N_INSNS (7), /* muldi */
806 COSTS_N_INSNS (21), /* divsi */
807 COSTS_N_INSNS (37), /* divdi */
808 COSTS_N_INSNS (3), /* fp */
809 COSTS_N_INSNS (3), /* dmul */
810 COSTS_N_INSNS (17), /* sdiv */
811 COSTS_N_INSNS (21), /* ddiv */
812 128, /* cache line size */
813 64, /* l1 cache */
814 1024, /* l2 cache */
815 1, /* streams */
816 0, /* SF->DF convert */
819 /* Instruction costs on Cell processor. */
820 /* COSTS_N_INSNS (1) ~ one add. */
821 static const
822 struct processor_costs ppccell_cost = {
823 COSTS_N_INSNS (9/2)+2, /* mulsi */
824 COSTS_N_INSNS (6/2), /* mulsi_const */
825 COSTS_N_INSNS (6/2), /* mulsi_const9 */
826 COSTS_N_INSNS (15/2)+2, /* muldi */
827 COSTS_N_INSNS (38/2), /* divsi */
828 COSTS_N_INSNS (70/2), /* divdi */
829 COSTS_N_INSNS (10/2), /* fp */
830 COSTS_N_INSNS (10/2), /* dmul */
831 COSTS_N_INSNS (74/2), /* sdiv */
832 COSTS_N_INSNS (74/2), /* ddiv */
833 128, /* cache line size */
834 32, /* l1 cache */
835 512, /* l2 cache */
836 6, /* streams */
837 0, /* SF->DF convert */
840 /* Instruction costs on PPC750 and PPC7400 processors. */
841 static const
842 struct processor_costs ppc750_cost = {
843 COSTS_N_INSNS (5), /* mulsi */
844 COSTS_N_INSNS (3), /* mulsi_const */
845 COSTS_N_INSNS (2), /* mulsi_const9 */
846 COSTS_N_INSNS (5), /* muldi */
847 COSTS_N_INSNS (17), /* divsi */
848 COSTS_N_INSNS (17), /* divdi */
849 COSTS_N_INSNS (3), /* fp */
850 COSTS_N_INSNS (3), /* dmul */
851 COSTS_N_INSNS (17), /* sdiv */
852 COSTS_N_INSNS (31), /* ddiv */
853 32, /* cache line size */
854 32, /* l1 cache */
855 512, /* l2 cache */
856 1, /* streams */
857 0, /* SF->DF convert */
860 /* Instruction costs on PPC7450 processors. */
861 static const
862 struct processor_costs ppc7450_cost = {
863 COSTS_N_INSNS (4), /* mulsi */
864 COSTS_N_INSNS (3), /* mulsi_const */
865 COSTS_N_INSNS (3), /* mulsi_const9 */
866 COSTS_N_INSNS (4), /* muldi */
867 COSTS_N_INSNS (23), /* divsi */
868 COSTS_N_INSNS (23), /* divdi */
869 COSTS_N_INSNS (5), /* fp */
870 COSTS_N_INSNS (5), /* dmul */
871 COSTS_N_INSNS (21), /* sdiv */
872 COSTS_N_INSNS (35), /* ddiv */
873 32, /* cache line size */
874 32, /* l1 cache */
875 1024, /* l2 cache */
876 1, /* streams */
877 0, /* SF->DF convert */
880 /* Instruction costs on PPC8540 processors. */
881 static const
882 struct processor_costs ppc8540_cost = {
883 COSTS_N_INSNS (4), /* mulsi */
884 COSTS_N_INSNS (4), /* mulsi_const */
885 COSTS_N_INSNS (4), /* mulsi_const9 */
886 COSTS_N_INSNS (4), /* muldi */
887 COSTS_N_INSNS (19), /* divsi */
888 COSTS_N_INSNS (19), /* divdi */
889 COSTS_N_INSNS (4), /* fp */
890 COSTS_N_INSNS (4), /* dmul */
891 COSTS_N_INSNS (29), /* sdiv */
892 COSTS_N_INSNS (29), /* ddiv */
893 32, /* cache line size */
894 32, /* l1 cache */
895 256, /* l2 cache */
896 1, /* prefetch streams /*/
897 0, /* SF->DF convert */
900 /* Instruction costs on E300C2 and E300C3 cores. */
901 static const
902 struct processor_costs ppce300c2c3_cost = {
903 COSTS_N_INSNS (4), /* mulsi */
904 COSTS_N_INSNS (4), /* mulsi_const */
905 COSTS_N_INSNS (4), /* mulsi_const9 */
906 COSTS_N_INSNS (4), /* muldi */
907 COSTS_N_INSNS (19), /* divsi */
908 COSTS_N_INSNS (19), /* divdi */
909 COSTS_N_INSNS (3), /* fp */
910 COSTS_N_INSNS (4), /* dmul */
911 COSTS_N_INSNS (18), /* sdiv */
912 COSTS_N_INSNS (33), /* ddiv */
914 16, /* l1 cache */
915 16, /* l2 cache */
916 1, /* prefetch streams /*/
917 0, /* SF->DF convert */
920 /* Instruction costs on PPCE500MC processors. */
921 static const
922 struct processor_costs ppce500mc_cost = {
923 COSTS_N_INSNS (4), /* mulsi */
924 COSTS_N_INSNS (4), /* mulsi_const */
925 COSTS_N_INSNS (4), /* mulsi_const9 */
926 COSTS_N_INSNS (4), /* muldi */
927 COSTS_N_INSNS (14), /* divsi */
928 COSTS_N_INSNS (14), /* divdi */
929 COSTS_N_INSNS (8), /* fp */
930 COSTS_N_INSNS (10), /* dmul */
931 COSTS_N_INSNS (36), /* sdiv */
932 COSTS_N_INSNS (66), /* ddiv */
933 64, /* cache line size */
934 32, /* l1 cache */
935 128, /* l2 cache */
936 1, /* prefetch streams /*/
937 0, /* SF->DF convert */
940 /* Instruction costs on PPCE500MC64 processors. */
941 static const
942 struct processor_costs ppce500mc64_cost = {
943 COSTS_N_INSNS (4), /* mulsi */
944 COSTS_N_INSNS (4), /* mulsi_const */
945 COSTS_N_INSNS (4), /* mulsi_const9 */
946 COSTS_N_INSNS (4), /* muldi */
947 COSTS_N_INSNS (14), /* divsi */
948 COSTS_N_INSNS (14), /* divdi */
949 COSTS_N_INSNS (4), /* fp */
950 COSTS_N_INSNS (10), /* dmul */
951 COSTS_N_INSNS (36), /* sdiv */
952 COSTS_N_INSNS (66), /* ddiv */
953 64, /* cache line size */
954 32, /* l1 cache */
955 128, /* l2 cache */
956 1, /* prefetch streams /*/
957 0, /* SF->DF convert */
960 /* Instruction costs on PPCE5500 processors. */
961 static const
962 struct processor_costs ppce5500_cost = {
963 COSTS_N_INSNS (5), /* mulsi */
964 COSTS_N_INSNS (5), /* mulsi_const */
965 COSTS_N_INSNS (4), /* mulsi_const9 */
966 COSTS_N_INSNS (5), /* muldi */
967 COSTS_N_INSNS (14), /* divsi */
968 COSTS_N_INSNS (14), /* divdi */
969 COSTS_N_INSNS (7), /* fp */
970 COSTS_N_INSNS (10), /* dmul */
971 COSTS_N_INSNS (36), /* sdiv */
972 COSTS_N_INSNS (66), /* ddiv */
973 64, /* cache line size */
974 32, /* l1 cache */
975 128, /* l2 cache */
976 1, /* prefetch streams /*/
977 0, /* SF->DF convert */
980 /* Instruction costs on PPCE6500 processors. */
981 static const
982 struct processor_costs ppce6500_cost = {
983 COSTS_N_INSNS (5), /* mulsi */
984 COSTS_N_INSNS (5), /* mulsi_const */
985 COSTS_N_INSNS (4), /* mulsi_const9 */
986 COSTS_N_INSNS (5), /* muldi */
987 COSTS_N_INSNS (14), /* divsi */
988 COSTS_N_INSNS (14), /* divdi */
989 COSTS_N_INSNS (7), /* fp */
990 COSTS_N_INSNS (10), /* dmul */
991 COSTS_N_INSNS (36), /* sdiv */
992 COSTS_N_INSNS (66), /* ddiv */
993 64, /* cache line size */
994 32, /* l1 cache */
995 128, /* l2 cache */
996 1, /* prefetch streams /*/
997 0, /* SF->DF convert */
1000 /* Instruction costs on AppliedMicro Titan processors. */
1001 static const
1002 struct processor_costs titan_cost = {
1003 COSTS_N_INSNS (5), /* mulsi */
1004 COSTS_N_INSNS (5), /* mulsi_const */
1005 COSTS_N_INSNS (5), /* mulsi_const9 */
1006 COSTS_N_INSNS (5), /* muldi */
1007 COSTS_N_INSNS (18), /* divsi */
1008 COSTS_N_INSNS (18), /* divdi */
1009 COSTS_N_INSNS (10), /* fp */
1010 COSTS_N_INSNS (10), /* dmul */
1011 COSTS_N_INSNS (46), /* sdiv */
1012 COSTS_N_INSNS (72), /* ddiv */
1013 32, /* cache line size */
1014 32, /* l1 cache */
1015 512, /* l2 cache */
1016 1, /* prefetch streams /*/
1017 0, /* SF->DF convert */
1020 /* Instruction costs on POWER4 and POWER5 processors. */
1021 static const
1022 struct processor_costs power4_cost = {
1023 COSTS_N_INSNS (3), /* mulsi */
1024 COSTS_N_INSNS (2), /* mulsi_const */
1025 COSTS_N_INSNS (2), /* mulsi_const9 */
1026 COSTS_N_INSNS (4), /* muldi */
1027 COSTS_N_INSNS (18), /* divsi */
1028 COSTS_N_INSNS (34), /* divdi */
1029 COSTS_N_INSNS (3), /* fp */
1030 COSTS_N_INSNS (3), /* dmul */
1031 COSTS_N_INSNS (17), /* sdiv */
1032 COSTS_N_INSNS (17), /* ddiv */
1033 128, /* cache line size */
1034 32, /* l1 cache */
1035 1024, /* l2 cache */
1036 8, /* prefetch streams /*/
1037 0, /* SF->DF convert */
1040 /* Instruction costs on POWER6 processors. */
1041 static const
1042 struct processor_costs power6_cost = {
1043 COSTS_N_INSNS (8), /* mulsi */
1044 COSTS_N_INSNS (8), /* mulsi_const */
1045 COSTS_N_INSNS (8), /* mulsi_const9 */
1046 COSTS_N_INSNS (8), /* muldi */
1047 COSTS_N_INSNS (22), /* divsi */
1048 COSTS_N_INSNS (28), /* divdi */
1049 COSTS_N_INSNS (3), /* fp */
1050 COSTS_N_INSNS (3), /* dmul */
1051 COSTS_N_INSNS (13), /* sdiv */
1052 COSTS_N_INSNS (16), /* ddiv */
1053 128, /* cache line size */
1054 64, /* l1 cache */
1055 2048, /* l2 cache */
1056 16, /* prefetch streams */
1057 0, /* SF->DF convert */
1060 /* Instruction costs on POWER7 processors. */
1061 static const
1062 struct processor_costs power7_cost = {
1063 COSTS_N_INSNS (2), /* mulsi */
1064 COSTS_N_INSNS (2), /* mulsi_const */
1065 COSTS_N_INSNS (2), /* mulsi_const9 */
1066 COSTS_N_INSNS (2), /* muldi */
1067 COSTS_N_INSNS (18), /* divsi */
1068 COSTS_N_INSNS (34), /* divdi */
1069 COSTS_N_INSNS (3), /* fp */
1070 COSTS_N_INSNS (3), /* dmul */
1071 COSTS_N_INSNS (13), /* sdiv */
1072 COSTS_N_INSNS (16), /* ddiv */
1073 128, /* cache line size */
1074 32, /* l1 cache */
1075 256, /* l2 cache */
1076 12, /* prefetch streams */
1077 COSTS_N_INSNS (3), /* SF->DF convert */
1080 /* Instruction costs on POWER8 processors. */
1081 static const
1082 struct processor_costs power8_cost = {
1083 COSTS_N_INSNS (3), /* mulsi */
1084 COSTS_N_INSNS (3), /* mulsi_const */
1085 COSTS_N_INSNS (3), /* mulsi_const9 */
1086 COSTS_N_INSNS (3), /* muldi */
1087 COSTS_N_INSNS (19), /* divsi */
1088 COSTS_N_INSNS (35), /* divdi */
1089 COSTS_N_INSNS (3), /* fp */
1090 COSTS_N_INSNS (3), /* dmul */
1091 COSTS_N_INSNS (14), /* sdiv */
1092 COSTS_N_INSNS (17), /* ddiv */
1093 128, /* cache line size */
1094 32, /* l1 cache */
1095 256, /* l2 cache */
1096 12, /* prefetch streams */
1097 COSTS_N_INSNS (3), /* SF->DF convert */
1100 /* Instruction costs on POWER9 processors. */
1101 static const
1102 struct processor_costs power9_cost = {
1103 COSTS_N_INSNS (3), /* mulsi */
1104 COSTS_N_INSNS (3), /* mulsi_const */
1105 COSTS_N_INSNS (3), /* mulsi_const9 */
1106 COSTS_N_INSNS (3), /* muldi */
1107 COSTS_N_INSNS (8), /* divsi */
1108 COSTS_N_INSNS (12), /* divdi */
1109 COSTS_N_INSNS (3), /* fp */
1110 COSTS_N_INSNS (3), /* dmul */
1111 COSTS_N_INSNS (13), /* sdiv */
1112 COSTS_N_INSNS (18), /* ddiv */
1113 128, /* cache line size */
1114 32, /* l1 cache */
1115 512, /* l2 cache */
1116 8, /* prefetch streams */
1117 COSTS_N_INSNS (3), /* SF->DF convert */
1120 /* Instruction costs on POWER A2 processors. */
1121 static const
1122 struct processor_costs ppca2_cost = {
1123 COSTS_N_INSNS (16), /* mulsi */
1124 COSTS_N_INSNS (16), /* mulsi_const */
1125 COSTS_N_INSNS (16), /* mulsi_const9 */
1126 COSTS_N_INSNS (16), /* muldi */
1127 COSTS_N_INSNS (22), /* divsi */
1128 COSTS_N_INSNS (28), /* divdi */
1129 COSTS_N_INSNS (3), /* fp */
1130 COSTS_N_INSNS (3), /* dmul */
1131 COSTS_N_INSNS (59), /* sdiv */
1132 COSTS_N_INSNS (72), /* ddiv */
1134 16, /* l1 cache */
1135 2048, /* l2 cache */
1136 16, /* prefetch streams */
1137 0, /* SF->DF convert */
1141 /* Table that classifies rs6000 builtin functions (pure, const, etc.). */
1142 #undef RS6000_BUILTIN_0
1143 #undef RS6000_BUILTIN_1
1144 #undef RS6000_BUILTIN_2
1145 #undef RS6000_BUILTIN_3
1146 #undef RS6000_BUILTIN_A
1147 #undef RS6000_BUILTIN_D
1148 #undef RS6000_BUILTIN_E
1149 #undef RS6000_BUILTIN_H
1150 #undef RS6000_BUILTIN_P
1151 #undef RS6000_BUILTIN_Q
1152 #undef RS6000_BUILTIN_S
1153 #undef RS6000_BUILTIN_X
1155 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE) \
1156 { NAME, ICODE, MASK, ATTR },
1158 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE) \
1159 { NAME, ICODE, MASK, ATTR },
1161 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE) \
1162 { NAME, ICODE, MASK, ATTR },
1164 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE) \
1165 { NAME, ICODE, MASK, ATTR },
1167 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE) \
1168 { NAME, ICODE, MASK, ATTR },
1170 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE) \
1171 { NAME, ICODE, MASK, ATTR },
1173 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE) \
1174 { NAME, ICODE, MASK, ATTR },
1176 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) \
1177 { NAME, ICODE, MASK, ATTR },
1179 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) \
1180 { NAME, ICODE, MASK, ATTR },
1182 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE) \
1183 { NAME, ICODE, MASK, ATTR },
1185 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE) \
1186 { NAME, ICODE, MASK, ATTR },
1188 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE) \
1189 { NAME, ICODE, MASK, ATTR },
1191 struct rs6000_builtin_info_type {
1192 const char *name;
1193 const enum insn_code icode;
1194 const HOST_WIDE_INT mask;
1195 const unsigned attr;
1198 static const struct rs6000_builtin_info_type rs6000_builtin_info[] =
1200 #include "rs6000-builtin.def"
1203 #undef RS6000_BUILTIN_0
1204 #undef RS6000_BUILTIN_1
1205 #undef RS6000_BUILTIN_2
1206 #undef RS6000_BUILTIN_3
1207 #undef RS6000_BUILTIN_A
1208 #undef RS6000_BUILTIN_D
1209 #undef RS6000_BUILTIN_E
1210 #undef RS6000_BUILTIN_H
1211 #undef RS6000_BUILTIN_P
1212 #undef RS6000_BUILTIN_Q
1213 #undef RS6000_BUILTIN_S
1214 #undef RS6000_BUILTIN_X
1216 /* Support for -mveclibabi=<xxx> to control which vector library to use. */
1217 static tree (*rs6000_veclib_handler) (combined_fn, tree, tree);
1220 static bool rs6000_debug_legitimate_address_p (machine_mode, rtx, bool);
1221 static bool spe_func_has_64bit_regs_p (void);
1222 static struct machine_function * rs6000_init_machine_status (void);
1223 static int rs6000_ra_ever_killed (void);
1224 static tree rs6000_handle_longcall_attribute (tree *, tree, tree, int, bool *);
1225 static tree rs6000_handle_altivec_attribute (tree *, tree, tree, int, bool *);
1226 static tree rs6000_handle_struct_attribute (tree *, tree, tree, int, bool *);
1227 static tree rs6000_builtin_vectorized_libmass (combined_fn, tree, tree);
1228 static void rs6000_emit_set_long_const (rtx, HOST_WIDE_INT);
1229 static int rs6000_memory_move_cost (machine_mode, reg_class_t, bool);
1230 static bool rs6000_debug_rtx_costs (rtx, machine_mode, int, int, int *, bool);
1231 static int rs6000_debug_address_cost (rtx, machine_mode, addr_space_t,
1232 bool);
1233 static int rs6000_debug_adjust_cost (rtx_insn *, int, rtx_insn *, int,
1234 unsigned int);
1235 static bool is_microcoded_insn (rtx_insn *);
1236 static bool is_nonpipeline_insn (rtx_insn *);
1237 static bool is_cracked_insn (rtx_insn *);
1238 static bool is_load_insn (rtx, rtx *);
1239 static bool is_store_insn (rtx, rtx *);
1240 static bool set_to_load_agen (rtx_insn *,rtx_insn *);
1241 static bool insn_terminates_group_p (rtx_insn *, enum group_termination);
1242 static bool insn_must_be_first_in_group (rtx_insn *);
1243 static bool insn_must_be_last_in_group (rtx_insn *);
1244 static void altivec_init_builtins (void);
1245 static tree builtin_function_type (machine_mode, machine_mode,
1246 machine_mode, machine_mode,
1247 enum rs6000_builtins, const char *name);
1248 static void rs6000_common_init_builtins (void);
1249 static void paired_init_builtins (void);
1250 static rtx paired_expand_predicate_builtin (enum insn_code, tree, rtx);
1251 static void spe_init_builtins (void);
1252 static void htm_init_builtins (void);
1253 static rtx spe_expand_predicate_builtin (enum insn_code, tree, rtx);
1254 static rtx spe_expand_evsel_builtin (enum insn_code, tree, rtx);
1255 static int rs6000_emit_int_cmove (rtx, rtx, rtx, rtx);
1256 static rs6000_stack_t *rs6000_stack_info (void);
1257 static void is_altivec_return_reg (rtx, void *);
1258 int easy_vector_constant (rtx, machine_mode);
1259 static rtx rs6000_debug_legitimize_address (rtx, rtx, machine_mode);
1260 static rtx rs6000_legitimize_tls_address (rtx, enum tls_model);
1261 static rtx rs6000_darwin64_record_arg (CUMULATIVE_ARGS *, const_tree,
1262 bool, bool);
1263 #if TARGET_MACHO
1264 static void macho_branch_islands (void);
1265 #endif
1266 static rtx rs6000_legitimize_reload_address (rtx, machine_mode, int, int,
1267 int, int *);
1268 static rtx rs6000_debug_legitimize_reload_address (rtx, machine_mode, int,
1269 int, int, int *);
1270 static bool rs6000_mode_dependent_address (const_rtx);
1271 static bool rs6000_debug_mode_dependent_address (const_rtx);
1272 static enum reg_class rs6000_secondary_reload_class (enum reg_class,
1273 machine_mode, rtx);
1274 static enum reg_class rs6000_debug_secondary_reload_class (enum reg_class,
1275 machine_mode,
1276 rtx);
1277 static enum reg_class rs6000_preferred_reload_class (rtx, enum reg_class);
1278 static enum reg_class rs6000_debug_preferred_reload_class (rtx,
1279 enum reg_class);
1280 static bool rs6000_secondary_memory_needed (enum reg_class, enum reg_class,
1281 machine_mode);
1282 static bool rs6000_debug_secondary_memory_needed (enum reg_class,
1283 enum reg_class,
1284 machine_mode);
1285 static bool rs6000_cannot_change_mode_class (machine_mode,
1286 machine_mode,
1287 enum reg_class);
1288 static bool rs6000_debug_cannot_change_mode_class (machine_mode,
1289 machine_mode,
1290 enum reg_class);
1291 static bool rs6000_save_toc_in_prologue_p (void);
1292 static rtx rs6000_internal_arg_pointer (void);
1294 rtx (*rs6000_legitimize_reload_address_ptr) (rtx, machine_mode, int, int,
1295 int, int *)
1296 = rs6000_legitimize_reload_address;
1298 static bool (*rs6000_mode_dependent_address_ptr) (const_rtx)
1299 = rs6000_mode_dependent_address;
1301 enum reg_class (*rs6000_secondary_reload_class_ptr) (enum reg_class,
1302 machine_mode, rtx)
1303 = rs6000_secondary_reload_class;
1305 enum reg_class (*rs6000_preferred_reload_class_ptr) (rtx, enum reg_class)
1306 = rs6000_preferred_reload_class;
1308 bool (*rs6000_secondary_memory_needed_ptr) (enum reg_class, enum reg_class,
1309 machine_mode)
1310 = rs6000_secondary_memory_needed;
1312 bool (*rs6000_cannot_change_mode_class_ptr) (machine_mode,
1313 machine_mode,
1314 enum reg_class)
1315 = rs6000_cannot_change_mode_class;
1317 const int INSN_NOT_AVAILABLE = -1;
1319 static void rs6000_print_isa_options (FILE *, int, const char *,
1320 HOST_WIDE_INT);
1321 static void rs6000_print_builtin_options (FILE *, int, const char *,
1322 HOST_WIDE_INT);
1324 static enum rs6000_reg_type register_to_reg_type (rtx, bool *);
1325 static bool rs6000_secondary_reload_move (enum rs6000_reg_type,
1326 enum rs6000_reg_type,
1327 machine_mode,
1328 secondary_reload_info *,
1329 bool);
1330 rtl_opt_pass *make_pass_analyze_swaps (gcc::context*);
1331 static bool rs6000_keep_leaf_when_profiled () __attribute__ ((unused));
1332 static tree rs6000_fold_builtin (tree, int, tree *, bool);
1334 /* Hash table stuff for keeping track of TOC entries. */
1336 struct GTY((for_user)) toc_hash_struct
1338 /* `key' will satisfy CONSTANT_P; in fact, it will satisfy
1339 ASM_OUTPUT_SPECIAL_POOL_ENTRY_P. */
1340 rtx key;
1341 machine_mode key_mode;
1342 int labelno;
1345 struct toc_hasher : ggc_ptr_hash<toc_hash_struct>
1347 static hashval_t hash (toc_hash_struct *);
1348 static bool equal (toc_hash_struct *, toc_hash_struct *);
1351 static GTY (()) hash_table<toc_hasher> *toc_hash_table;
1353 /* Hash table to keep track of the argument types for builtin functions. */
1355 struct GTY((for_user)) builtin_hash_struct
1357 tree type;
1358 machine_mode mode[4]; /* return value + 3 arguments. */
1359 unsigned char uns_p[4]; /* and whether the types are unsigned. */
1362 struct builtin_hasher : ggc_ptr_hash<builtin_hash_struct>
1364 static hashval_t hash (builtin_hash_struct *);
1365 static bool equal (builtin_hash_struct *, builtin_hash_struct *);
1368 static GTY (()) hash_table<builtin_hasher> *builtin_hash_table;
1371 /* Default register names. */
1372 char rs6000_reg_names[][8] =
1374 "0", "1", "2", "3", "4", "5", "6", "7",
1375 "8", "9", "10", "11", "12", "13", "14", "15",
1376 "16", "17", "18", "19", "20", "21", "22", "23",
1377 "24", "25", "26", "27", "28", "29", "30", "31",
1378 "0", "1", "2", "3", "4", "5", "6", "7",
1379 "8", "9", "10", "11", "12", "13", "14", "15",
1380 "16", "17", "18", "19", "20", "21", "22", "23",
1381 "24", "25", "26", "27", "28", "29", "30", "31",
1382 "mq", "lr", "ctr","ap",
1383 "0", "1", "2", "3", "4", "5", "6", "7",
1384 "ca",
1385 /* AltiVec registers. */
1386 "0", "1", "2", "3", "4", "5", "6", "7",
1387 "8", "9", "10", "11", "12", "13", "14", "15",
1388 "16", "17", "18", "19", "20", "21", "22", "23",
1389 "24", "25", "26", "27", "28", "29", "30", "31",
1390 "vrsave", "vscr",
1391 /* SPE registers. */
1392 "spe_acc", "spefscr",
1393 /* Soft frame pointer. */
1394 "sfp",
1395 /* HTM SPR registers. */
1396 "tfhar", "tfiar", "texasr",
1397 /* SPE High registers. */
1398 "0", "1", "2", "3", "4", "5", "6", "7",
1399 "8", "9", "10", "11", "12", "13", "14", "15",
1400 "16", "17", "18", "19", "20", "21", "22", "23",
1401 "24", "25", "26", "27", "28", "29", "30", "31"
1404 #ifdef TARGET_REGNAMES
1405 static const char alt_reg_names[][8] =
1407 "%r0", "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7",
1408 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
1409 "%r16", "%r17", "%r18", "%r19", "%r20", "%r21", "%r22", "%r23",
1410 "%r24", "%r25", "%r26", "%r27", "%r28", "%r29", "%r30", "%r31",
1411 "%f0", "%f1", "%f2", "%f3", "%f4", "%f5", "%f6", "%f7",
1412 "%f8", "%f9", "%f10", "%f11", "%f12", "%f13", "%f14", "%f15",
1413 "%f16", "%f17", "%f18", "%f19", "%f20", "%f21", "%f22", "%f23",
1414 "%f24", "%f25", "%f26", "%f27", "%f28", "%f29", "%f30", "%f31",
1415 "mq", "lr", "ctr", "ap",
1416 "%cr0", "%cr1", "%cr2", "%cr3", "%cr4", "%cr5", "%cr6", "%cr7",
1417 "ca",
1418 /* AltiVec registers. */
1419 "%v0", "%v1", "%v2", "%v3", "%v4", "%v5", "%v6", "%v7",
1420 "%v8", "%v9", "%v10", "%v11", "%v12", "%v13", "%v14", "%v15",
1421 "%v16", "%v17", "%v18", "%v19", "%v20", "%v21", "%v22", "%v23",
1422 "%v24", "%v25", "%v26", "%v27", "%v28", "%v29", "%v30", "%v31",
1423 "vrsave", "vscr",
1424 /* SPE registers. */
1425 "spe_acc", "spefscr",
1426 /* Soft frame pointer. */
1427 "sfp",
1428 /* HTM SPR registers. */
1429 "tfhar", "tfiar", "texasr",
1430 /* SPE High registers. */
1431 "%rh0", "%rh1", "%rh2", "%rh3", "%rh4", "%rh5", "%rh6", "%rh7",
1432 "%rh8", "%rh9", "%rh10", "%r11", "%rh12", "%rh13", "%rh14", "%rh15",
1433 "%rh16", "%rh17", "%rh18", "%rh19", "%rh20", "%rh21", "%rh22", "%rh23",
1434 "%rh24", "%rh25", "%rh26", "%rh27", "%rh28", "%rh29", "%rh30", "%rh31"
1436 #endif
1438 /* Table of valid machine attributes. */
1440 static const struct attribute_spec rs6000_attribute_table[] =
1442 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
1443 affects_type_identity } */
1444 { "altivec", 1, 1, false, true, false, rs6000_handle_altivec_attribute,
1445 false },
1446 { "longcall", 0, 0, false, true, true, rs6000_handle_longcall_attribute,
1447 false },
1448 { "shortcall", 0, 0, false, true, true, rs6000_handle_longcall_attribute,
1449 false },
1450 { "ms_struct", 0, 0, false, false, false, rs6000_handle_struct_attribute,
1451 false },
1452 { "gcc_struct", 0, 0, false, false, false, rs6000_handle_struct_attribute,
1453 false },
1454 #ifdef SUBTARGET_ATTRIBUTE_TABLE
1455 SUBTARGET_ATTRIBUTE_TABLE,
1456 #endif
1457 { NULL, 0, 0, false, false, false, NULL, false }
1460 #ifndef TARGET_PROFILE_KERNEL
1461 #define TARGET_PROFILE_KERNEL 0
1462 #endif
1464 /* The VRSAVE bitmask puts bit %v0 as the most significant bit. */
1465 #define ALTIVEC_REG_BIT(REGNO) (0x80000000 >> ((REGNO) - FIRST_ALTIVEC_REGNO))
1467 /* Initialize the GCC target structure. */
1468 #undef TARGET_ATTRIBUTE_TABLE
1469 #define TARGET_ATTRIBUTE_TABLE rs6000_attribute_table
1470 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
1471 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES rs6000_set_default_type_attributes
1472 #undef TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P
1473 #define TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P rs6000_attribute_takes_identifier_p
1475 #undef TARGET_ASM_ALIGNED_DI_OP
1476 #define TARGET_ASM_ALIGNED_DI_OP DOUBLE_INT_ASM_OP
1478 /* Default unaligned ops are only provided for ELF. Find the ops needed
1479 for non-ELF systems. */
1480 #ifndef OBJECT_FORMAT_ELF
1481 #if TARGET_XCOFF
1482 /* For XCOFF. rs6000_assemble_integer will handle unaligned DIs on
1483 64-bit targets. */
1484 #undef TARGET_ASM_UNALIGNED_HI_OP
1485 #define TARGET_ASM_UNALIGNED_HI_OP "\t.vbyte\t2,"
1486 #undef TARGET_ASM_UNALIGNED_SI_OP
1487 #define TARGET_ASM_UNALIGNED_SI_OP "\t.vbyte\t4,"
1488 #undef TARGET_ASM_UNALIGNED_DI_OP
1489 #define TARGET_ASM_UNALIGNED_DI_OP "\t.vbyte\t8,"
1490 #else
1491 /* For Darwin. */
1492 #undef TARGET_ASM_UNALIGNED_HI_OP
1493 #define TARGET_ASM_UNALIGNED_HI_OP "\t.short\t"
1494 #undef TARGET_ASM_UNALIGNED_SI_OP
1495 #define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
1496 #undef TARGET_ASM_UNALIGNED_DI_OP
1497 #define TARGET_ASM_UNALIGNED_DI_OP "\t.quad\t"
1498 #undef TARGET_ASM_ALIGNED_DI_OP
1499 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
1500 #endif
1501 #endif
1503 /* This hook deals with fixups for relocatable code and DI-mode objects
1504 in 64-bit code. */
1505 #undef TARGET_ASM_INTEGER
1506 #define TARGET_ASM_INTEGER rs6000_assemble_integer
1508 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
1509 #undef TARGET_ASM_ASSEMBLE_VISIBILITY
1510 #define TARGET_ASM_ASSEMBLE_VISIBILITY rs6000_assemble_visibility
1511 #endif
1513 #undef TARGET_SET_UP_BY_PROLOGUE
1514 #define TARGET_SET_UP_BY_PROLOGUE rs6000_set_up_by_prologue
1516 #undef TARGET_EXTRA_LIVE_ON_ENTRY
1517 #define TARGET_EXTRA_LIVE_ON_ENTRY rs6000_live_on_entry
1519 #undef TARGET_INTERNAL_ARG_POINTER
1520 #define TARGET_INTERNAL_ARG_POINTER rs6000_internal_arg_pointer
1522 #undef TARGET_HAVE_TLS
1523 #define TARGET_HAVE_TLS HAVE_AS_TLS
1525 #undef TARGET_CANNOT_FORCE_CONST_MEM
1526 #define TARGET_CANNOT_FORCE_CONST_MEM rs6000_cannot_force_const_mem
1528 #undef TARGET_DELEGITIMIZE_ADDRESS
1529 #define TARGET_DELEGITIMIZE_ADDRESS rs6000_delegitimize_address
1531 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
1532 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P rs6000_const_not_ok_for_debug_p
1534 #undef TARGET_ASM_FUNCTION_PROLOGUE
1535 #define TARGET_ASM_FUNCTION_PROLOGUE rs6000_output_function_prologue
1536 #undef TARGET_ASM_FUNCTION_EPILOGUE
1537 #define TARGET_ASM_FUNCTION_EPILOGUE rs6000_output_function_epilogue
1539 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
1540 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA rs6000_output_addr_const_extra
1542 #undef TARGET_LEGITIMIZE_ADDRESS
1543 #define TARGET_LEGITIMIZE_ADDRESS rs6000_legitimize_address
1545 #undef TARGET_SCHED_VARIABLE_ISSUE
1546 #define TARGET_SCHED_VARIABLE_ISSUE rs6000_variable_issue
1548 #undef TARGET_SCHED_ISSUE_RATE
1549 #define TARGET_SCHED_ISSUE_RATE rs6000_issue_rate
1550 #undef TARGET_SCHED_ADJUST_COST
1551 #define TARGET_SCHED_ADJUST_COST rs6000_adjust_cost
1552 #undef TARGET_SCHED_ADJUST_PRIORITY
1553 #define TARGET_SCHED_ADJUST_PRIORITY rs6000_adjust_priority
1554 #undef TARGET_SCHED_IS_COSTLY_DEPENDENCE
1555 #define TARGET_SCHED_IS_COSTLY_DEPENDENCE rs6000_is_costly_dependence
1556 #undef TARGET_SCHED_INIT
1557 #define TARGET_SCHED_INIT rs6000_sched_init
1558 #undef TARGET_SCHED_FINISH
1559 #define TARGET_SCHED_FINISH rs6000_sched_finish
1560 #undef TARGET_SCHED_REORDER
1561 #define TARGET_SCHED_REORDER rs6000_sched_reorder
1562 #undef TARGET_SCHED_REORDER2
1563 #define TARGET_SCHED_REORDER2 rs6000_sched_reorder2
1565 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1566 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD rs6000_use_sched_lookahead
1568 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
1569 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD rs6000_use_sched_lookahead_guard
1571 #undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
1572 #define TARGET_SCHED_ALLOC_SCHED_CONTEXT rs6000_alloc_sched_context
1573 #undef TARGET_SCHED_INIT_SCHED_CONTEXT
1574 #define TARGET_SCHED_INIT_SCHED_CONTEXT rs6000_init_sched_context
1575 #undef TARGET_SCHED_SET_SCHED_CONTEXT
1576 #define TARGET_SCHED_SET_SCHED_CONTEXT rs6000_set_sched_context
1577 #undef TARGET_SCHED_FREE_SCHED_CONTEXT
1578 #define TARGET_SCHED_FREE_SCHED_CONTEXT rs6000_free_sched_context
1580 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
1581 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD rs6000_builtin_mask_for_load
1582 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
1583 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
1584 rs6000_builtin_support_vector_misalignment
1585 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
1586 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE rs6000_vector_alignment_reachable
1587 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
1588 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
1589 rs6000_builtin_vectorization_cost
1590 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
1591 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
1592 rs6000_preferred_simd_mode
1593 #undef TARGET_VECTORIZE_INIT_COST
1594 #define TARGET_VECTORIZE_INIT_COST rs6000_init_cost
1595 #undef TARGET_VECTORIZE_ADD_STMT_COST
1596 #define TARGET_VECTORIZE_ADD_STMT_COST rs6000_add_stmt_cost
1597 #undef TARGET_VECTORIZE_FINISH_COST
1598 #define TARGET_VECTORIZE_FINISH_COST rs6000_finish_cost
1599 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
1600 #define TARGET_VECTORIZE_DESTROY_COST_DATA rs6000_destroy_cost_data
1602 #undef TARGET_INIT_BUILTINS
1603 #define TARGET_INIT_BUILTINS rs6000_init_builtins
1604 #undef TARGET_BUILTIN_DECL
1605 #define TARGET_BUILTIN_DECL rs6000_builtin_decl
1607 #undef TARGET_FOLD_BUILTIN
1608 #define TARGET_FOLD_BUILTIN rs6000_fold_builtin
1610 #undef TARGET_EXPAND_BUILTIN
1611 #define TARGET_EXPAND_BUILTIN rs6000_expand_builtin
1613 #undef TARGET_MANGLE_TYPE
1614 #define TARGET_MANGLE_TYPE rs6000_mangle_type
1616 #undef TARGET_INIT_LIBFUNCS
1617 #define TARGET_INIT_LIBFUNCS rs6000_init_libfuncs
1619 #if TARGET_MACHO
1620 #undef TARGET_BINDS_LOCAL_P
1621 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1622 #endif
1624 #undef TARGET_MS_BITFIELD_LAYOUT_P
1625 #define TARGET_MS_BITFIELD_LAYOUT_P rs6000_ms_bitfield_layout_p
1627 #undef TARGET_ASM_OUTPUT_MI_THUNK
1628 #define TARGET_ASM_OUTPUT_MI_THUNK rs6000_output_mi_thunk
1630 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1631 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
1633 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1634 #define TARGET_FUNCTION_OK_FOR_SIBCALL rs6000_function_ok_for_sibcall
1636 #undef TARGET_REGISTER_MOVE_COST
1637 #define TARGET_REGISTER_MOVE_COST rs6000_register_move_cost
1638 #undef TARGET_MEMORY_MOVE_COST
1639 #define TARGET_MEMORY_MOVE_COST rs6000_memory_move_cost
1640 #undef TARGET_CANNOT_COPY_INSN_P
1641 #define TARGET_CANNOT_COPY_INSN_P rs6000_cannot_copy_insn_p
1642 #undef TARGET_RTX_COSTS
1643 #define TARGET_RTX_COSTS rs6000_rtx_costs
1644 #undef TARGET_ADDRESS_COST
1645 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
1647 #undef TARGET_DWARF_REGISTER_SPAN
1648 #define TARGET_DWARF_REGISTER_SPAN rs6000_dwarf_register_span
1650 #undef TARGET_INIT_DWARF_REG_SIZES_EXTRA
1651 #define TARGET_INIT_DWARF_REG_SIZES_EXTRA rs6000_init_dwarf_reg_sizes_extra
1653 #undef TARGET_MEMBER_TYPE_FORCES_BLK
1654 #define TARGET_MEMBER_TYPE_FORCES_BLK rs6000_member_type_forces_blk
1656 #undef TARGET_PROMOTE_FUNCTION_MODE
1657 #define TARGET_PROMOTE_FUNCTION_MODE rs6000_promote_function_mode
1659 #undef TARGET_RETURN_IN_MEMORY
1660 #define TARGET_RETURN_IN_MEMORY rs6000_return_in_memory
1662 #undef TARGET_RETURN_IN_MSB
1663 #define TARGET_RETURN_IN_MSB rs6000_return_in_msb
1665 #undef TARGET_SETUP_INCOMING_VARARGS
1666 #define TARGET_SETUP_INCOMING_VARARGS setup_incoming_varargs
1668 /* Always strict argument naming on rs6000. */
1669 #undef TARGET_STRICT_ARGUMENT_NAMING
1670 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
1671 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
1672 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED hook_bool_CUMULATIVE_ARGS_true
1673 #undef TARGET_SPLIT_COMPLEX_ARG
1674 #define TARGET_SPLIT_COMPLEX_ARG hook_bool_const_tree_true
1675 #undef TARGET_MUST_PASS_IN_STACK
1676 #define TARGET_MUST_PASS_IN_STACK rs6000_must_pass_in_stack
1677 #undef TARGET_PASS_BY_REFERENCE
1678 #define TARGET_PASS_BY_REFERENCE rs6000_pass_by_reference
1679 #undef TARGET_ARG_PARTIAL_BYTES
1680 #define TARGET_ARG_PARTIAL_BYTES rs6000_arg_partial_bytes
1681 #undef TARGET_FUNCTION_ARG_ADVANCE
1682 #define TARGET_FUNCTION_ARG_ADVANCE rs6000_function_arg_advance
1683 #undef TARGET_FUNCTION_ARG
1684 #define TARGET_FUNCTION_ARG rs6000_function_arg
1685 #undef TARGET_FUNCTION_ARG_BOUNDARY
1686 #define TARGET_FUNCTION_ARG_BOUNDARY rs6000_function_arg_boundary
1688 #undef TARGET_BUILD_BUILTIN_VA_LIST
1689 #define TARGET_BUILD_BUILTIN_VA_LIST rs6000_build_builtin_va_list
1691 #undef TARGET_EXPAND_BUILTIN_VA_START
1692 #define TARGET_EXPAND_BUILTIN_VA_START rs6000_va_start
1694 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1695 #define TARGET_GIMPLIFY_VA_ARG_EXPR rs6000_gimplify_va_arg
1697 #undef TARGET_EH_RETURN_FILTER_MODE
1698 #define TARGET_EH_RETURN_FILTER_MODE rs6000_eh_return_filter_mode
1700 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1701 #define TARGET_SCALAR_MODE_SUPPORTED_P rs6000_scalar_mode_supported_p
1703 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1704 #define TARGET_VECTOR_MODE_SUPPORTED_P rs6000_vector_mode_supported_p
1706 #undef TARGET_FLOATN_MODE
1707 #define TARGET_FLOATN_MODE rs6000_floatn_mode
1709 #undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN
1710 #define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN invalid_arg_for_unprototyped_fn
1712 #undef TARGET_ASM_LOOP_ALIGN_MAX_SKIP
1713 #define TARGET_ASM_LOOP_ALIGN_MAX_SKIP rs6000_loop_align_max_skip
1715 #undef TARGET_MD_ASM_ADJUST
1716 #define TARGET_MD_ASM_ADJUST rs6000_md_asm_adjust
1718 #undef TARGET_OPTION_OVERRIDE
1719 #define TARGET_OPTION_OVERRIDE rs6000_option_override
1721 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
1722 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
1723 rs6000_builtin_vectorized_function
1725 #undef TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION
1726 #define TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION \
1727 rs6000_builtin_md_vectorized_function
1729 #if !TARGET_MACHO
1730 #undef TARGET_STACK_PROTECT_FAIL
1731 #define TARGET_STACK_PROTECT_FAIL rs6000_stack_protect_fail
1732 #endif
1734 #ifdef HAVE_AS_TLS
1735 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1736 #define TARGET_ASM_OUTPUT_DWARF_DTPREL rs6000_output_dwarf_dtprel
1737 #endif
1739 /* Use a 32-bit anchor range. This leads to sequences like:
1741 addis tmp,anchor,high
1742 add dest,tmp,low
1744 where tmp itself acts as an anchor, and can be shared between
1745 accesses to the same 64k page. */
1746 #undef TARGET_MIN_ANCHOR_OFFSET
1747 #define TARGET_MIN_ANCHOR_OFFSET -0x7fffffff - 1
1748 #undef TARGET_MAX_ANCHOR_OFFSET
1749 #define TARGET_MAX_ANCHOR_OFFSET 0x7fffffff
1750 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1751 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P rs6000_use_blocks_for_constant_p
1752 #undef TARGET_USE_BLOCKS_FOR_DECL_P
1753 #define TARGET_USE_BLOCKS_FOR_DECL_P rs6000_use_blocks_for_decl_p
1755 #undef TARGET_BUILTIN_RECIPROCAL
1756 #define TARGET_BUILTIN_RECIPROCAL rs6000_builtin_reciprocal
1758 #undef TARGET_EXPAND_TO_RTL_HOOK
1759 #define TARGET_EXPAND_TO_RTL_HOOK rs6000_alloc_sdmode_stack_slot
1761 #undef TARGET_INSTANTIATE_DECLS
1762 #define TARGET_INSTANTIATE_DECLS rs6000_instantiate_decls
1764 #undef TARGET_SECONDARY_RELOAD
1765 #define TARGET_SECONDARY_RELOAD rs6000_secondary_reload
1767 #undef TARGET_LEGITIMATE_ADDRESS_P
1768 #define TARGET_LEGITIMATE_ADDRESS_P rs6000_legitimate_address_p
1770 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
1771 #define TARGET_MODE_DEPENDENT_ADDRESS_P rs6000_mode_dependent_address_p
1773 #undef TARGET_LRA_P
1774 #define TARGET_LRA_P rs6000_lra_p
1776 #undef TARGET_CAN_ELIMINATE
1777 #define TARGET_CAN_ELIMINATE rs6000_can_eliminate
1779 #undef TARGET_CONDITIONAL_REGISTER_USAGE
1780 #define TARGET_CONDITIONAL_REGISTER_USAGE rs6000_conditional_register_usage
1782 #undef TARGET_SCHED_REASSOCIATION_WIDTH
1783 #define TARGET_SCHED_REASSOCIATION_WIDTH rs6000_reassociation_width
1785 #undef TARGET_TRAMPOLINE_INIT
1786 #define TARGET_TRAMPOLINE_INIT rs6000_trampoline_init
1788 #undef TARGET_FUNCTION_VALUE
1789 #define TARGET_FUNCTION_VALUE rs6000_function_value
1791 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
1792 #define TARGET_OPTION_VALID_ATTRIBUTE_P rs6000_valid_attribute_p
1794 #undef TARGET_OPTION_SAVE
1795 #define TARGET_OPTION_SAVE rs6000_function_specific_save
1797 #undef TARGET_OPTION_RESTORE
1798 #define TARGET_OPTION_RESTORE rs6000_function_specific_restore
1800 #undef TARGET_OPTION_PRINT
1801 #define TARGET_OPTION_PRINT rs6000_function_specific_print
1803 #undef TARGET_CAN_INLINE_P
1804 #define TARGET_CAN_INLINE_P rs6000_can_inline_p
1806 #undef TARGET_SET_CURRENT_FUNCTION
1807 #define TARGET_SET_CURRENT_FUNCTION rs6000_set_current_function
1809 #undef TARGET_LEGITIMATE_CONSTANT_P
1810 #define TARGET_LEGITIMATE_CONSTANT_P rs6000_legitimate_constant_p
1812 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
1813 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK rs6000_vectorize_vec_perm_const_ok
1815 #undef TARGET_CAN_USE_DOLOOP_P
1816 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
1818 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
1819 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV rs6000_atomic_assign_expand_fenv
1821 #undef TARGET_LIBGCC_CMP_RETURN_MODE
1822 #define TARGET_LIBGCC_CMP_RETURN_MODE rs6000_abi_word_mode
1823 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
1824 #define TARGET_LIBGCC_SHIFT_COUNT_MODE rs6000_abi_word_mode
1825 #undef TARGET_UNWIND_WORD_MODE
1826 #define TARGET_UNWIND_WORD_MODE rs6000_abi_word_mode
1828 #undef TARGET_OFFLOAD_OPTIONS
1829 #define TARGET_OFFLOAD_OPTIONS rs6000_offload_options
1831 #undef TARGET_C_MODE_FOR_SUFFIX
1832 #define TARGET_C_MODE_FOR_SUFFIX rs6000_c_mode_for_suffix
1834 #undef TARGET_INVALID_BINARY_OP
1835 #define TARGET_INVALID_BINARY_OP rs6000_invalid_binary_op
1837 #undef TARGET_OPTAB_SUPPORTED_P
1838 #define TARGET_OPTAB_SUPPORTED_P rs6000_optab_supported_p
1841 /* Processor table. */
1842 struct rs6000_ptt
1844 const char *const name; /* Canonical processor name. */
1845 const enum processor_type processor; /* Processor type enum value. */
1846 const HOST_WIDE_INT target_enable; /* Target flags to enable. */
1849 static struct rs6000_ptt const processor_target_table[] =
1851 #define RS6000_CPU(NAME, CPU, FLAGS) { NAME, CPU, FLAGS },
1852 #include "rs6000-cpus.def"
1853 #undef RS6000_CPU
1856 /* Look up a processor name for -mcpu=xxx and -mtune=xxx. Return -1 if the
1857 name is invalid. */
1859 static int
1860 rs6000_cpu_name_lookup (const char *name)
1862 size_t i;
1864 if (name != NULL)
1866 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
1867 if (! strcmp (name, processor_target_table[i].name))
1868 return (int)i;
1871 return -1;
1875 /* Return number of consecutive hard regs needed starting at reg REGNO
1876 to hold something of mode MODE.
1877 This is ordinarily the length in words of a value of mode MODE
1878 but can be less for certain modes in special long registers.
1880 For the SPE, GPRs are 64 bits but only 32 bits are visible in
1881 scalar instructions. The upper 32 bits are only available to the
1882 SIMD instructions.
1884 POWER and PowerPC GPRs hold 32 bits worth;
1885 PowerPC64 GPRs and FPRs point register holds 64 bits worth. */
1887 static int
1888 rs6000_hard_regno_nregs_internal (int regno, machine_mode mode)
1890 unsigned HOST_WIDE_INT reg_size;
1892 /* 128-bit floating point usually takes 2 registers, unless it is IEEE
1893 128-bit floating point that can go in vector registers, which has VSX
1894 memory addressing. */
1895 if (FP_REGNO_P (regno))
1896 reg_size = (VECTOR_MEM_VSX_P (mode) || FLOAT128_VECTOR_P (mode)
1897 ? UNITS_PER_VSX_WORD
1898 : UNITS_PER_FP_WORD);
1900 else if (SPE_SIMD_REGNO_P (regno) && TARGET_SPE && SPE_VECTOR_MODE (mode))
1901 reg_size = UNITS_PER_SPE_WORD;
1903 else if (ALTIVEC_REGNO_P (regno))
1904 reg_size = UNITS_PER_ALTIVEC_WORD;
1906 /* The value returned for SCmode in the E500 double case is 2 for
1907 ABI compatibility; storing an SCmode value in a single register
1908 would require function_arg and rs6000_spe_function_arg to handle
1909 SCmode so as to pass the value correctly in a pair of
1910 registers. */
1911 else if (TARGET_E500_DOUBLE && FLOAT_MODE_P (mode) && mode != SCmode
1912 && !DECIMAL_FLOAT_MODE_P (mode) && SPE_SIMD_REGNO_P (regno))
1913 reg_size = UNITS_PER_FP_WORD;
1915 else
1916 reg_size = UNITS_PER_WORD;
1918 return (GET_MODE_SIZE (mode) + reg_size - 1) / reg_size;
1921 /* Value is 1 if hard register REGNO can hold a value of machine-mode
1922 MODE. */
1923 static int
1924 rs6000_hard_regno_mode_ok (int regno, machine_mode mode)
1926 int last_regno = regno + rs6000_hard_regno_nregs[mode][regno] - 1;
1928 if (COMPLEX_MODE_P (mode))
1929 mode = GET_MODE_INNER (mode);
1931 /* PTImode can only go in GPRs. Quad word memory operations require even/odd
1932 register combinations, and use PTImode where we need to deal with quad
1933 word memory operations. Don't allow quad words in the argument or frame
1934 pointer registers, just registers 0..31. */
1935 if (mode == PTImode)
1936 return (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
1937 && IN_RANGE (last_regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
1938 && ((regno & 1) == 0));
1940 /* VSX registers that overlap the FPR registers are larger than for non-VSX
1941 implementations. Don't allow an item to be split between a FP register
1942 and an Altivec register. Allow TImode in all VSX registers if the user
1943 asked for it. */
1944 if (TARGET_VSX && VSX_REGNO_P (regno)
1945 && (VECTOR_MEM_VSX_P (mode)
1946 || FLOAT128_VECTOR_P (mode)
1947 || reg_addr[mode].scalar_in_vmx_p
1948 || (TARGET_VSX_TIMODE && mode == TImode)
1949 || (TARGET_VADDUQM && mode == V1TImode)
1950 || (TARGET_UPPER_REGS_DI && mode == DImode)))
1952 if (FP_REGNO_P (regno))
1953 return FP_REGNO_P (last_regno);
1955 if (ALTIVEC_REGNO_P (regno))
1957 if (GET_MODE_SIZE (mode) != 16 && !reg_addr[mode].scalar_in_vmx_p)
1958 return 0;
1960 return ALTIVEC_REGNO_P (last_regno);
1964 /* The GPRs can hold any mode, but values bigger than one register
1965 cannot go past R31. */
1966 if (INT_REGNO_P (regno))
1967 return INT_REGNO_P (last_regno);
1969 /* The float registers (except for VSX vector modes) can only hold floating
1970 modes and DImode. */
1971 if (FP_REGNO_P (regno))
1973 if (FLOAT128_VECTOR_P (mode))
1974 return false;
1976 if (SCALAR_FLOAT_MODE_P (mode)
1977 && (mode != TDmode || (regno % 2) == 0)
1978 && FP_REGNO_P (last_regno))
1979 return 1;
1981 if (GET_MODE_CLASS (mode) == MODE_INT
1982 && GET_MODE_SIZE (mode) == UNITS_PER_FP_WORD)
1983 return 1;
1985 if (PAIRED_SIMD_REGNO_P (regno) && TARGET_PAIRED_FLOAT
1986 && PAIRED_VECTOR_MODE (mode))
1987 return 1;
1989 return 0;
1992 /* The CR register can only hold CC modes. */
1993 if (CR_REGNO_P (regno))
1994 return GET_MODE_CLASS (mode) == MODE_CC;
1996 if (CA_REGNO_P (regno))
1997 return mode == Pmode || mode == SImode;
1999 /* AltiVec only in AldyVec registers. */
2000 if (ALTIVEC_REGNO_P (regno))
2001 return (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)
2002 || mode == V1TImode);
2004 /* ...but GPRs can hold SIMD data on the SPE in one register. */
2005 if (SPE_SIMD_REGNO_P (regno) && TARGET_SPE && SPE_VECTOR_MODE (mode))
2006 return 1;
2008 /* We cannot put non-VSX TImode or PTImode anywhere except general register
2009 and it must be able to fit within the register set. */
2011 return GET_MODE_SIZE (mode) <= UNITS_PER_WORD;
2014 /* Print interesting facts about registers. */
2015 static void
2016 rs6000_debug_reg_print (int first_regno, int last_regno, const char *reg_name)
2018 int r, m;
2020 for (r = first_regno; r <= last_regno; ++r)
2022 const char *comma = "";
2023 int len;
2025 if (first_regno == last_regno)
2026 fprintf (stderr, "%s:\t", reg_name);
2027 else
2028 fprintf (stderr, "%s%d:\t", reg_name, r - first_regno);
2030 len = 8;
2031 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2032 if (rs6000_hard_regno_mode_ok_p[m][r] && rs6000_hard_regno_nregs[m][r])
2034 if (len > 70)
2036 fprintf (stderr, ",\n\t");
2037 len = 8;
2038 comma = "";
2041 if (rs6000_hard_regno_nregs[m][r] > 1)
2042 len += fprintf (stderr, "%s%s/%d", comma, GET_MODE_NAME (m),
2043 rs6000_hard_regno_nregs[m][r]);
2044 else
2045 len += fprintf (stderr, "%s%s", comma, GET_MODE_NAME (m));
2047 comma = ", ";
2050 if (call_used_regs[r])
2052 if (len > 70)
2054 fprintf (stderr, ",\n\t");
2055 len = 8;
2056 comma = "";
2059 len += fprintf (stderr, "%s%s", comma, "call-used");
2060 comma = ", ";
2063 if (fixed_regs[r])
2065 if (len > 70)
2067 fprintf (stderr, ",\n\t");
2068 len = 8;
2069 comma = "";
2072 len += fprintf (stderr, "%s%s", comma, "fixed");
2073 comma = ", ";
2076 if (len > 70)
2078 fprintf (stderr, ",\n\t");
2079 comma = "";
2082 len += fprintf (stderr, "%sreg-class = %s", comma,
2083 reg_class_names[(int)rs6000_regno_regclass[r]]);
2084 comma = ", ";
2086 if (len > 70)
2088 fprintf (stderr, ",\n\t");
2089 comma = "";
2092 fprintf (stderr, "%sregno = %d\n", comma, r);
2096 static const char *
2097 rs6000_debug_vector_unit (enum rs6000_vector v)
2099 const char *ret;
2101 switch (v)
2103 case VECTOR_NONE: ret = "none"; break;
2104 case VECTOR_ALTIVEC: ret = "altivec"; break;
2105 case VECTOR_VSX: ret = "vsx"; break;
2106 case VECTOR_P8_VECTOR: ret = "p8_vector"; break;
2107 case VECTOR_PAIRED: ret = "paired"; break;
2108 case VECTOR_SPE: ret = "spe"; break;
2109 case VECTOR_OTHER: ret = "other"; break;
2110 default: ret = "unknown"; break;
2113 return ret;
2116 /* Inner function printing just the address mask for a particular reload
2117 register class. */
2118 DEBUG_FUNCTION char *
2119 rs6000_debug_addr_mask (addr_mask_type mask, bool keep_spaces)
2121 static char ret[8];
2122 char *p = ret;
2124 if ((mask & RELOAD_REG_VALID) != 0)
2125 *p++ = 'v';
2126 else if (keep_spaces)
2127 *p++ = ' ';
2129 if ((mask & RELOAD_REG_MULTIPLE) != 0)
2130 *p++ = 'm';
2131 else if (keep_spaces)
2132 *p++ = ' ';
2134 if ((mask & RELOAD_REG_INDEXED) != 0)
2135 *p++ = 'i';
2136 else if (keep_spaces)
2137 *p++ = ' ';
2139 if ((mask & RELOAD_REG_QUAD_OFFSET) != 0)
2140 *p++ = 'O';
2141 else if ((mask & RELOAD_REG_OFFSET) != 0)
2142 *p++ = 'o';
2143 else if (keep_spaces)
2144 *p++ = ' ';
2146 if ((mask & RELOAD_REG_PRE_INCDEC) != 0)
2147 *p++ = '+';
2148 else if (keep_spaces)
2149 *p++ = ' ';
2151 if ((mask & RELOAD_REG_PRE_MODIFY) != 0)
2152 *p++ = '+';
2153 else if (keep_spaces)
2154 *p++ = ' ';
2156 if ((mask & RELOAD_REG_AND_M16) != 0)
2157 *p++ = '&';
2158 else if (keep_spaces)
2159 *p++ = ' ';
2161 *p = '\0';
2163 return ret;
2166 /* Print the address masks in a human readble fashion. */
2167 DEBUG_FUNCTION void
2168 rs6000_debug_print_mode (ssize_t m)
2170 ssize_t rc;
2171 int spaces = 0;
2172 bool fuse_extra_p;
2174 fprintf (stderr, "Mode: %-5s", GET_MODE_NAME (m));
2175 for (rc = 0; rc < N_RELOAD_REG; rc++)
2176 fprintf (stderr, " %s: %s", reload_reg_map[rc].name,
2177 rs6000_debug_addr_mask (reg_addr[m].addr_mask[rc], true));
2179 if ((reg_addr[m].reload_store != CODE_FOR_nothing)
2180 || (reg_addr[m].reload_load != CODE_FOR_nothing))
2181 fprintf (stderr, " Reload=%c%c",
2182 (reg_addr[m].reload_store != CODE_FOR_nothing) ? 's' : '*',
2183 (reg_addr[m].reload_load != CODE_FOR_nothing) ? 'l' : '*');
2184 else
2185 spaces += sizeof (" Reload=sl") - 1;
2187 if (reg_addr[m].scalar_in_vmx_p)
2189 fprintf (stderr, "%*s Upper=y", spaces, "");
2190 spaces = 0;
2192 else
2193 spaces += sizeof (" Upper=y") - 1;
2195 fuse_extra_p = ((reg_addr[m].fusion_gpr_ld != CODE_FOR_nothing)
2196 || reg_addr[m].fused_toc);
2197 if (!fuse_extra_p)
2199 for (rc = 0; rc < N_RELOAD_REG; rc++)
2201 if (rc != RELOAD_REG_ANY)
2203 if (reg_addr[m].fusion_addi_ld[rc] != CODE_FOR_nothing
2204 || reg_addr[m].fusion_addi_ld[rc] != CODE_FOR_nothing
2205 || reg_addr[m].fusion_addi_st[rc] != CODE_FOR_nothing
2206 || reg_addr[m].fusion_addis_ld[rc] != CODE_FOR_nothing
2207 || reg_addr[m].fusion_addis_st[rc] != CODE_FOR_nothing)
2209 fuse_extra_p = true;
2210 break;
2216 if (fuse_extra_p)
2218 fprintf (stderr, "%*s Fuse:", spaces, "");
2219 spaces = 0;
2221 for (rc = 0; rc < N_RELOAD_REG; rc++)
2223 if (rc != RELOAD_REG_ANY)
2225 char load, store;
2227 if (reg_addr[m].fusion_addis_ld[rc] != CODE_FOR_nothing)
2228 load = 'l';
2229 else if (reg_addr[m].fusion_addi_ld[rc] != CODE_FOR_nothing)
2230 load = 'L';
2231 else
2232 load = '-';
2234 if (reg_addr[m].fusion_addis_st[rc] != CODE_FOR_nothing)
2235 store = 's';
2236 else if (reg_addr[m].fusion_addi_st[rc] != CODE_FOR_nothing)
2237 store = 'S';
2238 else
2239 store = '-';
2241 if (load == '-' && store == '-')
2242 spaces += 5;
2243 else
2245 fprintf (stderr, "%*s%c=%c%c", (spaces + 1), "",
2246 reload_reg_map[rc].name[0], load, store);
2247 spaces = 0;
2252 if (reg_addr[m].fusion_gpr_ld != CODE_FOR_nothing)
2254 fprintf (stderr, "%*sP8gpr", (spaces + 1), "");
2255 spaces = 0;
2257 else
2258 spaces += sizeof (" P8gpr") - 1;
2260 if (reg_addr[m].fused_toc)
2262 fprintf (stderr, "%*sToc", (spaces + 1), "");
2263 spaces = 0;
2265 else
2266 spaces += sizeof (" Toc") - 1;
2268 else
2269 spaces += sizeof (" Fuse: G=ls F=ls v=ls P8gpr Toc") - 1;
2271 if (rs6000_vector_unit[m] != VECTOR_NONE
2272 || rs6000_vector_mem[m] != VECTOR_NONE)
2274 fprintf (stderr, "%*s vector: arith=%-10s mem=%s",
2275 spaces, "",
2276 rs6000_debug_vector_unit (rs6000_vector_unit[m]),
2277 rs6000_debug_vector_unit (rs6000_vector_mem[m]));
2280 fputs ("\n", stderr);
2283 #define DEBUG_FMT_ID "%-32s= "
2284 #define DEBUG_FMT_D DEBUG_FMT_ID "%d\n"
2285 #define DEBUG_FMT_WX DEBUG_FMT_ID "%#.12" HOST_WIDE_INT_PRINT "x: "
2286 #define DEBUG_FMT_S DEBUG_FMT_ID "%s\n"
2288 /* Print various interesting information with -mdebug=reg. */
2289 static void
2290 rs6000_debug_reg_global (void)
2292 static const char *const tf[2] = { "false", "true" };
2293 const char *nl = (const char *)0;
2294 int m;
2295 size_t m1, m2, v;
2296 char costly_num[20];
2297 char nop_num[20];
2298 char flags_buffer[40];
2299 const char *costly_str;
2300 const char *nop_str;
2301 const char *trace_str;
2302 const char *abi_str;
2303 const char *cmodel_str;
2304 struct cl_target_option cl_opts;
2306 /* Modes we want tieable information on. */
2307 static const machine_mode print_tieable_modes[] = {
2308 QImode,
2309 HImode,
2310 SImode,
2311 DImode,
2312 TImode,
2313 PTImode,
2314 SFmode,
2315 DFmode,
2316 TFmode,
2317 IFmode,
2318 KFmode,
2319 SDmode,
2320 DDmode,
2321 TDmode,
2322 V8QImode,
2323 V4HImode,
2324 V2SImode,
2325 V16QImode,
2326 V8HImode,
2327 V4SImode,
2328 V2DImode,
2329 V1TImode,
2330 V32QImode,
2331 V16HImode,
2332 V8SImode,
2333 V4DImode,
2334 V2TImode,
2335 V2SFmode,
2336 V4SFmode,
2337 V2DFmode,
2338 V8SFmode,
2339 V4DFmode,
2340 CCmode,
2341 CCUNSmode,
2342 CCEQmode,
2345 /* Virtual regs we are interested in. */
2346 const static struct {
2347 int regno; /* register number. */
2348 const char *name; /* register name. */
2349 } virtual_regs[] = {
2350 { STACK_POINTER_REGNUM, "stack pointer:" },
2351 { TOC_REGNUM, "toc: " },
2352 { STATIC_CHAIN_REGNUM, "static chain: " },
2353 { RS6000_PIC_OFFSET_TABLE_REGNUM, "pic offset: " },
2354 { HARD_FRAME_POINTER_REGNUM, "hard frame: " },
2355 { ARG_POINTER_REGNUM, "arg pointer: " },
2356 { FRAME_POINTER_REGNUM, "frame pointer:" },
2357 { FIRST_PSEUDO_REGISTER, "first pseudo: " },
2358 { FIRST_VIRTUAL_REGISTER, "first virtual:" },
2359 { VIRTUAL_INCOMING_ARGS_REGNUM, "incoming_args:" },
2360 { VIRTUAL_STACK_VARS_REGNUM, "stack_vars: " },
2361 { VIRTUAL_STACK_DYNAMIC_REGNUM, "stack_dynamic:" },
2362 { VIRTUAL_OUTGOING_ARGS_REGNUM, "outgoing_args:" },
2363 { VIRTUAL_CFA_REGNUM, "cfa (frame): " },
2364 { VIRTUAL_PREFERRED_STACK_BOUNDARY_REGNUM, "stack boundry:" },
2365 { LAST_VIRTUAL_REGISTER, "last virtual: " },
2368 fputs ("\nHard register information:\n", stderr);
2369 rs6000_debug_reg_print (FIRST_GPR_REGNO, LAST_GPR_REGNO, "gr");
2370 rs6000_debug_reg_print (FIRST_FPR_REGNO, LAST_FPR_REGNO, "fp");
2371 rs6000_debug_reg_print (FIRST_ALTIVEC_REGNO,
2372 LAST_ALTIVEC_REGNO,
2373 "vs");
2374 rs6000_debug_reg_print (LR_REGNO, LR_REGNO, "lr");
2375 rs6000_debug_reg_print (CTR_REGNO, CTR_REGNO, "ctr");
2376 rs6000_debug_reg_print (CR0_REGNO, CR7_REGNO, "cr");
2377 rs6000_debug_reg_print (CA_REGNO, CA_REGNO, "ca");
2378 rs6000_debug_reg_print (VRSAVE_REGNO, VRSAVE_REGNO, "vrsave");
2379 rs6000_debug_reg_print (VSCR_REGNO, VSCR_REGNO, "vscr");
2380 rs6000_debug_reg_print (SPE_ACC_REGNO, SPE_ACC_REGNO, "spe_a");
2381 rs6000_debug_reg_print (SPEFSCR_REGNO, SPEFSCR_REGNO, "spe_f");
2383 fputs ("\nVirtual/stack/frame registers:\n", stderr);
2384 for (v = 0; v < ARRAY_SIZE (virtual_regs); v++)
2385 fprintf (stderr, "%s regno = %3d\n", virtual_regs[v].name, virtual_regs[v].regno);
2387 fprintf (stderr,
2388 "\n"
2389 "d reg_class = %s\n"
2390 "f reg_class = %s\n"
2391 "v reg_class = %s\n"
2392 "wa reg_class = %s\n"
2393 "wb reg_class = %s\n"
2394 "wd reg_class = %s\n"
2395 "we reg_class = %s\n"
2396 "wf reg_class = %s\n"
2397 "wg reg_class = %s\n"
2398 "wh reg_class = %s\n"
2399 "wi reg_class = %s\n"
2400 "wj reg_class = %s\n"
2401 "wk reg_class = %s\n"
2402 "wl reg_class = %s\n"
2403 "wm reg_class = %s\n"
2404 "wo reg_class = %s\n"
2405 "wp reg_class = %s\n"
2406 "wq reg_class = %s\n"
2407 "wr reg_class = %s\n"
2408 "ws reg_class = %s\n"
2409 "wt reg_class = %s\n"
2410 "wu reg_class = %s\n"
2411 "wv reg_class = %s\n"
2412 "ww reg_class = %s\n"
2413 "wx reg_class = %s\n"
2414 "wy reg_class = %s\n"
2415 "wz reg_class = %s\n"
2416 "\n",
2417 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_d]],
2418 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_f]],
2419 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_v]],
2420 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wa]],
2421 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wb]],
2422 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wd]],
2423 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_we]],
2424 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wf]],
2425 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wg]],
2426 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wh]],
2427 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wi]],
2428 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wj]],
2429 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wk]],
2430 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wl]],
2431 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wm]],
2432 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wo]],
2433 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wp]],
2434 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wq]],
2435 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wr]],
2436 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_ws]],
2437 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wt]],
2438 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wu]],
2439 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wv]],
2440 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_ww]],
2441 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wx]],
2442 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wy]],
2443 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wz]]);
2445 nl = "\n";
2446 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2447 rs6000_debug_print_mode (m);
2449 fputs ("\n", stderr);
2451 for (m1 = 0; m1 < ARRAY_SIZE (print_tieable_modes); m1++)
2453 machine_mode mode1 = print_tieable_modes[m1];
2454 bool first_time = true;
2456 nl = (const char *)0;
2457 for (m2 = 0; m2 < ARRAY_SIZE (print_tieable_modes); m2++)
2459 machine_mode mode2 = print_tieable_modes[m2];
2460 if (mode1 != mode2 && MODES_TIEABLE_P (mode1, mode2))
2462 if (first_time)
2464 fprintf (stderr, "Tieable modes %s:", GET_MODE_NAME (mode1));
2465 nl = "\n";
2466 first_time = false;
2469 fprintf (stderr, " %s", GET_MODE_NAME (mode2));
2473 if (!first_time)
2474 fputs ("\n", stderr);
2477 if (nl)
2478 fputs (nl, stderr);
2480 if (rs6000_recip_control)
2482 fprintf (stderr, "\nReciprocal mask = 0x%x\n", rs6000_recip_control);
2484 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2485 if (rs6000_recip_bits[m])
2487 fprintf (stderr,
2488 "Reciprocal estimate mode: %-5s divide: %s rsqrt: %s\n",
2489 GET_MODE_NAME (m),
2490 (RS6000_RECIP_AUTO_RE_P (m)
2491 ? "auto"
2492 : (RS6000_RECIP_HAVE_RE_P (m) ? "have" : "none")),
2493 (RS6000_RECIP_AUTO_RSQRTE_P (m)
2494 ? "auto"
2495 : (RS6000_RECIP_HAVE_RSQRTE_P (m) ? "have" : "none")));
2498 fputs ("\n", stderr);
2501 if (rs6000_cpu_index >= 0)
2503 const char *name = processor_target_table[rs6000_cpu_index].name;
2504 HOST_WIDE_INT flags
2505 = processor_target_table[rs6000_cpu_index].target_enable;
2507 sprintf (flags_buffer, "-mcpu=%s flags", name);
2508 rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2510 else
2511 fprintf (stderr, DEBUG_FMT_S, "cpu", "<none>");
2513 if (rs6000_tune_index >= 0)
2515 const char *name = processor_target_table[rs6000_tune_index].name;
2516 HOST_WIDE_INT flags
2517 = processor_target_table[rs6000_tune_index].target_enable;
2519 sprintf (flags_buffer, "-mtune=%s flags", name);
2520 rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2522 else
2523 fprintf (stderr, DEBUG_FMT_S, "tune", "<none>");
2525 cl_target_option_save (&cl_opts, &global_options);
2526 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags",
2527 rs6000_isa_flags);
2529 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags_explicit",
2530 rs6000_isa_flags_explicit);
2532 rs6000_print_builtin_options (stderr, 0, "rs6000_builtin_mask",
2533 rs6000_builtin_mask);
2535 rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT);
2537 fprintf (stderr, DEBUG_FMT_S, "--with-cpu default",
2538 OPTION_TARGET_CPU_DEFAULT ? OPTION_TARGET_CPU_DEFAULT : "<none>");
2540 switch (rs6000_sched_costly_dep)
2542 case max_dep_latency:
2543 costly_str = "max_dep_latency";
2544 break;
2546 case no_dep_costly:
2547 costly_str = "no_dep_costly";
2548 break;
2550 case all_deps_costly:
2551 costly_str = "all_deps_costly";
2552 break;
2554 case true_store_to_load_dep_costly:
2555 costly_str = "true_store_to_load_dep_costly";
2556 break;
2558 case store_to_load_dep_costly:
2559 costly_str = "store_to_load_dep_costly";
2560 break;
2562 default:
2563 costly_str = costly_num;
2564 sprintf (costly_num, "%d", (int)rs6000_sched_costly_dep);
2565 break;
2568 fprintf (stderr, DEBUG_FMT_S, "sched_costly_dep", costly_str);
2570 switch (rs6000_sched_insert_nops)
2572 case sched_finish_regroup_exact:
2573 nop_str = "sched_finish_regroup_exact";
2574 break;
2576 case sched_finish_pad_groups:
2577 nop_str = "sched_finish_pad_groups";
2578 break;
2580 case sched_finish_none:
2581 nop_str = "sched_finish_none";
2582 break;
2584 default:
2585 nop_str = nop_num;
2586 sprintf (nop_num, "%d", (int)rs6000_sched_insert_nops);
2587 break;
2590 fprintf (stderr, DEBUG_FMT_S, "sched_insert_nops", nop_str);
2592 switch (rs6000_sdata)
2594 default:
2595 case SDATA_NONE:
2596 break;
2598 case SDATA_DATA:
2599 fprintf (stderr, DEBUG_FMT_S, "sdata", "data");
2600 break;
2602 case SDATA_SYSV:
2603 fprintf (stderr, DEBUG_FMT_S, "sdata", "sysv");
2604 break;
2606 case SDATA_EABI:
2607 fprintf (stderr, DEBUG_FMT_S, "sdata", "eabi");
2608 break;
2612 switch (rs6000_traceback)
2614 case traceback_default: trace_str = "default"; break;
2615 case traceback_none: trace_str = "none"; break;
2616 case traceback_part: trace_str = "part"; break;
2617 case traceback_full: trace_str = "full"; break;
2618 default: trace_str = "unknown"; break;
2621 fprintf (stderr, DEBUG_FMT_S, "traceback", trace_str);
2623 switch (rs6000_current_cmodel)
2625 case CMODEL_SMALL: cmodel_str = "small"; break;
2626 case CMODEL_MEDIUM: cmodel_str = "medium"; break;
2627 case CMODEL_LARGE: cmodel_str = "large"; break;
2628 default: cmodel_str = "unknown"; break;
2631 fprintf (stderr, DEBUG_FMT_S, "cmodel", cmodel_str);
2633 switch (rs6000_current_abi)
2635 case ABI_NONE: abi_str = "none"; break;
2636 case ABI_AIX: abi_str = "aix"; break;
2637 case ABI_ELFv2: abi_str = "ELFv2"; break;
2638 case ABI_V4: abi_str = "V4"; break;
2639 case ABI_DARWIN: abi_str = "darwin"; break;
2640 default: abi_str = "unknown"; break;
2643 fprintf (stderr, DEBUG_FMT_S, "abi", abi_str);
2645 if (rs6000_altivec_abi)
2646 fprintf (stderr, DEBUG_FMT_S, "altivec_abi", "true");
2648 if (rs6000_spe_abi)
2649 fprintf (stderr, DEBUG_FMT_S, "spe_abi", "true");
2651 if (rs6000_darwin64_abi)
2652 fprintf (stderr, DEBUG_FMT_S, "darwin64_abi", "true");
2654 if (rs6000_float_gprs)
2655 fprintf (stderr, DEBUG_FMT_S, "float_gprs", "true");
2657 fprintf (stderr, DEBUG_FMT_S, "fprs",
2658 (TARGET_FPRS ? "true" : "false"));
2660 fprintf (stderr, DEBUG_FMT_S, "single_float",
2661 (TARGET_SINGLE_FLOAT ? "true" : "false"));
2663 fprintf (stderr, DEBUG_FMT_S, "double_float",
2664 (TARGET_DOUBLE_FLOAT ? "true" : "false"));
2666 fprintf (stderr, DEBUG_FMT_S, "soft_float",
2667 (TARGET_SOFT_FLOAT ? "true" : "false"));
2669 fprintf (stderr, DEBUG_FMT_S, "e500_single",
2670 (TARGET_E500_SINGLE ? "true" : "false"));
2672 fprintf (stderr, DEBUG_FMT_S, "e500_double",
2673 (TARGET_E500_DOUBLE ? "true" : "false"));
2675 if (TARGET_LINK_STACK)
2676 fprintf (stderr, DEBUG_FMT_S, "link_stack", "true");
2678 fprintf (stderr, DEBUG_FMT_S, "lra", TARGET_LRA ? "true" : "false");
2680 if (TARGET_P8_FUSION)
2682 char options[80];
2684 strcpy (options, (TARGET_P9_FUSION) ? "power9" : "power8");
2685 if (TARGET_TOC_FUSION)
2686 strcat (options, ", toc");
2688 if (TARGET_P8_FUSION_SIGN)
2689 strcat (options, ", sign");
2691 fprintf (stderr, DEBUG_FMT_S, "fusion", options);
2694 fprintf (stderr, DEBUG_FMT_S, "plt-format",
2695 TARGET_SECURE_PLT ? "secure" : "bss");
2696 fprintf (stderr, DEBUG_FMT_S, "struct-return",
2697 aix_struct_return ? "aix" : "sysv");
2698 fprintf (stderr, DEBUG_FMT_S, "always_hint", tf[!!rs6000_always_hint]);
2699 fprintf (stderr, DEBUG_FMT_S, "sched_groups", tf[!!rs6000_sched_groups]);
2700 fprintf (stderr, DEBUG_FMT_S, "align_branch",
2701 tf[!!rs6000_align_branch_targets]);
2702 fprintf (stderr, DEBUG_FMT_D, "tls_size", rs6000_tls_size);
2703 fprintf (stderr, DEBUG_FMT_D, "long_double_size",
2704 rs6000_long_double_type_size);
2705 fprintf (stderr, DEBUG_FMT_D, "sched_restricted_insns_priority",
2706 (int)rs6000_sched_restricted_insns_priority);
2707 fprintf (stderr, DEBUG_FMT_D, "Number of standard builtins",
2708 (int)END_BUILTINS);
2709 fprintf (stderr, DEBUG_FMT_D, "Number of rs6000 builtins",
2710 (int)RS6000_BUILTIN_COUNT);
2712 if (TARGET_VSX)
2713 fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit scalar element",
2714 (int)VECTOR_ELEMENT_SCALAR_64BIT);
2716 if (TARGET_DIRECT_MOVE_128)
2717 fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit mfvsrld element",
2718 (int)VECTOR_ELEMENT_MFVSRLD_64BIT);
2722 /* Update the addr mask bits in reg_addr to help secondary reload and go if
2723 legitimate address support to figure out the appropriate addressing to
2724 use. */
2726 static void
2727 rs6000_setup_reg_addr_masks (void)
2729 ssize_t rc, reg, m, nregs;
2730 addr_mask_type any_addr_mask, addr_mask;
2732 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2734 machine_mode m2 = (machine_mode) m;
2735 bool complex_p = false;
2736 size_t msize;
2738 if (COMPLEX_MODE_P (m2))
2740 complex_p = true;
2741 m2 = GET_MODE_INNER (m2);
2744 msize = GET_MODE_SIZE (m2);
2746 /* SDmode is special in that we want to access it only via REG+REG
2747 addressing on power7 and above, since we want to use the LFIWZX and
2748 STFIWZX instructions to load it. */
2749 bool indexed_only_p = (m == SDmode && TARGET_NO_SDMODE_STACK);
2751 any_addr_mask = 0;
2752 for (rc = FIRST_RELOAD_REG_CLASS; rc <= LAST_RELOAD_REG_CLASS; rc++)
2754 addr_mask = 0;
2755 reg = reload_reg_map[rc].reg;
2757 /* Can mode values go in the GPR/FPR/Altivec registers? */
2758 if (reg >= 0 && rs6000_hard_regno_mode_ok_p[m][reg])
2760 nregs = rs6000_hard_regno_nregs[m][reg];
2761 addr_mask |= RELOAD_REG_VALID;
2763 /* Indicate if the mode takes more than 1 physical register. If
2764 it takes a single register, indicate it can do REG+REG
2765 addressing. */
2766 if (nregs > 1 || m == BLKmode || complex_p)
2767 addr_mask |= RELOAD_REG_MULTIPLE;
2768 else
2769 addr_mask |= RELOAD_REG_INDEXED;
2771 /* Figure out if we can do PRE_INC, PRE_DEC, or PRE_MODIFY
2772 addressing. Restrict addressing on SPE for 64-bit types
2773 because of the SUBREG hackery used to address 64-bit floats in
2774 '32-bit' GPRs. If we allow scalars into Altivec registers,
2775 don't allow PRE_INC, PRE_DEC, or PRE_MODIFY. */
2777 if (TARGET_UPDATE
2778 && (rc == RELOAD_REG_GPR || rc == RELOAD_REG_FPR)
2779 && msize <= 8
2780 && !VECTOR_MODE_P (m2)
2781 && !FLOAT128_VECTOR_P (m2)
2782 && !complex_p
2783 && (m2 != DFmode || !TARGET_UPPER_REGS_DF)
2784 && (m2 != SFmode || !TARGET_UPPER_REGS_SF)
2785 && !(TARGET_E500_DOUBLE && msize == 8))
2787 addr_mask |= RELOAD_REG_PRE_INCDEC;
2789 /* PRE_MODIFY is more restricted than PRE_INC/PRE_DEC in that
2790 we don't allow PRE_MODIFY for some multi-register
2791 operations. */
2792 switch (m)
2794 default:
2795 addr_mask |= RELOAD_REG_PRE_MODIFY;
2796 break;
2798 case DImode:
2799 if (TARGET_POWERPC64)
2800 addr_mask |= RELOAD_REG_PRE_MODIFY;
2801 break;
2803 case DFmode:
2804 case DDmode:
2805 if (TARGET_DF_INSN)
2806 addr_mask |= RELOAD_REG_PRE_MODIFY;
2807 break;
2812 /* GPR and FPR registers can do REG+OFFSET addressing, except
2813 possibly for SDmode. ISA 3.0 (i.e. power9) adds D-form addressing
2814 for 64-bit scalars and 32-bit SFmode to altivec registers. */
2815 if ((addr_mask != 0) && !indexed_only_p
2816 && msize <= 8
2817 && (rc == RELOAD_REG_GPR
2818 || ((msize == 8 || m2 == SFmode)
2819 && (rc == RELOAD_REG_FPR
2820 || (rc == RELOAD_REG_VMX
2821 && TARGET_P9_DFORM_SCALAR)))))
2822 addr_mask |= RELOAD_REG_OFFSET;
2824 /* VSX registers can do REG+OFFSET addresssing if ISA 3.0
2825 instructions are enabled. The offset for 128-bit VSX registers is
2826 only 12-bits. While GPRs can handle the full offset range, VSX
2827 registers can only handle the restricted range. */
2828 else if ((addr_mask != 0) && !indexed_only_p
2829 && msize == 16 && TARGET_P9_DFORM_VECTOR
2830 && (ALTIVEC_OR_VSX_VECTOR_MODE (m2)
2831 || (m2 == TImode && TARGET_VSX_TIMODE)))
2833 addr_mask |= RELOAD_REG_OFFSET;
2834 if (rc == RELOAD_REG_FPR || rc == RELOAD_REG_VMX)
2835 addr_mask |= RELOAD_REG_QUAD_OFFSET;
2838 /* VMX registers can do (REG & -16) and ((REG+REG) & -16)
2839 addressing on 128-bit types. */
2840 if (rc == RELOAD_REG_VMX && msize == 16
2841 && (addr_mask & RELOAD_REG_VALID) != 0)
2842 addr_mask |= RELOAD_REG_AND_M16;
2844 reg_addr[m].addr_mask[rc] = addr_mask;
2845 any_addr_mask |= addr_mask;
2848 reg_addr[m].addr_mask[RELOAD_REG_ANY] = any_addr_mask;
2853 /* Initialize the various global tables that are based on register size. */
2854 static void
2855 rs6000_init_hard_regno_mode_ok (bool global_init_p)
2857 ssize_t r, m, c;
2858 int align64;
2859 int align32;
2861 /* Precalculate REGNO_REG_CLASS. */
2862 rs6000_regno_regclass[0] = GENERAL_REGS;
2863 for (r = 1; r < 32; ++r)
2864 rs6000_regno_regclass[r] = BASE_REGS;
2866 for (r = 32; r < 64; ++r)
2867 rs6000_regno_regclass[r] = FLOAT_REGS;
2869 for (r = 64; r < FIRST_PSEUDO_REGISTER; ++r)
2870 rs6000_regno_regclass[r] = NO_REGS;
2872 for (r = FIRST_ALTIVEC_REGNO; r <= LAST_ALTIVEC_REGNO; ++r)
2873 rs6000_regno_regclass[r] = ALTIVEC_REGS;
2875 rs6000_regno_regclass[CR0_REGNO] = CR0_REGS;
2876 for (r = CR1_REGNO; r <= CR7_REGNO; ++r)
2877 rs6000_regno_regclass[r] = CR_REGS;
2879 rs6000_regno_regclass[LR_REGNO] = LINK_REGS;
2880 rs6000_regno_regclass[CTR_REGNO] = CTR_REGS;
2881 rs6000_regno_regclass[CA_REGNO] = NO_REGS;
2882 rs6000_regno_regclass[VRSAVE_REGNO] = VRSAVE_REGS;
2883 rs6000_regno_regclass[VSCR_REGNO] = VRSAVE_REGS;
2884 rs6000_regno_regclass[SPE_ACC_REGNO] = SPE_ACC_REGS;
2885 rs6000_regno_regclass[SPEFSCR_REGNO] = SPEFSCR_REGS;
2886 rs6000_regno_regclass[TFHAR_REGNO] = SPR_REGS;
2887 rs6000_regno_regclass[TFIAR_REGNO] = SPR_REGS;
2888 rs6000_regno_regclass[TEXASR_REGNO] = SPR_REGS;
2889 rs6000_regno_regclass[ARG_POINTER_REGNUM] = BASE_REGS;
2890 rs6000_regno_regclass[FRAME_POINTER_REGNUM] = BASE_REGS;
2892 /* Precalculate register class to simpler reload register class. We don't
2893 need all of the register classes that are combinations of different
2894 classes, just the simple ones that have constraint letters. */
2895 for (c = 0; c < N_REG_CLASSES; c++)
2896 reg_class_to_reg_type[c] = NO_REG_TYPE;
2898 reg_class_to_reg_type[(int)GENERAL_REGS] = GPR_REG_TYPE;
2899 reg_class_to_reg_type[(int)BASE_REGS] = GPR_REG_TYPE;
2900 reg_class_to_reg_type[(int)VSX_REGS] = VSX_REG_TYPE;
2901 reg_class_to_reg_type[(int)VRSAVE_REGS] = SPR_REG_TYPE;
2902 reg_class_to_reg_type[(int)VSCR_REGS] = SPR_REG_TYPE;
2903 reg_class_to_reg_type[(int)LINK_REGS] = SPR_REG_TYPE;
2904 reg_class_to_reg_type[(int)CTR_REGS] = SPR_REG_TYPE;
2905 reg_class_to_reg_type[(int)LINK_OR_CTR_REGS] = SPR_REG_TYPE;
2906 reg_class_to_reg_type[(int)CR_REGS] = CR_REG_TYPE;
2907 reg_class_to_reg_type[(int)CR0_REGS] = CR_REG_TYPE;
2908 reg_class_to_reg_type[(int)SPE_ACC_REGS] = SPE_ACC_TYPE;
2909 reg_class_to_reg_type[(int)SPEFSCR_REGS] = SPEFSCR_REG_TYPE;
2911 if (TARGET_VSX)
2913 reg_class_to_reg_type[(int)FLOAT_REGS] = VSX_REG_TYPE;
2914 reg_class_to_reg_type[(int)ALTIVEC_REGS] = VSX_REG_TYPE;
2916 else
2918 reg_class_to_reg_type[(int)FLOAT_REGS] = FPR_REG_TYPE;
2919 reg_class_to_reg_type[(int)ALTIVEC_REGS] = ALTIVEC_REG_TYPE;
2922 /* Precalculate the valid memory formats as well as the vector information,
2923 this must be set up before the rs6000_hard_regno_nregs_internal calls
2924 below. */
2925 gcc_assert ((int)VECTOR_NONE == 0);
2926 memset ((void *) &rs6000_vector_unit[0], '\0', sizeof (rs6000_vector_unit));
2927 memset ((void *) &rs6000_vector_mem[0], '\0', sizeof (rs6000_vector_unit));
2929 gcc_assert ((int)CODE_FOR_nothing == 0);
2930 memset ((void *) &reg_addr[0], '\0', sizeof (reg_addr));
2932 gcc_assert ((int)NO_REGS == 0);
2933 memset ((void *) &rs6000_constraints[0], '\0', sizeof (rs6000_constraints));
2935 /* The VSX hardware allows native alignment for vectors, but control whether the compiler
2936 believes it can use native alignment or still uses 128-bit alignment. */
2937 if (TARGET_VSX && !TARGET_VSX_ALIGN_128)
2939 align64 = 64;
2940 align32 = 32;
2942 else
2944 align64 = 128;
2945 align32 = 128;
2948 /* KF mode (IEEE 128-bit in VSX registers). We do not have arithmetic, so
2949 only set the memory modes. Include TFmode if -mabi=ieeelongdouble. */
2950 if (TARGET_FLOAT128)
2952 rs6000_vector_mem[KFmode] = VECTOR_VSX;
2953 rs6000_vector_align[KFmode] = 128;
2955 if (FLOAT128_IEEE_P (TFmode))
2957 rs6000_vector_mem[TFmode] = VECTOR_VSX;
2958 rs6000_vector_align[TFmode] = 128;
2962 /* V2DF mode, VSX only. */
2963 if (TARGET_VSX)
2965 rs6000_vector_unit[V2DFmode] = VECTOR_VSX;
2966 rs6000_vector_mem[V2DFmode] = VECTOR_VSX;
2967 rs6000_vector_align[V2DFmode] = align64;
2970 /* V4SF mode, either VSX or Altivec. */
2971 if (TARGET_VSX)
2973 rs6000_vector_unit[V4SFmode] = VECTOR_VSX;
2974 rs6000_vector_mem[V4SFmode] = VECTOR_VSX;
2975 rs6000_vector_align[V4SFmode] = align32;
2977 else if (TARGET_ALTIVEC)
2979 rs6000_vector_unit[V4SFmode] = VECTOR_ALTIVEC;
2980 rs6000_vector_mem[V4SFmode] = VECTOR_ALTIVEC;
2981 rs6000_vector_align[V4SFmode] = align32;
2984 /* V16QImode, V8HImode, V4SImode are Altivec only, but possibly do VSX loads
2985 and stores. */
2986 if (TARGET_ALTIVEC)
2988 rs6000_vector_unit[V4SImode] = VECTOR_ALTIVEC;
2989 rs6000_vector_unit[V8HImode] = VECTOR_ALTIVEC;
2990 rs6000_vector_unit[V16QImode] = VECTOR_ALTIVEC;
2991 rs6000_vector_align[V4SImode] = align32;
2992 rs6000_vector_align[V8HImode] = align32;
2993 rs6000_vector_align[V16QImode] = align32;
2995 if (TARGET_VSX)
2997 rs6000_vector_mem[V4SImode] = VECTOR_VSX;
2998 rs6000_vector_mem[V8HImode] = VECTOR_VSX;
2999 rs6000_vector_mem[V16QImode] = VECTOR_VSX;
3001 else
3003 rs6000_vector_mem[V4SImode] = VECTOR_ALTIVEC;
3004 rs6000_vector_mem[V8HImode] = VECTOR_ALTIVEC;
3005 rs6000_vector_mem[V16QImode] = VECTOR_ALTIVEC;
3009 /* V2DImode, full mode depends on ISA 2.07 vector mode. Allow under VSX to
3010 do insert/splat/extract. Altivec doesn't have 64-bit integer support. */
3011 if (TARGET_VSX)
3013 rs6000_vector_mem[V2DImode] = VECTOR_VSX;
3014 rs6000_vector_unit[V2DImode]
3015 = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
3016 rs6000_vector_align[V2DImode] = align64;
3018 rs6000_vector_mem[V1TImode] = VECTOR_VSX;
3019 rs6000_vector_unit[V1TImode]
3020 = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
3021 rs6000_vector_align[V1TImode] = 128;
3024 /* DFmode, see if we want to use the VSX unit. Memory is handled
3025 differently, so don't set rs6000_vector_mem. */
3026 if (TARGET_VSX && TARGET_VSX_SCALAR_DOUBLE)
3028 rs6000_vector_unit[DFmode] = VECTOR_VSX;
3029 rs6000_vector_align[DFmode] = 64;
3032 /* SFmode, see if we want to use the VSX unit. */
3033 if (TARGET_P8_VECTOR && TARGET_VSX_SCALAR_FLOAT)
3035 rs6000_vector_unit[SFmode] = VECTOR_VSX;
3036 rs6000_vector_align[SFmode] = 32;
3039 /* Allow TImode in VSX register and set the VSX memory macros. */
3040 if (TARGET_VSX && TARGET_VSX_TIMODE)
3042 rs6000_vector_mem[TImode] = VECTOR_VSX;
3043 rs6000_vector_align[TImode] = align64;
3046 /* TODO add SPE and paired floating point vector support. */
3048 /* Register class constraints for the constraints that depend on compile
3049 switches. When the VSX code was added, different constraints were added
3050 based on the type (DFmode, V2DFmode, V4SFmode). For the vector types, all
3051 of the VSX registers are used. The register classes for scalar floating
3052 point types is set, based on whether we allow that type into the upper
3053 (Altivec) registers. GCC has register classes to target the Altivec
3054 registers for load/store operations, to select using a VSX memory
3055 operation instead of the traditional floating point operation. The
3056 constraints are:
3058 d - Register class to use with traditional DFmode instructions.
3059 f - Register class to use with traditional SFmode instructions.
3060 v - Altivec register.
3061 wa - Any VSX register.
3062 wc - Reserved to represent individual CR bits (used in LLVM).
3063 wd - Preferred register class for V2DFmode.
3064 wf - Preferred register class for V4SFmode.
3065 wg - Float register for power6x move insns.
3066 wh - FP register for direct move instructions.
3067 wi - FP or VSX register to hold 64-bit integers for VSX insns.
3068 wj - FP or VSX register to hold 64-bit integers for direct moves.
3069 wk - FP or VSX register to hold 64-bit doubles for direct moves.
3070 wl - Float register if we can do 32-bit signed int loads.
3071 wm - VSX register for ISA 2.07 direct move operations.
3072 wn - always NO_REGS.
3073 wr - GPR if 64-bit mode is permitted.
3074 ws - Register class to do ISA 2.06 DF operations.
3075 wt - VSX register for TImode in VSX registers.
3076 wu - Altivec register for ISA 2.07 VSX SF/SI load/stores.
3077 wv - Altivec register for ISA 2.06 VSX DF/DI load/stores.
3078 ww - Register class to do SF conversions in with VSX operations.
3079 wx - Float register if we can do 32-bit int stores.
3080 wy - Register class to do ISA 2.07 SF operations.
3081 wz - Float register if we can do 32-bit unsigned int loads. */
3083 if (TARGET_HARD_FLOAT && TARGET_FPRS)
3084 rs6000_constraints[RS6000_CONSTRAINT_f] = FLOAT_REGS; /* SFmode */
3086 if (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
3087 rs6000_constraints[RS6000_CONSTRAINT_d] = FLOAT_REGS; /* DFmode */
3089 if (TARGET_VSX)
3091 rs6000_constraints[RS6000_CONSTRAINT_wa] = VSX_REGS;
3092 rs6000_constraints[RS6000_CONSTRAINT_wd] = VSX_REGS; /* V2DFmode */
3093 rs6000_constraints[RS6000_CONSTRAINT_wf] = VSX_REGS; /* V4SFmode */
3095 if (TARGET_VSX_TIMODE)
3096 rs6000_constraints[RS6000_CONSTRAINT_wt] = VSX_REGS; /* TImode */
3098 if (TARGET_UPPER_REGS_DF) /* DFmode */
3100 rs6000_constraints[RS6000_CONSTRAINT_ws] = VSX_REGS;
3101 rs6000_constraints[RS6000_CONSTRAINT_wv] = ALTIVEC_REGS;
3103 else
3104 rs6000_constraints[RS6000_CONSTRAINT_ws] = FLOAT_REGS;
3106 if (TARGET_UPPER_REGS_DF) /* DImode */
3107 rs6000_constraints[RS6000_CONSTRAINT_wi] = VSX_REGS;
3108 else
3109 rs6000_constraints[RS6000_CONSTRAINT_wi] = FLOAT_REGS;
3112 /* Add conditional constraints based on various options, to allow us to
3113 collapse multiple insn patterns. */
3114 if (TARGET_ALTIVEC)
3115 rs6000_constraints[RS6000_CONSTRAINT_v] = ALTIVEC_REGS;
3117 if (TARGET_MFPGPR) /* DFmode */
3118 rs6000_constraints[RS6000_CONSTRAINT_wg] = FLOAT_REGS;
3120 if (TARGET_LFIWAX)
3121 rs6000_constraints[RS6000_CONSTRAINT_wl] = FLOAT_REGS; /* DImode */
3123 if (TARGET_DIRECT_MOVE)
3125 rs6000_constraints[RS6000_CONSTRAINT_wh] = FLOAT_REGS;
3126 rs6000_constraints[RS6000_CONSTRAINT_wj] /* DImode */
3127 = rs6000_constraints[RS6000_CONSTRAINT_wi];
3128 rs6000_constraints[RS6000_CONSTRAINT_wk] /* DFmode */
3129 = rs6000_constraints[RS6000_CONSTRAINT_ws];
3130 rs6000_constraints[RS6000_CONSTRAINT_wm] = VSX_REGS;
3133 if (TARGET_POWERPC64)
3134 rs6000_constraints[RS6000_CONSTRAINT_wr] = GENERAL_REGS;
3136 if (TARGET_P8_VECTOR && TARGET_UPPER_REGS_SF) /* SFmode */
3138 rs6000_constraints[RS6000_CONSTRAINT_wu] = ALTIVEC_REGS;
3139 rs6000_constraints[RS6000_CONSTRAINT_wy] = VSX_REGS;
3140 rs6000_constraints[RS6000_CONSTRAINT_ww] = VSX_REGS;
3142 else if (TARGET_P8_VECTOR)
3144 rs6000_constraints[RS6000_CONSTRAINT_wy] = FLOAT_REGS;
3145 rs6000_constraints[RS6000_CONSTRAINT_ww] = FLOAT_REGS;
3147 else if (TARGET_VSX)
3148 rs6000_constraints[RS6000_CONSTRAINT_ww] = FLOAT_REGS;
3150 if (TARGET_STFIWX)
3151 rs6000_constraints[RS6000_CONSTRAINT_wx] = FLOAT_REGS; /* DImode */
3153 if (TARGET_LFIWZX)
3154 rs6000_constraints[RS6000_CONSTRAINT_wz] = FLOAT_REGS; /* DImode */
3156 if (TARGET_FLOAT128)
3158 rs6000_constraints[RS6000_CONSTRAINT_wq] = VSX_REGS; /* KFmode */
3159 if (FLOAT128_IEEE_P (TFmode))
3160 rs6000_constraints[RS6000_CONSTRAINT_wp] = VSX_REGS; /* TFmode */
3163 /* Support for new D-form instructions. */
3164 if (TARGET_P9_DFORM_SCALAR)
3165 rs6000_constraints[RS6000_CONSTRAINT_wb] = ALTIVEC_REGS;
3167 /* Support for ISA 3.0 (power9) vectors. */
3168 if (TARGET_P9_VECTOR)
3169 rs6000_constraints[RS6000_CONSTRAINT_wo] = VSX_REGS;
3171 /* Support for new direct moves (ISA 3.0 + 64bit). */
3172 if (TARGET_DIRECT_MOVE_128)
3173 rs6000_constraints[RS6000_CONSTRAINT_we] = VSX_REGS;
3175 /* Set up the reload helper and direct move functions. */
3176 if (TARGET_VSX || TARGET_ALTIVEC)
3178 if (TARGET_64BIT)
3180 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_di_store;
3181 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_di_load;
3182 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_di_store;
3183 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_di_load;
3184 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_di_store;
3185 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_di_load;
3186 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_di_store;
3187 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_di_load;
3188 reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_di_store;
3189 reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_di_load;
3190 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_di_store;
3191 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_di_load;
3192 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_di_store;
3193 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_di_load;
3194 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_di_store;
3195 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_di_load;
3196 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_di_store;
3197 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_di_load;
3198 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_di_store;
3199 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_di_load;
3201 if (FLOAT128_VECTOR_P (KFmode))
3203 reg_addr[KFmode].reload_store = CODE_FOR_reload_kf_di_store;
3204 reg_addr[KFmode].reload_load = CODE_FOR_reload_kf_di_load;
3207 if (FLOAT128_VECTOR_P (TFmode))
3209 reg_addr[TFmode].reload_store = CODE_FOR_reload_tf_di_store;
3210 reg_addr[TFmode].reload_load = CODE_FOR_reload_tf_di_load;
3213 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
3214 available. */
3215 if (TARGET_NO_SDMODE_STACK)
3217 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_di_store;
3218 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_di_load;
3221 if (TARGET_VSX_TIMODE)
3223 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_di_store;
3224 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_di_load;
3227 if (TARGET_DIRECT_MOVE && !TARGET_DIRECT_MOVE_128)
3229 reg_addr[TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxti;
3230 reg_addr[V1TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv1ti;
3231 reg_addr[V2DFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2df;
3232 reg_addr[V2DImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2di;
3233 reg_addr[V4SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4sf;
3234 reg_addr[V4SImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4si;
3235 reg_addr[V8HImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv8hi;
3236 reg_addr[V16QImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv16qi;
3237 reg_addr[SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxsf;
3239 reg_addr[TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprti;
3240 reg_addr[V1TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv1ti;
3241 reg_addr[V2DFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2df;
3242 reg_addr[V2DImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2di;
3243 reg_addr[V4SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4sf;
3244 reg_addr[V4SImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4si;
3245 reg_addr[V8HImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv8hi;
3246 reg_addr[V16QImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv16qi;
3247 reg_addr[SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprsf;
3249 if (FLOAT128_VECTOR_P (KFmode))
3251 reg_addr[KFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxkf;
3252 reg_addr[KFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprkf;
3255 if (FLOAT128_VECTOR_P (TFmode))
3257 reg_addr[TFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxtf;
3258 reg_addr[TFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprtf;
3262 else
3264 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_si_store;
3265 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_si_load;
3266 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_si_store;
3267 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_si_load;
3268 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_si_store;
3269 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_si_load;
3270 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_si_store;
3271 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_si_load;
3272 reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_si_store;
3273 reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_si_load;
3274 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_si_store;
3275 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_si_load;
3276 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_si_store;
3277 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_si_load;
3278 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_si_store;
3279 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_si_load;
3280 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_si_store;
3281 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_si_load;
3282 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_si_store;
3283 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_si_load;
3285 if (FLOAT128_VECTOR_P (KFmode))
3287 reg_addr[KFmode].reload_store = CODE_FOR_reload_kf_si_store;
3288 reg_addr[KFmode].reload_load = CODE_FOR_reload_kf_si_load;
3291 if (FLOAT128_IEEE_P (TFmode))
3293 reg_addr[TFmode].reload_store = CODE_FOR_reload_tf_si_store;
3294 reg_addr[TFmode].reload_load = CODE_FOR_reload_tf_si_load;
3297 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
3298 available. */
3299 if (TARGET_NO_SDMODE_STACK)
3301 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_si_store;
3302 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_si_load;
3305 if (TARGET_VSX_TIMODE)
3307 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_si_store;
3308 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_si_load;
3311 if (TARGET_DIRECT_MOVE)
3313 reg_addr[DImode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdi;
3314 reg_addr[DDmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdd;
3315 reg_addr[DFmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdf;
3319 if (TARGET_UPPER_REGS_DF)
3320 reg_addr[DFmode].scalar_in_vmx_p = true;
3322 if (TARGET_UPPER_REGS_DI)
3323 reg_addr[DImode].scalar_in_vmx_p = true;
3325 if (TARGET_UPPER_REGS_SF)
3326 reg_addr[SFmode].scalar_in_vmx_p = true;
3329 /* Setup the fusion operations. */
3330 if (TARGET_P8_FUSION)
3332 reg_addr[QImode].fusion_gpr_ld = CODE_FOR_fusion_gpr_load_qi;
3333 reg_addr[HImode].fusion_gpr_ld = CODE_FOR_fusion_gpr_load_hi;
3334 reg_addr[SImode].fusion_gpr_ld = CODE_FOR_fusion_gpr_load_si;
3335 if (TARGET_64BIT)
3336 reg_addr[DImode].fusion_gpr_ld = CODE_FOR_fusion_gpr_load_di;
3339 if (TARGET_P9_FUSION)
3341 struct fuse_insns {
3342 enum machine_mode mode; /* mode of the fused type. */
3343 enum machine_mode pmode; /* pointer mode. */
3344 enum rs6000_reload_reg_type rtype; /* register type. */
3345 enum insn_code load; /* load insn. */
3346 enum insn_code store; /* store insn. */
3349 static const struct fuse_insns addis_insns[] = {
3350 { SFmode, DImode, RELOAD_REG_FPR,
3351 CODE_FOR_fusion_fpr_di_sf_load,
3352 CODE_FOR_fusion_fpr_di_sf_store },
3354 { SFmode, SImode, RELOAD_REG_FPR,
3355 CODE_FOR_fusion_fpr_si_sf_load,
3356 CODE_FOR_fusion_fpr_si_sf_store },
3358 { DFmode, DImode, RELOAD_REG_FPR,
3359 CODE_FOR_fusion_fpr_di_df_load,
3360 CODE_FOR_fusion_fpr_di_df_store },
3362 { DFmode, SImode, RELOAD_REG_FPR,
3363 CODE_FOR_fusion_fpr_si_df_load,
3364 CODE_FOR_fusion_fpr_si_df_store },
3366 { DImode, DImode, RELOAD_REG_FPR,
3367 CODE_FOR_fusion_fpr_di_di_load,
3368 CODE_FOR_fusion_fpr_di_di_store },
3370 { DImode, SImode, RELOAD_REG_FPR,
3371 CODE_FOR_fusion_fpr_si_di_load,
3372 CODE_FOR_fusion_fpr_si_di_store },
3374 { QImode, DImode, RELOAD_REG_GPR,
3375 CODE_FOR_fusion_gpr_di_qi_load,
3376 CODE_FOR_fusion_gpr_di_qi_store },
3378 { QImode, SImode, RELOAD_REG_GPR,
3379 CODE_FOR_fusion_gpr_si_qi_load,
3380 CODE_FOR_fusion_gpr_si_qi_store },
3382 { HImode, DImode, RELOAD_REG_GPR,
3383 CODE_FOR_fusion_gpr_di_hi_load,
3384 CODE_FOR_fusion_gpr_di_hi_store },
3386 { HImode, SImode, RELOAD_REG_GPR,
3387 CODE_FOR_fusion_gpr_si_hi_load,
3388 CODE_FOR_fusion_gpr_si_hi_store },
3390 { SImode, DImode, RELOAD_REG_GPR,
3391 CODE_FOR_fusion_gpr_di_si_load,
3392 CODE_FOR_fusion_gpr_di_si_store },
3394 { SImode, SImode, RELOAD_REG_GPR,
3395 CODE_FOR_fusion_gpr_si_si_load,
3396 CODE_FOR_fusion_gpr_si_si_store },
3398 { SFmode, DImode, RELOAD_REG_GPR,
3399 CODE_FOR_fusion_gpr_di_sf_load,
3400 CODE_FOR_fusion_gpr_di_sf_store },
3402 { SFmode, SImode, RELOAD_REG_GPR,
3403 CODE_FOR_fusion_gpr_si_sf_load,
3404 CODE_FOR_fusion_gpr_si_sf_store },
3406 { DImode, DImode, RELOAD_REG_GPR,
3407 CODE_FOR_fusion_gpr_di_di_load,
3408 CODE_FOR_fusion_gpr_di_di_store },
3410 { DFmode, DImode, RELOAD_REG_GPR,
3411 CODE_FOR_fusion_gpr_di_df_load,
3412 CODE_FOR_fusion_gpr_di_df_store },
3415 enum machine_mode cur_pmode = Pmode;
3416 size_t i;
3418 for (i = 0; i < ARRAY_SIZE (addis_insns); i++)
3420 enum machine_mode xmode = addis_insns[i].mode;
3421 enum rs6000_reload_reg_type rtype = addis_insns[i].rtype;
3423 if (addis_insns[i].pmode != cur_pmode)
3424 continue;
3426 if (rtype == RELOAD_REG_FPR
3427 && (!TARGET_HARD_FLOAT || !TARGET_FPRS))
3428 continue;
3430 reg_addr[xmode].fusion_addis_ld[rtype] = addis_insns[i].load;
3431 reg_addr[xmode].fusion_addis_st[rtype] = addis_insns[i].store;
3435 /* Note which types we support fusing TOC setup plus memory insn. We only do
3436 fused TOCs for medium/large code models. */
3437 if (TARGET_P8_FUSION && TARGET_TOC_FUSION && TARGET_POWERPC64
3438 && (TARGET_CMODEL != CMODEL_SMALL))
3440 reg_addr[QImode].fused_toc = true;
3441 reg_addr[HImode].fused_toc = true;
3442 reg_addr[SImode].fused_toc = true;
3443 reg_addr[DImode].fused_toc = true;
3444 if (TARGET_HARD_FLOAT && TARGET_FPRS)
3446 if (TARGET_SINGLE_FLOAT)
3447 reg_addr[SFmode].fused_toc = true;
3448 if (TARGET_DOUBLE_FLOAT)
3449 reg_addr[DFmode].fused_toc = true;
3453 /* Precalculate HARD_REGNO_NREGS. */
3454 for (r = 0; r < FIRST_PSEUDO_REGISTER; ++r)
3455 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3456 rs6000_hard_regno_nregs[m][r]
3457 = rs6000_hard_regno_nregs_internal (r, (machine_mode)m);
3459 /* Precalculate HARD_REGNO_MODE_OK. */
3460 for (r = 0; r < FIRST_PSEUDO_REGISTER; ++r)
3461 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3462 if (rs6000_hard_regno_mode_ok (r, (machine_mode)m))
3463 rs6000_hard_regno_mode_ok_p[m][r] = true;
3465 /* Precalculate CLASS_MAX_NREGS sizes. */
3466 for (c = 0; c < LIM_REG_CLASSES; ++c)
3468 int reg_size;
3470 if (TARGET_VSX && VSX_REG_CLASS_P (c))
3471 reg_size = UNITS_PER_VSX_WORD;
3473 else if (c == ALTIVEC_REGS)
3474 reg_size = UNITS_PER_ALTIVEC_WORD;
3476 else if (c == FLOAT_REGS)
3477 reg_size = UNITS_PER_FP_WORD;
3479 else
3480 reg_size = UNITS_PER_WORD;
3482 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3484 machine_mode m2 = (machine_mode)m;
3485 int reg_size2 = reg_size;
3487 /* TDmode & IBM 128-bit floating point always takes 2 registers, even
3488 in VSX. */
3489 if (TARGET_VSX && VSX_REG_CLASS_P (c) && FLOAT128_2REG_P (m))
3490 reg_size2 = UNITS_PER_FP_WORD;
3492 rs6000_class_max_nregs[m][c]
3493 = (GET_MODE_SIZE (m2) + reg_size2 - 1) / reg_size2;
3497 if (TARGET_E500_DOUBLE)
3498 rs6000_class_max_nregs[DFmode][GENERAL_REGS] = 1;
3500 /* Calculate which modes to automatically generate code to use a the
3501 reciprocal divide and square root instructions. In the future, possibly
3502 automatically generate the instructions even if the user did not specify
3503 -mrecip. The older machines double precision reciprocal sqrt estimate is
3504 not accurate enough. */
3505 memset (rs6000_recip_bits, 0, sizeof (rs6000_recip_bits));
3506 if (TARGET_FRES)
3507 rs6000_recip_bits[SFmode] = RS6000_RECIP_MASK_HAVE_RE;
3508 if (TARGET_FRE)
3509 rs6000_recip_bits[DFmode] = RS6000_RECIP_MASK_HAVE_RE;
3510 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
3511 rs6000_recip_bits[V4SFmode] = RS6000_RECIP_MASK_HAVE_RE;
3512 if (VECTOR_UNIT_VSX_P (V2DFmode))
3513 rs6000_recip_bits[V2DFmode] = RS6000_RECIP_MASK_HAVE_RE;
3515 if (TARGET_FRSQRTES)
3516 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3517 if (TARGET_FRSQRTE)
3518 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3519 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
3520 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3521 if (VECTOR_UNIT_VSX_P (V2DFmode))
3522 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3524 if (rs6000_recip_control)
3526 if (!flag_finite_math_only)
3527 warning (0, "-mrecip requires -ffinite-math or -ffast-math");
3528 if (flag_trapping_math)
3529 warning (0, "-mrecip requires -fno-trapping-math or -ffast-math");
3530 if (!flag_reciprocal_math)
3531 warning (0, "-mrecip requires -freciprocal-math or -ffast-math");
3532 if (flag_finite_math_only && !flag_trapping_math && flag_reciprocal_math)
3534 if (RS6000_RECIP_HAVE_RE_P (SFmode)
3535 && (rs6000_recip_control & RECIP_SF_DIV) != 0)
3536 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3538 if (RS6000_RECIP_HAVE_RE_P (DFmode)
3539 && (rs6000_recip_control & RECIP_DF_DIV) != 0)
3540 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3542 if (RS6000_RECIP_HAVE_RE_P (V4SFmode)
3543 && (rs6000_recip_control & RECIP_V4SF_DIV) != 0)
3544 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3546 if (RS6000_RECIP_HAVE_RE_P (V2DFmode)
3547 && (rs6000_recip_control & RECIP_V2DF_DIV) != 0)
3548 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3550 if (RS6000_RECIP_HAVE_RSQRTE_P (SFmode)
3551 && (rs6000_recip_control & RECIP_SF_RSQRT) != 0)
3552 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3554 if (RS6000_RECIP_HAVE_RSQRTE_P (DFmode)
3555 && (rs6000_recip_control & RECIP_DF_RSQRT) != 0)
3556 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3558 if (RS6000_RECIP_HAVE_RSQRTE_P (V4SFmode)
3559 && (rs6000_recip_control & RECIP_V4SF_RSQRT) != 0)
3560 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3562 if (RS6000_RECIP_HAVE_RSQRTE_P (V2DFmode)
3563 && (rs6000_recip_control & RECIP_V2DF_RSQRT) != 0)
3564 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3568 /* Update the addr mask bits in reg_addr to help secondary reload and go if
3569 legitimate address support to figure out the appropriate addressing to
3570 use. */
3571 rs6000_setup_reg_addr_masks ();
3573 if (global_init_p || TARGET_DEBUG_TARGET)
3575 if (TARGET_DEBUG_REG)
3576 rs6000_debug_reg_global ();
3578 if (TARGET_DEBUG_COST || TARGET_DEBUG_REG)
3579 fprintf (stderr,
3580 "SImode variable mult cost = %d\n"
3581 "SImode constant mult cost = %d\n"
3582 "SImode short constant mult cost = %d\n"
3583 "DImode multipliciation cost = %d\n"
3584 "SImode division cost = %d\n"
3585 "DImode division cost = %d\n"
3586 "Simple fp operation cost = %d\n"
3587 "DFmode multiplication cost = %d\n"
3588 "SFmode division cost = %d\n"
3589 "DFmode division cost = %d\n"
3590 "cache line size = %d\n"
3591 "l1 cache size = %d\n"
3592 "l2 cache size = %d\n"
3593 "simultaneous prefetches = %d\n"
3594 "\n",
3595 rs6000_cost->mulsi,
3596 rs6000_cost->mulsi_const,
3597 rs6000_cost->mulsi_const9,
3598 rs6000_cost->muldi,
3599 rs6000_cost->divsi,
3600 rs6000_cost->divdi,
3601 rs6000_cost->fp,
3602 rs6000_cost->dmul,
3603 rs6000_cost->sdiv,
3604 rs6000_cost->ddiv,
3605 rs6000_cost->cache_line_size,
3606 rs6000_cost->l1_cache_size,
3607 rs6000_cost->l2_cache_size,
3608 rs6000_cost->simultaneous_prefetches);
3612 #if TARGET_MACHO
3613 /* The Darwin version of SUBTARGET_OVERRIDE_OPTIONS. */
3615 static void
3616 darwin_rs6000_override_options (void)
3618 /* The Darwin ABI always includes AltiVec, can't be (validly) turned
3619 off. */
3620 rs6000_altivec_abi = 1;
3621 TARGET_ALTIVEC_VRSAVE = 1;
3622 rs6000_current_abi = ABI_DARWIN;
3624 if (DEFAULT_ABI == ABI_DARWIN
3625 && TARGET_64BIT)
3626 darwin_one_byte_bool = 1;
3628 if (TARGET_64BIT && ! TARGET_POWERPC64)
3630 rs6000_isa_flags |= OPTION_MASK_POWERPC64;
3631 warning (0, "-m64 requires PowerPC64 architecture, enabling");
3633 if (flag_mkernel)
3635 rs6000_default_long_calls = 1;
3636 rs6000_isa_flags |= OPTION_MASK_SOFT_FLOAT;
3639 /* Make -m64 imply -maltivec. Darwin's 64-bit ABI includes
3640 Altivec. */
3641 if (!flag_mkernel && !flag_apple_kext
3642 && TARGET_64BIT
3643 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC))
3644 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3646 /* Unless the user (not the configurer) has explicitly overridden
3647 it with -mcpu=G3 or -mno-altivec, then 10.5+ targets default to
3648 G4 unless targeting the kernel. */
3649 if (!flag_mkernel
3650 && !flag_apple_kext
3651 && strverscmp (darwin_macosx_version_min, "10.5") >= 0
3652 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC)
3653 && ! global_options_set.x_rs6000_cpu_index)
3655 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3658 #endif
3660 /* If not otherwise specified by a target, make 'long double' equivalent to
3661 'double'. */
3663 #ifndef RS6000_DEFAULT_LONG_DOUBLE_SIZE
3664 #define RS6000_DEFAULT_LONG_DOUBLE_SIZE 64
3665 #endif
3667 /* Return the builtin mask of the various options used that could affect which
3668 builtins were used. In the past we used target_flags, but we've run out of
3669 bits, and some options like SPE and PAIRED are no longer in
3670 target_flags. */
3672 HOST_WIDE_INT
3673 rs6000_builtin_mask_calculate (void)
3675 return (((TARGET_ALTIVEC) ? RS6000_BTM_ALTIVEC : 0)
3676 | ((TARGET_VSX) ? RS6000_BTM_VSX : 0)
3677 | ((TARGET_SPE) ? RS6000_BTM_SPE : 0)
3678 | ((TARGET_PAIRED_FLOAT) ? RS6000_BTM_PAIRED : 0)
3679 | ((TARGET_FRE) ? RS6000_BTM_FRE : 0)
3680 | ((TARGET_FRES) ? RS6000_BTM_FRES : 0)
3681 | ((TARGET_FRSQRTE) ? RS6000_BTM_FRSQRTE : 0)
3682 | ((TARGET_FRSQRTES) ? RS6000_BTM_FRSQRTES : 0)
3683 | ((TARGET_POPCNTD) ? RS6000_BTM_POPCNTD : 0)
3684 | ((rs6000_cpu == PROCESSOR_CELL) ? RS6000_BTM_CELL : 0)
3685 | ((TARGET_P8_VECTOR) ? RS6000_BTM_P8_VECTOR : 0)
3686 | ((TARGET_P9_VECTOR) ? RS6000_BTM_P9_VECTOR : 0)
3687 | ((TARGET_P9_MISC) ? RS6000_BTM_P9_MISC : 0)
3688 | ((TARGET_MODULO) ? RS6000_BTM_MODULO : 0)
3689 | ((TARGET_64BIT) ? RS6000_BTM_64BIT : 0)
3690 | ((TARGET_CRYPTO) ? RS6000_BTM_CRYPTO : 0)
3691 | ((TARGET_HTM) ? RS6000_BTM_HTM : 0)
3692 | ((TARGET_DFP) ? RS6000_BTM_DFP : 0)
3693 | ((TARGET_HARD_FLOAT) ? RS6000_BTM_HARD_FLOAT : 0)
3694 | ((TARGET_LONG_DOUBLE_128) ? RS6000_BTM_LDBL128 : 0)
3695 | ((TARGET_FLOAT128) ? RS6000_BTM_FLOAT128 : 0));
3698 /* Implement TARGET_MD_ASM_ADJUST. All asm statements are considered
3699 to clobber the XER[CA] bit because clobbering that bit without telling
3700 the compiler worked just fine with versions of GCC before GCC 5, and
3701 breaking a lot of older code in ways that are hard to track down is
3702 not such a great idea. */
3704 static rtx_insn *
3705 rs6000_md_asm_adjust (vec<rtx> &/*outputs*/, vec<rtx> &/*inputs*/,
3706 vec<const char *> &/*constraints*/,
3707 vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs)
3709 clobbers.safe_push (gen_rtx_REG (SImode, CA_REGNO));
3710 SET_HARD_REG_BIT (clobbered_regs, CA_REGNO);
3711 return NULL;
3714 /* Override command line options. Mostly we process the processor type and
3715 sometimes adjust other TARGET_ options. */
3717 static bool
3718 rs6000_option_override_internal (bool global_init_p)
3720 bool ret = true;
3721 bool have_cpu = false;
3723 /* The default cpu requested at configure time, if any. */
3724 const char *implicit_cpu = OPTION_TARGET_CPU_DEFAULT;
3726 HOST_WIDE_INT set_masks;
3727 int cpu_index;
3728 int tune_index;
3729 struct cl_target_option *main_target_opt
3730 = ((global_init_p || target_option_default_node == NULL)
3731 ? NULL : TREE_TARGET_OPTION (target_option_default_node));
3733 /* Print defaults. */
3734 if ((TARGET_DEBUG_REG || TARGET_DEBUG_TARGET) && global_init_p)
3735 rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT);
3737 /* Remember the explicit arguments. */
3738 if (global_init_p)
3739 rs6000_isa_flags_explicit = global_options_set.x_rs6000_isa_flags;
3741 /* On 64-bit Darwin, power alignment is ABI-incompatible with some C
3742 library functions, so warn about it. The flag may be useful for
3743 performance studies from time to time though, so don't disable it
3744 entirely. */
3745 if (global_options_set.x_rs6000_alignment_flags
3746 && rs6000_alignment_flags == MASK_ALIGN_POWER
3747 && DEFAULT_ABI == ABI_DARWIN
3748 && TARGET_64BIT)
3749 warning (0, "-malign-power is not supported for 64-bit Darwin;"
3750 " it is incompatible with the installed C and C++ libraries");
3752 /* Numerous experiment shows that IRA based loop pressure
3753 calculation works better for RTL loop invariant motion on targets
3754 with enough (>= 32) registers. It is an expensive optimization.
3755 So it is on only for peak performance. */
3756 if (optimize >= 3 && global_init_p
3757 && !global_options_set.x_flag_ira_loop_pressure)
3758 flag_ira_loop_pressure = 1;
3760 /* Set the pointer size. */
3761 if (TARGET_64BIT)
3763 rs6000_pmode = (int)DImode;
3764 rs6000_pointer_size = 64;
3766 else
3768 rs6000_pmode = (int)SImode;
3769 rs6000_pointer_size = 32;
3772 /* Some OSs don't support saving the high part of 64-bit registers on context
3773 switch. Other OSs don't support saving Altivec registers. On those OSs,
3774 we don't touch the OPTION_MASK_POWERPC64 or OPTION_MASK_ALTIVEC settings;
3775 if the user wants either, the user must explicitly specify them and we
3776 won't interfere with the user's specification. */
3778 set_masks = POWERPC_MASKS;
3779 #ifdef OS_MISSING_POWERPC64
3780 if (OS_MISSING_POWERPC64)
3781 set_masks &= ~OPTION_MASK_POWERPC64;
3782 #endif
3783 #ifdef OS_MISSING_ALTIVEC
3784 if (OS_MISSING_ALTIVEC)
3785 set_masks &= ~(OPTION_MASK_ALTIVEC | OPTION_MASK_VSX);
3786 #endif
3788 /* Don't override by the processor default if given explicitly. */
3789 set_masks &= ~rs6000_isa_flags_explicit;
3791 /* Process the -mcpu=<xxx> and -mtune=<xxx> argument. If the user changed
3792 the cpu in a target attribute or pragma, but did not specify a tuning
3793 option, use the cpu for the tuning option rather than the option specified
3794 with -mtune on the command line. Process a '--with-cpu' configuration
3795 request as an implicit --cpu. */
3796 if (rs6000_cpu_index >= 0)
3798 cpu_index = rs6000_cpu_index;
3799 have_cpu = true;
3801 else if (main_target_opt != NULL && main_target_opt->x_rs6000_cpu_index >= 0)
3803 rs6000_cpu_index = cpu_index = main_target_opt->x_rs6000_cpu_index;
3804 have_cpu = true;
3806 else if (implicit_cpu)
3808 rs6000_cpu_index = cpu_index = rs6000_cpu_name_lookup (implicit_cpu);
3809 have_cpu = true;
3811 else
3813 /* PowerPC 64-bit LE requires at least ISA 2.07. */
3814 const char *default_cpu = ((!TARGET_POWERPC64)
3815 ? "powerpc"
3816 : ((BYTES_BIG_ENDIAN)
3817 ? "powerpc64"
3818 : "powerpc64le"));
3820 rs6000_cpu_index = cpu_index = rs6000_cpu_name_lookup (default_cpu);
3821 have_cpu = false;
3824 gcc_assert (cpu_index >= 0);
3826 /* If we have a cpu, either through an explicit -mcpu=<xxx> or if the
3827 compiler was configured with --with-cpu=<xxx>, replace all of the ISA bits
3828 with those from the cpu, except for options that were explicitly set. If
3829 we don't have a cpu, do not override the target bits set in
3830 TARGET_DEFAULT. */
3831 if (have_cpu)
3833 rs6000_isa_flags &= ~set_masks;
3834 rs6000_isa_flags |= (processor_target_table[cpu_index].target_enable
3835 & set_masks);
3837 else
3839 /* If no -mcpu=<xxx>, inherit any default options that were cleared via
3840 POWERPC_MASKS. Originally, TARGET_DEFAULT was used to initialize
3841 target_flags via the TARGET_DEFAULT_TARGET_FLAGS hook. When we switched
3842 to using rs6000_isa_flags, we need to do the initialization here.
3844 If there is a TARGET_DEFAULT, use that. Otherwise fall back to using
3845 -mcpu=powerpc, -mcpu=powerpc64, or -mcpu=powerpc64le defaults. */
3846 HOST_WIDE_INT flags = ((TARGET_DEFAULT) ? TARGET_DEFAULT
3847 : processor_target_table[cpu_index].target_enable);
3848 rs6000_isa_flags |= (flags & ~rs6000_isa_flags_explicit);
3851 if (rs6000_tune_index >= 0)
3852 tune_index = rs6000_tune_index;
3853 else if (have_cpu)
3854 rs6000_tune_index = tune_index = cpu_index;
3855 else
3857 size_t i;
3858 enum processor_type tune_proc
3859 = (TARGET_POWERPC64 ? PROCESSOR_DEFAULT64 : PROCESSOR_DEFAULT);
3861 tune_index = -1;
3862 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
3863 if (processor_target_table[i].processor == tune_proc)
3865 rs6000_tune_index = tune_index = i;
3866 break;
3870 gcc_assert (tune_index >= 0);
3871 rs6000_cpu = processor_target_table[tune_index].processor;
3873 /* Pick defaults for SPE related control flags. Do this early to make sure
3874 that the TARGET_ macros are representative ASAP. */
3876 int spe_capable_cpu =
3877 (rs6000_cpu == PROCESSOR_PPC8540
3878 || rs6000_cpu == PROCESSOR_PPC8548);
3880 if (!global_options_set.x_rs6000_spe_abi)
3881 rs6000_spe_abi = spe_capable_cpu;
3883 if (!global_options_set.x_rs6000_spe)
3884 rs6000_spe = spe_capable_cpu;
3886 if (!global_options_set.x_rs6000_float_gprs)
3887 rs6000_float_gprs =
3888 (rs6000_cpu == PROCESSOR_PPC8540 ? 1
3889 : rs6000_cpu == PROCESSOR_PPC8548 ? 2
3890 : 0);
3893 if (global_options_set.x_rs6000_spe_abi
3894 && rs6000_spe_abi
3895 && !TARGET_SPE_ABI)
3896 error ("not configured for SPE ABI");
3898 if (global_options_set.x_rs6000_spe
3899 && rs6000_spe
3900 && !TARGET_SPE)
3901 error ("not configured for SPE instruction set");
3903 if (main_target_opt != NULL
3904 && ((main_target_opt->x_rs6000_spe_abi != rs6000_spe_abi)
3905 || (main_target_opt->x_rs6000_spe != rs6000_spe)
3906 || (main_target_opt->x_rs6000_float_gprs != rs6000_float_gprs)))
3907 error ("target attribute or pragma changes SPE ABI");
3909 if (rs6000_cpu == PROCESSOR_PPCE300C2 || rs6000_cpu == PROCESSOR_PPCE300C3
3910 || rs6000_cpu == PROCESSOR_PPCE500MC || rs6000_cpu == PROCESSOR_PPCE500MC64
3911 || rs6000_cpu == PROCESSOR_PPCE5500)
3913 if (TARGET_ALTIVEC)
3914 error ("AltiVec not supported in this target");
3915 if (TARGET_SPE)
3916 error ("SPE not supported in this target");
3918 if (rs6000_cpu == PROCESSOR_PPCE6500)
3920 if (TARGET_SPE)
3921 error ("SPE not supported in this target");
3924 /* Disable Cell microcode if we are optimizing for the Cell
3925 and not optimizing for size. */
3926 if (rs6000_gen_cell_microcode == -1)
3927 rs6000_gen_cell_microcode = !(rs6000_cpu == PROCESSOR_CELL
3928 && !optimize_size);
3930 /* If we are optimizing big endian systems for space and it's OK to
3931 use instructions that would be microcoded on the Cell, use the
3932 load/store multiple and string instructions. */
3933 if (BYTES_BIG_ENDIAN && optimize_size && rs6000_gen_cell_microcode)
3934 rs6000_isa_flags |= ~rs6000_isa_flags_explicit & (OPTION_MASK_MULTIPLE
3935 | OPTION_MASK_STRING);
3937 /* Don't allow -mmultiple or -mstring on little endian systems
3938 unless the cpu is a 750, because the hardware doesn't support the
3939 instructions used in little endian mode, and causes an alignment
3940 trap. The 750 does not cause an alignment trap (except when the
3941 target is unaligned). */
3943 if (!BYTES_BIG_ENDIAN && rs6000_cpu != PROCESSOR_PPC750)
3945 if (TARGET_MULTIPLE)
3947 rs6000_isa_flags &= ~OPTION_MASK_MULTIPLE;
3948 if ((rs6000_isa_flags_explicit & OPTION_MASK_MULTIPLE) != 0)
3949 warning (0, "-mmultiple is not supported on little endian systems");
3952 if (TARGET_STRING)
3954 rs6000_isa_flags &= ~OPTION_MASK_STRING;
3955 if ((rs6000_isa_flags_explicit & OPTION_MASK_STRING) != 0)
3956 warning (0, "-mstring is not supported on little endian systems");
3960 /* If little-endian, default to -mstrict-align on older processors.
3961 Testing for htm matches power8 and later. */
3962 if (!BYTES_BIG_ENDIAN
3963 && !(processor_target_table[tune_index].target_enable & OPTION_MASK_HTM))
3964 rs6000_isa_flags |= ~rs6000_isa_flags_explicit & OPTION_MASK_STRICT_ALIGN;
3966 /* -maltivec={le,be} implies -maltivec. */
3967 if (rs6000_altivec_element_order != 0)
3968 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3970 /* Disallow -maltivec=le in big endian mode for now. This is not
3971 known to be useful for anyone. */
3972 if (BYTES_BIG_ENDIAN && rs6000_altivec_element_order == 1)
3974 warning (0, N_("-maltivec=le not allowed for big-endian targets"));
3975 rs6000_altivec_element_order = 0;
3978 /* Add some warnings for VSX. */
3979 if (TARGET_VSX)
3981 const char *msg = NULL;
3982 if (!TARGET_HARD_FLOAT || !TARGET_FPRS
3983 || !TARGET_SINGLE_FLOAT || !TARGET_DOUBLE_FLOAT)
3985 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
3986 msg = N_("-mvsx requires hardware floating point");
3987 else
3989 rs6000_isa_flags &= ~ OPTION_MASK_VSX;
3990 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
3993 else if (TARGET_PAIRED_FLOAT)
3994 msg = N_("-mvsx and -mpaired are incompatible");
3995 else if (TARGET_AVOID_XFORM > 0)
3996 msg = N_("-mvsx needs indexed addressing");
3997 else if (!TARGET_ALTIVEC && (rs6000_isa_flags_explicit
3998 & OPTION_MASK_ALTIVEC))
4000 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
4001 msg = N_("-mvsx and -mno-altivec are incompatible");
4002 else
4003 msg = N_("-mno-altivec disables vsx");
4006 if (msg)
4008 warning (0, msg);
4009 rs6000_isa_flags &= ~ OPTION_MASK_VSX;
4010 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
4014 /* If hard-float/altivec/vsx were explicitly turned off then don't allow
4015 the -mcpu setting to enable options that conflict. */
4016 if ((!TARGET_HARD_FLOAT || !TARGET_ALTIVEC || !TARGET_VSX)
4017 && (rs6000_isa_flags_explicit & (OPTION_MASK_SOFT_FLOAT
4018 | OPTION_MASK_ALTIVEC
4019 | OPTION_MASK_VSX)) != 0)
4020 rs6000_isa_flags &= ~((OPTION_MASK_P8_VECTOR | OPTION_MASK_CRYPTO
4021 | OPTION_MASK_DIRECT_MOVE)
4022 & ~rs6000_isa_flags_explicit);
4024 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4025 rs6000_print_isa_options (stderr, 0, "before defaults", rs6000_isa_flags);
4027 /* For the newer switches (vsx, dfp, etc.) set some of the older options,
4028 unless the user explicitly used the -mno-<option> to disable the code. */
4029 if (TARGET_P9_VECTOR || TARGET_MODULO || TARGET_P9_DFORM_SCALAR
4030 || TARGET_P9_DFORM_VECTOR || TARGET_P9_DFORM_BOTH > 0 || TARGET_P9_MINMAX)
4031 rs6000_isa_flags |= (ISA_3_0_MASKS_SERVER & ~rs6000_isa_flags_explicit);
4032 else if (TARGET_P8_VECTOR || TARGET_DIRECT_MOVE || TARGET_CRYPTO)
4033 rs6000_isa_flags |= (ISA_2_7_MASKS_SERVER & ~rs6000_isa_flags_explicit);
4034 else if (TARGET_VSX)
4035 rs6000_isa_flags |= (ISA_2_6_MASKS_SERVER & ~rs6000_isa_flags_explicit);
4036 else if (TARGET_POPCNTD)
4037 rs6000_isa_flags |= (ISA_2_6_MASKS_EMBEDDED & ~rs6000_isa_flags_explicit);
4038 else if (TARGET_DFP)
4039 rs6000_isa_flags |= (ISA_2_5_MASKS_SERVER & ~rs6000_isa_flags_explicit);
4040 else if (TARGET_CMPB)
4041 rs6000_isa_flags |= (ISA_2_5_MASKS_EMBEDDED & ~rs6000_isa_flags_explicit);
4042 else if (TARGET_FPRND)
4043 rs6000_isa_flags |= (ISA_2_4_MASKS & ~rs6000_isa_flags_explicit);
4044 else if (TARGET_POPCNTB)
4045 rs6000_isa_flags |= (ISA_2_2_MASKS & ~rs6000_isa_flags_explicit);
4046 else if (TARGET_ALTIVEC)
4047 rs6000_isa_flags |= (OPTION_MASK_PPC_GFXOPT & ~rs6000_isa_flags_explicit);
4049 if (TARGET_CRYPTO && !TARGET_ALTIVEC)
4051 if (rs6000_isa_flags_explicit & OPTION_MASK_CRYPTO)
4052 error ("-mcrypto requires -maltivec");
4053 rs6000_isa_flags &= ~OPTION_MASK_CRYPTO;
4056 if (TARGET_DIRECT_MOVE && !TARGET_VSX)
4058 if (rs6000_isa_flags_explicit & OPTION_MASK_DIRECT_MOVE)
4059 error ("-mdirect-move requires -mvsx");
4060 rs6000_isa_flags &= ~OPTION_MASK_DIRECT_MOVE;
4063 if (TARGET_P8_VECTOR && !TARGET_ALTIVEC)
4065 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
4066 error ("-mpower8-vector requires -maltivec");
4067 rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
4070 if (TARGET_P8_VECTOR && !TARGET_VSX)
4072 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
4073 error ("-mpower8-vector requires -mvsx");
4074 rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
4077 if (TARGET_VSX_TIMODE && !TARGET_VSX)
4079 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX_TIMODE)
4080 error ("-mvsx-timode requires -mvsx");
4081 rs6000_isa_flags &= ~OPTION_MASK_VSX_TIMODE;
4084 if (TARGET_DFP && !TARGET_HARD_FLOAT)
4086 if (rs6000_isa_flags_explicit & OPTION_MASK_DFP)
4087 error ("-mhard-dfp requires -mhard-float");
4088 rs6000_isa_flags &= ~OPTION_MASK_DFP;
4091 /* Allow an explicit -mupper-regs to set -mupper-regs-df, -mupper-regs-di,
4092 and -mupper-regs-sf, depending on the cpu, unless the user explicitly also
4093 set the individual option. */
4094 if (TARGET_UPPER_REGS > 0)
4096 if (TARGET_VSX
4097 && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF))
4099 rs6000_isa_flags |= OPTION_MASK_UPPER_REGS_DF;
4100 rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_DF;
4102 if (TARGET_VSX
4103 && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DI))
4105 rs6000_isa_flags |= OPTION_MASK_UPPER_REGS_DI;
4106 rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_DI;
4108 if (TARGET_P8_VECTOR
4109 && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_SF))
4111 rs6000_isa_flags |= OPTION_MASK_UPPER_REGS_SF;
4112 rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_SF;
4115 else if (TARGET_UPPER_REGS == 0)
4117 if (TARGET_VSX
4118 && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF))
4120 rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_DF;
4121 rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_DF;
4123 if (TARGET_VSX
4124 && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DI))
4126 rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_DI;
4127 rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_DI;
4129 if (TARGET_P8_VECTOR
4130 && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_SF))
4132 rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_SF;
4133 rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_SF;
4137 if (TARGET_UPPER_REGS_DF && !TARGET_VSX)
4139 if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF)
4140 error ("-mupper-regs-df requires -mvsx");
4141 rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_DF;
4144 if (TARGET_UPPER_REGS_DI && !TARGET_VSX)
4146 if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF)
4147 error ("-mupper-regs-di requires -mvsx");
4148 rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_DF;
4151 if (TARGET_UPPER_REGS_SF && !TARGET_P8_VECTOR)
4153 if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_SF)
4154 error ("-mupper-regs-sf requires -mpower8-vector");
4155 rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_SF;
4158 /* The quad memory instructions only works in 64-bit mode. In 32-bit mode,
4159 silently turn off quad memory mode. */
4160 if ((TARGET_QUAD_MEMORY || TARGET_QUAD_MEMORY_ATOMIC) && !TARGET_POWERPC64)
4162 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
4163 warning (0, N_("-mquad-memory requires 64-bit mode"));
4165 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) != 0)
4166 warning (0, N_("-mquad-memory-atomic requires 64-bit mode"));
4168 rs6000_isa_flags &= ~(OPTION_MASK_QUAD_MEMORY
4169 | OPTION_MASK_QUAD_MEMORY_ATOMIC);
4172 /* Non-atomic quad memory load/store are disabled for little endian, since
4173 the words are reversed, but atomic operations can still be done by
4174 swapping the words. */
4175 if (TARGET_QUAD_MEMORY && !WORDS_BIG_ENDIAN)
4177 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
4178 warning (0, N_("-mquad-memory is not available in little endian mode"));
4180 rs6000_isa_flags &= ~OPTION_MASK_QUAD_MEMORY;
4183 /* Assume if the user asked for normal quad memory instructions, they want
4184 the atomic versions as well, unless they explicity told us not to use quad
4185 word atomic instructions. */
4186 if (TARGET_QUAD_MEMORY
4187 && !TARGET_QUAD_MEMORY_ATOMIC
4188 && ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) == 0))
4189 rs6000_isa_flags |= OPTION_MASK_QUAD_MEMORY_ATOMIC;
4191 /* Enable power8 fusion if we are tuning for power8, even if we aren't
4192 generating power8 instructions. */
4193 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION))
4194 rs6000_isa_flags |= (processor_target_table[tune_index].target_enable
4195 & OPTION_MASK_P8_FUSION);
4197 /* Setting additional fusion flags turns on base fusion. */
4198 if (!TARGET_P8_FUSION && (TARGET_P8_FUSION_SIGN || TARGET_TOC_FUSION))
4200 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION)
4202 if (TARGET_P8_FUSION_SIGN)
4203 error ("-mpower8-fusion-sign requires -mpower8-fusion");
4205 if (TARGET_TOC_FUSION)
4206 error ("-mtoc-fusion requires -mpower8-fusion");
4208 rs6000_isa_flags &= ~OPTION_MASK_P8_FUSION;
4210 else
4211 rs6000_isa_flags |= OPTION_MASK_P8_FUSION;
4214 /* Power9 fusion is a superset over power8 fusion. */
4215 if (TARGET_P9_FUSION && !TARGET_P8_FUSION)
4217 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION)
4219 /* We prefer to not mention undocumented options in
4220 error messages. However, if users have managed to select
4221 power9-fusion without selecting power8-fusion, they
4222 already know about undocumented flags. */
4223 error ("-mpower9-fusion requires -mpower8-fusion");
4224 rs6000_isa_flags &= ~OPTION_MASK_P9_FUSION;
4226 else
4227 rs6000_isa_flags |= OPTION_MASK_P8_FUSION;
4230 /* Enable power9 fusion if we are tuning for power9, even if we aren't
4231 generating power9 instructions. */
4232 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P9_FUSION))
4233 rs6000_isa_flags |= (processor_target_table[tune_index].target_enable
4234 & OPTION_MASK_P9_FUSION);
4236 /* Power8 does not fuse sign extended loads with the addis. If we are
4237 optimizing at high levels for speed, convert a sign extended load into a
4238 zero extending load, and an explicit sign extension. */
4239 if (TARGET_P8_FUSION
4240 && !(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION_SIGN)
4241 && optimize_function_for_speed_p (cfun)
4242 && optimize >= 3)
4243 rs6000_isa_flags |= OPTION_MASK_P8_FUSION_SIGN;
4245 /* TOC fusion requires 64-bit and medium/large code model. */
4246 if (TARGET_TOC_FUSION && !TARGET_POWERPC64)
4248 rs6000_isa_flags &= ~OPTION_MASK_TOC_FUSION;
4249 if ((rs6000_isa_flags_explicit & OPTION_MASK_TOC_FUSION) != 0)
4250 warning (0, N_("-mtoc-fusion requires 64-bit"));
4253 if (TARGET_TOC_FUSION && (TARGET_CMODEL == CMODEL_SMALL))
4255 rs6000_isa_flags &= ~OPTION_MASK_TOC_FUSION;
4256 if ((rs6000_isa_flags_explicit & OPTION_MASK_TOC_FUSION) != 0)
4257 warning (0, N_("-mtoc-fusion requires medium/large code model"));
4260 /* Turn on -mtoc-fusion by default if p8-fusion and 64-bit medium/large code
4261 model. */
4262 if (TARGET_P8_FUSION && !TARGET_TOC_FUSION && TARGET_POWERPC64
4263 && (TARGET_CMODEL != CMODEL_SMALL)
4264 && !(rs6000_isa_flags_explicit & OPTION_MASK_TOC_FUSION))
4265 rs6000_isa_flags |= OPTION_MASK_TOC_FUSION;
4267 /* ISA 3.0 vector instructions include ISA 2.07. */
4268 if (TARGET_P9_VECTOR && !TARGET_P8_VECTOR)
4270 /* We prefer to not mention undocumented options in
4271 error messages. However, if users have managed to select
4272 power9-vector without selecting power8-vector, they
4273 already know about undocumented flags. */
4274 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
4275 error ("-mpower9-vector requires -mpower8-vector");
4276 rs6000_isa_flags &= ~OPTION_MASK_P9_VECTOR;
4279 /* -mpower9-dform turns on both -mpower9-dform-scalar and
4280 -mpower9-dform-vector. */
4281 if (TARGET_P9_DFORM_BOTH > 0)
4283 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P9_DFORM_VECTOR))
4284 rs6000_isa_flags |= OPTION_MASK_P9_DFORM_VECTOR;
4286 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P9_DFORM_SCALAR))
4287 rs6000_isa_flags |= OPTION_MASK_P9_DFORM_SCALAR;
4289 else if (TARGET_P9_DFORM_BOTH == 0)
4291 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P9_DFORM_VECTOR))
4292 rs6000_isa_flags &= ~OPTION_MASK_P9_DFORM_VECTOR;
4294 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P9_DFORM_SCALAR))
4295 rs6000_isa_flags &= ~OPTION_MASK_P9_DFORM_SCALAR;
4298 /* ISA 3.0 D-form instructions require p9-vector and upper-regs. */
4299 if ((TARGET_P9_DFORM_SCALAR || TARGET_P9_DFORM_VECTOR) && !TARGET_P9_VECTOR)
4301 /* We prefer to not mention undocumented options in
4302 error messages. However, if users have managed to select
4303 power9-dform without selecting power9-vector, they
4304 already know about undocumented flags. */
4305 if (rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR)
4306 error ("-mpower9-dform requires -mpower9-vector");
4307 rs6000_isa_flags &= ~(OPTION_MASK_P9_DFORM_SCALAR
4308 | OPTION_MASK_P9_DFORM_VECTOR);
4311 if (TARGET_P9_DFORM_SCALAR && !TARGET_UPPER_REGS_DF)
4313 /* We prefer to not mention undocumented options in
4314 error messages. However, if users have managed to select
4315 power9-dform without selecting upper-regs-df, they
4316 already know about undocumented flags. */
4317 if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF)
4318 error ("-mpower9-dform requires -mupper-regs-df");
4319 rs6000_isa_flags &= ~OPTION_MASK_P9_DFORM_SCALAR;
4322 if (TARGET_P9_DFORM_SCALAR && !TARGET_UPPER_REGS_SF)
4324 if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_SF)
4325 error ("-mpower9-dform requires -mupper-regs-sf");
4326 rs6000_isa_flags &= ~OPTION_MASK_P9_DFORM_SCALAR;
4329 /* Enable LRA by default. */
4330 if ((rs6000_isa_flags_explicit & OPTION_MASK_LRA) == 0)
4331 rs6000_isa_flags |= OPTION_MASK_LRA;
4333 /* There have been bugs with -mvsx-timode that don't show up with -mlra,
4334 but do show up with -mno-lra. Given -mlra will become the default once
4335 PR 69847 is fixed, turn off the options with problems by default if
4336 -mno-lra was used, and warn if the user explicitly asked for the option.
4338 Enable -mpower9-dform-vector by default if LRA and other power9 options.
4339 Enable -mvsx-timode by default if LRA and VSX. */
4340 if (!TARGET_LRA)
4342 if (TARGET_VSX_TIMODE)
4344 if ((rs6000_isa_flags_explicit & OPTION_MASK_VSX_TIMODE) != 0)
4345 warning (0, "-mvsx-timode might need -mlra");
4347 else
4348 rs6000_isa_flags &= ~OPTION_MASK_VSX_TIMODE;
4352 else
4354 if (TARGET_VSX && !TARGET_VSX_TIMODE
4355 && (rs6000_isa_flags_explicit & OPTION_MASK_VSX_TIMODE) == 0)
4356 rs6000_isa_flags |= OPTION_MASK_VSX_TIMODE;
4359 /* Set -mallow-movmisalign to explicitly on if we have full ISA 2.07
4360 support. If we only have ISA 2.06 support, and the user did not specify
4361 the switch, leave it set to -1 so the movmisalign patterns are enabled,
4362 but we don't enable the full vectorization support */
4363 if (TARGET_ALLOW_MOVMISALIGN == -1 && TARGET_P8_VECTOR && TARGET_DIRECT_MOVE)
4364 TARGET_ALLOW_MOVMISALIGN = 1;
4366 else if (TARGET_ALLOW_MOVMISALIGN && !TARGET_VSX)
4368 if (TARGET_ALLOW_MOVMISALIGN > 0
4369 && global_options_set.x_TARGET_ALLOW_MOVMISALIGN)
4370 error ("-mallow-movmisalign requires -mvsx");
4372 TARGET_ALLOW_MOVMISALIGN = 0;
4375 /* Determine when unaligned vector accesses are permitted, and when
4376 they are preferred over masked Altivec loads. Note that if
4377 TARGET_ALLOW_MOVMISALIGN has been disabled by the user, then
4378 TARGET_EFFICIENT_UNALIGNED_VSX must be as well. The converse is
4379 not true. */
4380 if (TARGET_EFFICIENT_UNALIGNED_VSX)
4382 if (!TARGET_VSX)
4384 if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX)
4385 error ("-mefficient-unaligned-vsx requires -mvsx");
4387 rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX;
4390 else if (!TARGET_ALLOW_MOVMISALIGN)
4392 if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX)
4393 error ("-mefficient-unaligned-vsx requires -mallow-movmisalign");
4395 rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX;
4399 /* __float128 requires VSX support. */
4400 if (TARGET_FLOAT128 && !TARGET_VSX)
4402 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128) != 0)
4403 error ("-mfloat128 requires VSX support");
4405 rs6000_isa_flags &= ~(OPTION_MASK_FLOAT128 | OPTION_MASK_FLOAT128_HW);
4408 /* If we have -mfloat128 and full ISA 3.0 support, enable -mfloat128-hardware
4409 by default. */
4410 if (TARGET_FLOAT128 && !TARGET_FLOAT128_HW
4411 && (rs6000_isa_flags & ISA_3_0_MASKS_IEEE) == ISA_3_0_MASKS_IEEE
4412 && !(rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW))
4414 rs6000_isa_flags |= OPTION_MASK_FLOAT128_HW;
4415 if ((rs6000_isa_flags & OPTION_MASK_FLOAT128) != 0)
4416 rs6000_isa_flags_explicit |= OPTION_MASK_FLOAT128_HW;
4419 /* IEEE 128-bit floating point hardware instructions imply enabling
4420 __float128. */
4421 if (TARGET_FLOAT128_HW
4422 && (rs6000_isa_flags & ISA_3_0_MASKS_IEEE) != ISA_3_0_MASKS_IEEE)
4424 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0)
4425 error ("-mfloat128-hardware requires full ISA 3.0 support");
4427 rs6000_isa_flags &= ~OPTION_MASK_FLOAT128_HW;
4430 if (TARGET_FLOAT128_HW
4431 && (rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128) == 0)
4432 rs6000_isa_flags |= OPTION_MASK_FLOAT128;
4434 /* Print the options after updating the defaults. */
4435 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4436 rs6000_print_isa_options (stderr, 0, "after defaults", rs6000_isa_flags);
4438 /* E500mc does "better" if we inline more aggressively. Respect the
4439 user's opinion, though. */
4440 if (rs6000_block_move_inline_limit == 0
4441 && (rs6000_cpu == PROCESSOR_PPCE500MC
4442 || rs6000_cpu == PROCESSOR_PPCE500MC64
4443 || rs6000_cpu == PROCESSOR_PPCE5500
4444 || rs6000_cpu == PROCESSOR_PPCE6500))
4445 rs6000_block_move_inline_limit = 128;
4447 /* store_one_arg depends on expand_block_move to handle at least the
4448 size of reg_parm_stack_space. */
4449 if (rs6000_block_move_inline_limit < (TARGET_POWERPC64 ? 64 : 32))
4450 rs6000_block_move_inline_limit = (TARGET_POWERPC64 ? 64 : 32);
4452 if (global_init_p)
4454 /* If the appropriate debug option is enabled, replace the target hooks
4455 with debug versions that call the real version and then prints
4456 debugging information. */
4457 if (TARGET_DEBUG_COST)
4459 targetm.rtx_costs = rs6000_debug_rtx_costs;
4460 targetm.address_cost = rs6000_debug_address_cost;
4461 targetm.sched.adjust_cost = rs6000_debug_adjust_cost;
4464 if (TARGET_DEBUG_ADDR)
4466 targetm.legitimate_address_p = rs6000_debug_legitimate_address_p;
4467 targetm.legitimize_address = rs6000_debug_legitimize_address;
4468 rs6000_secondary_reload_class_ptr
4469 = rs6000_debug_secondary_reload_class;
4470 rs6000_secondary_memory_needed_ptr
4471 = rs6000_debug_secondary_memory_needed;
4472 rs6000_cannot_change_mode_class_ptr
4473 = rs6000_debug_cannot_change_mode_class;
4474 rs6000_preferred_reload_class_ptr
4475 = rs6000_debug_preferred_reload_class;
4476 rs6000_legitimize_reload_address_ptr
4477 = rs6000_debug_legitimize_reload_address;
4478 rs6000_mode_dependent_address_ptr
4479 = rs6000_debug_mode_dependent_address;
4482 if (rs6000_veclibabi_name)
4484 if (strcmp (rs6000_veclibabi_name, "mass") == 0)
4485 rs6000_veclib_handler = rs6000_builtin_vectorized_libmass;
4486 else
4488 error ("unknown vectorization library ABI type (%s) for "
4489 "-mveclibabi= switch", rs6000_veclibabi_name);
4490 ret = false;
4495 if (!global_options_set.x_rs6000_long_double_type_size)
4497 if (main_target_opt != NULL
4498 && (main_target_opt->x_rs6000_long_double_type_size
4499 != RS6000_DEFAULT_LONG_DOUBLE_SIZE))
4500 error ("target attribute or pragma changes long double size");
4501 else
4502 rs6000_long_double_type_size = RS6000_DEFAULT_LONG_DOUBLE_SIZE;
4505 #if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD)
4506 if (!global_options_set.x_rs6000_ieeequad)
4507 rs6000_ieeequad = 1;
4508 #endif
4510 /* Disable VSX and Altivec silently if the user switched cpus to power7 in a
4511 target attribute or pragma which automatically enables both options,
4512 unless the altivec ABI was set. This is set by default for 64-bit, but
4513 not for 32-bit. */
4514 if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi)
4515 rs6000_isa_flags &= ~((OPTION_MASK_VSX | OPTION_MASK_ALTIVEC
4516 | OPTION_MASK_FLOAT128)
4517 & ~rs6000_isa_flags_explicit);
4519 /* Enable Altivec ABI for AIX -maltivec. */
4520 if (TARGET_XCOFF && (TARGET_ALTIVEC || TARGET_VSX))
4522 if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi)
4523 error ("target attribute or pragma changes AltiVec ABI");
4524 else
4525 rs6000_altivec_abi = 1;
4528 /* The AltiVec ABI is the default for PowerPC-64 GNU/Linux. For
4529 PowerPC-32 GNU/Linux, -maltivec implies the AltiVec ABI. It can
4530 be explicitly overridden in either case. */
4531 if (TARGET_ELF)
4533 if (!global_options_set.x_rs6000_altivec_abi
4534 && (TARGET_64BIT || TARGET_ALTIVEC || TARGET_VSX))
4536 if (main_target_opt != NULL &&
4537 !main_target_opt->x_rs6000_altivec_abi)
4538 error ("target attribute or pragma changes AltiVec ABI");
4539 else
4540 rs6000_altivec_abi = 1;
4544 /* Set the Darwin64 ABI as default for 64-bit Darwin.
4545 So far, the only darwin64 targets are also MACH-O. */
4546 if (TARGET_MACHO
4547 && DEFAULT_ABI == ABI_DARWIN
4548 && TARGET_64BIT)
4550 if (main_target_opt != NULL && !main_target_opt->x_rs6000_darwin64_abi)
4551 error ("target attribute or pragma changes darwin64 ABI");
4552 else
4554 rs6000_darwin64_abi = 1;
4555 /* Default to natural alignment, for better performance. */
4556 rs6000_alignment_flags = MASK_ALIGN_NATURAL;
4560 /* Place FP constants in the constant pool instead of TOC
4561 if section anchors enabled. */
4562 if (flag_section_anchors
4563 && !global_options_set.x_TARGET_NO_FP_IN_TOC)
4564 TARGET_NO_FP_IN_TOC = 1;
4566 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4567 rs6000_print_isa_options (stderr, 0, "before subtarget", rs6000_isa_flags);
4569 #ifdef SUBTARGET_OVERRIDE_OPTIONS
4570 SUBTARGET_OVERRIDE_OPTIONS;
4571 #endif
4572 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
4573 SUBSUBTARGET_OVERRIDE_OPTIONS;
4574 #endif
4575 #ifdef SUB3TARGET_OVERRIDE_OPTIONS
4576 SUB3TARGET_OVERRIDE_OPTIONS;
4577 #endif
4579 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4580 rs6000_print_isa_options (stderr, 0, "after subtarget", rs6000_isa_flags);
4582 /* For the E500 family of cores, reset the single/double FP flags to let us
4583 check that they remain constant across attributes or pragmas. Also,
4584 clear a possible request for string instructions, not supported and which
4585 we might have silently queried above for -Os.
4587 For other families, clear ISEL in case it was set implicitly.
4590 switch (rs6000_cpu)
4592 case PROCESSOR_PPC8540:
4593 case PROCESSOR_PPC8548:
4594 case PROCESSOR_PPCE500MC:
4595 case PROCESSOR_PPCE500MC64:
4596 case PROCESSOR_PPCE5500:
4597 case PROCESSOR_PPCE6500:
4599 rs6000_single_float = TARGET_E500_SINGLE || TARGET_E500_DOUBLE;
4600 rs6000_double_float = TARGET_E500_DOUBLE;
4602 rs6000_isa_flags &= ~OPTION_MASK_STRING;
4604 break;
4606 default:
4608 if (have_cpu && !(rs6000_isa_flags_explicit & OPTION_MASK_ISEL))
4609 rs6000_isa_flags &= ~OPTION_MASK_ISEL;
4611 break;
4614 if (main_target_opt)
4616 if (main_target_opt->x_rs6000_single_float != rs6000_single_float)
4617 error ("target attribute or pragma changes single precision floating "
4618 "point");
4619 if (main_target_opt->x_rs6000_double_float != rs6000_double_float)
4620 error ("target attribute or pragma changes double precision floating "
4621 "point");
4624 /* Detect invalid option combinations with E500. */
4625 CHECK_E500_OPTIONS;
4627 rs6000_always_hint = (rs6000_cpu != PROCESSOR_POWER4
4628 && rs6000_cpu != PROCESSOR_POWER5
4629 && rs6000_cpu != PROCESSOR_POWER6
4630 && rs6000_cpu != PROCESSOR_POWER7
4631 && rs6000_cpu != PROCESSOR_POWER8
4632 && rs6000_cpu != PROCESSOR_POWER9
4633 && rs6000_cpu != PROCESSOR_PPCA2
4634 && rs6000_cpu != PROCESSOR_CELL
4635 && rs6000_cpu != PROCESSOR_PPC476);
4636 rs6000_sched_groups = (rs6000_cpu == PROCESSOR_POWER4
4637 || rs6000_cpu == PROCESSOR_POWER5
4638 || rs6000_cpu == PROCESSOR_POWER7
4639 || rs6000_cpu == PROCESSOR_POWER8);
4640 rs6000_align_branch_targets = (rs6000_cpu == PROCESSOR_POWER4
4641 || rs6000_cpu == PROCESSOR_POWER5
4642 || rs6000_cpu == PROCESSOR_POWER6
4643 || rs6000_cpu == PROCESSOR_POWER7
4644 || rs6000_cpu == PROCESSOR_POWER8
4645 || rs6000_cpu == PROCESSOR_POWER9
4646 || rs6000_cpu == PROCESSOR_PPCE500MC
4647 || rs6000_cpu == PROCESSOR_PPCE500MC64
4648 || rs6000_cpu == PROCESSOR_PPCE5500
4649 || rs6000_cpu == PROCESSOR_PPCE6500);
4651 /* Allow debug switches to override the above settings. These are set to -1
4652 in rs6000.opt to indicate the user hasn't directly set the switch. */
4653 if (TARGET_ALWAYS_HINT >= 0)
4654 rs6000_always_hint = TARGET_ALWAYS_HINT;
4656 if (TARGET_SCHED_GROUPS >= 0)
4657 rs6000_sched_groups = TARGET_SCHED_GROUPS;
4659 if (TARGET_ALIGN_BRANCH_TARGETS >= 0)
4660 rs6000_align_branch_targets = TARGET_ALIGN_BRANCH_TARGETS;
4662 rs6000_sched_restricted_insns_priority
4663 = (rs6000_sched_groups ? 1 : 0);
4665 /* Handle -msched-costly-dep option. */
4666 rs6000_sched_costly_dep
4667 = (rs6000_sched_groups ? true_store_to_load_dep_costly : no_dep_costly);
4669 if (rs6000_sched_costly_dep_str)
4671 if (! strcmp (rs6000_sched_costly_dep_str, "no"))
4672 rs6000_sched_costly_dep = no_dep_costly;
4673 else if (! strcmp (rs6000_sched_costly_dep_str, "all"))
4674 rs6000_sched_costly_dep = all_deps_costly;
4675 else if (! strcmp (rs6000_sched_costly_dep_str, "true_store_to_load"))
4676 rs6000_sched_costly_dep = true_store_to_load_dep_costly;
4677 else if (! strcmp (rs6000_sched_costly_dep_str, "store_to_load"))
4678 rs6000_sched_costly_dep = store_to_load_dep_costly;
4679 else
4680 rs6000_sched_costly_dep = ((enum rs6000_dependence_cost)
4681 atoi (rs6000_sched_costly_dep_str));
4684 /* Handle -minsert-sched-nops option. */
4685 rs6000_sched_insert_nops
4686 = (rs6000_sched_groups ? sched_finish_regroup_exact : sched_finish_none);
4688 if (rs6000_sched_insert_nops_str)
4690 if (! strcmp (rs6000_sched_insert_nops_str, "no"))
4691 rs6000_sched_insert_nops = sched_finish_none;
4692 else if (! strcmp (rs6000_sched_insert_nops_str, "pad"))
4693 rs6000_sched_insert_nops = sched_finish_pad_groups;
4694 else if (! strcmp (rs6000_sched_insert_nops_str, "regroup_exact"))
4695 rs6000_sched_insert_nops = sched_finish_regroup_exact;
4696 else
4697 rs6000_sched_insert_nops = ((enum rs6000_nop_insertion)
4698 atoi (rs6000_sched_insert_nops_str));
4701 if (global_init_p)
4703 #ifdef TARGET_REGNAMES
4704 /* If the user desires alternate register names, copy in the
4705 alternate names now. */
4706 if (TARGET_REGNAMES)
4707 memcpy (rs6000_reg_names, alt_reg_names, sizeof (rs6000_reg_names));
4708 #endif
4710 /* Set aix_struct_return last, after the ABI is determined.
4711 If -maix-struct-return or -msvr4-struct-return was explicitly
4712 used, don't override with the ABI default. */
4713 if (!global_options_set.x_aix_struct_return)
4714 aix_struct_return = (DEFAULT_ABI != ABI_V4 || DRAFT_V4_STRUCT_RET);
4716 #if 0
4717 /* IBM XL compiler defaults to unsigned bitfields. */
4718 if (TARGET_XL_COMPAT)
4719 flag_signed_bitfields = 0;
4720 #endif
4722 if (TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
4723 REAL_MODE_FORMAT (TFmode) = &ibm_extended_format;
4725 ASM_GENERATE_INTERNAL_LABEL (toc_label_name, "LCTOC", 1);
4727 /* We can only guarantee the availability of DI pseudo-ops when
4728 assembling for 64-bit targets. */
4729 if (!TARGET_64BIT)
4731 targetm.asm_out.aligned_op.di = NULL;
4732 targetm.asm_out.unaligned_op.di = NULL;
4736 /* Set branch target alignment, if not optimizing for size. */
4737 if (!optimize_size)
4739 /* Cell wants to be aligned 8byte for dual issue. Titan wants to be
4740 aligned 8byte to avoid misprediction by the branch predictor. */
4741 if (rs6000_cpu == PROCESSOR_TITAN
4742 || rs6000_cpu == PROCESSOR_CELL)
4744 if (align_functions <= 0)
4745 align_functions = 8;
4746 if (align_jumps <= 0)
4747 align_jumps = 8;
4748 if (align_loops <= 0)
4749 align_loops = 8;
4751 if (rs6000_align_branch_targets)
4753 if (align_functions <= 0)
4754 align_functions = 16;
4755 if (align_jumps <= 0)
4756 align_jumps = 16;
4757 if (align_loops <= 0)
4759 can_override_loop_align = 1;
4760 align_loops = 16;
4763 if (align_jumps_max_skip <= 0)
4764 align_jumps_max_skip = 15;
4765 if (align_loops_max_skip <= 0)
4766 align_loops_max_skip = 15;
4769 /* Arrange to save and restore machine status around nested functions. */
4770 init_machine_status = rs6000_init_machine_status;
4772 /* We should always be splitting complex arguments, but we can't break
4773 Linux and Darwin ABIs at the moment. For now, only AIX is fixed. */
4774 if (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN)
4775 targetm.calls.split_complex_arg = NULL;
4778 /* Initialize rs6000_cost with the appropriate target costs. */
4779 if (optimize_size)
4780 rs6000_cost = TARGET_POWERPC64 ? &size64_cost : &size32_cost;
4781 else
4782 switch (rs6000_cpu)
4784 case PROCESSOR_RS64A:
4785 rs6000_cost = &rs64a_cost;
4786 break;
4788 case PROCESSOR_MPCCORE:
4789 rs6000_cost = &mpccore_cost;
4790 break;
4792 case PROCESSOR_PPC403:
4793 rs6000_cost = &ppc403_cost;
4794 break;
4796 case PROCESSOR_PPC405:
4797 rs6000_cost = &ppc405_cost;
4798 break;
4800 case PROCESSOR_PPC440:
4801 rs6000_cost = &ppc440_cost;
4802 break;
4804 case PROCESSOR_PPC476:
4805 rs6000_cost = &ppc476_cost;
4806 break;
4808 case PROCESSOR_PPC601:
4809 rs6000_cost = &ppc601_cost;
4810 break;
4812 case PROCESSOR_PPC603:
4813 rs6000_cost = &ppc603_cost;
4814 break;
4816 case PROCESSOR_PPC604:
4817 rs6000_cost = &ppc604_cost;
4818 break;
4820 case PROCESSOR_PPC604e:
4821 rs6000_cost = &ppc604e_cost;
4822 break;
4824 case PROCESSOR_PPC620:
4825 rs6000_cost = &ppc620_cost;
4826 break;
4828 case PROCESSOR_PPC630:
4829 rs6000_cost = &ppc630_cost;
4830 break;
4832 case PROCESSOR_CELL:
4833 rs6000_cost = &ppccell_cost;
4834 break;
4836 case PROCESSOR_PPC750:
4837 case PROCESSOR_PPC7400:
4838 rs6000_cost = &ppc750_cost;
4839 break;
4841 case PROCESSOR_PPC7450:
4842 rs6000_cost = &ppc7450_cost;
4843 break;
4845 case PROCESSOR_PPC8540:
4846 case PROCESSOR_PPC8548:
4847 rs6000_cost = &ppc8540_cost;
4848 break;
4850 case PROCESSOR_PPCE300C2:
4851 case PROCESSOR_PPCE300C3:
4852 rs6000_cost = &ppce300c2c3_cost;
4853 break;
4855 case PROCESSOR_PPCE500MC:
4856 rs6000_cost = &ppce500mc_cost;
4857 break;
4859 case PROCESSOR_PPCE500MC64:
4860 rs6000_cost = &ppce500mc64_cost;
4861 break;
4863 case PROCESSOR_PPCE5500:
4864 rs6000_cost = &ppce5500_cost;
4865 break;
4867 case PROCESSOR_PPCE6500:
4868 rs6000_cost = &ppce6500_cost;
4869 break;
4871 case PROCESSOR_TITAN:
4872 rs6000_cost = &titan_cost;
4873 break;
4875 case PROCESSOR_POWER4:
4876 case PROCESSOR_POWER5:
4877 rs6000_cost = &power4_cost;
4878 break;
4880 case PROCESSOR_POWER6:
4881 rs6000_cost = &power6_cost;
4882 break;
4884 case PROCESSOR_POWER7:
4885 rs6000_cost = &power7_cost;
4886 break;
4888 case PROCESSOR_POWER8:
4889 rs6000_cost = &power8_cost;
4890 break;
4892 case PROCESSOR_POWER9:
4893 rs6000_cost = &power9_cost;
4894 break;
4896 case PROCESSOR_PPCA2:
4897 rs6000_cost = &ppca2_cost;
4898 break;
4900 default:
4901 gcc_unreachable ();
4904 if (global_init_p)
4906 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
4907 rs6000_cost->simultaneous_prefetches,
4908 global_options.x_param_values,
4909 global_options_set.x_param_values);
4910 maybe_set_param_value (PARAM_L1_CACHE_SIZE, rs6000_cost->l1_cache_size,
4911 global_options.x_param_values,
4912 global_options_set.x_param_values);
4913 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
4914 rs6000_cost->cache_line_size,
4915 global_options.x_param_values,
4916 global_options_set.x_param_values);
4917 maybe_set_param_value (PARAM_L2_CACHE_SIZE, rs6000_cost->l2_cache_size,
4918 global_options.x_param_values,
4919 global_options_set.x_param_values);
4921 /* Increase loop peeling limits based on performance analysis. */
4922 maybe_set_param_value (PARAM_MAX_PEELED_INSNS, 400,
4923 global_options.x_param_values,
4924 global_options_set.x_param_values);
4925 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEELED_INSNS, 400,
4926 global_options.x_param_values,
4927 global_options_set.x_param_values);
4929 /* If using typedef char *va_list, signal that
4930 __builtin_va_start (&ap, 0) can be optimized to
4931 ap = __builtin_next_arg (0). */
4932 if (DEFAULT_ABI != ABI_V4)
4933 targetm.expand_builtin_va_start = NULL;
4936 /* Set up single/double float flags.
4937 If TARGET_HARD_FLOAT is set, but neither single or double is set,
4938 then set both flags. */
4939 if (TARGET_HARD_FLOAT && TARGET_FPRS
4940 && rs6000_single_float == 0 && rs6000_double_float == 0)
4941 rs6000_single_float = rs6000_double_float = 1;
4943 /* If not explicitly specified via option, decide whether to generate indexed
4944 load/store instructions. */
4945 if (TARGET_AVOID_XFORM == -1)
4946 /* Avoid indexed addressing when targeting Power6 in order to avoid the
4947 DERAT mispredict penalty. However the LVE and STVE altivec instructions
4948 need indexed accesses and the type used is the scalar type of the element
4949 being loaded or stored. */
4950 TARGET_AVOID_XFORM = (rs6000_cpu == PROCESSOR_POWER6 && TARGET_CMPB
4951 && !TARGET_ALTIVEC);
4953 /* Set the -mrecip options. */
4954 if (rs6000_recip_name)
4956 char *p = ASTRDUP (rs6000_recip_name);
4957 char *q;
4958 unsigned int mask, i;
4959 bool invert;
4961 while ((q = strtok (p, ",")) != NULL)
4963 p = NULL;
4964 if (*q == '!')
4966 invert = true;
4967 q++;
4969 else
4970 invert = false;
4972 if (!strcmp (q, "default"))
4973 mask = ((TARGET_RECIP_PRECISION)
4974 ? RECIP_HIGH_PRECISION : RECIP_LOW_PRECISION);
4975 else
4977 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
4978 if (!strcmp (q, recip_options[i].string))
4980 mask = recip_options[i].mask;
4981 break;
4984 if (i == ARRAY_SIZE (recip_options))
4986 error ("unknown option for -mrecip=%s", q);
4987 invert = false;
4988 mask = 0;
4989 ret = false;
4993 if (invert)
4994 rs6000_recip_control &= ~mask;
4995 else
4996 rs6000_recip_control |= mask;
5000 /* Set the builtin mask of the various options used that could affect which
5001 builtins were used. In the past we used target_flags, but we've run out
5002 of bits, and some options like SPE and PAIRED are no longer in
5003 target_flags. */
5004 rs6000_builtin_mask = rs6000_builtin_mask_calculate ();
5005 if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
5006 rs6000_print_builtin_options (stderr, 0, "builtin mask",
5007 rs6000_builtin_mask);
5009 /* Initialize all of the registers. */
5010 rs6000_init_hard_regno_mode_ok (global_init_p);
5012 /* Save the initial options in case the user does function specific options */
5013 if (global_init_p)
5014 target_option_default_node = target_option_current_node
5015 = build_target_option_node (&global_options);
5017 /* If not explicitly specified via option, decide whether to generate the
5018 extra blr's required to preserve the link stack on some cpus (eg, 476). */
5019 if (TARGET_LINK_STACK == -1)
5020 SET_TARGET_LINK_STACK (rs6000_cpu == PROCESSOR_PPC476 && flag_pic);
5022 return ret;
5025 /* Implement TARGET_OPTION_OVERRIDE. On the RS/6000 this is used to
5026 define the target cpu type. */
5028 static void
5029 rs6000_option_override (void)
5031 (void) rs6000_option_override_internal (true);
5033 /* Register machine-specific passes. This needs to be done at start-up.
5034 It's convenient to do it here (like i386 does). */
5035 opt_pass *pass_analyze_swaps = make_pass_analyze_swaps (g);
5037 struct register_pass_info analyze_swaps_info
5038 = { pass_analyze_swaps, "cse1", 1, PASS_POS_INSERT_BEFORE };
5040 register_pass (&analyze_swaps_info);
5044 /* Implement targetm.vectorize.builtin_mask_for_load. */
5045 static tree
5046 rs6000_builtin_mask_for_load (void)
5048 /* Don't use lvsl/vperm for P8 and similarly efficient machines. */
5049 if ((TARGET_ALTIVEC && !TARGET_VSX)
5050 || (TARGET_VSX && !TARGET_EFFICIENT_UNALIGNED_VSX))
5051 return altivec_builtin_mask_for_load;
5052 else
5053 return 0;
5056 /* Implement LOOP_ALIGN. */
5058 rs6000_loop_align (rtx label)
5060 basic_block bb;
5061 int ninsns;
5063 /* Don't override loop alignment if -falign-loops was specified. */
5064 if (!can_override_loop_align)
5065 return align_loops_log;
5067 bb = BLOCK_FOR_INSN (label);
5068 ninsns = num_loop_insns(bb->loop_father);
5070 /* Align small loops to 32 bytes to fit in an icache sector, otherwise return default. */
5071 if (ninsns > 4 && ninsns <= 8
5072 && (rs6000_cpu == PROCESSOR_POWER4
5073 || rs6000_cpu == PROCESSOR_POWER5
5074 || rs6000_cpu == PROCESSOR_POWER6
5075 || rs6000_cpu == PROCESSOR_POWER7
5076 || rs6000_cpu == PROCESSOR_POWER8
5077 || rs6000_cpu == PROCESSOR_POWER9))
5078 return 5;
5079 else
5080 return align_loops_log;
5083 /* Implement TARGET_LOOP_ALIGN_MAX_SKIP. */
5084 static int
5085 rs6000_loop_align_max_skip (rtx_insn *label)
5087 return (1 << rs6000_loop_align (label)) - 1;
5090 /* Return true iff, data reference of TYPE can reach vector alignment (16)
5091 after applying N number of iterations. This routine does not determine
5092 how may iterations are required to reach desired alignment. */
5094 static bool
5095 rs6000_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
5097 if (is_packed)
5098 return false;
5100 if (TARGET_32BIT)
5102 if (rs6000_alignment_flags == MASK_ALIGN_NATURAL)
5103 return true;
5105 if (rs6000_alignment_flags == MASK_ALIGN_POWER)
5106 return true;
5108 return false;
5110 else
5112 if (TARGET_MACHO)
5113 return false;
5115 /* Assuming that all other types are naturally aligned. CHECKME! */
5116 return true;
5120 /* Return true if the vector misalignment factor is supported by the
5121 target. */
5122 static bool
5123 rs6000_builtin_support_vector_misalignment (machine_mode mode,
5124 const_tree type,
5125 int misalignment,
5126 bool is_packed)
5128 if (TARGET_VSX)
5130 if (TARGET_EFFICIENT_UNALIGNED_VSX)
5131 return true;
5133 /* Return if movmisalign pattern is not supported for this mode. */
5134 if (optab_handler (movmisalign_optab, mode) == CODE_FOR_nothing)
5135 return false;
5137 if (misalignment == -1)
5139 /* Misalignment factor is unknown at compile time but we know
5140 it's word aligned. */
5141 if (rs6000_vector_alignment_reachable (type, is_packed))
5143 int element_size = TREE_INT_CST_LOW (TYPE_SIZE (type));
5145 if (element_size == 64 || element_size == 32)
5146 return true;
5149 return false;
5152 /* VSX supports word-aligned vector. */
5153 if (misalignment % 4 == 0)
5154 return true;
5156 return false;
5159 /* Implement targetm.vectorize.builtin_vectorization_cost. */
5160 static int
5161 rs6000_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
5162 tree vectype, int misalign)
5164 unsigned elements;
5165 tree elem_type;
5167 switch (type_of_cost)
5169 case scalar_stmt:
5170 case scalar_load:
5171 case scalar_store:
5172 case vector_stmt:
5173 case vector_load:
5174 case vector_store:
5175 case vec_to_scalar:
5176 case scalar_to_vec:
5177 case cond_branch_not_taken:
5178 return 1;
5180 case vec_perm:
5181 if (TARGET_VSX)
5182 return 3;
5183 else
5184 return 1;
5186 case vec_promote_demote:
5187 if (TARGET_VSX)
5188 return 4;
5189 else
5190 return 1;
5192 case cond_branch_taken:
5193 return 3;
5195 case unaligned_load:
5196 if (TARGET_EFFICIENT_UNALIGNED_VSX)
5197 return 1;
5199 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
5201 elements = TYPE_VECTOR_SUBPARTS (vectype);
5202 if (elements == 2)
5203 /* Double word aligned. */
5204 return 2;
5206 if (elements == 4)
5208 switch (misalign)
5210 case 8:
5211 /* Double word aligned. */
5212 return 2;
5214 case -1:
5215 /* Unknown misalignment. */
5216 case 4:
5217 case 12:
5218 /* Word aligned. */
5219 return 22;
5221 default:
5222 gcc_unreachable ();
5227 if (TARGET_ALTIVEC)
5228 /* Misaligned loads are not supported. */
5229 gcc_unreachable ();
5231 return 2;
5233 case unaligned_store:
5234 if (TARGET_EFFICIENT_UNALIGNED_VSX)
5235 return 1;
5237 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
5239 elements = TYPE_VECTOR_SUBPARTS (vectype);
5240 if (elements == 2)
5241 /* Double word aligned. */
5242 return 2;
5244 if (elements == 4)
5246 switch (misalign)
5248 case 8:
5249 /* Double word aligned. */
5250 return 2;
5252 case -1:
5253 /* Unknown misalignment. */
5254 case 4:
5255 case 12:
5256 /* Word aligned. */
5257 return 23;
5259 default:
5260 gcc_unreachable ();
5265 if (TARGET_ALTIVEC)
5266 /* Misaligned stores are not supported. */
5267 gcc_unreachable ();
5269 return 2;
5271 case vec_construct:
5272 /* This is a rough approximation assuming non-constant elements
5273 constructed into a vector via element insertion. FIXME:
5274 vec_construct is not granular enough for uniformly good
5275 decisions. If the initialization is a splat, this is
5276 cheaper than we estimate. Improve this someday. */
5277 elem_type = TREE_TYPE (vectype);
5278 /* 32-bit vectors loaded into registers are stored as double
5279 precision, so we need 2 permutes, 2 converts, and 1 merge
5280 to construct a vector of short floats from them. */
5281 if (SCALAR_FLOAT_TYPE_P (elem_type)
5282 && TYPE_PRECISION (elem_type) == 32)
5283 return 5;
5284 else
5285 return max (2, TYPE_VECTOR_SUBPARTS (vectype) - 1);
5287 default:
5288 gcc_unreachable ();
5292 /* Implement targetm.vectorize.preferred_simd_mode. */
5294 static machine_mode
5295 rs6000_preferred_simd_mode (machine_mode mode)
5297 if (TARGET_VSX)
5298 switch (mode)
5300 case DFmode:
5301 return V2DFmode;
5302 default:;
5304 if (TARGET_ALTIVEC || TARGET_VSX)
5305 switch (mode)
5307 case SFmode:
5308 return V4SFmode;
5309 case TImode:
5310 return V1TImode;
5311 case DImode:
5312 return V2DImode;
5313 case SImode:
5314 return V4SImode;
5315 case HImode:
5316 return V8HImode;
5317 case QImode:
5318 return V16QImode;
5319 default:;
5321 if (TARGET_SPE)
5322 switch (mode)
5324 case SFmode:
5325 return V2SFmode;
5326 case SImode:
5327 return V2SImode;
5328 default:;
5330 if (TARGET_PAIRED_FLOAT
5331 && mode == SFmode)
5332 return V2SFmode;
5333 return word_mode;
5336 typedef struct _rs6000_cost_data
5338 struct loop *loop_info;
5339 unsigned cost[3];
5340 } rs6000_cost_data;
5342 /* Test for likely overcommitment of vector hardware resources. If a
5343 loop iteration is relatively large, and too large a percentage of
5344 instructions in the loop are vectorized, the cost model may not
5345 adequately reflect delays from unavailable vector resources.
5346 Penalize the loop body cost for this case. */
5348 static void
5349 rs6000_density_test (rs6000_cost_data *data)
5351 const int DENSITY_PCT_THRESHOLD = 85;
5352 const int DENSITY_SIZE_THRESHOLD = 70;
5353 const int DENSITY_PENALTY = 10;
5354 struct loop *loop = data->loop_info;
5355 basic_block *bbs = get_loop_body (loop);
5356 int nbbs = loop->num_nodes;
5357 int vec_cost = data->cost[vect_body], not_vec_cost = 0;
5358 int i, density_pct;
5360 for (i = 0; i < nbbs; i++)
5362 basic_block bb = bbs[i];
5363 gimple_stmt_iterator gsi;
5365 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
5367 gimple *stmt = gsi_stmt (gsi);
5368 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5370 if (!STMT_VINFO_RELEVANT_P (stmt_info)
5371 && !STMT_VINFO_IN_PATTERN_P (stmt_info))
5372 not_vec_cost++;
5376 free (bbs);
5377 density_pct = (vec_cost * 100) / (vec_cost + not_vec_cost);
5379 if (density_pct > DENSITY_PCT_THRESHOLD
5380 && vec_cost + not_vec_cost > DENSITY_SIZE_THRESHOLD)
5382 data->cost[vect_body] = vec_cost * (100 + DENSITY_PENALTY) / 100;
5383 if (dump_enabled_p ())
5384 dump_printf_loc (MSG_NOTE, vect_location,
5385 "density %d%%, cost %d exceeds threshold, penalizing "
5386 "loop body cost by %d%%", density_pct,
5387 vec_cost + not_vec_cost, DENSITY_PENALTY);
5391 /* Implement targetm.vectorize.init_cost. */
5393 static void *
5394 rs6000_init_cost (struct loop *loop_info)
5396 rs6000_cost_data *data = XNEW (struct _rs6000_cost_data);
5397 data->loop_info = loop_info;
5398 data->cost[vect_prologue] = 0;
5399 data->cost[vect_body] = 0;
5400 data->cost[vect_epilogue] = 0;
5401 return data;
5404 /* Implement targetm.vectorize.add_stmt_cost. */
5406 static unsigned
5407 rs6000_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
5408 struct _stmt_vec_info *stmt_info, int misalign,
5409 enum vect_cost_model_location where)
5411 rs6000_cost_data *cost_data = (rs6000_cost_data*) data;
5412 unsigned retval = 0;
5414 if (flag_vect_cost_model)
5416 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
5417 int stmt_cost = rs6000_builtin_vectorization_cost (kind, vectype,
5418 misalign);
5419 /* Statements in an inner loop relative to the loop being
5420 vectorized are weighted more heavily. The value here is
5421 arbitrary and could potentially be improved with analysis. */
5422 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
5423 count *= 50; /* FIXME. */
5425 retval = (unsigned) (count * stmt_cost);
5426 cost_data->cost[where] += retval;
5429 return retval;
5432 /* Implement targetm.vectorize.finish_cost. */
5434 static void
5435 rs6000_finish_cost (void *data, unsigned *prologue_cost,
5436 unsigned *body_cost, unsigned *epilogue_cost)
5438 rs6000_cost_data *cost_data = (rs6000_cost_data*) data;
5440 if (cost_data->loop_info)
5441 rs6000_density_test (cost_data);
5443 *prologue_cost = cost_data->cost[vect_prologue];
5444 *body_cost = cost_data->cost[vect_body];
5445 *epilogue_cost = cost_data->cost[vect_epilogue];
5448 /* Implement targetm.vectorize.destroy_cost_data. */
5450 static void
5451 rs6000_destroy_cost_data (void *data)
5453 free (data);
5456 /* Handler for the Mathematical Acceleration Subsystem (mass) interface to a
5457 library with vectorized intrinsics. */
5459 static tree
5460 rs6000_builtin_vectorized_libmass (combined_fn fn, tree type_out,
5461 tree type_in)
5463 char name[32];
5464 const char *suffix = NULL;
5465 tree fntype, new_fndecl, bdecl = NULL_TREE;
5466 int n_args = 1;
5467 const char *bname;
5468 machine_mode el_mode, in_mode;
5469 int n, in_n;
5471 /* Libmass is suitable for unsafe math only as it does not correctly support
5472 parts of IEEE with the required precision such as denormals. Only support
5473 it if we have VSX to use the simd d2 or f4 functions.
5474 XXX: Add variable length support. */
5475 if (!flag_unsafe_math_optimizations || !TARGET_VSX)
5476 return NULL_TREE;
5478 el_mode = TYPE_MODE (TREE_TYPE (type_out));
5479 n = TYPE_VECTOR_SUBPARTS (type_out);
5480 in_mode = TYPE_MODE (TREE_TYPE (type_in));
5481 in_n = TYPE_VECTOR_SUBPARTS (type_in);
5482 if (el_mode != in_mode
5483 || n != in_n)
5484 return NULL_TREE;
5486 switch (fn)
5488 CASE_CFN_ATAN2:
5489 CASE_CFN_HYPOT:
5490 CASE_CFN_POW:
5491 n_args = 2;
5492 /* fall through */
5494 CASE_CFN_ACOS:
5495 CASE_CFN_ACOSH:
5496 CASE_CFN_ASIN:
5497 CASE_CFN_ASINH:
5498 CASE_CFN_ATAN:
5499 CASE_CFN_ATANH:
5500 CASE_CFN_CBRT:
5501 CASE_CFN_COS:
5502 CASE_CFN_COSH:
5503 CASE_CFN_ERF:
5504 CASE_CFN_ERFC:
5505 CASE_CFN_EXP2:
5506 CASE_CFN_EXP:
5507 CASE_CFN_EXPM1:
5508 CASE_CFN_LGAMMA:
5509 CASE_CFN_LOG10:
5510 CASE_CFN_LOG1P:
5511 CASE_CFN_LOG2:
5512 CASE_CFN_LOG:
5513 CASE_CFN_SIN:
5514 CASE_CFN_SINH:
5515 CASE_CFN_SQRT:
5516 CASE_CFN_TAN:
5517 CASE_CFN_TANH:
5518 if (el_mode == DFmode && n == 2)
5520 bdecl = mathfn_built_in (double_type_node, fn);
5521 suffix = "d2"; /* pow -> powd2 */
5523 else if (el_mode == SFmode && n == 4)
5525 bdecl = mathfn_built_in (float_type_node, fn);
5526 suffix = "4"; /* powf -> powf4 */
5528 else
5529 return NULL_TREE;
5530 if (!bdecl)
5531 return NULL_TREE;
5532 break;
5534 default:
5535 return NULL_TREE;
5538 gcc_assert (suffix != NULL);
5539 bname = IDENTIFIER_POINTER (DECL_NAME (bdecl));
5540 if (!bname)
5541 return NULL_TREE;
5543 strcpy (name, bname + sizeof ("__builtin_") - 1);
5544 strcat (name, suffix);
5546 if (n_args == 1)
5547 fntype = build_function_type_list (type_out, type_in, NULL);
5548 else if (n_args == 2)
5549 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
5550 else
5551 gcc_unreachable ();
5553 /* Build a function declaration for the vectorized function. */
5554 new_fndecl = build_decl (BUILTINS_LOCATION,
5555 FUNCTION_DECL, get_identifier (name), fntype);
5556 TREE_PUBLIC (new_fndecl) = 1;
5557 DECL_EXTERNAL (new_fndecl) = 1;
5558 DECL_IS_NOVOPS (new_fndecl) = 1;
5559 TREE_READONLY (new_fndecl) = 1;
5561 return new_fndecl;
5564 /* Returns a function decl for a vectorized version of the builtin function
5565 with builtin function code FN and the result vector type TYPE, or NULL_TREE
5566 if it is not available. */
5568 static tree
5569 rs6000_builtin_vectorized_function (unsigned int fn, tree type_out,
5570 tree type_in)
5572 machine_mode in_mode, out_mode;
5573 int in_n, out_n;
5575 if (TARGET_DEBUG_BUILTIN)
5576 fprintf (stderr, "rs6000_builtin_vectorized_function (%s, %s, %s)\n",
5577 combined_fn_name (combined_fn (fn)),
5578 GET_MODE_NAME (TYPE_MODE (type_out)),
5579 GET_MODE_NAME (TYPE_MODE (type_in)));
5581 if (TREE_CODE (type_out) != VECTOR_TYPE
5582 || TREE_CODE (type_in) != VECTOR_TYPE
5583 || !TARGET_VECTORIZE_BUILTINS)
5584 return NULL_TREE;
5586 out_mode = TYPE_MODE (TREE_TYPE (type_out));
5587 out_n = TYPE_VECTOR_SUBPARTS (type_out);
5588 in_mode = TYPE_MODE (TREE_TYPE (type_in));
5589 in_n = TYPE_VECTOR_SUBPARTS (type_in);
5591 switch (fn)
5593 CASE_CFN_COPYSIGN:
5594 if (VECTOR_UNIT_VSX_P (V2DFmode)
5595 && out_mode == DFmode && out_n == 2
5596 && in_mode == DFmode && in_n == 2)
5597 return rs6000_builtin_decls[VSX_BUILTIN_CPSGNDP];
5598 if (VECTOR_UNIT_VSX_P (V4SFmode)
5599 && out_mode == SFmode && out_n == 4
5600 && in_mode == SFmode && in_n == 4)
5601 return rs6000_builtin_decls[VSX_BUILTIN_CPSGNSP];
5602 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5603 && out_mode == SFmode && out_n == 4
5604 && in_mode == SFmode && in_n == 4)
5605 return rs6000_builtin_decls[ALTIVEC_BUILTIN_COPYSIGN_V4SF];
5606 break;
5607 CASE_CFN_CEIL:
5608 if (VECTOR_UNIT_VSX_P (V2DFmode)
5609 && out_mode == DFmode && out_n == 2
5610 && in_mode == DFmode && in_n == 2)
5611 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIP];
5612 if (VECTOR_UNIT_VSX_P (V4SFmode)
5613 && out_mode == SFmode && out_n == 4
5614 && in_mode == SFmode && in_n == 4)
5615 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIP];
5616 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5617 && out_mode == SFmode && out_n == 4
5618 && in_mode == SFmode && in_n == 4)
5619 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIP];
5620 break;
5621 CASE_CFN_FLOOR:
5622 if (VECTOR_UNIT_VSX_P (V2DFmode)
5623 && out_mode == DFmode && out_n == 2
5624 && in_mode == DFmode && in_n == 2)
5625 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIM];
5626 if (VECTOR_UNIT_VSX_P (V4SFmode)
5627 && out_mode == SFmode && out_n == 4
5628 && in_mode == SFmode && in_n == 4)
5629 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIM];
5630 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5631 && out_mode == SFmode && out_n == 4
5632 && in_mode == SFmode && in_n == 4)
5633 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIM];
5634 break;
5635 CASE_CFN_FMA:
5636 if (VECTOR_UNIT_VSX_P (V2DFmode)
5637 && out_mode == DFmode && out_n == 2
5638 && in_mode == DFmode && in_n == 2)
5639 return rs6000_builtin_decls[VSX_BUILTIN_XVMADDDP];
5640 if (VECTOR_UNIT_VSX_P (V4SFmode)
5641 && out_mode == SFmode && out_n == 4
5642 && in_mode == SFmode && in_n == 4)
5643 return rs6000_builtin_decls[VSX_BUILTIN_XVMADDSP];
5644 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5645 && out_mode == SFmode && out_n == 4
5646 && in_mode == SFmode && in_n == 4)
5647 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VMADDFP];
5648 break;
5649 CASE_CFN_TRUNC:
5650 if (VECTOR_UNIT_VSX_P (V2DFmode)
5651 && out_mode == DFmode && out_n == 2
5652 && in_mode == DFmode && in_n == 2)
5653 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIZ];
5654 if (VECTOR_UNIT_VSX_P (V4SFmode)
5655 && out_mode == SFmode && out_n == 4
5656 && in_mode == SFmode && in_n == 4)
5657 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIZ];
5658 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5659 && out_mode == SFmode && out_n == 4
5660 && in_mode == SFmode && in_n == 4)
5661 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIZ];
5662 break;
5663 CASE_CFN_NEARBYINT:
5664 if (VECTOR_UNIT_VSX_P (V2DFmode)
5665 && flag_unsafe_math_optimizations
5666 && out_mode == DFmode && out_n == 2
5667 && in_mode == DFmode && in_n == 2)
5668 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPI];
5669 if (VECTOR_UNIT_VSX_P (V4SFmode)
5670 && flag_unsafe_math_optimizations
5671 && out_mode == SFmode && out_n == 4
5672 && in_mode == SFmode && in_n == 4)
5673 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPI];
5674 break;
5675 CASE_CFN_RINT:
5676 if (VECTOR_UNIT_VSX_P (V2DFmode)
5677 && !flag_trapping_math
5678 && out_mode == DFmode && out_n == 2
5679 && in_mode == DFmode && in_n == 2)
5680 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIC];
5681 if (VECTOR_UNIT_VSX_P (V4SFmode)
5682 && !flag_trapping_math
5683 && out_mode == SFmode && out_n == 4
5684 && in_mode == SFmode && in_n == 4)
5685 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIC];
5686 break;
5687 default:
5688 break;
5691 /* Generate calls to libmass if appropriate. */
5692 if (rs6000_veclib_handler)
5693 return rs6000_veclib_handler (combined_fn (fn), type_out, type_in);
5695 return NULL_TREE;
5698 /* Implement TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION. */
5700 static tree
5701 rs6000_builtin_md_vectorized_function (tree fndecl, tree type_out,
5702 tree type_in)
5704 machine_mode in_mode, out_mode;
5705 int in_n, out_n;
5707 if (TARGET_DEBUG_BUILTIN)
5708 fprintf (stderr, "rs6000_builtin_md_vectorized_function (%s, %s, %s)\n",
5709 IDENTIFIER_POINTER (DECL_NAME (fndecl)),
5710 GET_MODE_NAME (TYPE_MODE (type_out)),
5711 GET_MODE_NAME (TYPE_MODE (type_in)));
5713 if (TREE_CODE (type_out) != VECTOR_TYPE
5714 || TREE_CODE (type_in) != VECTOR_TYPE
5715 || !TARGET_VECTORIZE_BUILTINS)
5716 return NULL_TREE;
5718 out_mode = TYPE_MODE (TREE_TYPE (type_out));
5719 out_n = TYPE_VECTOR_SUBPARTS (type_out);
5720 in_mode = TYPE_MODE (TREE_TYPE (type_in));
5721 in_n = TYPE_VECTOR_SUBPARTS (type_in);
5723 enum rs6000_builtins fn
5724 = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
5725 switch (fn)
5727 case RS6000_BUILTIN_RSQRTF:
5728 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)
5729 && out_mode == SFmode && out_n == 4
5730 && in_mode == SFmode && in_n == 4)
5731 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRSQRTFP];
5732 break;
5733 case RS6000_BUILTIN_RSQRT:
5734 if (VECTOR_UNIT_VSX_P (V2DFmode)
5735 && out_mode == DFmode && out_n == 2
5736 && in_mode == DFmode && in_n == 2)
5737 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_2DF];
5738 break;
5739 case RS6000_BUILTIN_RECIPF:
5740 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)
5741 && out_mode == SFmode && out_n == 4
5742 && in_mode == SFmode && in_n == 4)
5743 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRECIPFP];
5744 break;
5745 case RS6000_BUILTIN_RECIP:
5746 if (VECTOR_UNIT_VSX_P (V2DFmode)
5747 && out_mode == DFmode && out_n == 2
5748 && in_mode == DFmode && in_n == 2)
5749 return rs6000_builtin_decls[VSX_BUILTIN_RECIP_V2DF];
5750 break;
5751 default:
5752 break;
5754 return NULL_TREE;
5757 /* Default CPU string for rs6000*_file_start functions. */
5758 static const char *rs6000_default_cpu;
5760 /* Do anything needed at the start of the asm file. */
5762 static void
5763 rs6000_file_start (void)
5765 char buffer[80];
5766 const char *start = buffer;
5767 FILE *file = asm_out_file;
5769 rs6000_default_cpu = TARGET_CPU_DEFAULT;
5771 default_file_start ();
5773 if (flag_verbose_asm)
5775 sprintf (buffer, "\n%s rs6000/powerpc options:", ASM_COMMENT_START);
5777 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
5779 fprintf (file, "%s --with-cpu=%s", start, rs6000_default_cpu);
5780 start = "";
5783 if (global_options_set.x_rs6000_cpu_index)
5785 fprintf (file, "%s -mcpu=%s", start,
5786 processor_target_table[rs6000_cpu_index].name);
5787 start = "";
5790 if (global_options_set.x_rs6000_tune_index)
5792 fprintf (file, "%s -mtune=%s", start,
5793 processor_target_table[rs6000_tune_index].name);
5794 start = "";
5797 if (PPC405_ERRATUM77)
5799 fprintf (file, "%s PPC405CR_ERRATUM77", start);
5800 start = "";
5803 #ifdef USING_ELFOS_H
5804 switch (rs6000_sdata)
5806 case SDATA_NONE: fprintf (file, "%s -msdata=none", start); start = ""; break;
5807 case SDATA_DATA: fprintf (file, "%s -msdata=data", start); start = ""; break;
5808 case SDATA_SYSV: fprintf (file, "%s -msdata=sysv", start); start = ""; break;
5809 case SDATA_EABI: fprintf (file, "%s -msdata=eabi", start); start = ""; break;
5812 if (rs6000_sdata && g_switch_value)
5814 fprintf (file, "%s -G %d", start,
5815 g_switch_value);
5816 start = "";
5818 #endif
5820 if (*start == '\0')
5821 putc ('\n', file);
5824 #ifdef USING_ELFOS_H
5825 if (!(rs6000_default_cpu && rs6000_default_cpu[0])
5826 && !global_options_set.x_rs6000_cpu_index)
5828 fputs ("\t.machine ", asm_out_file);
5829 if ((rs6000_isa_flags & OPTION_MASK_MODULO) != 0)
5830 fputs ("power9\n", asm_out_file);
5831 else if ((rs6000_isa_flags & OPTION_MASK_DIRECT_MOVE) != 0)
5832 fputs ("power8\n", asm_out_file);
5833 else if ((rs6000_isa_flags & OPTION_MASK_POPCNTD) != 0)
5834 fputs ("power7\n", asm_out_file);
5835 else if ((rs6000_isa_flags & OPTION_MASK_CMPB) != 0)
5836 fputs ("power6\n", asm_out_file);
5837 else if ((rs6000_isa_flags & OPTION_MASK_POPCNTB) != 0)
5838 fputs ("power5\n", asm_out_file);
5839 else if ((rs6000_isa_flags & OPTION_MASK_MFCRF) != 0)
5840 fputs ("power4\n", asm_out_file);
5841 else if ((rs6000_isa_flags & OPTION_MASK_POWERPC64) != 0)
5842 fputs ("ppc64\n", asm_out_file);
5843 else
5844 fputs ("ppc\n", asm_out_file);
5846 #endif
5848 if (DEFAULT_ABI == ABI_ELFv2)
5849 fprintf (file, "\t.abiversion 2\n");
5853 /* Return nonzero if this function is known to have a null epilogue. */
5856 direct_return (void)
5858 if (reload_completed)
5860 rs6000_stack_t *info = rs6000_stack_info ();
5862 if (info->first_gp_reg_save == 32
5863 && info->first_fp_reg_save == 64
5864 && info->first_altivec_reg_save == LAST_ALTIVEC_REGNO + 1
5865 && ! info->lr_save_p
5866 && ! info->cr_save_p
5867 && info->vrsave_size == 0
5868 && ! info->push_p)
5869 return 1;
5872 return 0;
5875 /* Return the number of instructions it takes to form a constant in an
5876 integer register. */
5879 num_insns_constant_wide (HOST_WIDE_INT value)
5881 /* signed constant loadable with addi */
5882 if (((unsigned HOST_WIDE_INT) value + 0x8000) < 0x10000)
5883 return 1;
5885 /* constant loadable with addis */
5886 else if ((value & 0xffff) == 0
5887 && (value >> 31 == -1 || value >> 31 == 0))
5888 return 1;
5890 else if (TARGET_POWERPC64)
5892 HOST_WIDE_INT low = ((value & 0xffffffff) ^ 0x80000000) - 0x80000000;
5893 HOST_WIDE_INT high = value >> 31;
5895 if (high == 0 || high == -1)
5896 return 2;
5898 high >>= 1;
5900 if (low == 0)
5901 return num_insns_constant_wide (high) + 1;
5902 else if (high == 0)
5903 return num_insns_constant_wide (low) + 1;
5904 else
5905 return (num_insns_constant_wide (high)
5906 + num_insns_constant_wide (low) + 1);
5909 else
5910 return 2;
5914 num_insns_constant (rtx op, machine_mode mode)
5916 HOST_WIDE_INT low, high;
5918 switch (GET_CODE (op))
5920 case CONST_INT:
5921 if ((INTVAL (op) >> 31) != 0 && (INTVAL (op) >> 31) != -1
5922 && rs6000_is_valid_and_mask (op, mode))
5923 return 2;
5924 else
5925 return num_insns_constant_wide (INTVAL (op));
5927 case CONST_WIDE_INT:
5929 int i;
5930 int ins = CONST_WIDE_INT_NUNITS (op) - 1;
5931 for (i = 0; i < CONST_WIDE_INT_NUNITS (op); i++)
5932 ins += num_insns_constant_wide (CONST_WIDE_INT_ELT (op, i));
5933 return ins;
5936 case CONST_DOUBLE:
5937 if (mode == SFmode || mode == SDmode)
5939 long l;
5941 if (DECIMAL_FLOAT_MODE_P (mode))
5942 REAL_VALUE_TO_TARGET_DECIMAL32
5943 (*CONST_DOUBLE_REAL_VALUE (op), l);
5944 else
5945 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), l);
5946 return num_insns_constant_wide ((HOST_WIDE_INT) l);
5949 long l[2];
5950 if (DECIMAL_FLOAT_MODE_P (mode))
5951 REAL_VALUE_TO_TARGET_DECIMAL64 (*CONST_DOUBLE_REAL_VALUE (op), l);
5952 else
5953 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (op), l);
5954 high = l[WORDS_BIG_ENDIAN == 0];
5955 low = l[WORDS_BIG_ENDIAN != 0];
5957 if (TARGET_32BIT)
5958 return (num_insns_constant_wide (low)
5959 + num_insns_constant_wide (high));
5960 else
5962 if ((high == 0 && low >= 0)
5963 || (high == -1 && low < 0))
5964 return num_insns_constant_wide (low);
5966 else if (rs6000_is_valid_and_mask (op, mode))
5967 return 2;
5969 else if (low == 0)
5970 return num_insns_constant_wide (high) + 1;
5972 else
5973 return (num_insns_constant_wide (high)
5974 + num_insns_constant_wide (low) + 1);
5977 default:
5978 gcc_unreachable ();
5982 /* Interpret element ELT of the CONST_VECTOR OP as an integer value.
5983 If the mode of OP is MODE_VECTOR_INT, this simply returns the
5984 corresponding element of the vector, but for V4SFmode and V2SFmode,
5985 the corresponding "float" is interpreted as an SImode integer. */
5987 HOST_WIDE_INT
5988 const_vector_elt_as_int (rtx op, unsigned int elt)
5990 rtx tmp;
5992 /* We can't handle V2DImode and V2DFmode vector constants here yet. */
5993 gcc_assert (GET_MODE (op) != V2DImode
5994 && GET_MODE (op) != V2DFmode);
5996 tmp = CONST_VECTOR_ELT (op, elt);
5997 if (GET_MODE (op) == V4SFmode
5998 || GET_MODE (op) == V2SFmode)
5999 tmp = gen_lowpart (SImode, tmp);
6000 return INTVAL (tmp);
6003 /* Return true if OP can be synthesized with a particular vspltisb, vspltish
6004 or vspltisw instruction. OP is a CONST_VECTOR. Which instruction is used
6005 depends on STEP and COPIES, one of which will be 1. If COPIES > 1,
6006 all items are set to the same value and contain COPIES replicas of the
6007 vsplt's operand; if STEP > 1, one in STEP elements is set to the vsplt's
6008 operand and the others are set to the value of the operand's msb. */
6010 static bool
6011 vspltis_constant (rtx op, unsigned step, unsigned copies)
6013 machine_mode mode = GET_MODE (op);
6014 machine_mode inner = GET_MODE_INNER (mode);
6016 unsigned i;
6017 unsigned nunits;
6018 unsigned bitsize;
6019 unsigned mask;
6021 HOST_WIDE_INT val;
6022 HOST_WIDE_INT splat_val;
6023 HOST_WIDE_INT msb_val;
6025 if (mode == V2DImode || mode == V2DFmode || mode == V1TImode)
6026 return false;
6028 nunits = GET_MODE_NUNITS (mode);
6029 bitsize = GET_MODE_BITSIZE (inner);
6030 mask = GET_MODE_MASK (inner);
6032 val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
6033 splat_val = val;
6034 msb_val = val >= 0 ? 0 : -1;
6036 /* Construct the value to be splatted, if possible. If not, return 0. */
6037 for (i = 2; i <= copies; i *= 2)
6039 HOST_WIDE_INT small_val;
6040 bitsize /= 2;
6041 small_val = splat_val >> bitsize;
6042 mask >>= bitsize;
6043 if (splat_val != ((small_val << bitsize) | (small_val & mask)))
6044 return false;
6045 splat_val = small_val;
6048 /* Check if SPLAT_VAL can really be the operand of a vspltis[bhw]. */
6049 if (EASY_VECTOR_15 (splat_val))
6052 /* Also check if we can splat, and then add the result to itself. Do so if
6053 the value is positive, of if the splat instruction is using OP's mode;
6054 for splat_val < 0, the splat and the add should use the same mode. */
6055 else if (EASY_VECTOR_15_ADD_SELF (splat_val)
6056 && (splat_val >= 0 || (step == 1 && copies == 1)))
6059 /* Also check if are loading up the most significant bit which can be done by
6060 loading up -1 and shifting the value left by -1. */
6061 else if (EASY_VECTOR_MSB (splat_val, inner))
6064 else
6065 return false;
6067 /* Check if VAL is present in every STEP-th element, and the
6068 other elements are filled with its most significant bit. */
6069 for (i = 1; i < nunits; ++i)
6071 HOST_WIDE_INT desired_val;
6072 unsigned elt = BYTES_BIG_ENDIAN ? nunits - 1 - i : i;
6073 if ((i & (step - 1)) == 0)
6074 desired_val = val;
6075 else
6076 desired_val = msb_val;
6078 if (desired_val != const_vector_elt_as_int (op, elt))
6079 return false;
6082 return true;
6085 /* Like vsplitis_constant, but allow the value to be shifted left with a VSLDOI
6086 instruction, filling in the bottom elements with 0 or -1.
6088 Return 0 if the constant cannot be generated with VSLDOI. Return positive
6089 for the number of zeroes to shift in, or negative for the number of 0xff
6090 bytes to shift in.
6092 OP is a CONST_VECTOR. */
6095 vspltis_shifted (rtx op)
6097 machine_mode mode = GET_MODE (op);
6098 machine_mode inner = GET_MODE_INNER (mode);
6100 unsigned i, j;
6101 unsigned nunits;
6102 unsigned mask;
6104 HOST_WIDE_INT val;
6106 if (mode != V16QImode && mode != V8HImode && mode != V4SImode)
6107 return false;
6109 /* We need to create pseudo registers to do the shift, so don't recognize
6110 shift vector constants after reload. */
6111 if (!can_create_pseudo_p ())
6112 return false;
6114 nunits = GET_MODE_NUNITS (mode);
6115 mask = GET_MODE_MASK (inner);
6117 val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? 0 : nunits - 1);
6119 /* Check if the value can really be the operand of a vspltis[bhw]. */
6120 if (EASY_VECTOR_15 (val))
6123 /* Also check if we are loading up the most significant bit which can be done
6124 by loading up -1 and shifting the value left by -1. */
6125 else if (EASY_VECTOR_MSB (val, inner))
6128 else
6129 return 0;
6131 /* Check if VAL is present in every STEP-th element until we find elements
6132 that are 0 or all 1 bits. */
6133 for (i = 1; i < nunits; ++i)
6135 unsigned elt = BYTES_BIG_ENDIAN ? i : nunits - 1 - i;
6136 HOST_WIDE_INT elt_val = const_vector_elt_as_int (op, elt);
6138 /* If the value isn't the splat value, check for the remaining elements
6139 being 0/-1. */
6140 if (val != elt_val)
6142 if (elt_val == 0)
6144 for (j = i+1; j < nunits; ++j)
6146 unsigned elt2 = BYTES_BIG_ENDIAN ? j : nunits - 1 - j;
6147 if (const_vector_elt_as_int (op, elt2) != 0)
6148 return 0;
6151 return (nunits - i) * GET_MODE_SIZE (inner);
6154 else if ((elt_val & mask) == mask)
6156 for (j = i+1; j < nunits; ++j)
6158 unsigned elt2 = BYTES_BIG_ENDIAN ? j : nunits - 1 - j;
6159 if ((const_vector_elt_as_int (op, elt2) & mask) != mask)
6160 return 0;
6163 return -((nunits - i) * GET_MODE_SIZE (inner));
6166 else
6167 return 0;
6171 /* If all elements are equal, we don't need to do VLSDOI. */
6172 return 0;
6176 /* Return true if OP is of the given MODE and can be synthesized
6177 with a vspltisb, vspltish or vspltisw. */
6179 bool
6180 easy_altivec_constant (rtx op, machine_mode mode)
6182 unsigned step, copies;
6184 if (mode == VOIDmode)
6185 mode = GET_MODE (op);
6186 else if (mode != GET_MODE (op))
6187 return false;
6189 /* V2DI/V2DF was added with VSX. Only allow 0 and all 1's as easy
6190 constants. */
6191 if (mode == V2DFmode)
6192 return zero_constant (op, mode);
6194 else if (mode == V2DImode)
6196 if (GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_INT
6197 || GET_CODE (CONST_VECTOR_ELT (op, 1)) != CONST_INT)
6198 return false;
6200 if (zero_constant (op, mode))
6201 return true;
6203 if (INTVAL (CONST_VECTOR_ELT (op, 0)) == -1
6204 && INTVAL (CONST_VECTOR_ELT (op, 1)) == -1)
6205 return true;
6207 return false;
6210 /* V1TImode is a special container for TImode. Ignore for now. */
6211 else if (mode == V1TImode)
6212 return false;
6214 /* Start with a vspltisw. */
6215 step = GET_MODE_NUNITS (mode) / 4;
6216 copies = 1;
6218 if (vspltis_constant (op, step, copies))
6219 return true;
6221 /* Then try with a vspltish. */
6222 if (step == 1)
6223 copies <<= 1;
6224 else
6225 step >>= 1;
6227 if (vspltis_constant (op, step, copies))
6228 return true;
6230 /* And finally a vspltisb. */
6231 if (step == 1)
6232 copies <<= 1;
6233 else
6234 step >>= 1;
6236 if (vspltis_constant (op, step, copies))
6237 return true;
6239 if (vspltis_shifted (op) != 0)
6240 return true;
6242 return false;
6245 /* Generate a VEC_DUPLICATE representing a vspltis[bhw] instruction whose
6246 result is OP. Abort if it is not possible. */
6249 gen_easy_altivec_constant (rtx op)
6251 machine_mode mode = GET_MODE (op);
6252 int nunits = GET_MODE_NUNITS (mode);
6253 rtx val = CONST_VECTOR_ELT (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
6254 unsigned step = nunits / 4;
6255 unsigned copies = 1;
6257 /* Start with a vspltisw. */
6258 if (vspltis_constant (op, step, copies))
6259 return gen_rtx_VEC_DUPLICATE (V4SImode, gen_lowpart (SImode, val));
6261 /* Then try with a vspltish. */
6262 if (step == 1)
6263 copies <<= 1;
6264 else
6265 step >>= 1;
6267 if (vspltis_constant (op, step, copies))
6268 return gen_rtx_VEC_DUPLICATE (V8HImode, gen_lowpart (HImode, val));
6270 /* And finally a vspltisb. */
6271 if (step == 1)
6272 copies <<= 1;
6273 else
6274 step >>= 1;
6276 if (vspltis_constant (op, step, copies))
6277 return gen_rtx_VEC_DUPLICATE (V16QImode, gen_lowpart (QImode, val));
6279 gcc_unreachable ();
6282 /* Return true if OP is of the given MODE and can be synthesized with ISA 3.0
6283 instructions (xxspltib, vupkhsb/vextsb2w/vextb2d).
6285 Return the number of instructions needed (1 or 2) into the address pointed
6286 via NUM_INSNS_PTR.
6288 Return the constant that is being split via CONSTANT_PTR. */
6290 bool
6291 xxspltib_constant_p (rtx op,
6292 machine_mode mode,
6293 int *num_insns_ptr,
6294 int *constant_ptr)
6296 size_t nunits = GET_MODE_NUNITS (mode);
6297 size_t i;
6298 HOST_WIDE_INT value;
6299 rtx element;
6301 /* Set the returned values to out of bound values. */
6302 *num_insns_ptr = -1;
6303 *constant_ptr = 256;
6305 if (!TARGET_P9_VECTOR)
6306 return false;
6308 if (mode == VOIDmode)
6309 mode = GET_MODE (op);
6311 else if (mode != GET_MODE (op) && GET_MODE (op) != VOIDmode)
6312 return false;
6314 /* Handle (vec_duplicate <constant>). */
6315 if (GET_CODE (op) == VEC_DUPLICATE)
6317 if (mode != V16QImode && mode != V8HImode && mode != V4SImode
6318 && mode != V2DImode)
6319 return false;
6321 element = XEXP (op, 0);
6322 if (!CONST_INT_P (element))
6323 return false;
6325 value = INTVAL (element);
6326 if (!IN_RANGE (value, -128, 127))
6327 return false;
6330 /* Handle (const_vector [...]). */
6331 else if (GET_CODE (op) == CONST_VECTOR)
6333 if (mode != V16QImode && mode != V8HImode && mode != V4SImode
6334 && mode != V2DImode)
6335 return false;
6337 element = CONST_VECTOR_ELT (op, 0);
6338 if (!CONST_INT_P (element))
6339 return false;
6341 value = INTVAL (element);
6342 if (!IN_RANGE (value, -128, 127))
6343 return false;
6345 for (i = 1; i < nunits; i++)
6347 element = CONST_VECTOR_ELT (op, i);
6348 if (!CONST_INT_P (element))
6349 return false;
6351 if (value != INTVAL (element))
6352 return false;
6356 /* Handle integer constants being loaded into the upper part of the VSX
6357 register as a scalar. If the value isn't 0/-1, only allow it if the mode
6358 can go in Altivec registers. Prefer VSPLTISW/VUPKHSW over XXSPLITIB. */
6359 else if (CONST_INT_P (op))
6361 if (!SCALAR_INT_MODE_P (mode))
6362 return false;
6364 value = INTVAL (op);
6365 if (!IN_RANGE (value, -128, 127))
6366 return false;
6368 if (!IN_RANGE (value, -1, 0))
6370 if (!(reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_VALID))
6371 return false;
6373 if (EASY_VECTOR_15 (value))
6374 return false;
6378 else
6379 return false;
6381 /* See if we could generate vspltisw/vspltish directly instead of xxspltib +
6382 sign extend. Special case 0/-1 to allow getting any VSX register instead
6383 of an Altivec register. */
6384 if ((mode == V4SImode || mode == V8HImode) && !IN_RANGE (value, -1, 0)
6385 && EASY_VECTOR_15 (value))
6386 return false;
6388 /* Return # of instructions and the constant byte for XXSPLTIB. */
6389 if (mode == V16QImode)
6390 *num_insns_ptr = 1;
6392 else if (IN_RANGE (value, -1, 0))
6393 *num_insns_ptr = 1;
6395 else
6396 *num_insns_ptr = 2;
6398 *constant_ptr = (int) value;
6399 return true;
6402 const char *
6403 output_vec_const_move (rtx *operands)
6405 int cst, cst2, shift;
6406 machine_mode mode;
6407 rtx dest, vec;
6409 dest = operands[0];
6410 vec = operands[1];
6411 mode = GET_MODE (dest);
6413 if (TARGET_VSX)
6415 bool dest_vmx_p = ALTIVEC_REGNO_P (REGNO (dest));
6416 int xxspltib_value = 256;
6417 int num_insns = -1;
6419 if (zero_constant (vec, mode))
6421 if (TARGET_P9_VECTOR)
6422 return "xxspltib %x0,0";
6424 else if (dest_vmx_p)
6425 return "vspltisw %0,0";
6427 else
6428 return "xxlxor %x0,%x0,%x0";
6431 if (all_ones_constant (vec, mode))
6433 if (TARGET_P9_VECTOR)
6434 return "xxspltib %x0,255";
6436 else if (dest_vmx_p)
6437 return "vspltisw %0,-1";
6439 else if (TARGET_P8_VECTOR)
6440 return "xxlorc %x0,%x0,%x0";
6442 else
6443 gcc_unreachable ();
6446 if (TARGET_P9_VECTOR
6447 && xxspltib_constant_p (vec, mode, &num_insns, &xxspltib_value))
6449 if (num_insns == 1)
6451 operands[2] = GEN_INT (xxspltib_value & 0xff);
6452 return "xxspltib %x0,%2";
6455 return "#";
6459 if (TARGET_ALTIVEC)
6461 rtx splat_vec;
6463 gcc_assert (ALTIVEC_REGNO_P (REGNO (dest)));
6464 if (zero_constant (vec, mode))
6465 return "vspltisw %0,0";
6467 if (all_ones_constant (vec, mode))
6468 return "vspltisw %0,-1";
6470 /* Do we need to construct a value using VSLDOI? */
6471 shift = vspltis_shifted (vec);
6472 if (shift != 0)
6473 return "#";
6475 splat_vec = gen_easy_altivec_constant (vec);
6476 gcc_assert (GET_CODE (splat_vec) == VEC_DUPLICATE);
6477 operands[1] = XEXP (splat_vec, 0);
6478 if (!EASY_VECTOR_15 (INTVAL (operands[1])))
6479 return "#";
6481 switch (GET_MODE (splat_vec))
6483 case V4SImode:
6484 return "vspltisw %0,%1";
6486 case V8HImode:
6487 return "vspltish %0,%1";
6489 case V16QImode:
6490 return "vspltisb %0,%1";
6492 default:
6493 gcc_unreachable ();
6497 gcc_assert (TARGET_SPE);
6499 /* Vector constant 0 is handled as a splitter of V2SI, and in the
6500 pattern of V1DI, V4HI, and V2SF.
6502 FIXME: We should probably return # and add post reload
6503 splitters for these, but this way is so easy ;-). */
6504 cst = INTVAL (CONST_VECTOR_ELT (vec, 0));
6505 cst2 = INTVAL (CONST_VECTOR_ELT (vec, 1));
6506 operands[1] = CONST_VECTOR_ELT (vec, 0);
6507 operands[2] = CONST_VECTOR_ELT (vec, 1);
6508 if (cst == cst2)
6509 return "li %0,%1\n\tevmergelo %0,%0,%0";
6510 else if (WORDS_BIG_ENDIAN)
6511 return "li %0,%1\n\tevmergelo %0,%0,%0\n\tli %0,%2";
6512 else
6513 return "li %0,%2\n\tevmergelo %0,%0,%0\n\tli %0,%1";
6516 /* Initialize TARGET of vector PAIRED to VALS. */
6518 void
6519 paired_expand_vector_init (rtx target, rtx vals)
6521 machine_mode mode = GET_MODE (target);
6522 int n_elts = GET_MODE_NUNITS (mode);
6523 int n_var = 0;
6524 rtx x, new_rtx, tmp, constant_op, op1, op2;
6525 int i;
6527 for (i = 0; i < n_elts; ++i)
6529 x = XVECEXP (vals, 0, i);
6530 if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x)))
6531 ++n_var;
6533 if (n_var == 0)
6535 /* Load from constant pool. */
6536 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
6537 return;
6540 if (n_var == 2)
6542 /* The vector is initialized only with non-constants. */
6543 new_rtx = gen_rtx_VEC_CONCAT (V2SFmode, XVECEXP (vals, 0, 0),
6544 XVECEXP (vals, 0, 1));
6546 emit_move_insn (target, new_rtx);
6547 return;
6550 /* One field is non-constant and the other one is a constant. Load the
6551 constant from the constant pool and use ps_merge instruction to
6552 construct the whole vector. */
6553 op1 = XVECEXP (vals, 0, 0);
6554 op2 = XVECEXP (vals, 0, 1);
6556 constant_op = (CONSTANT_P (op1)) ? op1 : op2;
6558 tmp = gen_reg_rtx (GET_MODE (constant_op));
6559 emit_move_insn (tmp, constant_op);
6561 if (CONSTANT_P (op1))
6562 new_rtx = gen_rtx_VEC_CONCAT (V2SFmode, tmp, op2);
6563 else
6564 new_rtx = gen_rtx_VEC_CONCAT (V2SFmode, op1, tmp);
6566 emit_move_insn (target, new_rtx);
6569 void
6570 paired_expand_vector_move (rtx operands[])
6572 rtx op0 = operands[0], op1 = operands[1];
6574 emit_move_insn (op0, op1);
6577 /* Emit vector compare for code RCODE. DEST is destination, OP1 and
6578 OP2 are two VEC_COND_EXPR operands, CC_OP0 and CC_OP1 are the two
6579 operands for the relation operation COND. This is a recursive
6580 function. */
6582 static void
6583 paired_emit_vector_compare (enum rtx_code rcode,
6584 rtx dest, rtx op0, rtx op1,
6585 rtx cc_op0, rtx cc_op1)
6587 rtx tmp = gen_reg_rtx (V2SFmode);
6588 rtx tmp1, max, min;
6590 gcc_assert (TARGET_PAIRED_FLOAT);
6591 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
6593 switch (rcode)
6595 case LT:
6596 case LTU:
6597 paired_emit_vector_compare (GE, dest, op1, op0, cc_op0, cc_op1);
6598 return;
6599 case GE:
6600 case GEU:
6601 emit_insn (gen_subv2sf3 (tmp, cc_op0, cc_op1));
6602 emit_insn (gen_selv2sf4 (dest, tmp, op0, op1, CONST0_RTX (SFmode)));
6603 return;
6604 case LE:
6605 case LEU:
6606 paired_emit_vector_compare (GE, dest, op0, op1, cc_op1, cc_op0);
6607 return;
6608 case GT:
6609 paired_emit_vector_compare (LE, dest, op1, op0, cc_op0, cc_op1);
6610 return;
6611 case EQ:
6612 tmp1 = gen_reg_rtx (V2SFmode);
6613 max = gen_reg_rtx (V2SFmode);
6614 min = gen_reg_rtx (V2SFmode);
6615 gen_reg_rtx (V2SFmode);
6617 emit_insn (gen_subv2sf3 (tmp, cc_op0, cc_op1));
6618 emit_insn (gen_selv2sf4
6619 (max, tmp, cc_op0, cc_op1, CONST0_RTX (SFmode)));
6620 emit_insn (gen_subv2sf3 (tmp, cc_op1, cc_op0));
6621 emit_insn (gen_selv2sf4
6622 (min, tmp, cc_op0, cc_op1, CONST0_RTX (SFmode)));
6623 emit_insn (gen_subv2sf3 (tmp1, min, max));
6624 emit_insn (gen_selv2sf4 (dest, tmp1, op0, op1, CONST0_RTX (SFmode)));
6625 return;
6626 case NE:
6627 paired_emit_vector_compare (EQ, dest, op1, op0, cc_op0, cc_op1);
6628 return;
6629 case UNLE:
6630 paired_emit_vector_compare (LE, dest, op1, op0, cc_op0, cc_op1);
6631 return;
6632 case UNLT:
6633 paired_emit_vector_compare (LT, dest, op1, op0, cc_op0, cc_op1);
6634 return;
6635 case UNGE:
6636 paired_emit_vector_compare (GE, dest, op1, op0, cc_op0, cc_op1);
6637 return;
6638 case UNGT:
6639 paired_emit_vector_compare (GT, dest, op1, op0, cc_op0, cc_op1);
6640 return;
6641 default:
6642 gcc_unreachable ();
6645 return;
6648 /* Emit vector conditional expression.
6649 DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands.
6650 CC_OP0 and CC_OP1 are the two operands for the relation operation COND. */
6653 paired_emit_vector_cond_expr (rtx dest, rtx op1, rtx op2,
6654 rtx cond, rtx cc_op0, rtx cc_op1)
6656 enum rtx_code rcode = GET_CODE (cond);
6658 if (!TARGET_PAIRED_FLOAT)
6659 return 0;
6661 paired_emit_vector_compare (rcode, dest, op1, op2, cc_op0, cc_op1);
6663 return 1;
6666 /* Initialize vector TARGET to VALS. */
6668 void
6669 rs6000_expand_vector_init (rtx target, rtx vals)
6671 machine_mode mode = GET_MODE (target);
6672 machine_mode inner_mode = GET_MODE_INNER (mode);
6673 int n_elts = GET_MODE_NUNITS (mode);
6674 int n_var = 0, one_var = -1;
6675 bool all_same = true, all_const_zero = true;
6676 rtx x, mem;
6677 int i;
6679 for (i = 0; i < n_elts; ++i)
6681 x = XVECEXP (vals, 0, i);
6682 if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x)))
6683 ++n_var, one_var = i;
6684 else if (x != CONST0_RTX (inner_mode))
6685 all_const_zero = false;
6687 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
6688 all_same = false;
6691 if (n_var == 0)
6693 rtx const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
6694 bool int_vector_p = (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
6695 if ((int_vector_p || TARGET_VSX) && all_const_zero)
6697 /* Zero register. */
6698 emit_move_insn (target, CONST0_RTX (mode));
6699 return;
6701 else if (int_vector_p && easy_vector_constant (const_vec, mode))
6703 /* Splat immediate. */
6704 emit_insn (gen_rtx_SET (target, const_vec));
6705 return;
6707 else
6709 /* Load from constant pool. */
6710 emit_move_insn (target, const_vec);
6711 return;
6715 /* Double word values on VSX can use xxpermdi or lxvdsx. */
6716 if (VECTOR_MEM_VSX_P (mode) && (mode == V2DFmode || mode == V2DImode))
6718 rtx op0 = XVECEXP (vals, 0, 0);
6719 rtx op1 = XVECEXP (vals, 0, 1);
6720 if (all_same)
6722 if (!MEM_P (op0) && !REG_P (op0))
6723 op0 = force_reg (inner_mode, op0);
6724 if (mode == V2DFmode)
6725 emit_insn (gen_vsx_splat_v2df (target, op0));
6726 else
6727 emit_insn (gen_vsx_splat_v2di (target, op0));
6729 else
6731 op0 = force_reg (inner_mode, op0);
6732 op1 = force_reg (inner_mode, op1);
6733 if (mode == V2DFmode)
6734 emit_insn (gen_vsx_concat_v2df (target, op0, op1));
6735 else
6736 emit_insn (gen_vsx_concat_v2di (target, op0, op1));
6738 return;
6741 /* Special case initializing vector int if we are on 64-bit systems with
6742 direct move or we have the ISA 3.0 instructions. */
6743 if (mode == V4SImode && VECTOR_MEM_VSX_P (V4SImode)
6744 && TARGET_DIRECT_MOVE_64BIT)
6746 if (all_same)
6748 rtx element0 = XVECEXP (vals, 0, 0);
6749 if (MEM_P (element0))
6750 element0 = rs6000_address_for_fpconvert (element0);
6751 else
6752 element0 = force_reg (SImode, element0);
6754 if (TARGET_P9_VECTOR)
6755 emit_insn (gen_vsx_splat_v4si (target, element0));
6756 else
6758 rtx tmp = gen_reg_rtx (DImode);
6759 emit_insn (gen_zero_extendsidi2 (tmp, element0));
6760 emit_insn (gen_vsx_splat_v4si_di (target, tmp));
6762 return;
6764 else
6766 rtx elements[4];
6767 size_t i;
6769 for (i = 0; i < 4; i++)
6771 elements[i] = XVECEXP (vals, 0, i);
6772 if (!CONST_INT_P (elements[i]) && !REG_P (elements[i]))
6773 elements[i] = copy_to_mode_reg (SImode, elements[i]);
6776 emit_insn (gen_vsx_init_v4si (target, elements[0], elements[1],
6777 elements[2], elements[3]));
6778 return;
6782 /* With single precision floating point on VSX, know that internally single
6783 precision is actually represented as a double, and either make 2 V2DF
6784 vectors, and convert these vectors to single precision, or do one
6785 conversion, and splat the result to the other elements. */
6786 if (mode == V4SFmode && VECTOR_MEM_VSX_P (V4SFmode))
6788 if (all_same)
6790 rtx element0 = XVECEXP (vals, 0, 0);
6792 if (TARGET_P9_VECTOR)
6794 if (MEM_P (element0))
6795 element0 = rs6000_address_for_fpconvert (element0);
6797 emit_insn (gen_vsx_splat_v4sf (target, element0));
6800 else
6802 rtx freg = gen_reg_rtx (V4SFmode);
6803 rtx sreg = force_reg (SFmode, element0);
6804 rtx cvt = (TARGET_XSCVDPSPN
6805 ? gen_vsx_xscvdpspn_scalar (freg, sreg)
6806 : gen_vsx_xscvdpsp_scalar (freg, sreg));
6808 emit_insn (cvt);
6809 emit_insn (gen_vsx_xxspltw_v4sf_direct (target, freg,
6810 const0_rtx));
6813 else
6815 rtx dbl_even = gen_reg_rtx (V2DFmode);
6816 rtx dbl_odd = gen_reg_rtx (V2DFmode);
6817 rtx flt_even = gen_reg_rtx (V4SFmode);
6818 rtx flt_odd = gen_reg_rtx (V4SFmode);
6819 rtx op0 = force_reg (SFmode, XVECEXP (vals, 0, 0));
6820 rtx op1 = force_reg (SFmode, XVECEXP (vals, 0, 1));
6821 rtx op2 = force_reg (SFmode, XVECEXP (vals, 0, 2));
6822 rtx op3 = force_reg (SFmode, XVECEXP (vals, 0, 3));
6824 /* Use VMRGEW if we can instead of doing a permute. */
6825 if (TARGET_P8_VECTOR)
6827 emit_insn (gen_vsx_concat_v2sf (dbl_even, op0, op2));
6828 emit_insn (gen_vsx_concat_v2sf (dbl_odd, op1, op3));
6829 emit_insn (gen_vsx_xvcvdpsp (flt_even, dbl_even));
6830 emit_insn (gen_vsx_xvcvdpsp (flt_odd, dbl_odd));
6831 if (BYTES_BIG_ENDIAN)
6832 emit_insn (gen_p8_vmrgew_v4sf_direct (target, flt_even, flt_odd));
6833 else
6834 emit_insn (gen_p8_vmrgew_v4sf_direct (target, flt_odd, flt_even));
6836 else
6838 emit_insn (gen_vsx_concat_v2sf (dbl_even, op0, op1));
6839 emit_insn (gen_vsx_concat_v2sf (dbl_odd, op2, op3));
6840 emit_insn (gen_vsx_xvcvdpsp (flt_even, dbl_even));
6841 emit_insn (gen_vsx_xvcvdpsp (flt_odd, dbl_odd));
6842 rs6000_expand_extract_even (target, flt_even, flt_odd);
6845 return;
6848 /* Special case initializing vector short/char that are splats if we are on
6849 64-bit systems with direct move. */
6850 if (all_same && TARGET_DIRECT_MOVE_64BIT
6851 && (mode == V16QImode || mode == V8HImode))
6853 rtx op0 = XVECEXP (vals, 0, 0);
6854 rtx di_tmp = gen_reg_rtx (DImode);
6856 if (!REG_P (op0))
6857 op0 = force_reg (GET_MODE_INNER (mode), op0);
6859 if (mode == V16QImode)
6861 emit_insn (gen_zero_extendqidi2 (di_tmp, op0));
6862 emit_insn (gen_vsx_vspltb_di (target, di_tmp));
6863 return;
6866 if (mode == V8HImode)
6868 emit_insn (gen_zero_extendhidi2 (di_tmp, op0));
6869 emit_insn (gen_vsx_vsplth_di (target, di_tmp));
6870 return;
6874 /* Store value to stack temp. Load vector element. Splat. However, splat
6875 of 64-bit items is not supported on Altivec. */
6876 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
6878 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
6879 emit_move_insn (adjust_address_nv (mem, inner_mode, 0),
6880 XVECEXP (vals, 0, 0));
6881 x = gen_rtx_UNSPEC (VOIDmode,
6882 gen_rtvec (1, const0_rtx), UNSPEC_LVE);
6883 emit_insn (gen_rtx_PARALLEL (VOIDmode,
6884 gen_rtvec (2,
6885 gen_rtx_SET (target, mem),
6886 x)));
6887 x = gen_rtx_VEC_SELECT (inner_mode, target,
6888 gen_rtx_PARALLEL (VOIDmode,
6889 gen_rtvec (1, const0_rtx)));
6890 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_DUPLICATE (mode, x)));
6891 return;
6894 /* One field is non-constant. Load constant then overwrite
6895 varying field. */
6896 if (n_var == 1)
6898 rtx copy = copy_rtx (vals);
6900 /* Load constant part of vector, substitute neighboring value for
6901 varying element. */
6902 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
6903 rs6000_expand_vector_init (target, copy);
6905 /* Insert variable. */
6906 rs6000_expand_vector_set (target, XVECEXP (vals, 0, one_var), one_var);
6907 return;
6910 /* Construct the vector in memory one field at a time
6911 and load the whole vector. */
6912 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
6913 for (i = 0; i < n_elts; i++)
6914 emit_move_insn (adjust_address_nv (mem, inner_mode,
6915 i * GET_MODE_SIZE (inner_mode)),
6916 XVECEXP (vals, 0, i));
6917 emit_move_insn (target, mem);
6920 /* Set field ELT of TARGET to VAL. */
6922 void
6923 rs6000_expand_vector_set (rtx target, rtx val, int elt)
6925 machine_mode mode = GET_MODE (target);
6926 machine_mode inner_mode = GET_MODE_INNER (mode);
6927 rtx reg = gen_reg_rtx (mode);
6928 rtx mask, mem, x;
6929 int width = GET_MODE_SIZE (inner_mode);
6930 int i;
6932 if (VECTOR_MEM_VSX_P (mode) && (mode == V2DFmode || mode == V2DImode))
6934 rtx (*set_func) (rtx, rtx, rtx, rtx)
6935 = ((mode == V2DFmode) ? gen_vsx_set_v2df : gen_vsx_set_v2di);
6936 emit_insn (set_func (target, target, val, GEN_INT (elt)));
6937 return;
6940 /* Simplify setting single element vectors like V1TImode. */
6941 if (GET_MODE_SIZE (mode) == GET_MODE_SIZE (inner_mode) && elt == 0)
6943 emit_move_insn (target, gen_lowpart (mode, val));
6944 return;
6947 /* Load single variable value. */
6948 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
6949 emit_move_insn (adjust_address_nv (mem, inner_mode, 0), val);
6950 x = gen_rtx_UNSPEC (VOIDmode,
6951 gen_rtvec (1, const0_rtx), UNSPEC_LVE);
6952 emit_insn (gen_rtx_PARALLEL (VOIDmode,
6953 gen_rtvec (2,
6954 gen_rtx_SET (reg, mem),
6955 x)));
6957 /* Linear sequence. */
6958 mask = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
6959 for (i = 0; i < 16; ++i)
6960 XVECEXP (mask, 0, i) = GEN_INT (i);
6962 /* Set permute mask to insert element into target. */
6963 for (i = 0; i < width; ++i)
6964 XVECEXP (mask, 0, elt*width + i)
6965 = GEN_INT (i + 0x10);
6966 x = gen_rtx_CONST_VECTOR (V16QImode, XVEC (mask, 0));
6968 if (BYTES_BIG_ENDIAN)
6969 x = gen_rtx_UNSPEC (mode,
6970 gen_rtvec (3, target, reg,
6971 force_reg (V16QImode, x)),
6972 UNSPEC_VPERM);
6973 else
6975 if (TARGET_P9_VECTOR)
6976 x = gen_rtx_UNSPEC (mode,
6977 gen_rtvec (3, target, reg,
6978 force_reg (V16QImode, x)),
6979 UNSPEC_VPERMR);
6980 else
6982 /* Invert selector. We prefer to generate VNAND on P8 so
6983 that future fusion opportunities can kick in, but must
6984 generate VNOR elsewhere. */
6985 rtx notx = gen_rtx_NOT (V16QImode, force_reg (V16QImode, x));
6986 rtx iorx = (TARGET_P8_VECTOR
6987 ? gen_rtx_IOR (V16QImode, notx, notx)
6988 : gen_rtx_AND (V16QImode, notx, notx));
6989 rtx tmp = gen_reg_rtx (V16QImode);
6990 emit_insn (gen_rtx_SET (tmp, iorx));
6992 /* Permute with operands reversed and adjusted selector. */
6993 x = gen_rtx_UNSPEC (mode, gen_rtvec (3, reg, target, tmp),
6994 UNSPEC_VPERM);
6998 emit_insn (gen_rtx_SET (target, x));
7001 /* Extract field ELT from VEC into TARGET. */
7003 void
7004 rs6000_expand_vector_extract (rtx target, rtx vec, rtx elt)
7006 machine_mode mode = GET_MODE (vec);
7007 machine_mode inner_mode = GET_MODE_INNER (mode);
7008 rtx mem;
7010 if (VECTOR_MEM_VSX_P (mode) && CONST_INT_P (elt))
7012 switch (mode)
7014 default:
7015 break;
7016 case V1TImode:
7017 gcc_assert (INTVAL (elt) == 0 && inner_mode == TImode);
7018 emit_move_insn (target, gen_lowpart (TImode, vec));
7019 break;
7020 case V2DFmode:
7021 emit_insn (gen_vsx_extract_v2df (target, vec, elt));
7022 return;
7023 case V2DImode:
7024 emit_insn (gen_vsx_extract_v2di (target, vec, elt));
7025 return;
7026 case V4SFmode:
7027 emit_insn (gen_vsx_extract_v4sf (target, vec, elt));
7028 return;
7029 case V16QImode:
7030 if (TARGET_DIRECT_MOVE_64BIT)
7032 emit_insn (gen_vsx_extract_v16qi (target, vec, elt));
7033 return;
7035 else
7036 break;
7037 case V8HImode:
7038 if (TARGET_DIRECT_MOVE_64BIT)
7040 emit_insn (gen_vsx_extract_v8hi (target, vec, elt));
7041 return;
7043 else
7044 break;
7045 case V4SImode:
7046 if (TARGET_DIRECT_MOVE_64BIT)
7048 emit_insn (gen_vsx_extract_v4si (target, vec, elt));
7049 return;
7051 break;
7054 else if (VECTOR_MEM_VSX_P (mode) && !CONST_INT_P (elt)
7055 && TARGET_DIRECT_MOVE_64BIT)
7057 if (GET_MODE (elt) != DImode)
7059 rtx tmp = gen_reg_rtx (DImode);
7060 convert_move (tmp, elt, 0);
7061 elt = tmp;
7064 switch (mode)
7066 case V2DFmode:
7067 emit_insn (gen_vsx_extract_v2df_var (target, vec, elt));
7068 return;
7070 case V2DImode:
7071 emit_insn (gen_vsx_extract_v2di_var (target, vec, elt));
7072 return;
7074 case V4SFmode:
7075 if (TARGET_UPPER_REGS_SF)
7077 emit_insn (gen_vsx_extract_v4sf_var (target, vec, elt));
7078 return;
7080 break;
7082 case V4SImode:
7083 emit_insn (gen_vsx_extract_v4si_var (target, vec, elt));
7084 return;
7086 case V8HImode:
7087 emit_insn (gen_vsx_extract_v8hi_var (target, vec, elt));
7088 return;
7090 case V16QImode:
7091 emit_insn (gen_vsx_extract_v16qi_var (target, vec, elt));
7092 return;
7094 default:
7095 gcc_unreachable ();
7099 gcc_assert (CONST_INT_P (elt));
7101 /* Allocate mode-sized buffer. */
7102 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
7104 emit_move_insn (mem, vec);
7106 /* Add offset to field within buffer matching vector element. */
7107 mem = adjust_address_nv (mem, inner_mode,
7108 INTVAL (elt) * GET_MODE_SIZE (inner_mode));
7110 emit_move_insn (target, adjust_address_nv (mem, inner_mode, 0));
7113 /* Helper function to return the register number of a RTX. */
7114 static inline int
7115 regno_or_subregno (rtx op)
7117 if (REG_P (op))
7118 return REGNO (op);
7119 else if (SUBREG_P (op))
7120 return subreg_regno (op);
7121 else
7122 gcc_unreachable ();
7125 /* Adjust a memory address (MEM) of a vector type to point to a scalar field
7126 within the vector (ELEMENT) with a mode (SCALAR_MODE). Use a base register
7127 temporary (BASE_TMP) to fixup the address. Return the new memory address
7128 that is valid for reads or writes to a given register (SCALAR_REG). */
7131 rs6000_adjust_vec_address (rtx scalar_reg,
7132 rtx mem,
7133 rtx element,
7134 rtx base_tmp,
7135 machine_mode scalar_mode)
7137 unsigned scalar_size = GET_MODE_SIZE (scalar_mode);
7138 rtx addr = XEXP (mem, 0);
7139 rtx element_offset;
7140 rtx new_addr;
7141 bool valid_addr_p;
7143 /* Vector addresses should not have PRE_INC, PRE_DEC, or PRE_MODIFY. */
7144 gcc_assert (GET_RTX_CLASS (GET_CODE (addr)) != RTX_AUTOINC);
7146 /* Calculate what we need to add to the address to get the element
7147 address. */
7148 if (CONST_INT_P (element))
7149 element_offset = GEN_INT (INTVAL (element) * scalar_size);
7150 else
7152 int byte_shift = exact_log2 (scalar_size);
7153 gcc_assert (byte_shift >= 0);
7155 if (byte_shift == 0)
7156 element_offset = element;
7158 else
7160 if (TARGET_POWERPC64)
7161 emit_insn (gen_ashldi3 (base_tmp, element, GEN_INT (byte_shift)));
7162 else
7163 emit_insn (gen_ashlsi3 (base_tmp, element, GEN_INT (byte_shift)));
7165 element_offset = base_tmp;
7169 /* Create the new address pointing to the element within the vector. If we
7170 are adding 0, we don't have to change the address. */
7171 if (element_offset == const0_rtx)
7172 new_addr = addr;
7174 /* A simple indirect address can be converted into a reg + offset
7175 address. */
7176 else if (REG_P (addr) || SUBREG_P (addr))
7177 new_addr = gen_rtx_PLUS (Pmode, addr, element_offset);
7179 /* Optimize D-FORM addresses with constant offset with a constant element, to
7180 include the element offset in the address directly. */
7181 else if (GET_CODE (addr) == PLUS)
7183 rtx op0 = XEXP (addr, 0);
7184 rtx op1 = XEXP (addr, 1);
7185 rtx insn;
7187 gcc_assert (REG_P (op0) || SUBREG_P (op0));
7188 if (CONST_INT_P (op1) && CONST_INT_P (element_offset))
7190 HOST_WIDE_INT offset = INTVAL (op1) + INTVAL (element_offset);
7191 rtx offset_rtx = GEN_INT (offset);
7193 if (IN_RANGE (offset, -32768, 32767)
7194 && (scalar_size < 8 || (offset & 0x3) == 0))
7195 new_addr = gen_rtx_PLUS (Pmode, op0, offset_rtx);
7196 else
7198 emit_move_insn (base_tmp, offset_rtx);
7199 new_addr = gen_rtx_PLUS (Pmode, op0, base_tmp);
7202 else
7204 bool op1_reg_p = (REG_P (op1) || SUBREG_P (op1));
7205 bool ele_reg_p = (REG_P (element_offset) || SUBREG_P (element_offset));
7207 /* Note, ADDI requires the register being added to be a base
7208 register. If the register was R0, load it up into the temporary
7209 and do the add. */
7210 if (op1_reg_p
7211 && (ele_reg_p || reg_or_subregno (op1) != FIRST_GPR_REGNO))
7213 insn = gen_add3_insn (base_tmp, op1, element_offset);
7214 gcc_assert (insn != NULL_RTX);
7215 emit_insn (insn);
7218 else if (ele_reg_p
7219 && reg_or_subregno (element_offset) != FIRST_GPR_REGNO)
7221 insn = gen_add3_insn (base_tmp, element_offset, op1);
7222 gcc_assert (insn != NULL_RTX);
7223 emit_insn (insn);
7226 else
7228 emit_move_insn (base_tmp, op1);
7229 emit_insn (gen_add2_insn (base_tmp, element_offset));
7232 new_addr = gen_rtx_PLUS (Pmode, op0, base_tmp);
7236 else
7238 emit_move_insn (base_tmp, addr);
7239 new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
7242 /* If we have a PLUS, we need to see whether the particular register class
7243 allows for D-FORM or X-FORM addressing. */
7244 if (GET_CODE (new_addr) == PLUS)
7246 rtx op1 = XEXP (new_addr, 1);
7247 addr_mask_type addr_mask;
7248 int scalar_regno = regno_or_subregno (scalar_reg);
7250 gcc_assert (scalar_regno < FIRST_PSEUDO_REGISTER);
7251 if (INT_REGNO_P (scalar_regno))
7252 addr_mask = reg_addr[scalar_mode].addr_mask[RELOAD_REG_GPR];
7254 else if (FP_REGNO_P (scalar_regno))
7255 addr_mask = reg_addr[scalar_mode].addr_mask[RELOAD_REG_FPR];
7257 else if (ALTIVEC_REGNO_P (scalar_regno))
7258 addr_mask = reg_addr[scalar_mode].addr_mask[RELOAD_REG_VMX];
7260 else
7261 gcc_unreachable ();
7263 if (REG_P (op1) || SUBREG_P (op1))
7264 valid_addr_p = (addr_mask & RELOAD_REG_INDEXED) != 0;
7265 else
7266 valid_addr_p = (addr_mask & RELOAD_REG_OFFSET) != 0;
7269 else if (REG_P (new_addr) || SUBREG_P (new_addr))
7270 valid_addr_p = true;
7272 else
7273 valid_addr_p = false;
7275 if (!valid_addr_p)
7277 emit_move_insn (base_tmp, new_addr);
7278 new_addr = base_tmp;
7281 return change_address (mem, scalar_mode, new_addr);
7284 /* Split a variable vec_extract operation into the component instructions. */
7286 void
7287 rs6000_split_vec_extract_var (rtx dest, rtx src, rtx element, rtx tmp_gpr,
7288 rtx tmp_altivec)
7290 machine_mode mode = GET_MODE (src);
7291 machine_mode scalar_mode = GET_MODE (dest);
7292 unsigned scalar_size = GET_MODE_SIZE (scalar_mode);
7293 int byte_shift = exact_log2 (scalar_size);
7295 gcc_assert (byte_shift >= 0);
7297 /* If we are given a memory address, optimize to load just the element. We
7298 don't have to adjust the vector element number on little endian
7299 systems. */
7300 if (MEM_P (src))
7302 gcc_assert (REG_P (tmp_gpr));
7303 emit_move_insn (dest, rs6000_adjust_vec_address (dest, src, element,
7304 tmp_gpr, scalar_mode));
7305 return;
7308 else if (REG_P (src) || SUBREG_P (src))
7310 int bit_shift = byte_shift + 3;
7311 rtx element2;
7313 gcc_assert (REG_P (tmp_gpr) && REG_P (tmp_altivec));
7315 /* For little endian, adjust element ordering. For V2DI/V2DF, we can use
7316 an XOR, otherwise we need to subtract. The shift amount is so VSLO
7317 will shift the element into the upper position (adding 3 to convert a
7318 byte shift into a bit shift). */
7319 if (scalar_size == 8)
7321 if (!VECTOR_ELT_ORDER_BIG)
7323 emit_insn (gen_xordi3 (tmp_gpr, element, const1_rtx));
7324 element2 = tmp_gpr;
7326 else
7327 element2 = element;
7329 /* Generate RLDIC directly to shift left 6 bits and retrieve 1
7330 bit. */
7331 emit_insn (gen_rtx_SET (tmp_gpr,
7332 gen_rtx_AND (DImode,
7333 gen_rtx_ASHIFT (DImode,
7334 element2,
7335 GEN_INT (6)),
7336 GEN_INT (64))));
7338 else
7340 if (!VECTOR_ELT_ORDER_BIG)
7342 rtx num_ele_m1 = GEN_INT (GET_MODE_NUNITS (mode) - 1);
7344 emit_insn (gen_anddi3 (tmp_gpr, element, num_ele_m1));
7345 emit_insn (gen_subdi3 (tmp_gpr, num_ele_m1, tmp_gpr));
7346 element2 = tmp_gpr;
7348 else
7349 element2 = element;
7351 emit_insn (gen_ashldi3 (tmp_gpr, element2, GEN_INT (bit_shift)));
7354 /* Get the value into the lower byte of the Altivec register where VSLO
7355 expects it. */
7356 if (TARGET_P9_VECTOR)
7357 emit_insn (gen_vsx_splat_v2di (tmp_altivec, tmp_gpr));
7358 else if (can_create_pseudo_p ())
7359 emit_insn (gen_vsx_concat_v2di (tmp_altivec, tmp_gpr, tmp_gpr));
7360 else
7362 rtx tmp_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
7363 emit_move_insn (tmp_di, tmp_gpr);
7364 emit_insn (gen_vsx_concat_v2di (tmp_altivec, tmp_di, tmp_di));
7367 /* Do the VSLO to get the value into the final location. */
7368 switch (mode)
7370 case V2DFmode:
7371 emit_insn (gen_vsx_vslo_v2df (dest, src, tmp_altivec));
7372 return;
7374 case V2DImode:
7375 emit_insn (gen_vsx_vslo_v2di (dest, src, tmp_altivec));
7376 return;
7378 case V4SFmode:
7380 rtx tmp_altivec_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
7381 rtx tmp_altivec_v4sf = gen_rtx_REG (V4SFmode, REGNO (tmp_altivec));
7382 rtx src_v2di = gen_rtx_REG (V2DImode, REGNO (src));
7383 emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di, src_v2di,
7384 tmp_altivec));
7386 emit_insn (gen_vsx_xscvspdp_scalar2 (dest, tmp_altivec_v4sf));
7387 return;
7390 case V4SImode:
7391 case V8HImode:
7392 case V16QImode:
7394 rtx tmp_altivec_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
7395 rtx src_v2di = gen_rtx_REG (V2DImode, REGNO (src));
7396 rtx tmp_gpr_di = gen_rtx_REG (DImode, REGNO (dest));
7397 emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di, src_v2di,
7398 tmp_altivec));
7399 emit_move_insn (tmp_gpr_di, tmp_altivec_di);
7400 emit_insn (gen_ashrdi3 (tmp_gpr_di, tmp_gpr_di,
7401 GEN_INT (64 - (8 * scalar_size))));
7402 return;
7405 default:
7406 gcc_unreachable ();
7409 return;
7411 else
7412 gcc_unreachable ();
7415 /* Helper function for rs6000_split_v4si_init to build up a DImode value from
7416 two SImode values. */
7418 static void
7419 rs6000_split_v4si_init_di_reg (rtx dest, rtx si1, rtx si2, rtx tmp)
7421 const unsigned HOST_WIDE_INT mask_32bit = HOST_WIDE_INT_C (0xffffffff);
7423 if (CONST_INT_P (si1) && CONST_INT_P (si2))
7425 unsigned HOST_WIDE_INT const1 = (UINTVAL (si1) & mask_32bit) << 32;
7426 unsigned HOST_WIDE_INT const2 = UINTVAL (si2) & mask_32bit;
7428 emit_move_insn (dest, GEN_INT (const1 | const2));
7429 return;
7432 /* Put si1 into upper 32-bits of dest. */
7433 if (CONST_INT_P (si1))
7434 emit_move_insn (dest, GEN_INT ((UINTVAL (si1) & mask_32bit) << 32));
7435 else
7437 /* Generate RLDIC. */
7438 rtx si1_di = gen_rtx_REG (DImode, regno_or_subregno (si1));
7439 rtx shift_rtx = gen_rtx_ASHIFT (DImode, si1_di, GEN_INT (32));
7440 rtx mask_rtx = GEN_INT (mask_32bit << 32);
7441 rtx and_rtx = gen_rtx_AND (DImode, shift_rtx, mask_rtx);
7442 gcc_assert (!reg_overlap_mentioned_p (dest, si1));
7443 emit_insn (gen_rtx_SET (dest, and_rtx));
7446 /* Put si2 into the temporary. */
7447 gcc_assert (!reg_overlap_mentioned_p (dest, tmp));
7448 if (CONST_INT_P (si2))
7449 emit_move_insn (tmp, GEN_INT (UINTVAL (si2) & mask_32bit));
7450 else
7451 emit_insn (gen_zero_extendsidi2 (tmp, si2));
7453 /* Combine the two parts. */
7454 emit_insn (gen_iordi3 (dest, dest, tmp));
7455 return;
7458 /* Split a V4SI initialization. */
7460 void
7461 rs6000_split_v4si_init (rtx operands[])
7463 rtx dest = operands[0];
7465 /* Destination is a GPR, build up the two DImode parts in place. */
7466 if (REG_P (dest) || SUBREG_P (dest))
7468 int d_regno = regno_or_subregno (dest);
7469 rtx scalar1 = operands[1];
7470 rtx scalar2 = operands[2];
7471 rtx scalar3 = operands[3];
7472 rtx scalar4 = operands[4];
7473 rtx tmp1 = operands[5];
7474 rtx tmp2 = operands[6];
7476 /* Even though we only need one temporary (plus the destination, which
7477 has an early clobber constraint, try to use two temporaries, one for
7478 each double word created. That way the 2nd insn scheduling pass can
7479 rearrange things so the two parts are done in parallel. */
7480 if (BYTES_BIG_ENDIAN)
7482 rtx di_lo = gen_rtx_REG (DImode, d_regno);
7483 rtx di_hi = gen_rtx_REG (DImode, d_regno + 1);
7484 rs6000_split_v4si_init_di_reg (di_lo, scalar1, scalar2, tmp1);
7485 rs6000_split_v4si_init_di_reg (di_hi, scalar3, scalar4, tmp2);
7487 else
7489 rtx di_lo = gen_rtx_REG (DImode, d_regno + 1);
7490 rtx di_hi = gen_rtx_REG (DImode, d_regno);
7491 gcc_assert (!VECTOR_ELT_ORDER_BIG);
7492 rs6000_split_v4si_init_di_reg (di_lo, scalar4, scalar3, tmp1);
7493 rs6000_split_v4si_init_di_reg (di_hi, scalar2, scalar1, tmp2);
7495 return;
7498 else
7499 gcc_unreachable ();
7502 /* Return TRUE if OP is an invalid SUBREG operation on the e500. */
7504 bool
7505 invalid_e500_subreg (rtx op, machine_mode mode)
7507 if (TARGET_E500_DOUBLE)
7509 /* Reject (subreg:SI (reg:DF)); likewise with subreg:DI or
7510 subreg:TI and reg:TF. Decimal float modes are like integer
7511 modes (only low part of each register used) for this
7512 purpose. */
7513 if (GET_CODE (op) == SUBREG
7514 && (mode == SImode || mode == DImode || mode == TImode
7515 || mode == DDmode || mode == TDmode || mode == PTImode)
7516 && REG_P (SUBREG_REG (op))
7517 && (GET_MODE (SUBREG_REG (op)) == DFmode
7518 || GET_MODE (SUBREG_REG (op)) == TFmode
7519 || GET_MODE (SUBREG_REG (op)) == IFmode
7520 || GET_MODE (SUBREG_REG (op)) == KFmode))
7521 return true;
7523 /* Reject (subreg:DF (reg:DI)); likewise with subreg:TF and
7524 reg:TI. */
7525 if (GET_CODE (op) == SUBREG
7526 && (mode == DFmode || mode == TFmode || mode == IFmode
7527 || mode == KFmode)
7528 && REG_P (SUBREG_REG (op))
7529 && (GET_MODE (SUBREG_REG (op)) == DImode
7530 || GET_MODE (SUBREG_REG (op)) == TImode
7531 || GET_MODE (SUBREG_REG (op)) == PTImode
7532 || GET_MODE (SUBREG_REG (op)) == DDmode
7533 || GET_MODE (SUBREG_REG (op)) == TDmode))
7534 return true;
7537 if (TARGET_SPE
7538 && GET_CODE (op) == SUBREG
7539 && mode == SImode
7540 && REG_P (SUBREG_REG (op))
7541 && SPE_VECTOR_MODE (GET_MODE (SUBREG_REG (op))))
7542 return true;
7544 return false;
7547 /* Return alignment of TYPE. Existing alignment is ALIGN. HOW
7548 selects whether the alignment is abi mandated, optional, or
7549 both abi and optional alignment. */
7551 unsigned int
7552 rs6000_data_alignment (tree type, unsigned int align, enum data_align how)
7554 if (how != align_opt)
7556 if (TREE_CODE (type) == VECTOR_TYPE)
7558 if ((TARGET_SPE && SPE_VECTOR_MODE (TYPE_MODE (type)))
7559 || (TARGET_PAIRED_FLOAT && PAIRED_VECTOR_MODE (TYPE_MODE (type))))
7561 if (align < 64)
7562 align = 64;
7564 else if (align < 128)
7565 align = 128;
7567 else if (TARGET_E500_DOUBLE
7568 && TREE_CODE (type) == REAL_TYPE
7569 && TYPE_MODE (type) == DFmode)
7571 if (align < 64)
7572 align = 64;
7576 if (how != align_abi)
7578 if (TREE_CODE (type) == ARRAY_TYPE
7579 && TYPE_MODE (TREE_TYPE (type)) == QImode)
7581 if (align < BITS_PER_WORD)
7582 align = BITS_PER_WORD;
7586 return align;
7589 /* Previous GCC releases forced all vector types to have 16-byte alignment. */
7591 bool
7592 rs6000_special_adjust_field_align_p (tree field, unsigned int computed)
7594 if (TARGET_ALTIVEC && TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
7596 if (computed != 128)
7598 static bool warned;
7599 if (!warned && warn_psabi)
7601 warned = true;
7602 inform (input_location,
7603 "the layout of aggregates containing vectors with"
7604 " %d-byte alignment has changed in GCC 5",
7605 computed / BITS_PER_UNIT);
7608 /* In current GCC there is no special case. */
7609 return false;
7612 return false;
7615 /* AIX increases natural record alignment to doubleword if the first
7616 field is an FP double while the FP fields remain word aligned. */
7618 unsigned int
7619 rs6000_special_round_type_align (tree type, unsigned int computed,
7620 unsigned int specified)
7622 unsigned int align = MAX (computed, specified);
7623 tree field = TYPE_FIELDS (type);
7625 /* Skip all non field decls */
7626 while (field != NULL && TREE_CODE (field) != FIELD_DECL)
7627 field = DECL_CHAIN (field);
7629 if (field != NULL && field != type)
7631 type = TREE_TYPE (field);
7632 while (TREE_CODE (type) == ARRAY_TYPE)
7633 type = TREE_TYPE (type);
7635 if (type != error_mark_node && TYPE_MODE (type) == DFmode)
7636 align = MAX (align, 64);
7639 return align;
7642 /* Darwin increases record alignment to the natural alignment of
7643 the first field. */
7645 unsigned int
7646 darwin_rs6000_special_round_type_align (tree type, unsigned int computed,
7647 unsigned int specified)
7649 unsigned int align = MAX (computed, specified);
7651 if (TYPE_PACKED (type))
7652 return align;
7654 /* Find the first field, looking down into aggregates. */
7655 do {
7656 tree field = TYPE_FIELDS (type);
7657 /* Skip all non field decls */
7658 while (field != NULL && TREE_CODE (field) != FIELD_DECL)
7659 field = DECL_CHAIN (field);
7660 if (! field)
7661 break;
7662 /* A packed field does not contribute any extra alignment. */
7663 if (DECL_PACKED (field))
7664 return align;
7665 type = TREE_TYPE (field);
7666 while (TREE_CODE (type) == ARRAY_TYPE)
7667 type = TREE_TYPE (type);
7668 } while (AGGREGATE_TYPE_P (type));
7670 if (! AGGREGATE_TYPE_P (type) && type != error_mark_node)
7671 align = MAX (align, TYPE_ALIGN (type));
7673 return align;
7676 /* Return 1 for an operand in small memory on V.4/eabi. */
7679 small_data_operand (rtx op ATTRIBUTE_UNUSED,
7680 machine_mode mode ATTRIBUTE_UNUSED)
7682 #if TARGET_ELF
7683 rtx sym_ref;
7685 if (rs6000_sdata == SDATA_NONE || rs6000_sdata == SDATA_DATA)
7686 return 0;
7688 if (DEFAULT_ABI != ABI_V4)
7689 return 0;
7691 /* Vector and float memory instructions have a limited offset on the
7692 SPE, so using a vector or float variable directly as an operand is
7693 not useful. */
7694 if (TARGET_SPE
7695 && (SPE_VECTOR_MODE (mode) || FLOAT_MODE_P (mode)))
7696 return 0;
7698 if (GET_CODE (op) == SYMBOL_REF)
7699 sym_ref = op;
7701 else if (GET_CODE (op) != CONST
7702 || GET_CODE (XEXP (op, 0)) != PLUS
7703 || GET_CODE (XEXP (XEXP (op, 0), 0)) != SYMBOL_REF
7704 || GET_CODE (XEXP (XEXP (op, 0), 1)) != CONST_INT)
7705 return 0;
7707 else
7709 rtx sum = XEXP (op, 0);
7710 HOST_WIDE_INT summand;
7712 /* We have to be careful here, because it is the referenced address
7713 that must be 32k from _SDA_BASE_, not just the symbol. */
7714 summand = INTVAL (XEXP (sum, 1));
7715 if (summand < 0 || summand > g_switch_value)
7716 return 0;
7718 sym_ref = XEXP (sum, 0);
7721 return SYMBOL_REF_SMALL_P (sym_ref);
7722 #else
7723 return 0;
7724 #endif
7727 /* Return true if either operand is a general purpose register. */
7729 bool
7730 gpr_or_gpr_p (rtx op0, rtx op1)
7732 return ((REG_P (op0) && INT_REGNO_P (REGNO (op0)))
7733 || (REG_P (op1) && INT_REGNO_P (REGNO (op1))));
7736 /* Return true if this is a move direct operation between GPR registers and
7737 floating point/VSX registers. */
7739 bool
7740 direct_move_p (rtx op0, rtx op1)
7742 int regno0, regno1;
7744 if (!REG_P (op0) || !REG_P (op1))
7745 return false;
7747 if (!TARGET_DIRECT_MOVE && !TARGET_MFPGPR)
7748 return false;
7750 regno0 = REGNO (op0);
7751 regno1 = REGNO (op1);
7752 if (regno0 >= FIRST_PSEUDO_REGISTER || regno1 >= FIRST_PSEUDO_REGISTER)
7753 return false;
7755 if (INT_REGNO_P (regno0))
7756 return (TARGET_DIRECT_MOVE) ? VSX_REGNO_P (regno1) : FP_REGNO_P (regno1);
7758 else if (INT_REGNO_P (regno1))
7760 if (TARGET_MFPGPR && FP_REGNO_P (regno0))
7761 return true;
7763 else if (TARGET_DIRECT_MOVE && VSX_REGNO_P (regno0))
7764 return true;
7767 return false;
7770 /* Return true if the OFFSET is valid for the quad address instructions that
7771 use d-form (register + offset) addressing. */
7773 static inline bool
7774 quad_address_offset_p (HOST_WIDE_INT offset)
7776 return (IN_RANGE (offset, -32768, 32767) && ((offset) & 0xf) == 0);
7779 /* Return true if the ADDR is an acceptable address for a quad memory
7780 operation of mode MODE (either LQ/STQ for general purpose registers, or
7781 LXV/STXV for vector registers under ISA 3.0. GPR_P is true if this address
7782 is intended for LQ/STQ. If it is false, the address is intended for the ISA
7783 3.0 LXV/STXV instruction. */
7785 bool
7786 quad_address_p (rtx addr, machine_mode mode, bool strict)
7788 rtx op0, op1;
7790 if (GET_MODE_SIZE (mode) != 16)
7791 return false;
7793 if (legitimate_indirect_address_p (addr, strict))
7794 return true;
7796 if (VECTOR_MODE_P (mode) && !mode_supports_vsx_dform_quad (mode))
7797 return false;
7799 if (GET_CODE (addr) != PLUS)
7800 return false;
7802 op0 = XEXP (addr, 0);
7803 if (!REG_P (op0) || !INT_REG_OK_FOR_BASE_P (op0, strict))
7804 return false;
7806 op1 = XEXP (addr, 1);
7807 if (!CONST_INT_P (op1))
7808 return false;
7810 return quad_address_offset_p (INTVAL (op1));
7813 /* Return true if this is a load or store quad operation. This function does
7814 not handle the atomic quad memory instructions. */
7816 bool
7817 quad_load_store_p (rtx op0, rtx op1)
7819 bool ret;
7821 if (!TARGET_QUAD_MEMORY)
7822 ret = false;
7824 else if (REG_P (op0) && MEM_P (op1))
7825 ret = (quad_int_reg_operand (op0, GET_MODE (op0))
7826 && quad_memory_operand (op1, GET_MODE (op1))
7827 && !reg_overlap_mentioned_p (op0, op1));
7829 else if (MEM_P (op0) && REG_P (op1))
7830 ret = (quad_memory_operand (op0, GET_MODE (op0))
7831 && quad_int_reg_operand (op1, GET_MODE (op1)));
7833 else
7834 ret = false;
7836 if (TARGET_DEBUG_ADDR)
7838 fprintf (stderr, "\n========== quad_load_store, return %s\n",
7839 ret ? "true" : "false");
7840 debug_rtx (gen_rtx_SET (op0, op1));
7843 return ret;
7846 /* Given an address, return a constant offset term if one exists. */
7848 static rtx
7849 address_offset (rtx op)
7851 if (GET_CODE (op) == PRE_INC
7852 || GET_CODE (op) == PRE_DEC)
7853 op = XEXP (op, 0);
7854 else if (GET_CODE (op) == PRE_MODIFY
7855 || GET_CODE (op) == LO_SUM)
7856 op = XEXP (op, 1);
7858 if (GET_CODE (op) == CONST)
7859 op = XEXP (op, 0);
7861 if (GET_CODE (op) == PLUS)
7862 op = XEXP (op, 1);
7864 if (CONST_INT_P (op))
7865 return op;
7867 return NULL_RTX;
7870 /* Return true if the MEM operand is a memory operand suitable for use
7871 with a (full width, possibly multiple) gpr load/store. On
7872 powerpc64 this means the offset must be divisible by 4.
7873 Implements 'Y' constraint.
7875 Accept direct, indexed, offset, lo_sum and tocref. Since this is
7876 a constraint function we know the operand has satisfied a suitable
7877 memory predicate. Also accept some odd rtl generated by reload
7878 (see rs6000_legitimize_reload_address for various forms). It is
7879 important that reload rtl be accepted by appropriate constraints
7880 but not by the operand predicate.
7882 Offsetting a lo_sum should not be allowed, except where we know by
7883 alignment that a 32k boundary is not crossed, but see the ???
7884 comment in rs6000_legitimize_reload_address. Note that by
7885 "offsetting" here we mean a further offset to access parts of the
7886 MEM. It's fine to have a lo_sum where the inner address is offset
7887 from a sym, since the same sym+offset will appear in the high part
7888 of the address calculation. */
7890 bool
7891 mem_operand_gpr (rtx op, machine_mode mode)
7893 unsigned HOST_WIDE_INT offset;
7894 int extra;
7895 rtx addr = XEXP (op, 0);
7897 op = address_offset (addr);
7898 if (op == NULL_RTX)
7899 return true;
7901 offset = INTVAL (op);
7902 if (TARGET_POWERPC64 && (offset & 3) != 0)
7903 return false;
7905 extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD;
7906 if (extra < 0)
7907 extra = 0;
7909 if (GET_CODE (addr) == LO_SUM)
7910 /* For lo_sum addresses, we must allow any offset except one that
7911 causes a wrap, so test only the low 16 bits. */
7912 offset = ((offset & 0xffff) ^ 0x8000) - 0x8000;
7914 return offset + 0x8000 < 0x10000u - extra;
7917 /* As above, but for DS-FORM VSX insns. Unlike mem_operand_gpr,
7918 enforce an offset divisible by 4 even for 32-bit. */
7920 bool
7921 mem_operand_ds_form (rtx op, machine_mode mode)
7923 unsigned HOST_WIDE_INT offset;
7924 int extra;
7925 rtx addr = XEXP (op, 0);
7927 if (!offsettable_address_p (false, mode, addr))
7928 return false;
7930 op = address_offset (addr);
7931 if (op == NULL_RTX)
7932 return true;
7934 offset = INTVAL (op);
7935 if ((offset & 3) != 0)
7936 return false;
7938 extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD;
7939 if (extra < 0)
7940 extra = 0;
7942 if (GET_CODE (addr) == LO_SUM)
7943 /* For lo_sum addresses, we must allow any offset except one that
7944 causes a wrap, so test only the low 16 bits. */
7945 offset = ((offset & 0xffff) ^ 0x8000) - 0x8000;
7947 return offset + 0x8000 < 0x10000u - extra;
7950 /* Subroutines of rs6000_legitimize_address and rs6000_legitimate_address_p. */
7952 static bool
7953 reg_offset_addressing_ok_p (machine_mode mode)
7955 switch (mode)
7957 case V16QImode:
7958 case V8HImode:
7959 case V4SFmode:
7960 case V4SImode:
7961 case V2DFmode:
7962 case V2DImode:
7963 case V1TImode:
7964 case TImode:
7965 case TFmode:
7966 case KFmode:
7967 /* AltiVec/VSX vector modes. Only reg+reg addressing was valid until the
7968 ISA 3.0 vector d-form addressing mode was added. While TImode is not
7969 a vector mode, if we want to use the VSX registers to move it around,
7970 we need to restrict ourselves to reg+reg addressing. Similarly for
7971 IEEE 128-bit floating point that is passed in a single vector
7972 register. */
7973 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode))
7974 return mode_supports_vsx_dform_quad (mode);
7975 break;
7977 case V4HImode:
7978 case V2SImode:
7979 case V1DImode:
7980 case V2SFmode:
7981 /* Paired vector modes. Only reg+reg addressing is valid. */
7982 if (TARGET_PAIRED_FLOAT)
7983 return false;
7984 break;
7986 case SDmode:
7987 /* If we can do direct load/stores of SDmode, restrict it to reg+reg
7988 addressing for the LFIWZX and STFIWX instructions. */
7989 if (TARGET_NO_SDMODE_STACK)
7990 return false;
7991 break;
7993 default:
7994 break;
7997 return true;
8000 static bool
8001 virtual_stack_registers_memory_p (rtx op)
8003 int regnum;
8005 if (GET_CODE (op) == REG)
8006 regnum = REGNO (op);
8008 else if (GET_CODE (op) == PLUS
8009 && GET_CODE (XEXP (op, 0)) == REG
8010 && GET_CODE (XEXP (op, 1)) == CONST_INT)
8011 regnum = REGNO (XEXP (op, 0));
8013 else
8014 return false;
8016 return (regnum >= FIRST_VIRTUAL_REGISTER
8017 && regnum <= LAST_VIRTUAL_POINTER_REGISTER);
8020 /* Return true if a MODE sized memory accesses to OP plus OFFSET
8021 is known to not straddle a 32k boundary. This function is used
8022 to determine whether -mcmodel=medium code can use TOC pointer
8023 relative addressing for OP. This means the alignment of the TOC
8024 pointer must also be taken into account, and unfortunately that is
8025 only 8 bytes. */
8027 #ifndef POWERPC64_TOC_POINTER_ALIGNMENT
8028 #define POWERPC64_TOC_POINTER_ALIGNMENT 8
8029 #endif
8031 static bool
8032 offsettable_ok_by_alignment (rtx op, HOST_WIDE_INT offset,
8033 machine_mode mode)
8035 tree decl;
8036 unsigned HOST_WIDE_INT dsize, dalign, lsb, mask;
8038 if (GET_CODE (op) != SYMBOL_REF)
8039 return false;
8041 /* ISA 3.0 vector d-form addressing is restricted, don't allow
8042 SYMBOL_REF. */
8043 if (mode_supports_vsx_dform_quad (mode))
8044 return false;
8046 dsize = GET_MODE_SIZE (mode);
8047 decl = SYMBOL_REF_DECL (op);
8048 if (!decl)
8050 if (dsize == 0)
8051 return false;
8053 /* -fsection-anchors loses the original SYMBOL_REF_DECL when
8054 replacing memory addresses with an anchor plus offset. We
8055 could find the decl by rummaging around in the block->objects
8056 VEC for the given offset but that seems like too much work. */
8057 dalign = BITS_PER_UNIT;
8058 if (SYMBOL_REF_HAS_BLOCK_INFO_P (op)
8059 && SYMBOL_REF_ANCHOR_P (op)
8060 && SYMBOL_REF_BLOCK (op) != NULL)
8062 struct object_block *block = SYMBOL_REF_BLOCK (op);
8064 dalign = block->alignment;
8065 offset += SYMBOL_REF_BLOCK_OFFSET (op);
8067 else if (CONSTANT_POOL_ADDRESS_P (op))
8069 /* It would be nice to have get_pool_align().. */
8070 machine_mode cmode = get_pool_mode (op);
8072 dalign = GET_MODE_ALIGNMENT (cmode);
8075 else if (DECL_P (decl))
8077 dalign = DECL_ALIGN (decl);
8079 if (dsize == 0)
8081 /* Allow BLKmode when the entire object is known to not
8082 cross a 32k boundary. */
8083 if (!DECL_SIZE_UNIT (decl))
8084 return false;
8086 if (!tree_fits_uhwi_p (DECL_SIZE_UNIT (decl)))
8087 return false;
8089 dsize = tree_to_uhwi (DECL_SIZE_UNIT (decl));
8090 if (dsize > 32768)
8091 return false;
8093 dalign /= BITS_PER_UNIT;
8094 if (dalign > POWERPC64_TOC_POINTER_ALIGNMENT)
8095 dalign = POWERPC64_TOC_POINTER_ALIGNMENT;
8096 return dalign >= dsize;
8099 else
8100 gcc_unreachable ();
8102 /* Find how many bits of the alignment we know for this access. */
8103 dalign /= BITS_PER_UNIT;
8104 if (dalign > POWERPC64_TOC_POINTER_ALIGNMENT)
8105 dalign = POWERPC64_TOC_POINTER_ALIGNMENT;
8106 mask = dalign - 1;
8107 lsb = offset & -offset;
8108 mask &= lsb - 1;
8109 dalign = mask + 1;
8111 return dalign >= dsize;
8114 static bool
8115 constant_pool_expr_p (rtx op)
8117 rtx base, offset;
8119 split_const (op, &base, &offset);
8120 return (GET_CODE (base) == SYMBOL_REF
8121 && CONSTANT_POOL_ADDRESS_P (base)
8122 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (base), Pmode));
8125 static const_rtx tocrel_base, tocrel_offset;
8127 /* Return true if OP is a toc pointer relative address (the output
8128 of create_TOC_reference). If STRICT, do not match non-split
8129 -mcmodel=large/medium toc pointer relative addresses. */
8131 bool
8132 toc_relative_expr_p (const_rtx op, bool strict)
8134 if (!TARGET_TOC)
8135 return false;
8137 if (TARGET_CMODEL != CMODEL_SMALL)
8139 /* When strict ensure we have everything tidy. */
8140 if (strict
8141 && !(GET_CODE (op) == LO_SUM
8142 && REG_P (XEXP (op, 0))
8143 && INT_REG_OK_FOR_BASE_P (XEXP (op, 0), strict)))
8144 return false;
8146 /* When not strict, allow non-split TOC addresses and also allow
8147 (lo_sum (high ..)) TOC addresses created during reload. */
8148 if (GET_CODE (op) == LO_SUM)
8149 op = XEXP (op, 1);
8152 tocrel_base = op;
8153 tocrel_offset = const0_rtx;
8154 if (GET_CODE (op) == PLUS && add_cint_operand (XEXP (op, 1), GET_MODE (op)))
8156 tocrel_base = XEXP (op, 0);
8157 tocrel_offset = XEXP (op, 1);
8160 return (GET_CODE (tocrel_base) == UNSPEC
8161 && XINT (tocrel_base, 1) == UNSPEC_TOCREL);
8164 /* Return true if X is a constant pool address, and also for cmodel=medium
8165 if X is a toc-relative address known to be offsettable within MODE. */
8167 bool
8168 legitimate_constant_pool_address_p (const_rtx x, machine_mode mode,
8169 bool strict)
8171 return (toc_relative_expr_p (x, strict)
8172 && (TARGET_CMODEL != CMODEL_MEDIUM
8173 || constant_pool_expr_p (XVECEXP (tocrel_base, 0, 0))
8174 || mode == QImode
8175 || offsettable_ok_by_alignment (XVECEXP (tocrel_base, 0, 0),
8176 INTVAL (tocrel_offset), mode)));
8179 static bool
8180 legitimate_small_data_p (machine_mode mode, rtx x)
8182 return (DEFAULT_ABI == ABI_V4
8183 && !flag_pic && !TARGET_TOC
8184 && (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == CONST)
8185 && small_data_operand (x, mode));
8188 /* SPE offset addressing is limited to 5-bits worth of double words. */
8189 #define SPE_CONST_OFFSET_OK(x) (((x) & ~0xf8) == 0)
8191 bool
8192 rs6000_legitimate_offset_address_p (machine_mode mode, rtx x,
8193 bool strict, bool worst_case)
8195 unsigned HOST_WIDE_INT offset;
8196 unsigned int extra;
8198 if (GET_CODE (x) != PLUS)
8199 return false;
8200 if (!REG_P (XEXP (x, 0)))
8201 return false;
8202 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
8203 return false;
8204 if (mode_supports_vsx_dform_quad (mode))
8205 return quad_address_p (x, mode, strict);
8206 if (!reg_offset_addressing_ok_p (mode))
8207 return virtual_stack_registers_memory_p (x);
8208 if (legitimate_constant_pool_address_p (x, mode, strict || lra_in_progress))
8209 return true;
8210 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
8211 return false;
8213 offset = INTVAL (XEXP (x, 1));
8214 extra = 0;
8215 switch (mode)
8217 case V4HImode:
8218 case V2SImode:
8219 case V1DImode:
8220 case V2SFmode:
8221 /* SPE vector modes. */
8222 return SPE_CONST_OFFSET_OK (offset);
8224 case DFmode:
8225 case DDmode:
8226 case DImode:
8227 /* On e500v2, we may have:
8229 (subreg:DF (mem:DI (plus (reg) (const_int))) 0).
8231 Which gets addressed with evldd instructions. */
8232 if (TARGET_E500_DOUBLE)
8233 return SPE_CONST_OFFSET_OK (offset);
8235 /* If we are using VSX scalar loads, restrict ourselves to reg+reg
8236 addressing. */
8237 if (VECTOR_MEM_VSX_P (mode))
8238 return false;
8240 if (!worst_case)
8241 break;
8242 if (!TARGET_POWERPC64)
8243 extra = 4;
8244 else if (offset & 3)
8245 return false;
8246 break;
8248 case TFmode:
8249 case IFmode:
8250 case KFmode:
8251 if (TARGET_E500_DOUBLE)
8252 return (SPE_CONST_OFFSET_OK (offset)
8253 && SPE_CONST_OFFSET_OK (offset + 8));
8254 /* fall through */
8256 case TDmode:
8257 case TImode:
8258 case PTImode:
8259 extra = 8;
8260 if (!worst_case)
8261 break;
8262 if (!TARGET_POWERPC64)
8263 extra = 12;
8264 else if (offset & 3)
8265 return false;
8266 break;
8268 default:
8269 break;
8272 offset += 0x8000;
8273 return offset < 0x10000 - extra;
8276 bool
8277 legitimate_indexed_address_p (rtx x, int strict)
8279 rtx op0, op1;
8281 if (GET_CODE (x) != PLUS)
8282 return false;
8284 op0 = XEXP (x, 0);
8285 op1 = XEXP (x, 1);
8287 /* Recognize the rtl generated by reload which we know will later be
8288 replaced with proper base and index regs. */
8289 if (!strict
8290 && reload_in_progress
8291 && (REG_P (op0) || GET_CODE (op0) == PLUS)
8292 && REG_P (op1))
8293 return true;
8295 return (REG_P (op0) && REG_P (op1)
8296 && ((INT_REG_OK_FOR_BASE_P (op0, strict)
8297 && INT_REG_OK_FOR_INDEX_P (op1, strict))
8298 || (INT_REG_OK_FOR_BASE_P (op1, strict)
8299 && INT_REG_OK_FOR_INDEX_P (op0, strict))));
8302 bool
8303 avoiding_indexed_address_p (machine_mode mode)
8305 /* Avoid indexed addressing for modes that have non-indexed
8306 load/store instruction forms. */
8307 return (TARGET_AVOID_XFORM && VECTOR_MEM_NONE_P (mode));
8310 bool
8311 legitimate_indirect_address_p (rtx x, int strict)
8313 return GET_CODE (x) == REG && INT_REG_OK_FOR_BASE_P (x, strict);
8316 bool
8317 macho_lo_sum_memory_operand (rtx x, machine_mode mode)
8319 if (!TARGET_MACHO || !flag_pic
8320 || mode != SImode || GET_CODE (x) != MEM)
8321 return false;
8322 x = XEXP (x, 0);
8324 if (GET_CODE (x) != LO_SUM)
8325 return false;
8326 if (GET_CODE (XEXP (x, 0)) != REG)
8327 return false;
8328 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), 0))
8329 return false;
8330 x = XEXP (x, 1);
8332 return CONSTANT_P (x);
8335 static bool
8336 legitimate_lo_sum_address_p (machine_mode mode, rtx x, int strict)
8338 if (GET_CODE (x) != LO_SUM)
8339 return false;
8340 if (GET_CODE (XEXP (x, 0)) != REG)
8341 return false;
8342 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
8343 return false;
8344 /* quad word addresses are restricted, and we can't use LO_SUM. */
8345 if (mode_supports_vsx_dform_quad (mode))
8346 return false;
8347 /* Restrict addressing for DI because of our SUBREG hackery. */
8348 if (TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
8349 return false;
8350 x = XEXP (x, 1);
8352 if (TARGET_ELF || TARGET_MACHO)
8354 bool large_toc_ok;
8356 if (DEFAULT_ABI == ABI_V4 && flag_pic)
8357 return false;
8358 /* LRA doesn't use LEGITIMIZE_RELOAD_ADDRESS as it usually calls
8359 push_reload from reload pass code. LEGITIMIZE_RELOAD_ADDRESS
8360 recognizes some LO_SUM addresses as valid although this
8361 function says opposite. In most cases, LRA through different
8362 transformations can generate correct code for address reloads.
8363 It can not manage only some LO_SUM cases. So we need to add
8364 code analogous to one in rs6000_legitimize_reload_address for
8365 LOW_SUM here saying that some addresses are still valid. */
8366 large_toc_ok = (lra_in_progress && TARGET_CMODEL != CMODEL_SMALL
8367 && small_toc_ref (x, VOIDmode));
8368 if (TARGET_TOC && ! large_toc_ok)
8369 return false;
8370 if (GET_MODE_NUNITS (mode) != 1)
8371 return false;
8372 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
8373 && !(/* ??? Assume floating point reg based on mode? */
8374 TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
8375 && (mode == DFmode || mode == DDmode)))
8376 return false;
8378 return CONSTANT_P (x) || large_toc_ok;
8381 return false;
8385 /* Try machine-dependent ways of modifying an illegitimate address
8386 to be legitimate. If we find one, return the new, valid address.
8387 This is used from only one place: `memory_address' in explow.c.
8389 OLDX is the address as it was before break_out_memory_refs was
8390 called. In some cases it is useful to look at this to decide what
8391 needs to be done.
8393 It is always safe for this function to do nothing. It exists to
8394 recognize opportunities to optimize the output.
8396 On RS/6000, first check for the sum of a register with a constant
8397 integer that is out of range. If so, generate code to add the
8398 constant with the low-order 16 bits masked to the register and force
8399 this result into another register (this can be done with `cau').
8400 Then generate an address of REG+(CONST&0xffff), allowing for the
8401 possibility of bit 16 being a one.
8403 Then check for the sum of a register and something not constant, try to
8404 load the other things into a register and return the sum. */
8406 static rtx
8407 rs6000_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
8408 machine_mode mode)
8410 unsigned int extra;
8412 if (!reg_offset_addressing_ok_p (mode)
8413 || mode_supports_vsx_dform_quad (mode))
8415 if (virtual_stack_registers_memory_p (x))
8416 return x;
8418 /* In theory we should not be seeing addresses of the form reg+0,
8419 but just in case it is generated, optimize it away. */
8420 if (GET_CODE (x) == PLUS && XEXP (x, 1) == const0_rtx)
8421 return force_reg (Pmode, XEXP (x, 0));
8423 /* For TImode with load/store quad, restrict addresses to just a single
8424 pointer, so it works with both GPRs and VSX registers. */
8425 /* Make sure both operands are registers. */
8426 else if (GET_CODE (x) == PLUS
8427 && (mode != TImode || !TARGET_VSX_TIMODE))
8428 return gen_rtx_PLUS (Pmode,
8429 force_reg (Pmode, XEXP (x, 0)),
8430 force_reg (Pmode, XEXP (x, 1)));
8431 else
8432 return force_reg (Pmode, x);
8434 if (GET_CODE (x) == SYMBOL_REF)
8436 enum tls_model model = SYMBOL_REF_TLS_MODEL (x);
8437 if (model != 0)
8438 return rs6000_legitimize_tls_address (x, model);
8441 extra = 0;
8442 switch (mode)
8444 case TFmode:
8445 case TDmode:
8446 case TImode:
8447 case PTImode:
8448 case IFmode:
8449 case KFmode:
8450 /* As in legitimate_offset_address_p we do not assume
8451 worst-case. The mode here is just a hint as to the registers
8452 used. A TImode is usually in gprs, but may actually be in
8453 fprs. Leave worst-case scenario for reload to handle via
8454 insn constraints. PTImode is only GPRs. */
8455 extra = 8;
8456 break;
8457 default:
8458 break;
8461 if (GET_CODE (x) == PLUS
8462 && GET_CODE (XEXP (x, 0)) == REG
8463 && GET_CODE (XEXP (x, 1)) == CONST_INT
8464 && ((unsigned HOST_WIDE_INT) (INTVAL (XEXP (x, 1)) + 0x8000)
8465 >= 0x10000 - extra)
8466 && !(SPE_VECTOR_MODE (mode)
8467 || (TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD)))
8469 HOST_WIDE_INT high_int, low_int;
8470 rtx sum;
8471 low_int = ((INTVAL (XEXP (x, 1)) & 0xffff) ^ 0x8000) - 0x8000;
8472 if (low_int >= 0x8000 - extra)
8473 low_int = 0;
8474 high_int = INTVAL (XEXP (x, 1)) - low_int;
8475 sum = force_operand (gen_rtx_PLUS (Pmode, XEXP (x, 0),
8476 GEN_INT (high_int)), 0);
8477 return plus_constant (Pmode, sum, low_int);
8479 else if (GET_CODE (x) == PLUS
8480 && GET_CODE (XEXP (x, 0)) == REG
8481 && GET_CODE (XEXP (x, 1)) != CONST_INT
8482 && GET_MODE_NUNITS (mode) == 1
8483 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
8484 || (/* ??? Assume floating point reg based on mode? */
8485 (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
8486 && (mode == DFmode || mode == DDmode)))
8487 && !avoiding_indexed_address_p (mode))
8489 return gen_rtx_PLUS (Pmode, XEXP (x, 0),
8490 force_reg (Pmode, force_operand (XEXP (x, 1), 0)));
8492 else if (SPE_VECTOR_MODE (mode)
8493 || (TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD))
8495 if (mode == DImode)
8496 return x;
8497 /* We accept [reg + reg] and [reg + OFFSET]. */
8499 if (GET_CODE (x) == PLUS)
8501 rtx op1 = XEXP (x, 0);
8502 rtx op2 = XEXP (x, 1);
8503 rtx y;
8505 op1 = force_reg (Pmode, op1);
8507 if (GET_CODE (op2) != REG
8508 && (GET_CODE (op2) != CONST_INT
8509 || !SPE_CONST_OFFSET_OK (INTVAL (op2))
8510 || (GET_MODE_SIZE (mode) > 8
8511 && !SPE_CONST_OFFSET_OK (INTVAL (op2) + 8))))
8512 op2 = force_reg (Pmode, op2);
8514 /* We can't always do [reg + reg] for these, because [reg +
8515 reg + offset] is not a legitimate addressing mode. */
8516 y = gen_rtx_PLUS (Pmode, op1, op2);
8518 if ((GET_MODE_SIZE (mode) > 8 || mode == DDmode) && REG_P (op2))
8519 return force_reg (Pmode, y);
8520 else
8521 return y;
8524 return force_reg (Pmode, x);
8526 else if ((TARGET_ELF
8527 #if TARGET_MACHO
8528 || !MACHO_DYNAMIC_NO_PIC_P
8529 #endif
8531 && TARGET_32BIT
8532 && TARGET_NO_TOC
8533 && ! flag_pic
8534 && GET_CODE (x) != CONST_INT
8535 && GET_CODE (x) != CONST_WIDE_INT
8536 && GET_CODE (x) != CONST_DOUBLE
8537 && CONSTANT_P (x)
8538 && GET_MODE_NUNITS (mode) == 1
8539 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
8540 || (/* ??? Assume floating point reg based on mode? */
8541 (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
8542 && (mode == DFmode || mode == DDmode))))
8544 rtx reg = gen_reg_rtx (Pmode);
8545 if (TARGET_ELF)
8546 emit_insn (gen_elf_high (reg, x));
8547 else
8548 emit_insn (gen_macho_high (reg, x));
8549 return gen_rtx_LO_SUM (Pmode, reg, x);
8551 else if (TARGET_TOC
8552 && GET_CODE (x) == SYMBOL_REF
8553 && constant_pool_expr_p (x)
8554 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (x), Pmode))
8555 return create_TOC_reference (x, NULL_RTX);
8556 else
8557 return x;
8560 /* Debug version of rs6000_legitimize_address. */
8561 static rtx
8562 rs6000_debug_legitimize_address (rtx x, rtx oldx, machine_mode mode)
8564 rtx ret;
8565 rtx_insn *insns;
8567 start_sequence ();
8568 ret = rs6000_legitimize_address (x, oldx, mode);
8569 insns = get_insns ();
8570 end_sequence ();
8572 if (ret != x)
8574 fprintf (stderr,
8575 "\nrs6000_legitimize_address: mode %s, old code %s, "
8576 "new code %s, modified\n",
8577 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)),
8578 GET_RTX_NAME (GET_CODE (ret)));
8580 fprintf (stderr, "Original address:\n");
8581 debug_rtx (x);
8583 fprintf (stderr, "oldx:\n");
8584 debug_rtx (oldx);
8586 fprintf (stderr, "New address:\n");
8587 debug_rtx (ret);
8589 if (insns)
8591 fprintf (stderr, "Insns added:\n");
8592 debug_rtx_list (insns, 20);
8595 else
8597 fprintf (stderr,
8598 "\nrs6000_legitimize_address: mode %s, code %s, no change:\n",
8599 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)));
8601 debug_rtx (x);
8604 if (insns)
8605 emit_insn (insns);
8607 return ret;
8610 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
8611 We need to emit DTP-relative relocations. */
8613 static void rs6000_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
8614 static void
8615 rs6000_output_dwarf_dtprel (FILE *file, int size, rtx x)
8617 switch (size)
8619 case 4:
8620 fputs ("\t.long\t", file);
8621 break;
8622 case 8:
8623 fputs (DOUBLE_INT_ASM_OP, file);
8624 break;
8625 default:
8626 gcc_unreachable ();
8628 output_addr_const (file, x);
8629 if (TARGET_ELF)
8630 fputs ("@dtprel+0x8000", file);
8631 else if (TARGET_XCOFF && GET_CODE (x) == SYMBOL_REF)
8633 switch (SYMBOL_REF_TLS_MODEL (x))
8635 case 0:
8636 break;
8637 case TLS_MODEL_LOCAL_EXEC:
8638 fputs ("@le", file);
8639 break;
8640 case TLS_MODEL_INITIAL_EXEC:
8641 fputs ("@ie", file);
8642 break;
8643 case TLS_MODEL_GLOBAL_DYNAMIC:
8644 case TLS_MODEL_LOCAL_DYNAMIC:
8645 fputs ("@m", file);
8646 break;
8647 default:
8648 gcc_unreachable ();
8653 /* Return true if X is a symbol that refers to real (rather than emulated)
8654 TLS. */
8656 static bool
8657 rs6000_real_tls_symbol_ref_p (rtx x)
8659 return (GET_CODE (x) == SYMBOL_REF
8660 && SYMBOL_REF_TLS_MODEL (x) >= TLS_MODEL_REAL);
8663 /* In the name of slightly smaller debug output, and to cater to
8664 general assembler lossage, recognize various UNSPEC sequences
8665 and turn them back into a direct symbol reference. */
8667 static rtx
8668 rs6000_delegitimize_address (rtx orig_x)
8670 rtx x, y, offset;
8672 orig_x = delegitimize_mem_from_attrs (orig_x);
8673 x = orig_x;
8674 if (MEM_P (x))
8675 x = XEXP (x, 0);
8677 y = x;
8678 if (TARGET_CMODEL != CMODEL_SMALL
8679 && GET_CODE (y) == LO_SUM)
8680 y = XEXP (y, 1);
8682 offset = NULL_RTX;
8683 if (GET_CODE (y) == PLUS
8684 && GET_MODE (y) == Pmode
8685 && CONST_INT_P (XEXP (y, 1)))
8687 offset = XEXP (y, 1);
8688 y = XEXP (y, 0);
8691 if (GET_CODE (y) == UNSPEC
8692 && XINT (y, 1) == UNSPEC_TOCREL)
8694 y = XVECEXP (y, 0, 0);
8696 #ifdef HAVE_AS_TLS
8697 /* Do not associate thread-local symbols with the original
8698 constant pool symbol. */
8699 if (TARGET_XCOFF
8700 && GET_CODE (y) == SYMBOL_REF
8701 && CONSTANT_POOL_ADDRESS_P (y)
8702 && rs6000_real_tls_symbol_ref_p (get_pool_constant (y)))
8703 return orig_x;
8704 #endif
8706 if (offset != NULL_RTX)
8707 y = gen_rtx_PLUS (Pmode, y, offset);
8708 if (!MEM_P (orig_x))
8709 return y;
8710 else
8711 return replace_equiv_address_nv (orig_x, y);
8714 if (TARGET_MACHO
8715 && GET_CODE (orig_x) == LO_SUM
8716 && GET_CODE (XEXP (orig_x, 1)) == CONST)
8718 y = XEXP (XEXP (orig_x, 1), 0);
8719 if (GET_CODE (y) == UNSPEC
8720 && XINT (y, 1) == UNSPEC_MACHOPIC_OFFSET)
8721 return XVECEXP (y, 0, 0);
8724 return orig_x;
8727 /* Return true if X shouldn't be emitted into the debug info.
8728 The linker doesn't like .toc section references from
8729 .debug_* sections, so reject .toc section symbols. */
8731 static bool
8732 rs6000_const_not_ok_for_debug_p (rtx x)
8734 if (GET_CODE (x) == SYMBOL_REF
8735 && CONSTANT_POOL_ADDRESS_P (x))
8737 rtx c = get_pool_constant (x);
8738 machine_mode cmode = get_pool_mode (x);
8739 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (c, cmode))
8740 return true;
8743 return false;
8746 /* Construct the SYMBOL_REF for the tls_get_addr function. */
8748 static GTY(()) rtx rs6000_tls_symbol;
8749 static rtx
8750 rs6000_tls_get_addr (void)
8752 if (!rs6000_tls_symbol)
8753 rs6000_tls_symbol = init_one_libfunc ("__tls_get_addr");
8755 return rs6000_tls_symbol;
8758 /* Construct the SYMBOL_REF for TLS GOT references. */
8760 static GTY(()) rtx rs6000_got_symbol;
8761 static rtx
8762 rs6000_got_sym (void)
8764 if (!rs6000_got_symbol)
8766 rs6000_got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
8767 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_LOCAL;
8768 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_EXTERNAL;
8771 return rs6000_got_symbol;
8774 /* AIX Thread-Local Address support. */
8776 static rtx
8777 rs6000_legitimize_tls_address_aix (rtx addr, enum tls_model model)
8779 rtx sym, mem, tocref, tlsreg, tmpreg, dest, tlsaddr;
8780 const char *name;
8781 char *tlsname;
8783 name = XSTR (addr, 0);
8784 /* Append TLS CSECT qualifier, unless the symbol already is qualified
8785 or the symbol will be in TLS private data section. */
8786 if (name[strlen (name) - 1] != ']'
8787 && (TREE_PUBLIC (SYMBOL_REF_DECL (addr))
8788 || bss_initializer_p (SYMBOL_REF_DECL (addr))))
8790 tlsname = XALLOCAVEC (char, strlen (name) + 4);
8791 strcpy (tlsname, name);
8792 strcat (tlsname,
8793 bss_initializer_p (SYMBOL_REF_DECL (addr)) ? "[UL]" : "[TL]");
8794 tlsaddr = copy_rtx (addr);
8795 XSTR (tlsaddr, 0) = ggc_strdup (tlsname);
8797 else
8798 tlsaddr = addr;
8800 /* Place addr into TOC constant pool. */
8801 sym = force_const_mem (GET_MODE (tlsaddr), tlsaddr);
8803 /* Output the TOC entry and create the MEM referencing the value. */
8804 if (constant_pool_expr_p (XEXP (sym, 0))
8805 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (XEXP (sym, 0)), Pmode))
8807 tocref = create_TOC_reference (XEXP (sym, 0), NULL_RTX);
8808 mem = gen_const_mem (Pmode, tocref);
8809 set_mem_alias_set (mem, get_TOC_alias_set ());
8811 else
8812 return sym;
8814 /* Use global-dynamic for local-dynamic. */
8815 if (model == TLS_MODEL_GLOBAL_DYNAMIC
8816 || model == TLS_MODEL_LOCAL_DYNAMIC)
8818 /* Create new TOC reference for @m symbol. */
8819 name = XSTR (XVECEXP (XEXP (mem, 0), 0, 0), 0);
8820 tlsname = XALLOCAVEC (char, strlen (name) + 1);
8821 strcpy (tlsname, "*LCM");
8822 strcat (tlsname, name + 3);
8823 rtx modaddr = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (tlsname));
8824 SYMBOL_REF_FLAGS (modaddr) |= SYMBOL_FLAG_LOCAL;
8825 tocref = create_TOC_reference (modaddr, NULL_RTX);
8826 rtx modmem = gen_const_mem (Pmode, tocref);
8827 set_mem_alias_set (modmem, get_TOC_alias_set ());
8829 rtx modreg = gen_reg_rtx (Pmode);
8830 emit_insn (gen_rtx_SET (modreg, modmem));
8832 tmpreg = gen_reg_rtx (Pmode);
8833 emit_insn (gen_rtx_SET (tmpreg, mem));
8835 dest = gen_reg_rtx (Pmode);
8836 if (TARGET_32BIT)
8837 emit_insn (gen_tls_get_addrsi (dest, modreg, tmpreg));
8838 else
8839 emit_insn (gen_tls_get_addrdi (dest, modreg, tmpreg));
8840 return dest;
8842 /* Obtain TLS pointer: 32 bit call or 64 bit GPR 13. */
8843 else if (TARGET_32BIT)
8845 tlsreg = gen_reg_rtx (SImode);
8846 emit_insn (gen_tls_get_tpointer (tlsreg));
8848 else
8849 tlsreg = gen_rtx_REG (DImode, 13);
8851 /* Load the TOC value into temporary register. */
8852 tmpreg = gen_reg_rtx (Pmode);
8853 emit_insn (gen_rtx_SET (tmpreg, mem));
8854 set_unique_reg_note (get_last_insn (), REG_EQUAL,
8855 gen_rtx_MINUS (Pmode, addr, tlsreg));
8857 /* Add TOC symbol value to TLS pointer. */
8858 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tmpreg, tlsreg));
8860 return dest;
8863 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
8864 this (thread-local) address. */
8866 static rtx
8867 rs6000_legitimize_tls_address (rtx addr, enum tls_model model)
8869 rtx dest, insn;
8871 if (TARGET_XCOFF)
8872 return rs6000_legitimize_tls_address_aix (addr, model);
8874 dest = gen_reg_rtx (Pmode);
8875 if (model == TLS_MODEL_LOCAL_EXEC && rs6000_tls_size == 16)
8877 rtx tlsreg;
8879 if (TARGET_64BIT)
8881 tlsreg = gen_rtx_REG (Pmode, 13);
8882 insn = gen_tls_tprel_64 (dest, tlsreg, addr);
8884 else
8886 tlsreg = gen_rtx_REG (Pmode, 2);
8887 insn = gen_tls_tprel_32 (dest, tlsreg, addr);
8889 emit_insn (insn);
8891 else if (model == TLS_MODEL_LOCAL_EXEC && rs6000_tls_size == 32)
8893 rtx tlsreg, tmp;
8895 tmp = gen_reg_rtx (Pmode);
8896 if (TARGET_64BIT)
8898 tlsreg = gen_rtx_REG (Pmode, 13);
8899 insn = gen_tls_tprel_ha_64 (tmp, tlsreg, addr);
8901 else
8903 tlsreg = gen_rtx_REG (Pmode, 2);
8904 insn = gen_tls_tprel_ha_32 (tmp, tlsreg, addr);
8906 emit_insn (insn);
8907 if (TARGET_64BIT)
8908 insn = gen_tls_tprel_lo_64 (dest, tmp, addr);
8909 else
8910 insn = gen_tls_tprel_lo_32 (dest, tmp, addr);
8911 emit_insn (insn);
8913 else
8915 rtx r3, got, tga, tmp1, tmp2, call_insn;
8917 /* We currently use relocations like @got@tlsgd for tls, which
8918 means the linker will handle allocation of tls entries, placing
8919 them in the .got section. So use a pointer to the .got section,
8920 not one to secondary TOC sections used by 64-bit -mminimal-toc,
8921 or to secondary GOT sections used by 32-bit -fPIC. */
8922 if (TARGET_64BIT)
8923 got = gen_rtx_REG (Pmode, 2);
8924 else
8926 if (flag_pic == 1)
8927 got = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
8928 else
8930 rtx gsym = rs6000_got_sym ();
8931 got = gen_reg_rtx (Pmode);
8932 if (flag_pic == 0)
8933 rs6000_emit_move (got, gsym, Pmode);
8934 else
8936 rtx mem, lab, last;
8938 tmp1 = gen_reg_rtx (Pmode);
8939 tmp2 = gen_reg_rtx (Pmode);
8940 mem = gen_const_mem (Pmode, tmp1);
8941 lab = gen_label_rtx ();
8942 emit_insn (gen_load_toc_v4_PIC_1b (gsym, lab));
8943 emit_move_insn (tmp1, gen_rtx_REG (Pmode, LR_REGNO));
8944 if (TARGET_LINK_STACK)
8945 emit_insn (gen_addsi3 (tmp1, tmp1, GEN_INT (4)));
8946 emit_move_insn (tmp2, mem);
8947 last = emit_insn (gen_addsi3 (got, tmp1, tmp2));
8948 set_unique_reg_note (last, REG_EQUAL, gsym);
8953 if (model == TLS_MODEL_GLOBAL_DYNAMIC)
8955 tga = rs6000_tls_get_addr ();
8956 emit_library_call_value (tga, dest, LCT_CONST, Pmode,
8957 1, const0_rtx, Pmode);
8959 r3 = gen_rtx_REG (Pmode, 3);
8960 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
8962 if (TARGET_64BIT)
8963 insn = gen_tls_gd_aix64 (r3, got, addr, tga, const0_rtx);
8964 else
8965 insn = gen_tls_gd_aix32 (r3, got, addr, tga, const0_rtx);
8967 else if (DEFAULT_ABI == ABI_V4)
8968 insn = gen_tls_gd_sysvsi (r3, got, addr, tga, const0_rtx);
8969 else
8970 gcc_unreachable ();
8971 call_insn = last_call_insn ();
8972 PATTERN (call_insn) = insn;
8973 if (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
8974 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn),
8975 pic_offset_table_rtx);
8977 else if (model == TLS_MODEL_LOCAL_DYNAMIC)
8979 tga = rs6000_tls_get_addr ();
8980 tmp1 = gen_reg_rtx (Pmode);
8981 emit_library_call_value (tga, tmp1, LCT_CONST, Pmode,
8982 1, const0_rtx, Pmode);
8984 r3 = gen_rtx_REG (Pmode, 3);
8985 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
8987 if (TARGET_64BIT)
8988 insn = gen_tls_ld_aix64 (r3, got, tga, const0_rtx);
8989 else
8990 insn = gen_tls_ld_aix32 (r3, got, tga, const0_rtx);
8992 else if (DEFAULT_ABI == ABI_V4)
8993 insn = gen_tls_ld_sysvsi (r3, got, tga, const0_rtx);
8994 else
8995 gcc_unreachable ();
8996 call_insn = last_call_insn ();
8997 PATTERN (call_insn) = insn;
8998 if (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
8999 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn),
9000 pic_offset_table_rtx);
9002 if (rs6000_tls_size == 16)
9004 if (TARGET_64BIT)
9005 insn = gen_tls_dtprel_64 (dest, tmp1, addr);
9006 else
9007 insn = gen_tls_dtprel_32 (dest, tmp1, addr);
9009 else if (rs6000_tls_size == 32)
9011 tmp2 = gen_reg_rtx (Pmode);
9012 if (TARGET_64BIT)
9013 insn = gen_tls_dtprel_ha_64 (tmp2, tmp1, addr);
9014 else
9015 insn = gen_tls_dtprel_ha_32 (tmp2, tmp1, addr);
9016 emit_insn (insn);
9017 if (TARGET_64BIT)
9018 insn = gen_tls_dtprel_lo_64 (dest, tmp2, addr);
9019 else
9020 insn = gen_tls_dtprel_lo_32 (dest, tmp2, addr);
9022 else
9024 tmp2 = gen_reg_rtx (Pmode);
9025 if (TARGET_64BIT)
9026 insn = gen_tls_got_dtprel_64 (tmp2, got, addr);
9027 else
9028 insn = gen_tls_got_dtprel_32 (tmp2, got, addr);
9029 emit_insn (insn);
9030 insn = gen_rtx_SET (dest, gen_rtx_PLUS (Pmode, tmp2, tmp1));
9032 emit_insn (insn);
9034 else
9036 /* IE, or 64-bit offset LE. */
9037 tmp2 = gen_reg_rtx (Pmode);
9038 if (TARGET_64BIT)
9039 insn = gen_tls_got_tprel_64 (tmp2, got, addr);
9040 else
9041 insn = gen_tls_got_tprel_32 (tmp2, got, addr);
9042 emit_insn (insn);
9043 if (TARGET_64BIT)
9044 insn = gen_tls_tls_64 (dest, tmp2, addr);
9045 else
9046 insn = gen_tls_tls_32 (dest, tmp2, addr);
9047 emit_insn (insn);
9051 return dest;
9054 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
9056 static bool
9057 rs6000_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
9059 if (GET_CODE (x) == HIGH
9060 && GET_CODE (XEXP (x, 0)) == UNSPEC)
9061 return true;
9063 /* A TLS symbol in the TOC cannot contain a sum. */
9064 if (GET_CODE (x) == CONST
9065 && GET_CODE (XEXP (x, 0)) == PLUS
9066 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
9067 && SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0)) != 0)
9068 return true;
9070 /* Do not place an ELF TLS symbol in the constant pool. */
9071 return TARGET_ELF && tls_referenced_p (x);
9074 /* Return true iff the given SYMBOL_REF refers to a constant pool entry
9075 that we have put in the TOC, or for cmodel=medium, if the SYMBOL_REF
9076 can be addressed relative to the toc pointer. */
9078 static bool
9079 use_toc_relative_ref (rtx sym, machine_mode mode)
9081 return ((constant_pool_expr_p (sym)
9082 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (sym),
9083 get_pool_mode (sym)))
9084 || (TARGET_CMODEL == CMODEL_MEDIUM
9085 && SYMBOL_REF_LOCAL_P (sym)
9086 && GET_MODE_SIZE (mode) <= POWERPC64_TOC_POINTER_ALIGNMENT));
9089 /* Our implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
9090 replace the input X, or the original X if no replacement is called for.
9091 The output parameter *WIN is 1 if the calling macro should goto WIN,
9092 0 if it should not.
9094 For RS/6000, we wish to handle large displacements off a base
9095 register by splitting the addend across an addiu/addis and the mem insn.
9096 This cuts number of extra insns needed from 3 to 1.
9098 On Darwin, we use this to generate code for floating point constants.
9099 A movsf_low is generated so we wind up with 2 instructions rather than 3.
9100 The Darwin code is inside #if TARGET_MACHO because only then are the
9101 machopic_* functions defined. */
9102 static rtx
9103 rs6000_legitimize_reload_address (rtx x, machine_mode mode,
9104 int opnum, int type,
9105 int ind_levels ATTRIBUTE_UNUSED, int *win)
9107 bool reg_offset_p = reg_offset_addressing_ok_p (mode);
9108 bool quad_offset_p = mode_supports_vsx_dform_quad (mode);
9110 /* Nasty hack for vsx_splat_v2df/v2di load from mem, which takes a
9111 DFmode/DImode MEM. Ditto for ISA 3.0 vsx_splat_v4sf/v4si. */
9112 if (reg_offset_p
9113 && opnum == 1
9114 && ((mode == DFmode && recog_data.operand_mode[0] == V2DFmode)
9115 || (mode == DImode && recog_data.operand_mode[0] == V2DImode)
9116 || (mode == SFmode && recog_data.operand_mode[0] == V4SFmode
9117 && TARGET_P9_VECTOR)
9118 || (mode == SImode && recog_data.operand_mode[0] == V4SImode
9119 && TARGET_P9_VECTOR)))
9120 reg_offset_p = false;
9122 /* We must recognize output that we have already generated ourselves. */
9123 if (GET_CODE (x) == PLUS
9124 && GET_CODE (XEXP (x, 0)) == PLUS
9125 && GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
9126 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
9127 && GET_CODE (XEXP (x, 1)) == CONST_INT)
9129 if (TARGET_DEBUG_ADDR)
9131 fprintf (stderr, "\nlegitimize_reload_address push_reload #1:\n");
9132 debug_rtx (x);
9134 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
9135 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
9136 opnum, (enum reload_type) type);
9137 *win = 1;
9138 return x;
9141 /* Likewise for (lo_sum (high ...) ...) output we have generated. */
9142 if (GET_CODE (x) == LO_SUM
9143 && GET_CODE (XEXP (x, 0)) == HIGH)
9145 if (TARGET_DEBUG_ADDR)
9147 fprintf (stderr, "\nlegitimize_reload_address push_reload #2:\n");
9148 debug_rtx (x);
9150 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
9151 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
9152 opnum, (enum reload_type) type);
9153 *win = 1;
9154 return x;
9157 #if TARGET_MACHO
9158 if (DEFAULT_ABI == ABI_DARWIN && flag_pic
9159 && GET_CODE (x) == LO_SUM
9160 && GET_CODE (XEXP (x, 0)) == PLUS
9161 && XEXP (XEXP (x, 0), 0) == pic_offset_table_rtx
9162 && GET_CODE (XEXP (XEXP (x, 0), 1)) == HIGH
9163 && XEXP (XEXP (XEXP (x, 0), 1), 0) == XEXP (x, 1)
9164 && machopic_operand_p (XEXP (x, 1)))
9166 /* Result of previous invocation of this function on Darwin
9167 floating point constant. */
9168 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
9169 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
9170 opnum, (enum reload_type) type);
9171 *win = 1;
9172 return x;
9174 #endif
9176 if (TARGET_CMODEL != CMODEL_SMALL
9177 && reg_offset_p
9178 && !quad_offset_p
9179 && small_toc_ref (x, VOIDmode))
9181 rtx hi = gen_rtx_HIGH (Pmode, copy_rtx (x));
9182 x = gen_rtx_LO_SUM (Pmode, hi, x);
9183 if (TARGET_DEBUG_ADDR)
9185 fprintf (stderr, "\nlegitimize_reload_address push_reload #3:\n");
9186 debug_rtx (x);
9188 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
9189 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
9190 opnum, (enum reload_type) type);
9191 *win = 1;
9192 return x;
9195 if (GET_CODE (x) == PLUS
9196 && REG_P (XEXP (x, 0))
9197 && REGNO (XEXP (x, 0)) < FIRST_PSEUDO_REGISTER
9198 && INT_REG_OK_FOR_BASE_P (XEXP (x, 0), 1)
9199 && CONST_INT_P (XEXP (x, 1))
9200 && reg_offset_p
9201 && !SPE_VECTOR_MODE (mode)
9202 && !(TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
9203 && (quad_offset_p || !VECTOR_MODE_P (mode) || VECTOR_MEM_NONE_P (mode)))
9205 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
9206 HOST_WIDE_INT low = ((val & 0xffff) ^ 0x8000) - 0x8000;
9207 HOST_WIDE_INT high
9208 = (((val - low) & 0xffffffff) ^ 0x80000000) - 0x80000000;
9210 /* Check for 32-bit overflow or quad addresses with one of the
9211 four least significant bits set. */
9212 if (high + low != val
9213 || (quad_offset_p && (low & 0xf)))
9215 *win = 0;
9216 return x;
9219 /* Reload the high part into a base reg; leave the low part
9220 in the mem directly. */
9222 x = gen_rtx_PLUS (GET_MODE (x),
9223 gen_rtx_PLUS (GET_MODE (x), XEXP (x, 0),
9224 GEN_INT (high)),
9225 GEN_INT (low));
9227 if (TARGET_DEBUG_ADDR)
9229 fprintf (stderr, "\nlegitimize_reload_address push_reload #4:\n");
9230 debug_rtx (x);
9232 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
9233 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
9234 opnum, (enum reload_type) type);
9235 *win = 1;
9236 return x;
9239 if (GET_CODE (x) == SYMBOL_REF
9240 && reg_offset_p
9241 && !quad_offset_p
9242 && (!VECTOR_MODE_P (mode) || VECTOR_MEM_NONE_P (mode))
9243 && !SPE_VECTOR_MODE (mode)
9244 #if TARGET_MACHO
9245 && DEFAULT_ABI == ABI_DARWIN
9246 && (flag_pic || MACHO_DYNAMIC_NO_PIC_P)
9247 && machopic_symbol_defined_p (x)
9248 #else
9249 && DEFAULT_ABI == ABI_V4
9250 && !flag_pic
9251 #endif
9252 /* Don't do this for TFmode or TDmode, since the result isn't offsettable.
9253 The same goes for DImode without 64-bit gprs and DFmode and DDmode
9254 without fprs.
9255 ??? Assume floating point reg based on mode? This assumption is
9256 violated by eg. powerpc-linux -m32 compile of gcc.dg/pr28796-2.c
9257 where reload ends up doing a DFmode load of a constant from
9258 mem using two gprs. Unfortunately, at this point reload
9259 hasn't yet selected regs so poking around in reload data
9260 won't help and even if we could figure out the regs reliably,
9261 we'd still want to allow this transformation when the mem is
9262 naturally aligned. Since we say the address is good here, we
9263 can't disable offsets from LO_SUMs in mem_operand_gpr.
9264 FIXME: Allow offset from lo_sum for other modes too, when
9265 mem is sufficiently aligned.
9267 Also disallow this if the type can go in VMX/Altivec registers, since
9268 those registers do not have d-form (reg+offset) address modes. */
9269 && !reg_addr[mode].scalar_in_vmx_p
9270 && mode != TFmode
9271 && mode != TDmode
9272 && mode != IFmode
9273 && mode != KFmode
9274 && (mode != TImode || !TARGET_VSX_TIMODE)
9275 && mode != PTImode
9276 && (mode != DImode || TARGET_POWERPC64)
9277 && ((mode != DFmode && mode != DDmode) || TARGET_POWERPC64
9278 || (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)))
9280 #if TARGET_MACHO
9281 if (flag_pic)
9283 rtx offset = machopic_gen_offset (x);
9284 x = gen_rtx_LO_SUM (GET_MODE (x),
9285 gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
9286 gen_rtx_HIGH (Pmode, offset)), offset);
9288 else
9289 #endif
9290 x = gen_rtx_LO_SUM (GET_MODE (x),
9291 gen_rtx_HIGH (Pmode, x), x);
9293 if (TARGET_DEBUG_ADDR)
9295 fprintf (stderr, "\nlegitimize_reload_address push_reload #5:\n");
9296 debug_rtx (x);
9298 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
9299 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
9300 opnum, (enum reload_type) type);
9301 *win = 1;
9302 return x;
9305 /* Reload an offset address wrapped by an AND that represents the
9306 masking of the lower bits. Strip the outer AND and let reload
9307 convert the offset address into an indirect address. For VSX,
9308 force reload to create the address with an AND in a separate
9309 register, because we can't guarantee an altivec register will
9310 be used. */
9311 if (VECTOR_MEM_ALTIVEC_P (mode)
9312 && GET_CODE (x) == AND
9313 && GET_CODE (XEXP (x, 0)) == PLUS
9314 && GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
9315 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
9316 && GET_CODE (XEXP (x, 1)) == CONST_INT
9317 && INTVAL (XEXP (x, 1)) == -16)
9319 x = XEXP (x, 0);
9320 *win = 1;
9321 return x;
9324 if (TARGET_TOC
9325 && reg_offset_p
9326 && !quad_offset_p
9327 && GET_CODE (x) == SYMBOL_REF
9328 && use_toc_relative_ref (x, mode))
9330 x = create_TOC_reference (x, NULL_RTX);
9331 if (TARGET_CMODEL != CMODEL_SMALL)
9333 if (TARGET_DEBUG_ADDR)
9335 fprintf (stderr, "\nlegitimize_reload_address push_reload #6:\n");
9336 debug_rtx (x);
9338 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
9339 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
9340 opnum, (enum reload_type) type);
9342 *win = 1;
9343 return x;
9345 *win = 0;
9346 return x;
9349 /* Debug version of rs6000_legitimize_reload_address. */
9350 static rtx
9351 rs6000_debug_legitimize_reload_address (rtx x, machine_mode mode,
9352 int opnum, int type,
9353 int ind_levels, int *win)
9355 rtx ret = rs6000_legitimize_reload_address (x, mode, opnum, type,
9356 ind_levels, win);
9357 fprintf (stderr,
9358 "\nrs6000_legitimize_reload_address: mode = %s, opnum = %d, "
9359 "type = %d, ind_levels = %d, win = %d, original addr:\n",
9360 GET_MODE_NAME (mode), opnum, type, ind_levels, *win);
9361 debug_rtx (x);
9363 if (x == ret)
9364 fprintf (stderr, "Same address returned\n");
9365 else if (!ret)
9366 fprintf (stderr, "NULL returned\n");
9367 else
9369 fprintf (stderr, "New address:\n");
9370 debug_rtx (ret);
9373 return ret;
9376 /* TARGET_LEGITIMATE_ADDRESS_P recognizes an RTL expression
9377 that is a valid memory address for an instruction.
9378 The MODE argument is the machine mode for the MEM expression
9379 that wants to use this address.
9381 On the RS/6000, there are four valid address: a SYMBOL_REF that
9382 refers to a constant pool entry of an address (or the sum of it
9383 plus a constant), a short (16-bit signed) constant plus a register,
9384 the sum of two registers, or a register indirect, possibly with an
9385 auto-increment. For DFmode, DDmode and DImode with a constant plus
9386 register, we must ensure that both words are addressable or PowerPC64
9387 with offset word aligned.
9389 For modes spanning multiple registers (DFmode and DDmode in 32-bit GPRs,
9390 32-bit DImode, TImode, TFmode, TDmode), indexed addressing cannot be used
9391 because adjacent memory cells are accessed by adding word-sized offsets
9392 during assembly output. */
9393 static bool
9394 rs6000_legitimate_address_p (machine_mode mode, rtx x, bool reg_ok_strict)
9396 bool reg_offset_p = reg_offset_addressing_ok_p (mode);
9397 bool quad_offset_p = mode_supports_vsx_dform_quad (mode);
9399 /* If this is an unaligned stvx/ldvx type address, discard the outer AND. */
9400 if (VECTOR_MEM_ALTIVEC_P (mode)
9401 && GET_CODE (x) == AND
9402 && GET_CODE (XEXP (x, 1)) == CONST_INT
9403 && INTVAL (XEXP (x, 1)) == -16)
9404 x = XEXP (x, 0);
9406 if (TARGET_ELF && RS6000_SYMBOL_REF_TLS_P (x))
9407 return 0;
9408 if (legitimate_indirect_address_p (x, reg_ok_strict))
9409 return 1;
9410 if (TARGET_UPDATE
9411 && (GET_CODE (x) == PRE_INC || GET_CODE (x) == PRE_DEC)
9412 && mode_supports_pre_incdec_p (mode)
9413 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict))
9414 return 1;
9415 /* Handle restricted vector d-form offsets in ISA 3.0. */
9416 if (quad_offset_p)
9418 if (quad_address_p (x, mode, reg_ok_strict))
9419 return 1;
9421 else if (virtual_stack_registers_memory_p (x))
9422 return 1;
9424 else if (reg_offset_p)
9426 if (legitimate_small_data_p (mode, x))
9427 return 1;
9428 if (legitimate_constant_pool_address_p (x, mode,
9429 reg_ok_strict || lra_in_progress))
9430 return 1;
9431 if (reg_addr[mode].fused_toc && GET_CODE (x) == UNSPEC
9432 && XINT (x, 1) == UNSPEC_FUSION_ADDIS)
9433 return 1;
9436 /* For TImode, if we have TImode in VSX registers, only allow register
9437 indirect addresses. This will allow the values to go in either GPRs
9438 or VSX registers without reloading. The vector types would tend to
9439 go into VSX registers, so we allow REG+REG, while TImode seems
9440 somewhat split, in that some uses are GPR based, and some VSX based. */
9441 /* FIXME: We could loosen this by changing the following to
9442 if (mode == TImode && TARGET_QUAD_MEMORY && TARGET_VSX_TIMODE)
9443 but currently we cannot allow REG+REG addressing for TImode. See
9444 PR72827 for complete details on how this ends up hoodwinking DSE. */
9445 if (mode == TImode && TARGET_VSX_TIMODE)
9446 return 0;
9447 /* If not REG_OK_STRICT (before reload) let pass any stack offset. */
9448 if (! reg_ok_strict
9449 && reg_offset_p
9450 && GET_CODE (x) == PLUS
9451 && GET_CODE (XEXP (x, 0)) == REG
9452 && (XEXP (x, 0) == virtual_stack_vars_rtx
9453 || XEXP (x, 0) == arg_pointer_rtx)
9454 && GET_CODE (XEXP (x, 1)) == CONST_INT)
9455 return 1;
9456 if (rs6000_legitimate_offset_address_p (mode, x, reg_ok_strict, false))
9457 return 1;
9458 if (!FLOAT128_2REG_P (mode)
9459 && ((TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
9460 || TARGET_POWERPC64
9461 || (mode != DFmode && mode != DDmode)
9462 || (TARGET_E500_DOUBLE && mode != DDmode))
9463 && (TARGET_POWERPC64 || mode != DImode)
9464 && (mode != TImode || VECTOR_MEM_VSX_P (TImode))
9465 && mode != PTImode
9466 && !avoiding_indexed_address_p (mode)
9467 && legitimate_indexed_address_p (x, reg_ok_strict))
9468 return 1;
9469 if (TARGET_UPDATE && GET_CODE (x) == PRE_MODIFY
9470 && mode_supports_pre_modify_p (mode)
9471 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict)
9472 && (rs6000_legitimate_offset_address_p (mode, XEXP (x, 1),
9473 reg_ok_strict, false)
9474 || (!avoiding_indexed_address_p (mode)
9475 && legitimate_indexed_address_p (XEXP (x, 1), reg_ok_strict)))
9476 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
9477 return 1;
9478 if (reg_offset_p && !quad_offset_p
9479 && legitimate_lo_sum_address_p (mode, x, reg_ok_strict))
9480 return 1;
9481 return 0;
9484 /* Debug version of rs6000_legitimate_address_p. */
9485 static bool
9486 rs6000_debug_legitimate_address_p (machine_mode mode, rtx x,
9487 bool reg_ok_strict)
9489 bool ret = rs6000_legitimate_address_p (mode, x, reg_ok_strict);
9490 fprintf (stderr,
9491 "\nrs6000_legitimate_address_p: return = %s, mode = %s, "
9492 "strict = %d, reload = %s, code = %s\n",
9493 ret ? "true" : "false",
9494 GET_MODE_NAME (mode),
9495 reg_ok_strict,
9496 (reload_completed
9497 ? "after"
9498 : (reload_in_progress ? "progress" : "before")),
9499 GET_RTX_NAME (GET_CODE (x)));
9500 debug_rtx (x);
9502 return ret;
9505 /* Implement TARGET_MODE_DEPENDENT_ADDRESS_P. */
9507 static bool
9508 rs6000_mode_dependent_address_p (const_rtx addr,
9509 addr_space_t as ATTRIBUTE_UNUSED)
9511 return rs6000_mode_dependent_address_ptr (addr);
9514 /* Go to LABEL if ADDR (a legitimate address expression)
9515 has an effect that depends on the machine mode it is used for.
9517 On the RS/6000 this is true of all integral offsets (since AltiVec
9518 and VSX modes don't allow them) or is a pre-increment or decrement.
9520 ??? Except that due to conceptual problems in offsettable_address_p
9521 we can't really report the problems of integral offsets. So leave
9522 this assuming that the adjustable offset must be valid for the
9523 sub-words of a TFmode operand, which is what we had before. */
9525 static bool
9526 rs6000_mode_dependent_address (const_rtx addr)
9528 switch (GET_CODE (addr))
9530 case PLUS:
9531 /* Any offset from virtual_stack_vars_rtx and arg_pointer_rtx
9532 is considered a legitimate address before reload, so there
9533 are no offset restrictions in that case. Note that this
9534 condition is safe in strict mode because any address involving
9535 virtual_stack_vars_rtx or arg_pointer_rtx would already have
9536 been rejected as illegitimate. */
9537 if (XEXP (addr, 0) != virtual_stack_vars_rtx
9538 && XEXP (addr, 0) != arg_pointer_rtx
9539 && GET_CODE (XEXP (addr, 1)) == CONST_INT)
9541 unsigned HOST_WIDE_INT val = INTVAL (XEXP (addr, 1));
9542 return val + 0x8000 >= 0x10000 - (TARGET_POWERPC64 ? 8 : 12);
9544 break;
9546 case LO_SUM:
9547 /* Anything in the constant pool is sufficiently aligned that
9548 all bytes have the same high part address. */
9549 return !legitimate_constant_pool_address_p (addr, QImode, false);
9551 /* Auto-increment cases are now treated generically in recog.c. */
9552 case PRE_MODIFY:
9553 return TARGET_UPDATE;
9555 /* AND is only allowed in Altivec loads. */
9556 case AND:
9557 return true;
9559 default:
9560 break;
9563 return false;
9566 /* Debug version of rs6000_mode_dependent_address. */
9567 static bool
9568 rs6000_debug_mode_dependent_address (const_rtx addr)
9570 bool ret = rs6000_mode_dependent_address (addr);
9572 fprintf (stderr, "\nrs6000_mode_dependent_address: ret = %s\n",
9573 ret ? "true" : "false");
9574 debug_rtx (addr);
9576 return ret;
9579 /* Implement FIND_BASE_TERM. */
9582 rs6000_find_base_term (rtx op)
9584 rtx base;
9586 base = op;
9587 if (GET_CODE (base) == CONST)
9588 base = XEXP (base, 0);
9589 if (GET_CODE (base) == PLUS)
9590 base = XEXP (base, 0);
9591 if (GET_CODE (base) == UNSPEC)
9592 switch (XINT (base, 1))
9594 case UNSPEC_TOCREL:
9595 case UNSPEC_MACHOPIC_OFFSET:
9596 /* OP represents SYM [+ OFFSET] - ANCHOR. SYM is the base term
9597 for aliasing purposes. */
9598 return XVECEXP (base, 0, 0);
9601 return op;
9604 /* More elaborate version of recog's offsettable_memref_p predicate
9605 that works around the ??? note of rs6000_mode_dependent_address.
9606 In particular it accepts
9608 (mem:DI (plus:SI (reg/f:SI 31 31) (const_int 32760 [0x7ff8])))
9610 in 32-bit mode, that the recog predicate rejects. */
9612 static bool
9613 rs6000_offsettable_memref_p (rtx op, machine_mode reg_mode)
9615 bool worst_case;
9617 if (!MEM_P (op))
9618 return false;
9620 /* First mimic offsettable_memref_p. */
9621 if (offsettable_address_p (true, GET_MODE (op), XEXP (op, 0)))
9622 return true;
9624 /* offsettable_address_p invokes rs6000_mode_dependent_address, but
9625 the latter predicate knows nothing about the mode of the memory
9626 reference and, therefore, assumes that it is the largest supported
9627 mode (TFmode). As a consequence, legitimate offsettable memory
9628 references are rejected. rs6000_legitimate_offset_address_p contains
9629 the correct logic for the PLUS case of rs6000_mode_dependent_address,
9630 at least with a little bit of help here given that we know the
9631 actual registers used. */
9632 worst_case = ((TARGET_POWERPC64 && GET_MODE_CLASS (reg_mode) == MODE_INT)
9633 || GET_MODE_SIZE (reg_mode) == 4);
9634 return rs6000_legitimate_offset_address_p (GET_MODE (op), XEXP (op, 0),
9635 true, worst_case);
9638 /* Determine the reassociation width to be used in reassociate_bb.
9639 This takes into account how many parallel operations we
9640 can actually do of a given type, and also the latency.
9642 int add/sub 6/cycle
9643 mul 2/cycle
9644 vect add/sub/mul 2/cycle
9645 fp add/sub/mul 2/cycle
9646 dfp 1/cycle
9649 static int
9650 rs6000_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED,
9651 enum machine_mode mode)
9653 switch (rs6000_cpu)
9655 case PROCESSOR_POWER8:
9656 case PROCESSOR_POWER9:
9657 if (DECIMAL_FLOAT_MODE_P (mode))
9658 return 1;
9659 if (VECTOR_MODE_P (mode))
9660 return 4;
9661 if (INTEGRAL_MODE_P (mode))
9662 return opc == MULT_EXPR ? 4 : 6;
9663 if (FLOAT_MODE_P (mode))
9664 return 4;
9665 break;
9666 default:
9667 break;
9669 return 1;
9672 /* Change register usage conditional on target flags. */
9673 static void
9674 rs6000_conditional_register_usage (void)
9676 int i;
9678 if (TARGET_DEBUG_TARGET)
9679 fprintf (stderr, "rs6000_conditional_register_usage called\n");
9681 /* Set MQ register fixed (already call_used) so that it will not be
9682 allocated. */
9683 fixed_regs[64] = 1;
9685 /* 64-bit AIX and Linux reserve GPR13 for thread-private data. */
9686 if (TARGET_64BIT)
9687 fixed_regs[13] = call_used_regs[13]
9688 = call_really_used_regs[13] = 1;
9690 /* Conditionally disable FPRs. */
9691 if (TARGET_SOFT_FLOAT || !TARGET_FPRS)
9692 for (i = 32; i < 64; i++)
9693 fixed_regs[i] = call_used_regs[i]
9694 = call_really_used_regs[i] = 1;
9696 /* The TOC register is not killed across calls in a way that is
9697 visible to the compiler. */
9698 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
9699 call_really_used_regs[2] = 0;
9701 if (DEFAULT_ABI == ABI_V4 && flag_pic == 2)
9702 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
9704 if (DEFAULT_ABI == ABI_V4 && flag_pic == 1)
9705 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
9706 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
9707 = call_really_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
9709 if (DEFAULT_ABI == ABI_DARWIN && flag_pic)
9710 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
9711 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
9712 = call_really_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
9714 if (TARGET_TOC && TARGET_MINIMAL_TOC)
9715 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
9716 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
9718 if (TARGET_SPE)
9720 global_regs[SPEFSCR_REGNO] = 1;
9721 /* We used to use r14 as FIXED_SCRATCH to address SPE 64-bit
9722 registers in prologues and epilogues. We no longer use r14
9723 for FIXED_SCRATCH, but we're keeping r14 out of the allocation
9724 pool for link-compatibility with older versions of GCC. Once
9725 "old" code has died out, we can return r14 to the allocation
9726 pool. */
9727 fixed_regs[14]
9728 = call_used_regs[14]
9729 = call_really_used_regs[14] = 1;
9732 if (!TARGET_ALTIVEC && !TARGET_VSX)
9734 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
9735 fixed_regs[i] = call_used_regs[i] = call_really_used_regs[i] = 1;
9736 call_really_used_regs[VRSAVE_REGNO] = 1;
9739 if (TARGET_ALTIVEC || TARGET_VSX)
9740 global_regs[VSCR_REGNO] = 1;
9742 if (TARGET_ALTIVEC_ABI)
9744 for (i = FIRST_ALTIVEC_REGNO; i < FIRST_ALTIVEC_REGNO + 20; ++i)
9745 call_used_regs[i] = call_really_used_regs[i] = 1;
9747 /* AIX reserves VR20:31 in non-extended ABI mode. */
9748 if (TARGET_XCOFF)
9749 for (i = FIRST_ALTIVEC_REGNO + 20; i < FIRST_ALTIVEC_REGNO + 32; ++i)
9750 fixed_regs[i] = call_used_regs[i] = call_really_used_regs[i] = 1;
9755 /* Output insns to set DEST equal to the constant SOURCE as a series of
9756 lis, ori and shl instructions and return TRUE. */
9758 bool
9759 rs6000_emit_set_const (rtx dest, rtx source)
9761 machine_mode mode = GET_MODE (dest);
9762 rtx temp, set;
9763 rtx_insn *insn;
9764 HOST_WIDE_INT c;
9766 gcc_checking_assert (CONST_INT_P (source));
9767 c = INTVAL (source);
9768 switch (mode)
9770 case QImode:
9771 case HImode:
9772 emit_insn (gen_rtx_SET (dest, source));
9773 return true;
9775 case SImode:
9776 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (SImode);
9778 emit_insn (gen_rtx_SET (copy_rtx (temp),
9779 GEN_INT (c & ~(HOST_WIDE_INT) 0xffff)));
9780 emit_insn (gen_rtx_SET (dest,
9781 gen_rtx_IOR (SImode, copy_rtx (temp),
9782 GEN_INT (c & 0xffff))));
9783 break;
9785 case DImode:
9786 if (!TARGET_POWERPC64)
9788 rtx hi, lo;
9790 hi = operand_subword_force (copy_rtx (dest), WORDS_BIG_ENDIAN == 0,
9791 DImode);
9792 lo = operand_subword_force (dest, WORDS_BIG_ENDIAN != 0,
9793 DImode);
9794 emit_move_insn (hi, GEN_INT (c >> 32));
9795 c = ((c & 0xffffffff) ^ 0x80000000) - 0x80000000;
9796 emit_move_insn (lo, GEN_INT (c));
9798 else
9799 rs6000_emit_set_long_const (dest, c);
9800 break;
9802 default:
9803 gcc_unreachable ();
9806 insn = get_last_insn ();
9807 set = single_set (insn);
9808 if (! CONSTANT_P (SET_SRC (set)))
9809 set_unique_reg_note (insn, REG_EQUAL, GEN_INT (c));
9811 return true;
9814 /* Subroutine of rs6000_emit_set_const, handling PowerPC64 DImode.
9815 Output insns to set DEST equal to the constant C as a series of
9816 lis, ori and shl instructions. */
9818 static void
9819 rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c)
9821 rtx temp;
9822 HOST_WIDE_INT ud1, ud2, ud3, ud4;
9824 ud1 = c & 0xffff;
9825 c = c >> 16;
9826 ud2 = c & 0xffff;
9827 c = c >> 16;
9828 ud3 = c & 0xffff;
9829 c = c >> 16;
9830 ud4 = c & 0xffff;
9832 if ((ud4 == 0xffff && ud3 == 0xffff && ud2 == 0xffff && (ud1 & 0x8000))
9833 || (ud4 == 0 && ud3 == 0 && ud2 == 0 && ! (ud1 & 0x8000)))
9834 emit_move_insn (dest, GEN_INT ((ud1 ^ 0x8000) - 0x8000));
9836 else if ((ud4 == 0xffff && ud3 == 0xffff && (ud2 & 0x8000))
9837 || (ud4 == 0 && ud3 == 0 && ! (ud2 & 0x8000)))
9839 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
9841 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
9842 GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
9843 if (ud1 != 0)
9844 emit_move_insn (dest,
9845 gen_rtx_IOR (DImode, copy_rtx (temp),
9846 GEN_INT (ud1)));
9848 else if (ud3 == 0 && ud4 == 0)
9850 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
9852 gcc_assert (ud2 & 0x8000);
9853 emit_move_insn (copy_rtx (temp),
9854 GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
9855 if (ud1 != 0)
9856 emit_move_insn (copy_rtx (temp),
9857 gen_rtx_IOR (DImode, copy_rtx (temp),
9858 GEN_INT (ud1)));
9859 emit_move_insn (dest,
9860 gen_rtx_ZERO_EXTEND (DImode,
9861 gen_lowpart (SImode,
9862 copy_rtx (temp))));
9864 else if ((ud4 == 0xffff && (ud3 & 0x8000))
9865 || (ud4 == 0 && ! (ud3 & 0x8000)))
9867 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
9869 emit_move_insn (copy_rtx (temp),
9870 GEN_INT (((ud3 << 16) ^ 0x80000000) - 0x80000000));
9871 if (ud2 != 0)
9872 emit_move_insn (copy_rtx (temp),
9873 gen_rtx_IOR (DImode, copy_rtx (temp),
9874 GEN_INT (ud2)));
9875 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
9876 gen_rtx_ASHIFT (DImode, copy_rtx (temp),
9877 GEN_INT (16)));
9878 if (ud1 != 0)
9879 emit_move_insn (dest,
9880 gen_rtx_IOR (DImode, copy_rtx (temp),
9881 GEN_INT (ud1)));
9883 else
9885 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
9887 emit_move_insn (copy_rtx (temp),
9888 GEN_INT (((ud4 << 16) ^ 0x80000000) - 0x80000000));
9889 if (ud3 != 0)
9890 emit_move_insn (copy_rtx (temp),
9891 gen_rtx_IOR (DImode, copy_rtx (temp),
9892 GEN_INT (ud3)));
9894 emit_move_insn (ud2 != 0 || ud1 != 0 ? copy_rtx (temp) : dest,
9895 gen_rtx_ASHIFT (DImode, copy_rtx (temp),
9896 GEN_INT (32)));
9897 if (ud2 != 0)
9898 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
9899 gen_rtx_IOR (DImode, copy_rtx (temp),
9900 GEN_INT (ud2 << 16)));
9901 if (ud1 != 0)
9902 emit_move_insn (dest,
9903 gen_rtx_IOR (DImode, copy_rtx (temp),
9904 GEN_INT (ud1)));
9908 /* Helper for the following. Get rid of [r+r] memory refs
9909 in cases where it won't work (TImode, TFmode, TDmode, PTImode). */
9911 static void
9912 rs6000_eliminate_indexed_memrefs (rtx operands[2])
9914 if (reload_in_progress)
9915 return;
9917 if (GET_CODE (operands[0]) == MEM
9918 && GET_CODE (XEXP (operands[0], 0)) != REG
9919 && ! legitimate_constant_pool_address_p (XEXP (operands[0], 0),
9920 GET_MODE (operands[0]), false))
9921 operands[0]
9922 = replace_equiv_address (operands[0],
9923 copy_addr_to_reg (XEXP (operands[0], 0)));
9925 if (GET_CODE (operands[1]) == MEM
9926 && GET_CODE (XEXP (operands[1], 0)) != REG
9927 && ! legitimate_constant_pool_address_p (XEXP (operands[1], 0),
9928 GET_MODE (operands[1]), false))
9929 operands[1]
9930 = replace_equiv_address (operands[1],
9931 copy_addr_to_reg (XEXP (operands[1], 0)));
9934 /* Generate a vector of constants to permute MODE for a little-endian
9935 storage operation by swapping the two halves of a vector. */
9936 static rtvec
9937 rs6000_const_vec (machine_mode mode)
9939 int i, subparts;
9940 rtvec v;
9942 switch (mode)
9944 case V1TImode:
9945 subparts = 1;
9946 break;
9947 case V2DFmode:
9948 case V2DImode:
9949 subparts = 2;
9950 break;
9951 case V4SFmode:
9952 case V4SImode:
9953 subparts = 4;
9954 break;
9955 case V8HImode:
9956 subparts = 8;
9957 break;
9958 case V16QImode:
9959 subparts = 16;
9960 break;
9961 default:
9962 gcc_unreachable();
9965 v = rtvec_alloc (subparts);
9967 for (i = 0; i < subparts / 2; ++i)
9968 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i + subparts / 2);
9969 for (i = subparts / 2; i < subparts; ++i)
9970 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i - subparts / 2);
9972 return v;
9975 /* Generate a permute rtx that represents an lxvd2x, stxvd2x, or xxpermdi
9976 for a VSX load or store operation. */
9978 rs6000_gen_le_vsx_permute (rtx source, machine_mode mode)
9980 /* Use ROTATE instead of VEC_SELECT on IEEE 128-bit floating point, and
9981 128-bit integers if they are allowed in VSX registers. */
9982 if (FLOAT128_VECTOR_P (mode) || mode == TImode)
9983 return gen_rtx_ROTATE (mode, source, GEN_INT (64));
9984 else
9986 rtx par = gen_rtx_PARALLEL (VOIDmode, rs6000_const_vec (mode));
9987 return gen_rtx_VEC_SELECT (mode, source, par);
9991 /* Emit a little-endian load from vector memory location SOURCE to VSX
9992 register DEST in mode MODE. The load is done with two permuting
9993 insn's that represent an lxvd2x and xxpermdi. */
9994 void
9995 rs6000_emit_le_vsx_load (rtx dest, rtx source, machine_mode mode)
9997 rtx tmp, permute_mem, permute_reg;
9999 /* Use V2DImode to do swaps of types with 128-bit scalare parts (TImode,
10000 V1TImode). */
10001 if (mode == TImode || mode == V1TImode)
10003 mode = V2DImode;
10004 dest = gen_lowpart (V2DImode, dest);
10005 source = adjust_address (source, V2DImode, 0);
10008 tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (dest) : dest;
10009 permute_mem = rs6000_gen_le_vsx_permute (source, mode);
10010 permute_reg = rs6000_gen_le_vsx_permute (tmp, mode);
10011 emit_insn (gen_rtx_SET (tmp, permute_mem));
10012 emit_insn (gen_rtx_SET (dest, permute_reg));
10015 /* Emit a little-endian store to vector memory location DEST from VSX
10016 register SOURCE in mode MODE. The store is done with two permuting
10017 insn's that represent an xxpermdi and an stxvd2x. */
10018 void
10019 rs6000_emit_le_vsx_store (rtx dest, rtx source, machine_mode mode)
10021 rtx tmp, permute_src, permute_tmp;
10023 /* This should never be called during or after reload, because it does
10024 not re-permute the source register. It is intended only for use
10025 during expand. */
10026 gcc_assert (!reload_in_progress && !lra_in_progress && !reload_completed);
10028 /* Use V2DImode to do swaps of types with 128-bit scalar parts (TImode,
10029 V1TImode). */
10030 if (mode == TImode || mode == V1TImode)
10032 mode = V2DImode;
10033 dest = adjust_address (dest, V2DImode, 0);
10034 source = gen_lowpart (V2DImode, source);
10037 tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (source) : source;
10038 permute_src = rs6000_gen_le_vsx_permute (source, mode);
10039 permute_tmp = rs6000_gen_le_vsx_permute (tmp, mode);
10040 emit_insn (gen_rtx_SET (tmp, permute_src));
10041 emit_insn (gen_rtx_SET (dest, permute_tmp));
10044 /* Emit a sequence representing a little-endian VSX load or store,
10045 moving data from SOURCE to DEST in mode MODE. This is done
10046 separately from rs6000_emit_move to ensure it is called only
10047 during expand. LE VSX loads and stores introduced later are
10048 handled with a split. The expand-time RTL generation allows
10049 us to optimize away redundant pairs of register-permutes. */
10050 void
10051 rs6000_emit_le_vsx_move (rtx dest, rtx source, machine_mode mode)
10053 gcc_assert (!BYTES_BIG_ENDIAN
10054 && VECTOR_MEM_VSX_P (mode)
10055 && !TARGET_P9_VECTOR
10056 && !gpr_or_gpr_p (dest, source)
10057 && (MEM_P (source) ^ MEM_P (dest)));
10059 if (MEM_P (source))
10061 gcc_assert (REG_P (dest) || GET_CODE (dest) == SUBREG);
10062 rs6000_emit_le_vsx_load (dest, source, mode);
10064 else
10066 if (!REG_P (source))
10067 source = force_reg (mode, source);
10068 rs6000_emit_le_vsx_store (dest, source, mode);
10072 /* Emit a move from SOURCE to DEST in mode MODE. */
10073 void
10074 rs6000_emit_move (rtx dest, rtx source, machine_mode mode)
10076 rtx operands[2];
10077 operands[0] = dest;
10078 operands[1] = source;
10080 if (TARGET_DEBUG_ADDR)
10082 fprintf (stderr,
10083 "\nrs6000_emit_move: mode = %s, reload_in_progress = %d, "
10084 "reload_completed = %d, can_create_pseudos = %d.\ndest:\n",
10085 GET_MODE_NAME (mode),
10086 reload_in_progress,
10087 reload_completed,
10088 can_create_pseudo_p ());
10089 debug_rtx (dest);
10090 fprintf (stderr, "source:\n");
10091 debug_rtx (source);
10094 /* Sanity checks. Check that we get CONST_DOUBLE only when we should. */
10095 if (CONST_WIDE_INT_P (operands[1])
10096 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
10098 /* This should be fixed with the introduction of CONST_WIDE_INT. */
10099 gcc_unreachable ();
10102 /* Check if GCC is setting up a block move that will end up using FP
10103 registers as temporaries. We must make sure this is acceptable. */
10104 if (GET_CODE (operands[0]) == MEM
10105 && GET_CODE (operands[1]) == MEM
10106 && mode == DImode
10107 && (SLOW_UNALIGNED_ACCESS (DImode, MEM_ALIGN (operands[0]))
10108 || SLOW_UNALIGNED_ACCESS (DImode, MEM_ALIGN (operands[1])))
10109 && ! (SLOW_UNALIGNED_ACCESS (SImode, (MEM_ALIGN (operands[0]) > 32
10110 ? 32 : MEM_ALIGN (operands[0])))
10111 || SLOW_UNALIGNED_ACCESS (SImode, (MEM_ALIGN (operands[1]) > 32
10112 ? 32
10113 : MEM_ALIGN (operands[1]))))
10114 && ! MEM_VOLATILE_P (operands [0])
10115 && ! MEM_VOLATILE_P (operands [1]))
10117 emit_move_insn (adjust_address (operands[0], SImode, 0),
10118 adjust_address (operands[1], SImode, 0));
10119 emit_move_insn (adjust_address (copy_rtx (operands[0]), SImode, 4),
10120 adjust_address (copy_rtx (operands[1]), SImode, 4));
10121 return;
10124 if (can_create_pseudo_p () && GET_CODE (operands[0]) == MEM
10125 && !gpc_reg_operand (operands[1], mode))
10126 operands[1] = force_reg (mode, operands[1]);
10128 /* Recognize the case where operand[1] is a reference to thread-local
10129 data and load its address to a register. */
10130 if (tls_referenced_p (operands[1]))
10132 enum tls_model model;
10133 rtx tmp = operands[1];
10134 rtx addend = NULL;
10136 if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS)
10138 addend = XEXP (XEXP (tmp, 0), 1);
10139 tmp = XEXP (XEXP (tmp, 0), 0);
10142 gcc_assert (GET_CODE (tmp) == SYMBOL_REF);
10143 model = SYMBOL_REF_TLS_MODEL (tmp);
10144 gcc_assert (model != 0);
10146 tmp = rs6000_legitimize_tls_address (tmp, model);
10147 if (addend)
10149 tmp = gen_rtx_PLUS (mode, tmp, addend);
10150 tmp = force_operand (tmp, operands[0]);
10152 operands[1] = tmp;
10155 /* Handle the case where reload calls us with an invalid address. */
10156 if (reload_in_progress && mode == Pmode
10157 && (! general_operand (operands[1], mode)
10158 || ! nonimmediate_operand (operands[0], mode)))
10159 goto emit_set;
10161 /* 128-bit constant floating-point values on Darwin should really be loaded
10162 as two parts. However, this premature splitting is a problem when DFmode
10163 values can go into Altivec registers. */
10164 if (FLOAT128_IBM_P (mode) && !reg_addr[DFmode].scalar_in_vmx_p
10165 && GET_CODE (operands[1]) == CONST_DOUBLE)
10167 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode, 0),
10168 simplify_gen_subreg (DFmode, operands[1], mode, 0),
10169 DFmode);
10170 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode,
10171 GET_MODE_SIZE (DFmode)),
10172 simplify_gen_subreg (DFmode, operands[1], mode,
10173 GET_MODE_SIZE (DFmode)),
10174 DFmode);
10175 return;
10178 if (reload_in_progress && cfun->machine->sdmode_stack_slot != NULL_RTX)
10179 cfun->machine->sdmode_stack_slot =
10180 eliminate_regs (cfun->machine->sdmode_stack_slot, VOIDmode, NULL_RTX);
10183 /* Transform (p0:DD, (SUBREG:DD p1:SD)) to ((SUBREG:SD p0:DD),
10184 p1:SD) if p1 is not of floating point class and p0 is spilled as
10185 we can have no analogous movsd_store for this. */
10186 if (lra_in_progress && mode == DDmode
10187 && REG_P (operands[0]) && REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER
10188 && reg_preferred_class (REGNO (operands[0])) == NO_REGS
10189 && GET_CODE (operands[1]) == SUBREG && REG_P (SUBREG_REG (operands[1]))
10190 && GET_MODE (SUBREG_REG (operands[1])) == SDmode)
10192 enum reg_class cl;
10193 int regno = REGNO (SUBREG_REG (operands[1]));
10195 if (regno >= FIRST_PSEUDO_REGISTER)
10197 cl = reg_preferred_class (regno);
10198 regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][1];
10200 if (regno >= 0 && ! FP_REGNO_P (regno))
10202 mode = SDmode;
10203 operands[0] = gen_lowpart_SUBREG (SDmode, operands[0]);
10204 operands[1] = SUBREG_REG (operands[1]);
10207 if (lra_in_progress
10208 && mode == SDmode
10209 && REG_P (operands[0]) && REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER
10210 && reg_preferred_class (REGNO (operands[0])) == NO_REGS
10211 && (REG_P (operands[1])
10212 || (GET_CODE (operands[1]) == SUBREG
10213 && REG_P (SUBREG_REG (operands[1])))))
10215 int regno = REGNO (GET_CODE (operands[1]) == SUBREG
10216 ? SUBREG_REG (operands[1]) : operands[1]);
10217 enum reg_class cl;
10219 if (regno >= FIRST_PSEUDO_REGISTER)
10221 cl = reg_preferred_class (regno);
10222 gcc_assert (cl != NO_REGS);
10223 regno = ira_class_hard_regs[cl][0];
10225 if (FP_REGNO_P (regno))
10227 if (GET_MODE (operands[0]) != DDmode)
10228 operands[0] = gen_rtx_SUBREG (DDmode, operands[0], 0);
10229 emit_insn (gen_movsd_store (operands[0], operands[1]));
10231 else if (INT_REGNO_P (regno))
10232 emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
10233 else
10234 gcc_unreachable();
10235 return;
10237 /* Transform ((SUBREG:DD p0:SD), p1:DD) to (p0:SD, (SUBREG:SD
10238 p:DD)) if p0 is not of floating point class and p1 is spilled as
10239 we can have no analogous movsd_load for this. */
10240 if (lra_in_progress && mode == DDmode
10241 && GET_CODE (operands[0]) == SUBREG && REG_P (SUBREG_REG (operands[0]))
10242 && GET_MODE (SUBREG_REG (operands[0])) == SDmode
10243 && REG_P (operands[1]) && REGNO (operands[1]) >= FIRST_PSEUDO_REGISTER
10244 && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
10246 enum reg_class cl;
10247 int regno = REGNO (SUBREG_REG (operands[0]));
10249 if (regno >= FIRST_PSEUDO_REGISTER)
10251 cl = reg_preferred_class (regno);
10252 regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][0];
10254 if (regno >= 0 && ! FP_REGNO_P (regno))
10256 mode = SDmode;
10257 operands[0] = SUBREG_REG (operands[0]);
10258 operands[1] = gen_lowpart_SUBREG (SDmode, operands[1]);
10261 if (lra_in_progress
10262 && mode == SDmode
10263 && (REG_P (operands[0])
10264 || (GET_CODE (operands[0]) == SUBREG
10265 && REG_P (SUBREG_REG (operands[0]))))
10266 && REG_P (operands[1]) && REGNO (operands[1]) >= FIRST_PSEUDO_REGISTER
10267 && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
10269 int regno = REGNO (GET_CODE (operands[0]) == SUBREG
10270 ? SUBREG_REG (operands[0]) : operands[0]);
10271 enum reg_class cl;
10273 if (regno >= FIRST_PSEUDO_REGISTER)
10275 cl = reg_preferred_class (regno);
10276 gcc_assert (cl != NO_REGS);
10277 regno = ira_class_hard_regs[cl][0];
10279 if (FP_REGNO_P (regno))
10281 if (GET_MODE (operands[1]) != DDmode)
10282 operands[1] = gen_rtx_SUBREG (DDmode, operands[1], 0);
10283 emit_insn (gen_movsd_load (operands[0], operands[1]));
10285 else if (INT_REGNO_P (regno))
10286 emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
10287 else
10288 gcc_unreachable();
10289 return;
10292 if (reload_in_progress
10293 && mode == SDmode
10294 && cfun->machine->sdmode_stack_slot != NULL_RTX
10295 && MEM_P (operands[0])
10296 && rtx_equal_p (operands[0], cfun->machine->sdmode_stack_slot)
10297 && REG_P (operands[1]))
10299 if (FP_REGNO_P (REGNO (operands[1])))
10301 rtx mem = adjust_address_nv (operands[0], DDmode, 0);
10302 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
10303 emit_insn (gen_movsd_store (mem, operands[1]));
10305 else if (INT_REGNO_P (REGNO (operands[1])))
10307 rtx mem = operands[0];
10308 if (BYTES_BIG_ENDIAN)
10309 mem = adjust_address_nv (mem, mode, 4);
10310 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
10311 emit_insn (gen_movsd_hardfloat (mem, operands[1]));
10313 else
10314 gcc_unreachable();
10315 return;
10317 if (reload_in_progress
10318 && mode == SDmode
10319 && REG_P (operands[0])
10320 && MEM_P (operands[1])
10321 && cfun->machine->sdmode_stack_slot != NULL_RTX
10322 && rtx_equal_p (operands[1], cfun->machine->sdmode_stack_slot))
10324 if (FP_REGNO_P (REGNO (operands[0])))
10326 rtx mem = adjust_address_nv (operands[1], DDmode, 0);
10327 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
10328 emit_insn (gen_movsd_load (operands[0], mem));
10330 else if (INT_REGNO_P (REGNO (operands[0])))
10332 rtx mem = operands[1];
10333 if (BYTES_BIG_ENDIAN)
10334 mem = adjust_address_nv (mem, mode, 4);
10335 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
10336 emit_insn (gen_movsd_hardfloat (operands[0], mem));
10338 else
10339 gcc_unreachable();
10340 return;
10343 /* FIXME: In the long term, this switch statement should go away
10344 and be replaced by a sequence of tests based on things like
10345 mode == Pmode. */
10346 switch (mode)
10348 case HImode:
10349 case QImode:
10350 if (CONSTANT_P (operands[1])
10351 && GET_CODE (operands[1]) != CONST_INT)
10352 operands[1] = force_const_mem (mode, operands[1]);
10353 break;
10355 case TFmode:
10356 case TDmode:
10357 case IFmode:
10358 case KFmode:
10359 if (FLOAT128_2REG_P (mode))
10360 rs6000_eliminate_indexed_memrefs (operands);
10361 /* fall through */
10363 case DFmode:
10364 case DDmode:
10365 case SFmode:
10366 case SDmode:
10367 if (CONSTANT_P (operands[1])
10368 && ! easy_fp_constant (operands[1], mode))
10369 operands[1] = force_const_mem (mode, operands[1]);
10370 break;
10372 case V16QImode:
10373 case V8HImode:
10374 case V4SFmode:
10375 case V4SImode:
10376 case V4HImode:
10377 case V2SFmode:
10378 case V2SImode:
10379 case V1DImode:
10380 case V2DFmode:
10381 case V2DImode:
10382 case V1TImode:
10383 if (CONSTANT_P (operands[1])
10384 && !easy_vector_constant (operands[1], mode))
10385 operands[1] = force_const_mem (mode, operands[1]);
10386 break;
10388 case SImode:
10389 case DImode:
10390 /* Use default pattern for address of ELF small data */
10391 if (TARGET_ELF
10392 && mode == Pmode
10393 && DEFAULT_ABI == ABI_V4
10394 && (GET_CODE (operands[1]) == SYMBOL_REF
10395 || GET_CODE (operands[1]) == CONST)
10396 && small_data_operand (operands[1], mode))
10398 emit_insn (gen_rtx_SET (operands[0], operands[1]));
10399 return;
10402 if (DEFAULT_ABI == ABI_V4
10403 && mode == Pmode && mode == SImode
10404 && flag_pic == 1 && got_operand (operands[1], mode))
10406 emit_insn (gen_movsi_got (operands[0], operands[1]));
10407 return;
10410 if ((TARGET_ELF || DEFAULT_ABI == ABI_DARWIN)
10411 && TARGET_NO_TOC
10412 && ! flag_pic
10413 && mode == Pmode
10414 && CONSTANT_P (operands[1])
10415 && GET_CODE (operands[1]) != HIGH
10416 && GET_CODE (operands[1]) != CONST_INT)
10418 rtx target = (!can_create_pseudo_p ()
10419 ? operands[0]
10420 : gen_reg_rtx (mode));
10422 /* If this is a function address on -mcall-aixdesc,
10423 convert it to the address of the descriptor. */
10424 if (DEFAULT_ABI == ABI_AIX
10425 && GET_CODE (operands[1]) == SYMBOL_REF
10426 && XSTR (operands[1], 0)[0] == '.')
10428 const char *name = XSTR (operands[1], 0);
10429 rtx new_ref;
10430 while (*name == '.')
10431 name++;
10432 new_ref = gen_rtx_SYMBOL_REF (Pmode, name);
10433 CONSTANT_POOL_ADDRESS_P (new_ref)
10434 = CONSTANT_POOL_ADDRESS_P (operands[1]);
10435 SYMBOL_REF_FLAGS (new_ref) = SYMBOL_REF_FLAGS (operands[1]);
10436 SYMBOL_REF_USED (new_ref) = SYMBOL_REF_USED (operands[1]);
10437 SYMBOL_REF_DATA (new_ref) = SYMBOL_REF_DATA (operands[1]);
10438 operands[1] = new_ref;
10441 if (DEFAULT_ABI == ABI_DARWIN)
10443 #if TARGET_MACHO
10444 if (MACHO_DYNAMIC_NO_PIC_P)
10446 /* Take care of any required data indirection. */
10447 operands[1] = rs6000_machopic_legitimize_pic_address (
10448 operands[1], mode, operands[0]);
10449 if (operands[0] != operands[1])
10450 emit_insn (gen_rtx_SET (operands[0], operands[1]));
10451 return;
10453 #endif
10454 emit_insn (gen_macho_high (target, operands[1]));
10455 emit_insn (gen_macho_low (operands[0], target, operands[1]));
10456 return;
10459 emit_insn (gen_elf_high (target, operands[1]));
10460 emit_insn (gen_elf_low (operands[0], target, operands[1]));
10461 return;
10464 /* If this is a SYMBOL_REF that refers to a constant pool entry,
10465 and we have put it in the TOC, we just need to make a TOC-relative
10466 reference to it. */
10467 if (TARGET_TOC
10468 && GET_CODE (operands[1]) == SYMBOL_REF
10469 && use_toc_relative_ref (operands[1], mode))
10470 operands[1] = create_TOC_reference (operands[1], operands[0]);
10471 else if (mode == Pmode
10472 && CONSTANT_P (operands[1])
10473 && GET_CODE (operands[1]) != HIGH
10474 && ((GET_CODE (operands[1]) != CONST_INT
10475 && ! easy_fp_constant (operands[1], mode))
10476 || (GET_CODE (operands[1]) == CONST_INT
10477 && (num_insns_constant (operands[1], mode)
10478 > (TARGET_CMODEL != CMODEL_SMALL ? 3 : 2)))
10479 || (GET_CODE (operands[0]) == REG
10480 && FP_REGNO_P (REGNO (operands[0]))))
10481 && !toc_relative_expr_p (operands[1], false)
10482 && (TARGET_CMODEL == CMODEL_SMALL
10483 || can_create_pseudo_p ()
10484 || (REG_P (operands[0])
10485 && INT_REG_OK_FOR_BASE_P (operands[0], true))))
10488 #if TARGET_MACHO
10489 /* Darwin uses a special PIC legitimizer. */
10490 if (DEFAULT_ABI == ABI_DARWIN && MACHOPIC_INDIRECT)
10492 operands[1] =
10493 rs6000_machopic_legitimize_pic_address (operands[1], mode,
10494 operands[0]);
10495 if (operands[0] != operands[1])
10496 emit_insn (gen_rtx_SET (operands[0], operands[1]));
10497 return;
10499 #endif
10501 /* If we are to limit the number of things we put in the TOC and
10502 this is a symbol plus a constant we can add in one insn,
10503 just put the symbol in the TOC and add the constant. Don't do
10504 this if reload is in progress. */
10505 if (GET_CODE (operands[1]) == CONST
10506 && TARGET_NO_SUM_IN_TOC && ! reload_in_progress
10507 && GET_CODE (XEXP (operands[1], 0)) == PLUS
10508 && add_operand (XEXP (XEXP (operands[1], 0), 1), mode)
10509 && (GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == LABEL_REF
10510 || GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == SYMBOL_REF)
10511 && ! side_effects_p (operands[0]))
10513 rtx sym =
10514 force_const_mem (mode, XEXP (XEXP (operands[1], 0), 0));
10515 rtx other = XEXP (XEXP (operands[1], 0), 1);
10517 sym = force_reg (mode, sym);
10518 emit_insn (gen_add3_insn (operands[0], sym, other));
10519 return;
10522 operands[1] = force_const_mem (mode, operands[1]);
10524 if (TARGET_TOC
10525 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
10526 && constant_pool_expr_p (XEXP (operands[1], 0))
10527 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (
10528 get_pool_constant (XEXP (operands[1], 0)),
10529 get_pool_mode (XEXP (operands[1], 0))))
10531 rtx tocref = create_TOC_reference (XEXP (operands[1], 0),
10532 operands[0]);
10533 operands[1] = gen_const_mem (mode, tocref);
10534 set_mem_alias_set (operands[1], get_TOC_alias_set ());
10537 break;
10539 case TImode:
10540 if (!VECTOR_MEM_VSX_P (TImode))
10541 rs6000_eliminate_indexed_memrefs (operands);
10542 break;
10544 case PTImode:
10545 rs6000_eliminate_indexed_memrefs (operands);
10546 break;
10548 default:
10549 fatal_insn ("bad move", gen_rtx_SET (dest, source));
10552 /* Above, we may have called force_const_mem which may have returned
10553 an invalid address. If we can, fix this up; otherwise, reload will
10554 have to deal with it. */
10555 if (GET_CODE (operands[1]) == MEM && ! reload_in_progress)
10556 operands[1] = validize_mem (operands[1]);
10558 emit_set:
10559 emit_insn (gen_rtx_SET (operands[0], operands[1]));
10562 /* Return true if a structure, union or array containing FIELD should be
10563 accessed using `BLKMODE'.
10565 For the SPE, simd types are V2SI, and gcc can be tempted to put the
10566 entire thing in a DI and use subregs to access the internals.
10567 store_bit_field() will force (subreg:DI (reg:V2SI x))'s to the
10568 back-end. Because a single GPR can hold a V2SI, but not a DI, the
10569 best thing to do is set structs to BLKmode and avoid Severe Tire
10570 Damage.
10572 On e500 v2, DF and DI modes suffer from the same anomaly. DF can
10573 fit into 1, whereas DI still needs two. */
10575 static bool
10576 rs6000_member_type_forces_blk (const_tree field, machine_mode mode)
10578 return ((TARGET_SPE && TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
10579 || (TARGET_E500_DOUBLE && mode == DFmode));
10582 /* Nonzero if we can use a floating-point register to pass this arg. */
10583 #define USE_FP_FOR_ARG_P(CUM,MODE) \
10584 (SCALAR_FLOAT_MODE_NOT_VECTOR_P (MODE) \
10585 && (CUM)->fregno <= FP_ARG_MAX_REG \
10586 && TARGET_HARD_FLOAT && TARGET_FPRS)
10588 /* Nonzero if we can use an AltiVec register to pass this arg. */
10589 #define USE_ALTIVEC_FOR_ARG_P(CUM,MODE,NAMED) \
10590 (ALTIVEC_OR_VSX_VECTOR_MODE (MODE) \
10591 && (CUM)->vregno <= ALTIVEC_ARG_MAX_REG \
10592 && TARGET_ALTIVEC_ABI \
10593 && (NAMED))
10595 /* Walk down the type tree of TYPE counting consecutive base elements.
10596 If *MODEP is VOIDmode, then set it to the first valid floating point
10597 or vector type. If a non-floating point or vector type is found, or
10598 if a floating point or vector type that doesn't match a non-VOIDmode
10599 *MODEP is found, then return -1, otherwise return the count in the
10600 sub-tree. */
10602 static int
10603 rs6000_aggregate_candidate (const_tree type, machine_mode *modep)
10605 machine_mode mode;
10606 HOST_WIDE_INT size;
10608 switch (TREE_CODE (type))
10610 case REAL_TYPE:
10611 mode = TYPE_MODE (type);
10612 if (!SCALAR_FLOAT_MODE_P (mode))
10613 return -1;
10615 if (*modep == VOIDmode)
10616 *modep = mode;
10618 if (*modep == mode)
10619 return 1;
10621 break;
10623 case COMPLEX_TYPE:
10624 mode = TYPE_MODE (TREE_TYPE (type));
10625 if (!SCALAR_FLOAT_MODE_P (mode))
10626 return -1;
10628 if (*modep == VOIDmode)
10629 *modep = mode;
10631 if (*modep == mode)
10632 return 2;
10634 break;
10636 case VECTOR_TYPE:
10637 if (!TARGET_ALTIVEC_ABI || !TARGET_ALTIVEC)
10638 return -1;
10640 /* Use V4SImode as representative of all 128-bit vector types. */
10641 size = int_size_in_bytes (type);
10642 switch (size)
10644 case 16:
10645 mode = V4SImode;
10646 break;
10647 default:
10648 return -1;
10651 if (*modep == VOIDmode)
10652 *modep = mode;
10654 /* Vector modes are considered to be opaque: two vectors are
10655 equivalent for the purposes of being homogeneous aggregates
10656 if they are the same size. */
10657 if (*modep == mode)
10658 return 1;
10660 break;
10662 case ARRAY_TYPE:
10664 int count;
10665 tree index = TYPE_DOMAIN (type);
10667 /* Can't handle incomplete types nor sizes that are not
10668 fixed. */
10669 if (!COMPLETE_TYPE_P (type)
10670 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
10671 return -1;
10673 count = rs6000_aggregate_candidate (TREE_TYPE (type), modep);
10674 if (count == -1
10675 || !index
10676 || !TYPE_MAX_VALUE (index)
10677 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
10678 || !TYPE_MIN_VALUE (index)
10679 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
10680 || count < 0)
10681 return -1;
10683 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
10684 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
10686 /* There must be no padding. */
10687 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
10688 return -1;
10690 return count;
10693 case RECORD_TYPE:
10695 int count = 0;
10696 int sub_count;
10697 tree field;
10699 /* Can't handle incomplete types nor sizes that are not
10700 fixed. */
10701 if (!COMPLETE_TYPE_P (type)
10702 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
10703 return -1;
10705 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
10707 if (TREE_CODE (field) != FIELD_DECL)
10708 continue;
10710 sub_count = rs6000_aggregate_candidate (TREE_TYPE (field), modep);
10711 if (sub_count < 0)
10712 return -1;
10713 count += sub_count;
10716 /* There must be no padding. */
10717 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
10718 return -1;
10720 return count;
10723 case UNION_TYPE:
10724 case QUAL_UNION_TYPE:
10726 /* These aren't very interesting except in a degenerate case. */
10727 int count = 0;
10728 int sub_count;
10729 tree field;
10731 /* Can't handle incomplete types nor sizes that are not
10732 fixed. */
10733 if (!COMPLETE_TYPE_P (type)
10734 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
10735 return -1;
10737 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
10739 if (TREE_CODE (field) != FIELD_DECL)
10740 continue;
10742 sub_count = rs6000_aggregate_candidate (TREE_TYPE (field), modep);
10743 if (sub_count < 0)
10744 return -1;
10745 count = count > sub_count ? count : sub_count;
10748 /* There must be no padding. */
10749 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
10750 return -1;
10752 return count;
10755 default:
10756 break;
10759 return -1;
10762 /* If an argument, whose type is described by TYPE and MODE, is a homogeneous
10763 float or vector aggregate that shall be passed in FP/vector registers
10764 according to the ELFv2 ABI, return the homogeneous element mode in
10765 *ELT_MODE and the number of elements in *N_ELTS, and return TRUE.
10767 Otherwise, set *ELT_MODE to MODE and *N_ELTS to 1, and return FALSE. */
10769 static bool
10770 rs6000_discover_homogeneous_aggregate (machine_mode mode, const_tree type,
10771 machine_mode *elt_mode,
10772 int *n_elts)
10774 /* Note that we do not accept complex types at the top level as
10775 homogeneous aggregates; these types are handled via the
10776 targetm.calls.split_complex_arg mechanism. Complex types
10777 can be elements of homogeneous aggregates, however. */
10778 if (DEFAULT_ABI == ABI_ELFv2 && type && AGGREGATE_TYPE_P (type))
10780 machine_mode field_mode = VOIDmode;
10781 int field_count = rs6000_aggregate_candidate (type, &field_mode);
10783 if (field_count > 0)
10785 int n_regs = (SCALAR_FLOAT_MODE_P (field_mode) ?
10786 (GET_MODE_SIZE (field_mode) + 7) >> 3 : 1);
10788 /* The ELFv2 ABI allows homogeneous aggregates to occupy
10789 up to AGGR_ARG_NUM_REG registers. */
10790 if (field_count * n_regs <= AGGR_ARG_NUM_REG)
10792 if (elt_mode)
10793 *elt_mode = field_mode;
10794 if (n_elts)
10795 *n_elts = field_count;
10796 return true;
10801 if (elt_mode)
10802 *elt_mode = mode;
10803 if (n_elts)
10804 *n_elts = 1;
10805 return false;
10808 /* Return a nonzero value to say to return the function value in
10809 memory, just as large structures are always returned. TYPE will be
10810 the data type of the value, and FNTYPE will be the type of the
10811 function doing the returning, or @code{NULL} for libcalls.
10813 The AIX ABI for the RS/6000 specifies that all structures are
10814 returned in memory. The Darwin ABI does the same.
10816 For the Darwin 64 Bit ABI, a function result can be returned in
10817 registers or in memory, depending on the size of the return data
10818 type. If it is returned in registers, the value occupies the same
10819 registers as it would if it were the first and only function
10820 argument. Otherwise, the function places its result in memory at
10821 the location pointed to by GPR3.
10823 The SVR4 ABI specifies that structures <= 8 bytes are returned in r3/r4,
10824 but a draft put them in memory, and GCC used to implement the draft
10825 instead of the final standard. Therefore, aix_struct_return
10826 controls this instead of DEFAULT_ABI; V.4 targets needing backward
10827 compatibility can change DRAFT_V4_STRUCT_RET to override the
10828 default, and -m switches get the final word. See
10829 rs6000_option_override_internal for more details.
10831 The PPC32 SVR4 ABI uses IEEE double extended for long double, if 128-bit
10832 long double support is enabled. These values are returned in memory.
10834 int_size_in_bytes returns -1 for variable size objects, which go in
10835 memory always. The cast to unsigned makes -1 > 8. */
10837 static bool
10838 rs6000_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
10840 /* For the Darwin64 ABI, test if we can fit the return value in regs. */
10841 if (TARGET_MACHO
10842 && rs6000_darwin64_abi
10843 && TREE_CODE (type) == RECORD_TYPE
10844 && int_size_in_bytes (type) > 0)
10846 CUMULATIVE_ARGS valcum;
10847 rtx valret;
10849 valcum.words = 0;
10850 valcum.fregno = FP_ARG_MIN_REG;
10851 valcum.vregno = ALTIVEC_ARG_MIN_REG;
10852 /* Do a trial code generation as if this were going to be passed
10853 as an argument; if any part goes in memory, we return NULL. */
10854 valret = rs6000_darwin64_record_arg (&valcum, type, true, true);
10855 if (valret)
10856 return false;
10857 /* Otherwise fall through to more conventional ABI rules. */
10860 /* The ELFv2 ABI returns homogeneous VFP aggregates in registers */
10861 if (rs6000_discover_homogeneous_aggregate (TYPE_MODE (type), type,
10862 NULL, NULL))
10863 return false;
10865 /* The ELFv2 ABI returns aggregates up to 16B in registers */
10866 if (DEFAULT_ABI == ABI_ELFv2 && AGGREGATE_TYPE_P (type)
10867 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) <= 16)
10868 return false;
10870 if (AGGREGATE_TYPE_P (type)
10871 && (aix_struct_return
10872 || (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8))
10873 return true;
10875 /* Allow -maltivec -mabi=no-altivec without warning. Altivec vector
10876 modes only exist for GCC vector types if -maltivec. */
10877 if (TARGET_32BIT && !TARGET_ALTIVEC_ABI
10878 && ALTIVEC_VECTOR_MODE (TYPE_MODE (type)))
10879 return false;
10881 /* Return synthetic vectors in memory. */
10882 if (TREE_CODE (type) == VECTOR_TYPE
10883 && int_size_in_bytes (type) > (TARGET_ALTIVEC_ABI ? 16 : 8))
10885 static bool warned_for_return_big_vectors = false;
10886 if (!warned_for_return_big_vectors)
10888 warning (0, "GCC vector returned by reference: "
10889 "non-standard ABI extension with no compatibility guarantee");
10890 warned_for_return_big_vectors = true;
10892 return true;
10895 if (DEFAULT_ABI == ABI_V4 && TARGET_IEEEQUAD
10896 && FLOAT128_IEEE_P (TYPE_MODE (type)))
10897 return true;
10899 return false;
10902 /* Specify whether values returned in registers should be at the most
10903 significant end of a register. We want aggregates returned by
10904 value to match the way aggregates are passed to functions. */
10906 static bool
10907 rs6000_return_in_msb (const_tree valtype)
10909 return (DEFAULT_ABI == ABI_ELFv2
10910 && BYTES_BIG_ENDIAN
10911 && AGGREGATE_TYPE_P (valtype)
10912 && FUNCTION_ARG_PADDING (TYPE_MODE (valtype), valtype) == upward);
10915 #ifdef HAVE_AS_GNU_ATTRIBUTE
10916 /* Return TRUE if a call to function FNDECL may be one that
10917 potentially affects the function calling ABI of the object file. */
10919 static bool
10920 call_ABI_of_interest (tree fndecl)
10922 if (symtab->state == EXPANSION)
10924 struct cgraph_node *c_node;
10926 /* Libcalls are always interesting. */
10927 if (fndecl == NULL_TREE)
10928 return true;
10930 /* Any call to an external function is interesting. */
10931 if (DECL_EXTERNAL (fndecl))
10932 return true;
10934 /* Interesting functions that we are emitting in this object file. */
10935 c_node = cgraph_node::get (fndecl);
10936 c_node = c_node->ultimate_alias_target ();
10937 return !c_node->only_called_directly_p ();
10939 return false;
10941 #endif
10943 /* Initialize a variable CUM of type CUMULATIVE_ARGS
10944 for a call to a function whose data type is FNTYPE.
10945 For a library call, FNTYPE is 0 and RETURN_MODE the return value mode.
10947 For incoming args we set the number of arguments in the prototype large
10948 so we never return a PARALLEL. */
10950 void
10951 init_cumulative_args (CUMULATIVE_ARGS *cum, tree fntype,
10952 rtx libname ATTRIBUTE_UNUSED, int incoming,
10953 int libcall, int n_named_args,
10954 tree fndecl ATTRIBUTE_UNUSED,
10955 machine_mode return_mode ATTRIBUTE_UNUSED)
10957 static CUMULATIVE_ARGS zero_cumulative;
10959 *cum = zero_cumulative;
10960 cum->words = 0;
10961 cum->fregno = FP_ARG_MIN_REG;
10962 cum->vregno = ALTIVEC_ARG_MIN_REG;
10963 cum->prototype = (fntype && prototype_p (fntype));
10964 cum->call_cookie = ((DEFAULT_ABI == ABI_V4 && libcall)
10965 ? CALL_LIBCALL : CALL_NORMAL);
10966 cum->sysv_gregno = GP_ARG_MIN_REG;
10967 cum->stdarg = stdarg_p (fntype);
10968 cum->libcall = libcall;
10970 cum->nargs_prototype = 0;
10971 if (incoming || cum->prototype)
10972 cum->nargs_prototype = n_named_args;
10974 /* Check for a longcall attribute. */
10975 if ((!fntype && rs6000_default_long_calls)
10976 || (fntype
10977 && lookup_attribute ("longcall", TYPE_ATTRIBUTES (fntype))
10978 && !lookup_attribute ("shortcall", TYPE_ATTRIBUTES (fntype))))
10979 cum->call_cookie |= CALL_LONG;
10981 if (TARGET_DEBUG_ARG)
10983 fprintf (stderr, "\ninit_cumulative_args:");
10984 if (fntype)
10986 tree ret_type = TREE_TYPE (fntype);
10987 fprintf (stderr, " ret code = %s,",
10988 get_tree_code_name (TREE_CODE (ret_type)));
10991 if (cum->call_cookie & CALL_LONG)
10992 fprintf (stderr, " longcall,");
10994 fprintf (stderr, " proto = %d, nargs = %d\n",
10995 cum->prototype, cum->nargs_prototype);
10998 #ifdef HAVE_AS_GNU_ATTRIBUTE
10999 if (DEFAULT_ABI == ABI_V4)
11001 cum->escapes = call_ABI_of_interest (fndecl);
11002 if (cum->escapes)
11004 tree return_type;
11006 if (fntype)
11008 return_type = TREE_TYPE (fntype);
11009 return_mode = TYPE_MODE (return_type);
11011 else
11012 return_type = lang_hooks.types.type_for_mode (return_mode, 0);
11014 if (return_type != NULL)
11016 if (TREE_CODE (return_type) == RECORD_TYPE
11017 && TYPE_TRANSPARENT_AGGR (return_type))
11019 return_type = TREE_TYPE (first_field (return_type));
11020 return_mode = TYPE_MODE (return_type);
11022 if (AGGREGATE_TYPE_P (return_type)
11023 && ((unsigned HOST_WIDE_INT) int_size_in_bytes (return_type)
11024 <= 8))
11025 rs6000_returns_struct = true;
11027 if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (return_mode))
11028 rs6000_passes_float = true;
11029 else if (ALTIVEC_OR_VSX_VECTOR_MODE (return_mode)
11030 || SPE_VECTOR_MODE (return_mode))
11031 rs6000_passes_vector = true;
11034 #endif
11036 if (fntype
11037 && !TARGET_ALTIVEC
11038 && TARGET_ALTIVEC_ABI
11039 && ALTIVEC_VECTOR_MODE (TYPE_MODE (TREE_TYPE (fntype))))
11041 error ("cannot return value in vector register because"
11042 " altivec instructions are disabled, use -maltivec"
11043 " to enable them");
11047 /* The mode the ABI uses for a word. This is not the same as word_mode
11048 for -m32 -mpowerpc64. This is used to implement various target hooks. */
11050 static machine_mode
11051 rs6000_abi_word_mode (void)
11053 return TARGET_32BIT ? SImode : DImode;
11056 /* Implement the TARGET_OFFLOAD_OPTIONS hook. */
11057 static char *
11058 rs6000_offload_options (void)
11060 if (TARGET_64BIT)
11061 return xstrdup ("-foffload-abi=lp64");
11062 else
11063 return xstrdup ("-foffload-abi=ilp32");
11066 /* On rs6000, function arguments are promoted, as are function return
11067 values. */
11069 static machine_mode
11070 rs6000_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
11071 machine_mode mode,
11072 int *punsignedp ATTRIBUTE_UNUSED,
11073 const_tree, int)
11075 PROMOTE_MODE (mode, *punsignedp, type);
11077 return mode;
11080 /* Return true if TYPE must be passed on the stack and not in registers. */
11082 static bool
11083 rs6000_must_pass_in_stack (machine_mode mode, const_tree type)
11085 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2 || TARGET_64BIT)
11086 return must_pass_in_stack_var_size (mode, type);
11087 else
11088 return must_pass_in_stack_var_size_or_pad (mode, type);
11091 static inline bool
11092 is_complex_IBM_long_double (machine_mode mode)
11094 return mode == ICmode || (!TARGET_IEEEQUAD && mode == TCmode);
11097 /* Whether ABI_V4 passes MODE args to a function in floating point
11098 registers. */
11100 static bool
11101 abi_v4_pass_in_fpr (machine_mode mode)
11103 if (!TARGET_FPRS || !TARGET_HARD_FLOAT)
11104 return false;
11105 if (TARGET_SINGLE_FLOAT && mode == SFmode)
11106 return true;
11107 if (TARGET_DOUBLE_FLOAT && mode == DFmode)
11108 return true;
11109 /* ABI_V4 passes complex IBM long double in 8 gprs.
11110 Stupid, but we can't change the ABI now. */
11111 if (is_complex_IBM_long_double (mode))
11112 return false;
11113 if (FLOAT128_2REG_P (mode))
11114 return true;
11115 if (DECIMAL_FLOAT_MODE_P (mode))
11116 return true;
11117 return false;
11120 /* If defined, a C expression which determines whether, and in which
11121 direction, to pad out an argument with extra space. The value
11122 should be of type `enum direction': either `upward' to pad above
11123 the argument, `downward' to pad below, or `none' to inhibit
11124 padding.
11126 For the AIX ABI structs are always stored left shifted in their
11127 argument slot. */
11129 enum direction
11130 function_arg_padding (machine_mode mode, const_tree type)
11132 #ifndef AGGREGATE_PADDING_FIXED
11133 #define AGGREGATE_PADDING_FIXED 0
11134 #endif
11135 #ifndef AGGREGATES_PAD_UPWARD_ALWAYS
11136 #define AGGREGATES_PAD_UPWARD_ALWAYS 0
11137 #endif
11139 if (!AGGREGATE_PADDING_FIXED)
11141 /* GCC used to pass structures of the same size as integer types as
11142 if they were in fact integers, ignoring FUNCTION_ARG_PADDING.
11143 i.e. Structures of size 1 or 2 (or 4 when TARGET_64BIT) were
11144 passed padded downward, except that -mstrict-align further
11145 muddied the water in that multi-component structures of 2 and 4
11146 bytes in size were passed padded upward.
11148 The following arranges for best compatibility with previous
11149 versions of gcc, but removes the -mstrict-align dependency. */
11150 if (BYTES_BIG_ENDIAN)
11152 HOST_WIDE_INT size = 0;
11154 if (mode == BLKmode)
11156 if (type && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
11157 size = int_size_in_bytes (type);
11159 else
11160 size = GET_MODE_SIZE (mode);
11162 if (size == 1 || size == 2 || size == 4)
11163 return downward;
11165 return upward;
11168 if (AGGREGATES_PAD_UPWARD_ALWAYS)
11170 if (type != 0 && AGGREGATE_TYPE_P (type))
11171 return upward;
11174 /* Fall back to the default. */
11175 return DEFAULT_FUNCTION_ARG_PADDING (mode, type);
11178 /* If defined, a C expression that gives the alignment boundary, in bits,
11179 of an argument with the specified mode and type. If it is not defined,
11180 PARM_BOUNDARY is used for all arguments.
11182 V.4 wants long longs and doubles to be double word aligned. Just
11183 testing the mode size is a boneheaded way to do this as it means
11184 that other types such as complex int are also double word aligned.
11185 However, we're stuck with this because changing the ABI might break
11186 existing library interfaces.
11188 Doubleword align SPE vectors.
11189 Quadword align Altivec/VSX vectors.
11190 Quadword align large synthetic vector types. */
11192 static unsigned int
11193 rs6000_function_arg_boundary (machine_mode mode, const_tree type)
11195 machine_mode elt_mode;
11196 int n_elts;
11198 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
11200 if (DEFAULT_ABI == ABI_V4
11201 && (GET_MODE_SIZE (mode) == 8
11202 || (TARGET_HARD_FLOAT
11203 && TARGET_FPRS
11204 && !is_complex_IBM_long_double (mode)
11205 && FLOAT128_2REG_P (mode))))
11206 return 64;
11207 else if (FLOAT128_VECTOR_P (mode))
11208 return 128;
11209 else if (SPE_VECTOR_MODE (mode)
11210 || (type && TREE_CODE (type) == VECTOR_TYPE
11211 && int_size_in_bytes (type) >= 8
11212 && int_size_in_bytes (type) < 16))
11213 return 64;
11214 else if (ALTIVEC_OR_VSX_VECTOR_MODE (elt_mode)
11215 || (type && TREE_CODE (type) == VECTOR_TYPE
11216 && int_size_in_bytes (type) >= 16))
11217 return 128;
11219 /* Aggregate types that need > 8 byte alignment are quadword-aligned
11220 in the parameter area in the ELFv2 ABI, and in the AIX ABI unless
11221 -mcompat-align-parm is used. */
11222 if (((DEFAULT_ABI == ABI_AIX && !rs6000_compat_align_parm)
11223 || DEFAULT_ABI == ABI_ELFv2)
11224 && type && TYPE_ALIGN (type) > 64)
11226 /* "Aggregate" means any AGGREGATE_TYPE except for single-element
11227 or homogeneous float/vector aggregates here. We already handled
11228 vector aggregates above, but still need to check for float here. */
11229 bool aggregate_p = (AGGREGATE_TYPE_P (type)
11230 && !SCALAR_FLOAT_MODE_P (elt_mode));
11232 /* We used to check for BLKmode instead of the above aggregate type
11233 check. Warn when this results in any difference to the ABI. */
11234 if (aggregate_p != (mode == BLKmode))
11236 static bool warned;
11237 if (!warned && warn_psabi)
11239 warned = true;
11240 inform (input_location,
11241 "the ABI of passing aggregates with %d-byte alignment"
11242 " has changed in GCC 5",
11243 (int) TYPE_ALIGN (type) / BITS_PER_UNIT);
11247 if (aggregate_p)
11248 return 128;
11251 /* Similar for the Darwin64 ABI. Note that for historical reasons we
11252 implement the "aggregate type" check as a BLKmode check here; this
11253 means certain aggregate types are in fact not aligned. */
11254 if (TARGET_MACHO && rs6000_darwin64_abi
11255 && mode == BLKmode
11256 && type && TYPE_ALIGN (type) > 64)
11257 return 128;
11259 return PARM_BOUNDARY;
11262 /* The offset in words to the start of the parameter save area. */
11264 static unsigned int
11265 rs6000_parm_offset (void)
11267 return (DEFAULT_ABI == ABI_V4 ? 2
11268 : DEFAULT_ABI == ABI_ELFv2 ? 4
11269 : 6);
11272 /* For a function parm of MODE and TYPE, return the starting word in
11273 the parameter area. NWORDS of the parameter area are already used. */
11275 static unsigned int
11276 rs6000_parm_start (machine_mode mode, const_tree type,
11277 unsigned int nwords)
11279 unsigned int align;
11281 align = rs6000_function_arg_boundary (mode, type) / PARM_BOUNDARY - 1;
11282 return nwords + (-(rs6000_parm_offset () + nwords) & align);
11285 /* Compute the size (in words) of a function argument. */
11287 static unsigned long
11288 rs6000_arg_size (machine_mode mode, const_tree type)
11290 unsigned long size;
11292 if (mode != BLKmode)
11293 size = GET_MODE_SIZE (mode);
11294 else
11295 size = int_size_in_bytes (type);
11297 if (TARGET_32BIT)
11298 return (size + 3) >> 2;
11299 else
11300 return (size + 7) >> 3;
11303 /* Use this to flush pending int fields. */
11305 static void
11306 rs6000_darwin64_record_arg_advance_flush (CUMULATIVE_ARGS *cum,
11307 HOST_WIDE_INT bitpos, int final)
11309 unsigned int startbit, endbit;
11310 int intregs, intoffset;
11311 machine_mode mode;
11313 /* Handle the situations where a float is taking up the first half
11314 of the GPR, and the other half is empty (typically due to
11315 alignment restrictions). We can detect this by a 8-byte-aligned
11316 int field, or by seeing that this is the final flush for this
11317 argument. Count the word and continue on. */
11318 if (cum->floats_in_gpr == 1
11319 && (cum->intoffset % 64 == 0
11320 || (cum->intoffset == -1 && final)))
11322 cum->words++;
11323 cum->floats_in_gpr = 0;
11326 if (cum->intoffset == -1)
11327 return;
11329 intoffset = cum->intoffset;
11330 cum->intoffset = -1;
11331 cum->floats_in_gpr = 0;
11333 if (intoffset % BITS_PER_WORD != 0)
11335 mode = mode_for_size (BITS_PER_WORD - intoffset % BITS_PER_WORD,
11336 MODE_INT, 0);
11337 if (mode == BLKmode)
11339 /* We couldn't find an appropriate mode, which happens,
11340 e.g., in packed structs when there are 3 bytes to load.
11341 Back intoffset back to the beginning of the word in this
11342 case. */
11343 intoffset = ROUND_DOWN (intoffset, BITS_PER_WORD);
11347 startbit = ROUND_DOWN (intoffset, BITS_PER_WORD);
11348 endbit = ROUND_UP (bitpos, BITS_PER_WORD);
11349 intregs = (endbit - startbit) / BITS_PER_WORD;
11350 cum->words += intregs;
11351 /* words should be unsigned. */
11352 if ((unsigned)cum->words < (endbit/BITS_PER_WORD))
11354 int pad = (endbit/BITS_PER_WORD) - cum->words;
11355 cum->words += pad;
11359 /* The darwin64 ABI calls for us to recurse down through structs,
11360 looking for elements passed in registers. Unfortunately, we have
11361 to track int register count here also because of misalignments
11362 in powerpc alignment mode. */
11364 static void
11365 rs6000_darwin64_record_arg_advance_recurse (CUMULATIVE_ARGS *cum,
11366 const_tree type,
11367 HOST_WIDE_INT startbitpos)
11369 tree f;
11371 for (f = TYPE_FIELDS (type); f ; f = DECL_CHAIN (f))
11372 if (TREE_CODE (f) == FIELD_DECL)
11374 HOST_WIDE_INT bitpos = startbitpos;
11375 tree ftype = TREE_TYPE (f);
11376 machine_mode mode;
11377 if (ftype == error_mark_node)
11378 continue;
11379 mode = TYPE_MODE (ftype);
11381 if (DECL_SIZE (f) != 0
11382 && tree_fits_uhwi_p (bit_position (f)))
11383 bitpos += int_bit_position (f);
11385 /* ??? FIXME: else assume zero offset. */
11387 if (TREE_CODE (ftype) == RECORD_TYPE)
11388 rs6000_darwin64_record_arg_advance_recurse (cum, ftype, bitpos);
11389 else if (USE_FP_FOR_ARG_P (cum, mode))
11391 unsigned n_fpregs = (GET_MODE_SIZE (mode) + 7) >> 3;
11392 rs6000_darwin64_record_arg_advance_flush (cum, bitpos, 0);
11393 cum->fregno += n_fpregs;
11394 /* Single-precision floats present a special problem for
11395 us, because they are smaller than an 8-byte GPR, and so
11396 the structure-packing rules combined with the standard
11397 varargs behavior mean that we want to pack float/float
11398 and float/int combinations into a single register's
11399 space. This is complicated by the arg advance flushing,
11400 which works on arbitrarily large groups of int-type
11401 fields. */
11402 if (mode == SFmode)
11404 if (cum->floats_in_gpr == 1)
11406 /* Two floats in a word; count the word and reset
11407 the float count. */
11408 cum->words++;
11409 cum->floats_in_gpr = 0;
11411 else if (bitpos % 64 == 0)
11413 /* A float at the beginning of an 8-byte word;
11414 count it and put off adjusting cum->words until
11415 we see if a arg advance flush is going to do it
11416 for us. */
11417 cum->floats_in_gpr++;
11419 else
11421 /* The float is at the end of a word, preceded
11422 by integer fields, so the arg advance flush
11423 just above has already set cum->words and
11424 everything is taken care of. */
11427 else
11428 cum->words += n_fpregs;
11430 else if (USE_ALTIVEC_FOR_ARG_P (cum, mode, 1))
11432 rs6000_darwin64_record_arg_advance_flush (cum, bitpos, 0);
11433 cum->vregno++;
11434 cum->words += 2;
11436 else if (cum->intoffset == -1)
11437 cum->intoffset = bitpos;
11441 /* Check for an item that needs to be considered specially under the darwin 64
11442 bit ABI. These are record types where the mode is BLK or the structure is
11443 8 bytes in size. */
11444 static int
11445 rs6000_darwin64_struct_check_p (machine_mode mode, const_tree type)
11447 return rs6000_darwin64_abi
11448 && ((mode == BLKmode
11449 && TREE_CODE (type) == RECORD_TYPE
11450 && int_size_in_bytes (type) > 0)
11451 || (type && TREE_CODE (type) == RECORD_TYPE
11452 && int_size_in_bytes (type) == 8)) ? 1 : 0;
11455 /* Update the data in CUM to advance over an argument
11456 of mode MODE and data type TYPE.
11457 (TYPE is null for libcalls where that information may not be available.)
11459 Note that for args passed by reference, function_arg will be called
11460 with MODE and TYPE set to that of the pointer to the arg, not the arg
11461 itself. */
11463 static void
11464 rs6000_function_arg_advance_1 (CUMULATIVE_ARGS *cum, machine_mode mode,
11465 const_tree type, bool named, int depth)
11467 machine_mode elt_mode;
11468 int n_elts;
11470 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
11472 /* Only tick off an argument if we're not recursing. */
11473 if (depth == 0)
11474 cum->nargs_prototype--;
11476 #ifdef HAVE_AS_GNU_ATTRIBUTE
11477 if (DEFAULT_ABI == ABI_V4
11478 && cum->escapes)
11480 if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode))
11481 rs6000_passes_float = true;
11482 else if (named && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
11483 rs6000_passes_vector = true;
11484 else if (SPE_VECTOR_MODE (mode)
11485 && !cum->stdarg
11486 && cum->sysv_gregno <= GP_ARG_MAX_REG)
11487 rs6000_passes_vector = true;
11489 #endif
11491 if (TARGET_ALTIVEC_ABI
11492 && (ALTIVEC_OR_VSX_VECTOR_MODE (elt_mode)
11493 || (type && TREE_CODE (type) == VECTOR_TYPE
11494 && int_size_in_bytes (type) == 16)))
11496 bool stack = false;
11498 if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
11500 cum->vregno += n_elts;
11502 if (!TARGET_ALTIVEC)
11503 error ("cannot pass argument in vector register because"
11504 " altivec instructions are disabled, use -maltivec"
11505 " to enable them");
11507 /* PowerPC64 Linux and AIX allocate GPRs for a vector argument
11508 even if it is going to be passed in a vector register.
11509 Darwin does the same for variable-argument functions. */
11510 if (((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
11511 && TARGET_64BIT)
11512 || (cum->stdarg && DEFAULT_ABI != ABI_V4))
11513 stack = true;
11515 else
11516 stack = true;
11518 if (stack)
11520 int align;
11522 /* Vector parameters must be 16-byte aligned. In 32-bit
11523 mode this means we need to take into account the offset
11524 to the parameter save area. In 64-bit mode, they just
11525 have to start on an even word, since the parameter save
11526 area is 16-byte aligned. */
11527 if (TARGET_32BIT)
11528 align = -(rs6000_parm_offset () + cum->words) & 3;
11529 else
11530 align = cum->words & 1;
11531 cum->words += align + rs6000_arg_size (mode, type);
11533 if (TARGET_DEBUG_ARG)
11535 fprintf (stderr, "function_adv: words = %2d, align=%d, ",
11536 cum->words, align);
11537 fprintf (stderr, "nargs = %4d, proto = %d, mode = %4s\n",
11538 cum->nargs_prototype, cum->prototype,
11539 GET_MODE_NAME (mode));
11543 else if (TARGET_SPE_ABI && TARGET_SPE && SPE_VECTOR_MODE (mode)
11544 && !cum->stdarg
11545 && cum->sysv_gregno <= GP_ARG_MAX_REG)
11546 cum->sysv_gregno++;
11548 else if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
11550 int size = int_size_in_bytes (type);
11551 /* Variable sized types have size == -1 and are
11552 treated as if consisting entirely of ints.
11553 Pad to 16 byte boundary if needed. */
11554 if (TYPE_ALIGN (type) >= 2 * BITS_PER_WORD
11555 && (cum->words % 2) != 0)
11556 cum->words++;
11557 /* For varargs, we can just go up by the size of the struct. */
11558 if (!named)
11559 cum->words += (size + 7) / 8;
11560 else
11562 /* It is tempting to say int register count just goes up by
11563 sizeof(type)/8, but this is wrong in a case such as
11564 { int; double; int; } [powerpc alignment]. We have to
11565 grovel through the fields for these too. */
11566 cum->intoffset = 0;
11567 cum->floats_in_gpr = 0;
11568 rs6000_darwin64_record_arg_advance_recurse (cum, type, 0);
11569 rs6000_darwin64_record_arg_advance_flush (cum,
11570 size * BITS_PER_UNIT, 1);
11572 if (TARGET_DEBUG_ARG)
11574 fprintf (stderr, "function_adv: words = %2d, align=%d, size=%d",
11575 cum->words, TYPE_ALIGN (type), size);
11576 fprintf (stderr,
11577 "nargs = %4d, proto = %d, mode = %4s (darwin64 abi)\n",
11578 cum->nargs_prototype, cum->prototype,
11579 GET_MODE_NAME (mode));
11582 else if (DEFAULT_ABI == ABI_V4)
11584 if (abi_v4_pass_in_fpr (mode))
11586 /* _Decimal128 must use an even/odd register pair. This assumes
11587 that the register number is odd when fregno is odd. */
11588 if (mode == TDmode && (cum->fregno % 2) == 1)
11589 cum->fregno++;
11591 if (cum->fregno + (FLOAT128_2REG_P (mode) ? 1 : 0)
11592 <= FP_ARG_V4_MAX_REG)
11593 cum->fregno += (GET_MODE_SIZE (mode) + 7) >> 3;
11594 else
11596 cum->fregno = FP_ARG_V4_MAX_REG + 1;
11597 if (mode == DFmode || FLOAT128_IBM_P (mode)
11598 || mode == DDmode || mode == TDmode)
11599 cum->words += cum->words & 1;
11600 cum->words += rs6000_arg_size (mode, type);
11603 else
11605 int n_words = rs6000_arg_size (mode, type);
11606 int gregno = cum->sysv_gregno;
11608 /* Long long and SPE vectors are put in (r3,r4), (r5,r6),
11609 (r7,r8) or (r9,r10). As does any other 2 word item such
11610 as complex int due to a historical mistake. */
11611 if (n_words == 2)
11612 gregno += (1 - gregno) & 1;
11614 /* Multi-reg args are not split between registers and stack. */
11615 if (gregno + n_words - 1 > GP_ARG_MAX_REG)
11617 /* Long long and SPE vectors are aligned on the stack.
11618 So are other 2 word items such as complex int due to
11619 a historical mistake. */
11620 if (n_words == 2)
11621 cum->words += cum->words & 1;
11622 cum->words += n_words;
11625 /* Note: continuing to accumulate gregno past when we've started
11626 spilling to the stack indicates the fact that we've started
11627 spilling to the stack to expand_builtin_saveregs. */
11628 cum->sysv_gregno = gregno + n_words;
11631 if (TARGET_DEBUG_ARG)
11633 fprintf (stderr, "function_adv: words = %2d, fregno = %2d, ",
11634 cum->words, cum->fregno);
11635 fprintf (stderr, "gregno = %2d, nargs = %4d, proto = %d, ",
11636 cum->sysv_gregno, cum->nargs_prototype, cum->prototype);
11637 fprintf (stderr, "mode = %4s, named = %d\n",
11638 GET_MODE_NAME (mode), named);
11641 else
11643 int n_words = rs6000_arg_size (mode, type);
11644 int start_words = cum->words;
11645 int align_words = rs6000_parm_start (mode, type, start_words);
11647 cum->words = align_words + n_words;
11649 if (SCALAR_FLOAT_MODE_P (elt_mode) && TARGET_HARD_FLOAT && TARGET_FPRS)
11651 /* _Decimal128 must be passed in an even/odd float register pair.
11652 This assumes that the register number is odd when fregno is
11653 odd. */
11654 if (elt_mode == TDmode && (cum->fregno % 2) == 1)
11655 cum->fregno++;
11656 cum->fregno += n_elts * ((GET_MODE_SIZE (elt_mode) + 7) >> 3);
11659 if (TARGET_DEBUG_ARG)
11661 fprintf (stderr, "function_adv: words = %2d, fregno = %2d, ",
11662 cum->words, cum->fregno);
11663 fprintf (stderr, "nargs = %4d, proto = %d, mode = %4s, ",
11664 cum->nargs_prototype, cum->prototype, GET_MODE_NAME (mode));
11665 fprintf (stderr, "named = %d, align = %d, depth = %d\n",
11666 named, align_words - start_words, depth);
11671 static void
11672 rs6000_function_arg_advance (cumulative_args_t cum, machine_mode mode,
11673 const_tree type, bool named)
11675 rs6000_function_arg_advance_1 (get_cumulative_args (cum), mode, type, named,
11679 static rtx
11680 spe_build_register_parallel (machine_mode mode, int gregno)
11682 rtx r1, r3, r5, r7;
11684 switch (mode)
11686 case DFmode:
11687 r1 = gen_rtx_REG (DImode, gregno);
11688 r1 = gen_rtx_EXPR_LIST (VOIDmode, r1, const0_rtx);
11689 return gen_rtx_PARALLEL (mode, gen_rtvec (1, r1));
11691 case DCmode:
11692 case TFmode:
11693 r1 = gen_rtx_REG (DImode, gregno);
11694 r1 = gen_rtx_EXPR_LIST (VOIDmode, r1, const0_rtx);
11695 r3 = gen_rtx_REG (DImode, gregno + 2);
11696 r3 = gen_rtx_EXPR_LIST (VOIDmode, r3, GEN_INT (8));
11697 return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r3));
11699 case TCmode:
11700 r1 = gen_rtx_REG (DImode, gregno);
11701 r1 = gen_rtx_EXPR_LIST (VOIDmode, r1, const0_rtx);
11702 r3 = gen_rtx_REG (DImode, gregno + 2);
11703 r3 = gen_rtx_EXPR_LIST (VOIDmode, r3, GEN_INT (8));
11704 r5 = gen_rtx_REG (DImode, gregno + 4);
11705 r5 = gen_rtx_EXPR_LIST (VOIDmode, r5, GEN_INT (16));
11706 r7 = gen_rtx_REG (DImode, gregno + 6);
11707 r7 = gen_rtx_EXPR_LIST (VOIDmode, r7, GEN_INT (24));
11708 return gen_rtx_PARALLEL (mode, gen_rtvec (4, r1, r3, r5, r7));
11710 default:
11711 gcc_unreachable ();
11715 /* Determine where to put a SIMD argument on the SPE. */
11716 static rtx
11717 rs6000_spe_function_arg (const CUMULATIVE_ARGS *cum, machine_mode mode,
11718 const_tree type)
11720 int gregno = cum->sysv_gregno;
11722 /* On E500 v2, double arithmetic is done on the full 64-bit GPR, but
11723 are passed and returned in a pair of GPRs for ABI compatibility. */
11724 if (TARGET_E500_DOUBLE && (mode == DFmode || mode == TFmode
11725 || mode == DCmode || mode == TCmode))
11727 int n_words = rs6000_arg_size (mode, type);
11729 /* Doubles go in an odd/even register pair (r5/r6, etc). */
11730 if (mode == DFmode)
11731 gregno += (1 - gregno) & 1;
11733 /* Multi-reg args are not split between registers and stack. */
11734 if (gregno + n_words - 1 > GP_ARG_MAX_REG)
11735 return NULL_RTX;
11737 return spe_build_register_parallel (mode, gregno);
11739 if (cum->stdarg)
11741 int n_words = rs6000_arg_size (mode, type);
11743 /* SPE vectors are put in odd registers. */
11744 if (n_words == 2 && (gregno & 1) == 0)
11745 gregno += 1;
11747 if (gregno + n_words - 1 <= GP_ARG_MAX_REG)
11749 rtx r1, r2;
11750 machine_mode m = SImode;
11752 r1 = gen_rtx_REG (m, gregno);
11753 r1 = gen_rtx_EXPR_LIST (m, r1, const0_rtx);
11754 r2 = gen_rtx_REG (m, gregno + 1);
11755 r2 = gen_rtx_EXPR_LIST (m, r2, GEN_INT (4));
11756 return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r2));
11758 else
11759 return NULL_RTX;
11761 else
11763 if (gregno <= GP_ARG_MAX_REG)
11764 return gen_rtx_REG (mode, gregno);
11765 else
11766 return NULL_RTX;
11770 /* A subroutine of rs6000_darwin64_record_arg. Assign the bits of the
11771 structure between cum->intoffset and bitpos to integer registers. */
11773 static void
11774 rs6000_darwin64_record_arg_flush (CUMULATIVE_ARGS *cum,
11775 HOST_WIDE_INT bitpos, rtx rvec[], int *k)
11777 machine_mode mode;
11778 unsigned int regno;
11779 unsigned int startbit, endbit;
11780 int this_regno, intregs, intoffset;
11781 rtx reg;
11783 if (cum->intoffset == -1)
11784 return;
11786 intoffset = cum->intoffset;
11787 cum->intoffset = -1;
11789 /* If this is the trailing part of a word, try to only load that
11790 much into the register. Otherwise load the whole register. Note
11791 that in the latter case we may pick up unwanted bits. It's not a
11792 problem at the moment but may wish to revisit. */
11794 if (intoffset % BITS_PER_WORD != 0)
11796 mode = mode_for_size (BITS_PER_WORD - intoffset % BITS_PER_WORD,
11797 MODE_INT, 0);
11798 if (mode == BLKmode)
11800 /* We couldn't find an appropriate mode, which happens,
11801 e.g., in packed structs when there are 3 bytes to load.
11802 Back intoffset back to the beginning of the word in this
11803 case. */
11804 intoffset = ROUND_DOWN (intoffset, BITS_PER_WORD);
11805 mode = word_mode;
11808 else
11809 mode = word_mode;
11811 startbit = ROUND_DOWN (intoffset, BITS_PER_WORD);
11812 endbit = ROUND_UP (bitpos, BITS_PER_WORD);
11813 intregs = (endbit - startbit) / BITS_PER_WORD;
11814 this_regno = cum->words + intoffset / BITS_PER_WORD;
11816 if (intregs > 0 && intregs > GP_ARG_NUM_REG - this_regno)
11817 cum->use_stack = 1;
11819 intregs = MIN (intregs, GP_ARG_NUM_REG - this_regno);
11820 if (intregs <= 0)
11821 return;
11823 intoffset /= BITS_PER_UNIT;
11826 regno = GP_ARG_MIN_REG + this_regno;
11827 reg = gen_rtx_REG (mode, regno);
11828 rvec[(*k)++] =
11829 gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (intoffset));
11831 this_regno += 1;
11832 intoffset = (intoffset | (UNITS_PER_WORD-1)) + 1;
11833 mode = word_mode;
11834 intregs -= 1;
11836 while (intregs > 0);
11839 /* Recursive workhorse for the following. */
11841 static void
11842 rs6000_darwin64_record_arg_recurse (CUMULATIVE_ARGS *cum, const_tree type,
11843 HOST_WIDE_INT startbitpos, rtx rvec[],
11844 int *k)
11846 tree f;
11848 for (f = TYPE_FIELDS (type); f ; f = DECL_CHAIN (f))
11849 if (TREE_CODE (f) == FIELD_DECL)
11851 HOST_WIDE_INT bitpos = startbitpos;
11852 tree ftype = TREE_TYPE (f);
11853 machine_mode mode;
11854 if (ftype == error_mark_node)
11855 continue;
11856 mode = TYPE_MODE (ftype);
11858 if (DECL_SIZE (f) != 0
11859 && tree_fits_uhwi_p (bit_position (f)))
11860 bitpos += int_bit_position (f);
11862 /* ??? FIXME: else assume zero offset. */
11864 if (TREE_CODE (ftype) == RECORD_TYPE)
11865 rs6000_darwin64_record_arg_recurse (cum, ftype, bitpos, rvec, k);
11866 else if (cum->named && USE_FP_FOR_ARG_P (cum, mode))
11868 unsigned n_fpreg = (GET_MODE_SIZE (mode) + 7) >> 3;
11869 #if 0
11870 switch (mode)
11872 case SCmode: mode = SFmode; break;
11873 case DCmode: mode = DFmode; break;
11874 case TCmode: mode = TFmode; break;
11875 default: break;
11877 #endif
11878 rs6000_darwin64_record_arg_flush (cum, bitpos, rvec, k);
11879 if (cum->fregno + n_fpreg > FP_ARG_MAX_REG + 1)
11881 gcc_assert (cum->fregno == FP_ARG_MAX_REG
11882 && (mode == TFmode || mode == TDmode));
11883 /* Long double or _Decimal128 split over regs and memory. */
11884 mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode : DFmode;
11885 cum->use_stack=1;
11887 rvec[(*k)++]
11888 = gen_rtx_EXPR_LIST (VOIDmode,
11889 gen_rtx_REG (mode, cum->fregno++),
11890 GEN_INT (bitpos / BITS_PER_UNIT));
11891 if (FLOAT128_2REG_P (mode))
11892 cum->fregno++;
11894 else if (cum->named && USE_ALTIVEC_FOR_ARG_P (cum, mode, 1))
11896 rs6000_darwin64_record_arg_flush (cum, bitpos, rvec, k);
11897 rvec[(*k)++]
11898 = gen_rtx_EXPR_LIST (VOIDmode,
11899 gen_rtx_REG (mode, cum->vregno++),
11900 GEN_INT (bitpos / BITS_PER_UNIT));
11902 else if (cum->intoffset == -1)
11903 cum->intoffset = bitpos;
11907 /* For the darwin64 ABI, we want to construct a PARALLEL consisting of
11908 the register(s) to be used for each field and subfield of a struct
11909 being passed by value, along with the offset of where the
11910 register's value may be found in the block. FP fields go in FP
11911 register, vector fields go in vector registers, and everything
11912 else goes in int registers, packed as in memory.
11914 This code is also used for function return values. RETVAL indicates
11915 whether this is the case.
11917 Much of this is taken from the SPARC V9 port, which has a similar
11918 calling convention. */
11920 static rtx
11921 rs6000_darwin64_record_arg (CUMULATIVE_ARGS *orig_cum, const_tree type,
11922 bool named, bool retval)
11924 rtx rvec[FIRST_PSEUDO_REGISTER];
11925 int k = 1, kbase = 1;
11926 HOST_WIDE_INT typesize = int_size_in_bytes (type);
11927 /* This is a copy; modifications are not visible to our caller. */
11928 CUMULATIVE_ARGS copy_cum = *orig_cum;
11929 CUMULATIVE_ARGS *cum = &copy_cum;
11931 /* Pad to 16 byte boundary if needed. */
11932 if (!retval && TYPE_ALIGN (type) >= 2 * BITS_PER_WORD
11933 && (cum->words % 2) != 0)
11934 cum->words++;
11936 cum->intoffset = 0;
11937 cum->use_stack = 0;
11938 cum->named = named;
11940 /* Put entries into rvec[] for individual FP and vector fields, and
11941 for the chunks of memory that go in int regs. Note we start at
11942 element 1; 0 is reserved for an indication of using memory, and
11943 may or may not be filled in below. */
11944 rs6000_darwin64_record_arg_recurse (cum, type, /* startbit pos= */ 0, rvec, &k);
11945 rs6000_darwin64_record_arg_flush (cum, typesize * BITS_PER_UNIT, rvec, &k);
11947 /* If any part of the struct went on the stack put all of it there.
11948 This hack is because the generic code for
11949 FUNCTION_ARG_PARTIAL_NREGS cannot handle cases where the register
11950 parts of the struct are not at the beginning. */
11951 if (cum->use_stack)
11953 if (retval)
11954 return NULL_RTX; /* doesn't go in registers at all */
11955 kbase = 0;
11956 rvec[0] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
11958 if (k > 1 || cum->use_stack)
11959 return gen_rtx_PARALLEL (BLKmode, gen_rtvec_v (k - kbase, &rvec[kbase]));
11960 else
11961 return NULL_RTX;
11964 /* Determine where to place an argument in 64-bit mode with 32-bit ABI. */
11966 static rtx
11967 rs6000_mixed_function_arg (machine_mode mode, const_tree type,
11968 int align_words)
11970 int n_units;
11971 int i, k;
11972 rtx rvec[GP_ARG_NUM_REG + 1];
11974 if (align_words >= GP_ARG_NUM_REG)
11975 return NULL_RTX;
11977 n_units = rs6000_arg_size (mode, type);
11979 /* Optimize the simple case where the arg fits in one gpr, except in
11980 the case of BLKmode due to assign_parms assuming that registers are
11981 BITS_PER_WORD wide. */
11982 if (n_units == 0
11983 || (n_units == 1 && mode != BLKmode))
11984 return gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
11986 k = 0;
11987 if (align_words + n_units > GP_ARG_NUM_REG)
11988 /* Not all of the arg fits in gprs. Say that it goes in memory too,
11989 using a magic NULL_RTX component.
11990 This is not strictly correct. Only some of the arg belongs in
11991 memory, not all of it. However, the normal scheme using
11992 function_arg_partial_nregs can result in unusual subregs, eg.
11993 (subreg:SI (reg:DF) 4), which are not handled well. The code to
11994 store the whole arg to memory is often more efficient than code
11995 to store pieces, and we know that space is available in the right
11996 place for the whole arg. */
11997 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
11999 i = 0;
12002 rtx r = gen_rtx_REG (SImode, GP_ARG_MIN_REG + align_words);
12003 rtx off = GEN_INT (i++ * 4);
12004 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
12006 while (++align_words < GP_ARG_NUM_REG && --n_units != 0);
12008 return gen_rtx_PARALLEL (mode, gen_rtvec_v (k, rvec));
12011 /* We have an argument of MODE and TYPE that goes into FPRs or VRs,
12012 but must also be copied into the parameter save area starting at
12013 offset ALIGN_WORDS. Fill in RVEC with the elements corresponding
12014 to the GPRs and/or memory. Return the number of elements used. */
12016 static int
12017 rs6000_psave_function_arg (machine_mode mode, const_tree type,
12018 int align_words, rtx *rvec)
12020 int k = 0;
12022 if (align_words < GP_ARG_NUM_REG)
12024 int n_words = rs6000_arg_size (mode, type);
12026 if (align_words + n_words > GP_ARG_NUM_REG
12027 || mode == BLKmode
12028 || (TARGET_32BIT && TARGET_POWERPC64))
12030 /* If this is partially on the stack, then we only
12031 include the portion actually in registers here. */
12032 machine_mode rmode = TARGET_32BIT ? SImode : DImode;
12033 int i = 0;
12035 if (align_words + n_words > GP_ARG_NUM_REG)
12037 /* Not all of the arg fits in gprs. Say that it goes in memory
12038 too, using a magic NULL_RTX component. Also see comment in
12039 rs6000_mixed_function_arg for why the normal
12040 function_arg_partial_nregs scheme doesn't work in this case. */
12041 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
12046 rtx r = gen_rtx_REG (rmode, GP_ARG_MIN_REG + align_words);
12047 rtx off = GEN_INT (i++ * GET_MODE_SIZE (rmode));
12048 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
12050 while (++align_words < GP_ARG_NUM_REG && --n_words != 0);
12052 else
12054 /* The whole arg fits in gprs. */
12055 rtx r = gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
12056 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, const0_rtx);
12059 else
12061 /* It's entirely in memory. */
12062 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
12065 return k;
12068 /* RVEC is a vector of K components of an argument of mode MODE.
12069 Construct the final function_arg return value from it. */
12071 static rtx
12072 rs6000_finish_function_arg (machine_mode mode, rtx *rvec, int k)
12074 gcc_assert (k >= 1);
12076 /* Avoid returning a PARALLEL in the trivial cases. */
12077 if (k == 1)
12079 if (XEXP (rvec[0], 0) == NULL_RTX)
12080 return NULL_RTX;
12082 if (GET_MODE (XEXP (rvec[0], 0)) == mode)
12083 return XEXP (rvec[0], 0);
12086 return gen_rtx_PARALLEL (mode, gen_rtvec_v (k, rvec));
12089 /* Determine where to put an argument to a function.
12090 Value is zero to push the argument on the stack,
12091 or a hard register in which to store the argument.
12093 MODE is the argument's machine mode.
12094 TYPE is the data type of the argument (as a tree).
12095 This is null for libcalls where that information may
12096 not be available.
12097 CUM is a variable of type CUMULATIVE_ARGS which gives info about
12098 the preceding args and about the function being called. It is
12099 not modified in this routine.
12100 NAMED is nonzero if this argument is a named parameter
12101 (otherwise it is an extra parameter matching an ellipsis).
12103 On RS/6000 the first eight words of non-FP are normally in registers
12104 and the rest are pushed. Under AIX, the first 13 FP args are in registers.
12105 Under V.4, the first 8 FP args are in registers.
12107 If this is floating-point and no prototype is specified, we use
12108 both an FP and integer register (or possibly FP reg and stack). Library
12109 functions (when CALL_LIBCALL is set) always have the proper types for args,
12110 so we can pass the FP value just in one register. emit_library_function
12111 doesn't support PARALLEL anyway.
12113 Note that for args passed by reference, function_arg will be called
12114 with MODE and TYPE set to that of the pointer to the arg, not the arg
12115 itself. */
12117 static rtx
12118 rs6000_function_arg (cumulative_args_t cum_v, machine_mode mode,
12119 const_tree type, bool named)
12121 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
12122 enum rs6000_abi abi = DEFAULT_ABI;
12123 machine_mode elt_mode;
12124 int n_elts;
12126 /* Return a marker to indicate whether CR1 needs to set or clear the
12127 bit that V.4 uses to say fp args were passed in registers.
12128 Assume that we don't need the marker for software floating point,
12129 or compiler generated library calls. */
12130 if (mode == VOIDmode)
12132 if (abi == ABI_V4
12133 && (cum->call_cookie & CALL_LIBCALL) == 0
12134 && (cum->stdarg
12135 || (cum->nargs_prototype < 0
12136 && (cum->prototype || TARGET_NO_PROTOTYPE))))
12138 /* For the SPE, we need to crxor CR6 always. */
12139 if (TARGET_SPE_ABI)
12140 return GEN_INT (cum->call_cookie | CALL_V4_SET_FP_ARGS);
12141 else if (TARGET_HARD_FLOAT && TARGET_FPRS)
12142 return GEN_INT (cum->call_cookie
12143 | ((cum->fregno == FP_ARG_MIN_REG)
12144 ? CALL_V4_SET_FP_ARGS
12145 : CALL_V4_CLEAR_FP_ARGS));
12148 return GEN_INT (cum->call_cookie & ~CALL_LIBCALL);
12151 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
12153 if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
12155 rtx rslt = rs6000_darwin64_record_arg (cum, type, named, /*retval= */false);
12156 if (rslt != NULL_RTX)
12157 return rslt;
12158 /* Else fall through to usual handling. */
12161 if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
12163 rtx rvec[GP_ARG_NUM_REG + AGGR_ARG_NUM_REG + 1];
12164 rtx r, off;
12165 int i, k = 0;
12167 /* Do we also need to pass this argument in the parameter save area?
12168 Library support functions for IEEE 128-bit are assumed to not need the
12169 value passed both in GPRs and in vector registers. */
12170 if (TARGET_64BIT && !cum->prototype
12171 && (!cum->libcall || !FLOAT128_VECTOR_P (elt_mode)))
12173 int align_words = ROUND_UP (cum->words, 2);
12174 k = rs6000_psave_function_arg (mode, type, align_words, rvec);
12177 /* Describe where this argument goes in the vector registers. */
12178 for (i = 0; i < n_elts && cum->vregno + i <= ALTIVEC_ARG_MAX_REG; i++)
12180 r = gen_rtx_REG (elt_mode, cum->vregno + i);
12181 off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
12182 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
12185 return rs6000_finish_function_arg (mode, rvec, k);
12187 else if (TARGET_ALTIVEC_ABI
12188 && (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
12189 || (type && TREE_CODE (type) == VECTOR_TYPE
12190 && int_size_in_bytes (type) == 16)))
12192 if (named || abi == ABI_V4)
12193 return NULL_RTX;
12194 else
12196 /* Vector parameters to varargs functions under AIX or Darwin
12197 get passed in memory and possibly also in GPRs. */
12198 int align, align_words, n_words;
12199 machine_mode part_mode;
12201 /* Vector parameters must be 16-byte aligned. In 32-bit
12202 mode this means we need to take into account the offset
12203 to the parameter save area. In 64-bit mode, they just
12204 have to start on an even word, since the parameter save
12205 area is 16-byte aligned. */
12206 if (TARGET_32BIT)
12207 align = -(rs6000_parm_offset () + cum->words) & 3;
12208 else
12209 align = cum->words & 1;
12210 align_words = cum->words + align;
12212 /* Out of registers? Memory, then. */
12213 if (align_words >= GP_ARG_NUM_REG)
12214 return NULL_RTX;
12216 if (TARGET_32BIT && TARGET_POWERPC64)
12217 return rs6000_mixed_function_arg (mode, type, align_words);
12219 /* The vector value goes in GPRs. Only the part of the
12220 value in GPRs is reported here. */
12221 part_mode = mode;
12222 n_words = rs6000_arg_size (mode, type);
12223 if (align_words + n_words > GP_ARG_NUM_REG)
12224 /* Fortunately, there are only two possibilities, the value
12225 is either wholly in GPRs or half in GPRs and half not. */
12226 part_mode = DImode;
12228 return gen_rtx_REG (part_mode, GP_ARG_MIN_REG + align_words);
12231 else if (TARGET_SPE_ABI && TARGET_SPE
12232 && (SPE_VECTOR_MODE (mode)
12233 || (TARGET_E500_DOUBLE && (mode == DFmode
12234 || mode == DCmode
12235 || mode == TFmode
12236 || mode == TCmode))))
12237 return rs6000_spe_function_arg (cum, mode, type);
12239 else if (abi == ABI_V4)
12241 if (abi_v4_pass_in_fpr (mode))
12243 /* _Decimal128 must use an even/odd register pair. This assumes
12244 that the register number is odd when fregno is odd. */
12245 if (mode == TDmode && (cum->fregno % 2) == 1)
12246 cum->fregno++;
12248 if (cum->fregno + (FLOAT128_2REG_P (mode) ? 1 : 0)
12249 <= FP_ARG_V4_MAX_REG)
12250 return gen_rtx_REG (mode, cum->fregno);
12251 else
12252 return NULL_RTX;
12254 else
12256 int n_words = rs6000_arg_size (mode, type);
12257 int gregno = cum->sysv_gregno;
12259 /* Long long and SPE vectors are put in (r3,r4), (r5,r6),
12260 (r7,r8) or (r9,r10). As does any other 2 word item such
12261 as complex int due to a historical mistake. */
12262 if (n_words == 2)
12263 gregno += (1 - gregno) & 1;
12265 /* Multi-reg args are not split between registers and stack. */
12266 if (gregno + n_words - 1 > GP_ARG_MAX_REG)
12267 return NULL_RTX;
12269 if (TARGET_32BIT && TARGET_POWERPC64)
12270 return rs6000_mixed_function_arg (mode, type,
12271 gregno - GP_ARG_MIN_REG);
12272 return gen_rtx_REG (mode, gregno);
12275 else
12277 int align_words = rs6000_parm_start (mode, type, cum->words);
12279 /* _Decimal128 must be passed in an even/odd float register pair.
12280 This assumes that the register number is odd when fregno is odd. */
12281 if (elt_mode == TDmode && (cum->fregno % 2) == 1)
12282 cum->fregno++;
12284 if (USE_FP_FOR_ARG_P (cum, elt_mode))
12286 rtx rvec[GP_ARG_NUM_REG + AGGR_ARG_NUM_REG + 1];
12287 rtx r, off;
12288 int i, k = 0;
12289 unsigned long n_fpreg = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
12290 int fpr_words;
12292 /* Do we also need to pass this argument in the parameter
12293 save area? */
12294 if (type && (cum->nargs_prototype <= 0
12295 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
12296 && TARGET_XL_COMPAT
12297 && align_words >= GP_ARG_NUM_REG)))
12298 k = rs6000_psave_function_arg (mode, type, align_words, rvec);
12300 /* Describe where this argument goes in the fprs. */
12301 for (i = 0; i < n_elts
12302 && cum->fregno + i * n_fpreg <= FP_ARG_MAX_REG; i++)
12304 /* Check if the argument is split over registers and memory.
12305 This can only ever happen for long double or _Decimal128;
12306 complex types are handled via split_complex_arg. */
12307 machine_mode fmode = elt_mode;
12308 if (cum->fregno + (i + 1) * n_fpreg > FP_ARG_MAX_REG + 1)
12310 gcc_assert (FLOAT128_2REG_P (fmode));
12311 fmode = DECIMAL_FLOAT_MODE_P (fmode) ? DDmode : DFmode;
12314 r = gen_rtx_REG (fmode, cum->fregno + i * n_fpreg);
12315 off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
12316 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
12319 /* If there were not enough FPRs to hold the argument, the rest
12320 usually goes into memory. However, if the current position
12321 is still within the register parameter area, a portion may
12322 actually have to go into GPRs.
12324 Note that it may happen that the portion of the argument
12325 passed in the first "half" of the first GPR was already
12326 passed in the last FPR as well.
12328 For unnamed arguments, we already set up GPRs to cover the
12329 whole argument in rs6000_psave_function_arg, so there is
12330 nothing further to do at this point. */
12331 fpr_words = (i * GET_MODE_SIZE (elt_mode)) / (TARGET_32BIT ? 4 : 8);
12332 if (i < n_elts && align_words + fpr_words < GP_ARG_NUM_REG
12333 && cum->nargs_prototype > 0)
12335 static bool warned;
12337 machine_mode rmode = TARGET_32BIT ? SImode : DImode;
12338 int n_words = rs6000_arg_size (mode, type);
12340 align_words += fpr_words;
12341 n_words -= fpr_words;
12345 r = gen_rtx_REG (rmode, GP_ARG_MIN_REG + align_words);
12346 off = GEN_INT (fpr_words++ * GET_MODE_SIZE (rmode));
12347 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
12349 while (++align_words < GP_ARG_NUM_REG && --n_words != 0);
12351 if (!warned && warn_psabi)
12353 warned = true;
12354 inform (input_location,
12355 "the ABI of passing homogeneous float aggregates"
12356 " has changed in GCC 5");
12360 return rs6000_finish_function_arg (mode, rvec, k);
12362 else if (align_words < GP_ARG_NUM_REG)
12364 if (TARGET_32BIT && TARGET_POWERPC64)
12365 return rs6000_mixed_function_arg (mode, type, align_words);
12367 return gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
12369 else
12370 return NULL_RTX;
12374 /* For an arg passed partly in registers and partly in memory, this is
12375 the number of bytes passed in registers. For args passed entirely in
12376 registers or entirely in memory, zero. When an arg is described by a
12377 PARALLEL, perhaps using more than one register type, this function
12378 returns the number of bytes used by the first element of the PARALLEL. */
12380 static int
12381 rs6000_arg_partial_bytes (cumulative_args_t cum_v, machine_mode mode,
12382 tree type, bool named)
12384 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
12385 bool passed_in_gprs = true;
12386 int ret = 0;
12387 int align_words;
12388 machine_mode elt_mode;
12389 int n_elts;
12391 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
12393 if (DEFAULT_ABI == ABI_V4)
12394 return 0;
12396 if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
12398 /* If we are passing this arg in the fixed parameter save area (gprs or
12399 memory) as well as VRs, we do not use the partial bytes mechanism;
12400 instead, rs6000_function_arg will return a PARALLEL including a memory
12401 element as necessary. Library support functions for IEEE 128-bit are
12402 assumed to not need the value passed both in GPRs and in vector
12403 registers. */
12404 if (TARGET_64BIT && !cum->prototype
12405 && (!cum->libcall || !FLOAT128_VECTOR_P (elt_mode)))
12406 return 0;
12408 /* Otherwise, we pass in VRs only. Check for partial copies. */
12409 passed_in_gprs = false;
12410 if (cum->vregno + n_elts > ALTIVEC_ARG_MAX_REG + 1)
12411 ret = (ALTIVEC_ARG_MAX_REG + 1 - cum->vregno) * 16;
12414 /* In this complicated case we just disable the partial_nregs code. */
12415 if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
12416 return 0;
12418 align_words = rs6000_parm_start (mode, type, cum->words);
12420 if (USE_FP_FOR_ARG_P (cum, elt_mode))
12422 unsigned long n_fpreg = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
12424 /* If we are passing this arg in the fixed parameter save area
12425 (gprs or memory) as well as FPRs, we do not use the partial
12426 bytes mechanism; instead, rs6000_function_arg will return a
12427 PARALLEL including a memory element as necessary. */
12428 if (type
12429 && (cum->nargs_prototype <= 0
12430 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
12431 && TARGET_XL_COMPAT
12432 && align_words >= GP_ARG_NUM_REG)))
12433 return 0;
12435 /* Otherwise, we pass in FPRs only. Check for partial copies. */
12436 passed_in_gprs = false;
12437 if (cum->fregno + n_elts * n_fpreg > FP_ARG_MAX_REG + 1)
12439 /* Compute number of bytes / words passed in FPRs. If there
12440 is still space available in the register parameter area
12441 *after* that amount, a part of the argument will be passed
12442 in GPRs. In that case, the total amount passed in any
12443 registers is equal to the amount that would have been passed
12444 in GPRs if everything were passed there, so we fall back to
12445 the GPR code below to compute the appropriate value. */
12446 int fpr = ((FP_ARG_MAX_REG + 1 - cum->fregno)
12447 * MIN (8, GET_MODE_SIZE (elt_mode)));
12448 int fpr_words = fpr / (TARGET_32BIT ? 4 : 8);
12450 if (align_words + fpr_words < GP_ARG_NUM_REG)
12451 passed_in_gprs = true;
12452 else
12453 ret = fpr;
12457 if (passed_in_gprs
12458 && align_words < GP_ARG_NUM_REG
12459 && GP_ARG_NUM_REG < align_words + rs6000_arg_size (mode, type))
12460 ret = (GP_ARG_NUM_REG - align_words) * (TARGET_32BIT ? 4 : 8);
12462 if (ret != 0 && TARGET_DEBUG_ARG)
12463 fprintf (stderr, "rs6000_arg_partial_bytes: %d\n", ret);
12465 return ret;
12468 /* A C expression that indicates when an argument must be passed by
12469 reference. If nonzero for an argument, a copy of that argument is
12470 made in memory and a pointer to the argument is passed instead of
12471 the argument itself. The pointer is passed in whatever way is
12472 appropriate for passing a pointer to that type.
12474 Under V.4, aggregates and long double are passed by reference.
12476 As an extension to all 32-bit ABIs, AltiVec vectors are passed by
12477 reference unless the AltiVec vector extension ABI is in force.
12479 As an extension to all ABIs, variable sized types are passed by
12480 reference. */
12482 static bool
12483 rs6000_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
12484 machine_mode mode, const_tree type,
12485 bool named ATTRIBUTE_UNUSED)
12487 if (!type)
12488 return 0;
12490 if (DEFAULT_ABI == ABI_V4 && TARGET_IEEEQUAD
12491 && FLOAT128_IEEE_P (TYPE_MODE (type)))
12493 if (TARGET_DEBUG_ARG)
12494 fprintf (stderr, "function_arg_pass_by_reference: V4 IEEE 128-bit\n");
12495 return 1;
12498 if (DEFAULT_ABI == ABI_V4 && AGGREGATE_TYPE_P (type))
12500 if (TARGET_DEBUG_ARG)
12501 fprintf (stderr, "function_arg_pass_by_reference: V4 aggregate\n");
12502 return 1;
12505 if (int_size_in_bytes (type) < 0)
12507 if (TARGET_DEBUG_ARG)
12508 fprintf (stderr, "function_arg_pass_by_reference: variable size\n");
12509 return 1;
12512 /* Allow -maltivec -mabi=no-altivec without warning. Altivec vector
12513 modes only exist for GCC vector types if -maltivec. */
12514 if (TARGET_32BIT && !TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (mode))
12516 if (TARGET_DEBUG_ARG)
12517 fprintf (stderr, "function_arg_pass_by_reference: AltiVec\n");
12518 return 1;
12521 /* Pass synthetic vectors in memory. */
12522 if (TREE_CODE (type) == VECTOR_TYPE
12523 && int_size_in_bytes (type) > (TARGET_ALTIVEC_ABI ? 16 : 8))
12525 static bool warned_for_pass_big_vectors = false;
12526 if (TARGET_DEBUG_ARG)
12527 fprintf (stderr, "function_arg_pass_by_reference: synthetic vector\n");
12528 if (!warned_for_pass_big_vectors)
12530 warning (0, "GCC vector passed by reference: "
12531 "non-standard ABI extension with no compatibility guarantee");
12532 warned_for_pass_big_vectors = true;
12534 return 1;
12537 return 0;
12540 /* Process parameter of type TYPE after ARGS_SO_FAR parameters were
12541 already processes. Return true if the parameter must be passed
12542 (fully or partially) on the stack. */
12544 static bool
12545 rs6000_parm_needs_stack (cumulative_args_t args_so_far, tree type)
12547 machine_mode mode;
12548 int unsignedp;
12549 rtx entry_parm;
12551 /* Catch errors. */
12552 if (type == NULL || type == error_mark_node)
12553 return true;
12555 /* Handle types with no storage requirement. */
12556 if (TYPE_MODE (type) == VOIDmode)
12557 return false;
12559 /* Handle complex types. */
12560 if (TREE_CODE (type) == COMPLEX_TYPE)
12561 return (rs6000_parm_needs_stack (args_so_far, TREE_TYPE (type))
12562 || rs6000_parm_needs_stack (args_so_far, TREE_TYPE (type)));
12564 /* Handle transparent aggregates. */
12565 if ((TREE_CODE (type) == UNION_TYPE || TREE_CODE (type) == RECORD_TYPE)
12566 && TYPE_TRANSPARENT_AGGR (type))
12567 type = TREE_TYPE (first_field (type));
12569 /* See if this arg was passed by invisible reference. */
12570 if (pass_by_reference (get_cumulative_args (args_so_far),
12571 TYPE_MODE (type), type, true))
12572 type = build_pointer_type (type);
12574 /* Find mode as it is passed by the ABI. */
12575 unsignedp = TYPE_UNSIGNED (type);
12576 mode = promote_mode (type, TYPE_MODE (type), &unsignedp);
12578 /* If we must pass in stack, we need a stack. */
12579 if (rs6000_must_pass_in_stack (mode, type))
12580 return true;
12582 /* If there is no incoming register, we need a stack. */
12583 entry_parm = rs6000_function_arg (args_so_far, mode, type, true);
12584 if (entry_parm == NULL)
12585 return true;
12587 /* Likewise if we need to pass both in registers and on the stack. */
12588 if (GET_CODE (entry_parm) == PARALLEL
12589 && XEXP (XVECEXP (entry_parm, 0, 0), 0) == NULL_RTX)
12590 return true;
12592 /* Also true if we're partially in registers and partially not. */
12593 if (rs6000_arg_partial_bytes (args_so_far, mode, type, true) != 0)
12594 return true;
12596 /* Update info on where next arg arrives in registers. */
12597 rs6000_function_arg_advance (args_so_far, mode, type, true);
12598 return false;
12601 /* Return true if FUN has no prototype, has a variable argument
12602 list, or passes any parameter in memory. */
12604 static bool
12605 rs6000_function_parms_need_stack (tree fun, bool incoming)
12607 tree fntype, result;
12608 CUMULATIVE_ARGS args_so_far_v;
12609 cumulative_args_t args_so_far;
12611 if (!fun)
12612 /* Must be a libcall, all of which only use reg parms. */
12613 return false;
12615 fntype = fun;
12616 if (!TYPE_P (fun))
12617 fntype = TREE_TYPE (fun);
12619 /* Varargs functions need the parameter save area. */
12620 if ((!incoming && !prototype_p (fntype)) || stdarg_p (fntype))
12621 return true;
12623 INIT_CUMULATIVE_INCOMING_ARGS (args_so_far_v, fntype, NULL_RTX);
12624 args_so_far = pack_cumulative_args (&args_so_far_v);
12626 /* When incoming, we will have been passed the function decl.
12627 It is necessary to use the decl to handle K&R style functions,
12628 where TYPE_ARG_TYPES may not be available. */
12629 if (incoming)
12631 gcc_assert (DECL_P (fun));
12632 result = DECL_RESULT (fun);
12634 else
12635 result = TREE_TYPE (fntype);
12637 if (result && aggregate_value_p (result, fntype))
12639 if (!TYPE_P (result))
12640 result = TREE_TYPE (result);
12641 result = build_pointer_type (result);
12642 rs6000_parm_needs_stack (args_so_far, result);
12645 if (incoming)
12647 tree parm;
12649 for (parm = DECL_ARGUMENTS (fun);
12650 parm && parm != void_list_node;
12651 parm = TREE_CHAIN (parm))
12652 if (rs6000_parm_needs_stack (args_so_far, TREE_TYPE (parm)))
12653 return true;
12655 else
12657 function_args_iterator args_iter;
12658 tree arg_type;
12660 FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
12661 if (rs6000_parm_needs_stack (args_so_far, arg_type))
12662 return true;
12665 return false;
12668 /* Return the size of the REG_PARM_STACK_SPACE are for FUN. This is
12669 usually a constant depending on the ABI. However, in the ELFv2 ABI
12670 the register parameter area is optional when calling a function that
12671 has a prototype is scope, has no variable argument list, and passes
12672 all parameters in registers. */
12675 rs6000_reg_parm_stack_space (tree fun, bool incoming)
12677 int reg_parm_stack_space;
12679 switch (DEFAULT_ABI)
12681 default:
12682 reg_parm_stack_space = 0;
12683 break;
12685 case ABI_AIX:
12686 case ABI_DARWIN:
12687 reg_parm_stack_space = TARGET_64BIT ? 64 : 32;
12688 break;
12690 case ABI_ELFv2:
12691 /* ??? Recomputing this every time is a bit expensive. Is there
12692 a place to cache this information? */
12693 if (rs6000_function_parms_need_stack (fun, incoming))
12694 reg_parm_stack_space = TARGET_64BIT ? 64 : 32;
12695 else
12696 reg_parm_stack_space = 0;
12697 break;
12700 return reg_parm_stack_space;
12703 static void
12704 rs6000_move_block_from_reg (int regno, rtx x, int nregs)
12706 int i;
12707 machine_mode reg_mode = TARGET_32BIT ? SImode : DImode;
12709 if (nregs == 0)
12710 return;
12712 for (i = 0; i < nregs; i++)
12714 rtx tem = adjust_address_nv (x, reg_mode, i * GET_MODE_SIZE (reg_mode));
12715 if (reload_completed)
12717 if (! strict_memory_address_p (reg_mode, XEXP (tem, 0)))
12718 tem = NULL_RTX;
12719 else
12720 tem = simplify_gen_subreg (reg_mode, x, BLKmode,
12721 i * GET_MODE_SIZE (reg_mode));
12723 else
12724 tem = replace_equiv_address (tem, XEXP (tem, 0));
12726 gcc_assert (tem);
12728 emit_move_insn (tem, gen_rtx_REG (reg_mode, regno + i));
12732 /* Perform any needed actions needed for a function that is receiving a
12733 variable number of arguments.
12735 CUM is as above.
12737 MODE and TYPE are the mode and type of the current parameter.
12739 PRETEND_SIZE is a variable that should be set to the amount of stack
12740 that must be pushed by the prolog to pretend that our caller pushed
12743 Normally, this macro will push all remaining incoming registers on the
12744 stack and set PRETEND_SIZE to the length of the registers pushed. */
12746 static void
12747 setup_incoming_varargs (cumulative_args_t cum, machine_mode mode,
12748 tree type, int *pretend_size ATTRIBUTE_UNUSED,
12749 int no_rtl)
12751 CUMULATIVE_ARGS next_cum;
12752 int reg_size = TARGET_32BIT ? 4 : 8;
12753 rtx save_area = NULL_RTX, mem;
12754 int first_reg_offset;
12755 alias_set_type set;
12757 /* Skip the last named argument. */
12758 next_cum = *get_cumulative_args (cum);
12759 rs6000_function_arg_advance_1 (&next_cum, mode, type, true, 0);
12761 if (DEFAULT_ABI == ABI_V4)
12763 first_reg_offset = next_cum.sysv_gregno - GP_ARG_MIN_REG;
12765 if (! no_rtl)
12767 int gpr_reg_num = 0, gpr_size = 0, fpr_size = 0;
12768 HOST_WIDE_INT offset = 0;
12770 /* Try to optimize the size of the varargs save area.
12771 The ABI requires that ap.reg_save_area is doubleword
12772 aligned, but we don't need to allocate space for all
12773 the bytes, only those to which we actually will save
12774 anything. */
12775 if (cfun->va_list_gpr_size && first_reg_offset < GP_ARG_NUM_REG)
12776 gpr_reg_num = GP_ARG_NUM_REG - first_reg_offset;
12777 if (TARGET_HARD_FLOAT && TARGET_FPRS
12778 && next_cum.fregno <= FP_ARG_V4_MAX_REG
12779 && cfun->va_list_fpr_size)
12781 if (gpr_reg_num)
12782 fpr_size = (next_cum.fregno - FP_ARG_MIN_REG)
12783 * UNITS_PER_FP_WORD;
12784 if (cfun->va_list_fpr_size
12785 < FP_ARG_V4_MAX_REG + 1 - next_cum.fregno)
12786 fpr_size += cfun->va_list_fpr_size * UNITS_PER_FP_WORD;
12787 else
12788 fpr_size += (FP_ARG_V4_MAX_REG + 1 - next_cum.fregno)
12789 * UNITS_PER_FP_WORD;
12791 if (gpr_reg_num)
12793 offset = -((first_reg_offset * reg_size) & ~7);
12794 if (!fpr_size && gpr_reg_num > cfun->va_list_gpr_size)
12796 gpr_reg_num = cfun->va_list_gpr_size;
12797 if (reg_size == 4 && (first_reg_offset & 1))
12798 gpr_reg_num++;
12800 gpr_size = (gpr_reg_num * reg_size + 7) & ~7;
12802 else if (fpr_size)
12803 offset = - (int) (next_cum.fregno - FP_ARG_MIN_REG)
12804 * UNITS_PER_FP_WORD
12805 - (int) (GP_ARG_NUM_REG * reg_size);
12807 if (gpr_size + fpr_size)
12809 rtx reg_save_area
12810 = assign_stack_local (BLKmode, gpr_size + fpr_size, 64);
12811 gcc_assert (GET_CODE (reg_save_area) == MEM);
12812 reg_save_area = XEXP (reg_save_area, 0);
12813 if (GET_CODE (reg_save_area) == PLUS)
12815 gcc_assert (XEXP (reg_save_area, 0)
12816 == virtual_stack_vars_rtx);
12817 gcc_assert (GET_CODE (XEXP (reg_save_area, 1)) == CONST_INT);
12818 offset += INTVAL (XEXP (reg_save_area, 1));
12820 else
12821 gcc_assert (reg_save_area == virtual_stack_vars_rtx);
12824 cfun->machine->varargs_save_offset = offset;
12825 save_area = plus_constant (Pmode, virtual_stack_vars_rtx, offset);
12828 else
12830 first_reg_offset = next_cum.words;
12831 save_area = crtl->args.internal_arg_pointer;
12833 if (targetm.calls.must_pass_in_stack (mode, type))
12834 first_reg_offset += rs6000_arg_size (TYPE_MODE (type), type);
12837 set = get_varargs_alias_set ();
12838 if (! no_rtl && first_reg_offset < GP_ARG_NUM_REG
12839 && cfun->va_list_gpr_size)
12841 int n_gpr, nregs = GP_ARG_NUM_REG - first_reg_offset;
12843 if (va_list_gpr_counter_field)
12844 /* V4 va_list_gpr_size counts number of registers needed. */
12845 n_gpr = cfun->va_list_gpr_size;
12846 else
12847 /* char * va_list instead counts number of bytes needed. */
12848 n_gpr = (cfun->va_list_gpr_size + reg_size - 1) / reg_size;
12850 if (nregs > n_gpr)
12851 nregs = n_gpr;
12853 mem = gen_rtx_MEM (BLKmode,
12854 plus_constant (Pmode, save_area,
12855 first_reg_offset * reg_size));
12856 MEM_NOTRAP_P (mem) = 1;
12857 set_mem_alias_set (mem, set);
12858 set_mem_align (mem, BITS_PER_WORD);
12860 rs6000_move_block_from_reg (GP_ARG_MIN_REG + first_reg_offset, mem,
12861 nregs);
12864 /* Save FP registers if needed. */
12865 if (DEFAULT_ABI == ABI_V4
12866 && TARGET_HARD_FLOAT && TARGET_FPRS
12867 && ! no_rtl
12868 && next_cum.fregno <= FP_ARG_V4_MAX_REG
12869 && cfun->va_list_fpr_size)
12871 int fregno = next_cum.fregno, nregs;
12872 rtx cr1 = gen_rtx_REG (CCmode, CR1_REGNO);
12873 rtx lab = gen_label_rtx ();
12874 int off = (GP_ARG_NUM_REG * reg_size) + ((fregno - FP_ARG_MIN_REG)
12875 * UNITS_PER_FP_WORD);
12877 emit_jump_insn
12878 (gen_rtx_SET (pc_rtx,
12879 gen_rtx_IF_THEN_ELSE (VOIDmode,
12880 gen_rtx_NE (VOIDmode, cr1,
12881 const0_rtx),
12882 gen_rtx_LABEL_REF (VOIDmode, lab),
12883 pc_rtx)));
12885 for (nregs = 0;
12886 fregno <= FP_ARG_V4_MAX_REG && nregs < cfun->va_list_fpr_size;
12887 fregno++, off += UNITS_PER_FP_WORD, nregs++)
12889 mem = gen_rtx_MEM ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
12890 ? DFmode : SFmode,
12891 plus_constant (Pmode, save_area, off));
12892 MEM_NOTRAP_P (mem) = 1;
12893 set_mem_alias_set (mem, set);
12894 set_mem_align (mem, GET_MODE_ALIGNMENT (
12895 (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
12896 ? DFmode : SFmode));
12897 emit_move_insn (mem, gen_rtx_REG (
12898 (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
12899 ? DFmode : SFmode, fregno));
12902 emit_label (lab);
12906 /* Create the va_list data type. */
12908 static tree
12909 rs6000_build_builtin_va_list (void)
12911 tree f_gpr, f_fpr, f_res, f_ovf, f_sav, record, type_decl;
12913 /* For AIX, prefer 'char *' because that's what the system
12914 header files like. */
12915 if (DEFAULT_ABI != ABI_V4)
12916 return build_pointer_type (char_type_node);
12918 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
12919 type_decl = build_decl (BUILTINS_LOCATION, TYPE_DECL,
12920 get_identifier ("__va_list_tag"), record);
12922 f_gpr = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("gpr"),
12923 unsigned_char_type_node);
12924 f_fpr = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("fpr"),
12925 unsigned_char_type_node);
12926 /* Give the two bytes of padding a name, so that -Wpadded won't warn on
12927 every user file. */
12928 f_res = build_decl (BUILTINS_LOCATION, FIELD_DECL,
12929 get_identifier ("reserved"), short_unsigned_type_node);
12930 f_ovf = build_decl (BUILTINS_LOCATION, FIELD_DECL,
12931 get_identifier ("overflow_arg_area"),
12932 ptr_type_node);
12933 f_sav = build_decl (BUILTINS_LOCATION, FIELD_DECL,
12934 get_identifier ("reg_save_area"),
12935 ptr_type_node);
12937 va_list_gpr_counter_field = f_gpr;
12938 va_list_fpr_counter_field = f_fpr;
12940 DECL_FIELD_CONTEXT (f_gpr) = record;
12941 DECL_FIELD_CONTEXT (f_fpr) = record;
12942 DECL_FIELD_CONTEXT (f_res) = record;
12943 DECL_FIELD_CONTEXT (f_ovf) = record;
12944 DECL_FIELD_CONTEXT (f_sav) = record;
12946 TYPE_STUB_DECL (record) = type_decl;
12947 TYPE_NAME (record) = type_decl;
12948 TYPE_FIELDS (record) = f_gpr;
12949 DECL_CHAIN (f_gpr) = f_fpr;
12950 DECL_CHAIN (f_fpr) = f_res;
12951 DECL_CHAIN (f_res) = f_ovf;
12952 DECL_CHAIN (f_ovf) = f_sav;
12954 layout_type (record);
12956 /* The correct type is an array type of one element. */
12957 return build_array_type (record, build_index_type (size_zero_node));
12960 /* Implement va_start. */
12962 static void
12963 rs6000_va_start (tree valist, rtx nextarg)
12965 HOST_WIDE_INT words, n_gpr, n_fpr;
12966 tree f_gpr, f_fpr, f_res, f_ovf, f_sav;
12967 tree gpr, fpr, ovf, sav, t;
12969 /* Only SVR4 needs something special. */
12970 if (DEFAULT_ABI != ABI_V4)
12972 std_expand_builtin_va_start (valist, nextarg);
12973 return;
12976 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
12977 f_fpr = DECL_CHAIN (f_gpr);
12978 f_res = DECL_CHAIN (f_fpr);
12979 f_ovf = DECL_CHAIN (f_res);
12980 f_sav = DECL_CHAIN (f_ovf);
12982 valist = build_simple_mem_ref (valist);
12983 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
12984 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
12985 f_fpr, NULL_TREE);
12986 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
12987 f_ovf, NULL_TREE);
12988 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
12989 f_sav, NULL_TREE);
12991 /* Count number of gp and fp argument registers used. */
12992 words = crtl->args.info.words;
12993 n_gpr = MIN (crtl->args.info.sysv_gregno - GP_ARG_MIN_REG,
12994 GP_ARG_NUM_REG);
12995 n_fpr = MIN (crtl->args.info.fregno - FP_ARG_MIN_REG,
12996 FP_ARG_NUM_REG);
12998 if (TARGET_DEBUG_ARG)
12999 fprintf (stderr, "va_start: words = " HOST_WIDE_INT_PRINT_DEC", n_gpr = "
13000 HOST_WIDE_INT_PRINT_DEC", n_fpr = " HOST_WIDE_INT_PRINT_DEC"\n",
13001 words, n_gpr, n_fpr);
13003 if (cfun->va_list_gpr_size)
13005 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
13006 build_int_cst (NULL_TREE, n_gpr));
13007 TREE_SIDE_EFFECTS (t) = 1;
13008 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
13011 if (cfun->va_list_fpr_size)
13013 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
13014 build_int_cst (NULL_TREE, n_fpr));
13015 TREE_SIDE_EFFECTS (t) = 1;
13016 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
13018 #ifdef HAVE_AS_GNU_ATTRIBUTE
13019 if (call_ABI_of_interest (cfun->decl))
13020 rs6000_passes_float = true;
13021 #endif
13024 /* Find the overflow area. */
13025 t = make_tree (TREE_TYPE (ovf), crtl->args.internal_arg_pointer);
13026 if (words != 0)
13027 t = fold_build_pointer_plus_hwi (t, words * MIN_UNITS_PER_WORD);
13028 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
13029 TREE_SIDE_EFFECTS (t) = 1;
13030 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
13032 /* If there were no va_arg invocations, don't set up the register
13033 save area. */
13034 if (!cfun->va_list_gpr_size
13035 && !cfun->va_list_fpr_size
13036 && n_gpr < GP_ARG_NUM_REG
13037 && n_fpr < FP_ARG_V4_MAX_REG)
13038 return;
13040 /* Find the register save area. */
13041 t = make_tree (TREE_TYPE (sav), virtual_stack_vars_rtx);
13042 if (cfun->machine->varargs_save_offset)
13043 t = fold_build_pointer_plus_hwi (t, cfun->machine->varargs_save_offset);
13044 t = build2 (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
13045 TREE_SIDE_EFFECTS (t) = 1;
13046 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
13049 /* Implement va_arg. */
13051 static tree
13052 rs6000_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
13053 gimple_seq *post_p)
13055 tree f_gpr, f_fpr, f_res, f_ovf, f_sav;
13056 tree gpr, fpr, ovf, sav, reg, t, u;
13057 int size, rsize, n_reg, sav_ofs, sav_scale;
13058 tree lab_false, lab_over, addr;
13059 int align;
13060 tree ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
13061 int regalign = 0;
13062 gimple *stmt;
13064 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
13066 t = rs6000_gimplify_va_arg (valist, ptrtype, pre_p, post_p);
13067 return build_va_arg_indirect_ref (t);
13070 /* We need to deal with the fact that the darwin ppc64 ABI is defined by an
13071 earlier version of gcc, with the property that it always applied alignment
13072 adjustments to the va-args (even for zero-sized types). The cheapest way
13073 to deal with this is to replicate the effect of the part of
13074 std_gimplify_va_arg_expr that carries out the align adjust, for the case
13075 of relevance.
13076 We don't need to check for pass-by-reference because of the test above.
13077 We can return a simplifed answer, since we know there's no offset to add. */
13079 if (((TARGET_MACHO
13080 && rs6000_darwin64_abi)
13081 || DEFAULT_ABI == ABI_ELFv2
13082 || (DEFAULT_ABI == ABI_AIX && !rs6000_compat_align_parm))
13083 && integer_zerop (TYPE_SIZE (type)))
13085 unsigned HOST_WIDE_INT align, boundary;
13086 tree valist_tmp = get_initialized_tmp_var (valist, pre_p, NULL);
13087 align = PARM_BOUNDARY / BITS_PER_UNIT;
13088 boundary = rs6000_function_arg_boundary (TYPE_MODE (type), type);
13089 if (boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
13090 boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
13091 boundary /= BITS_PER_UNIT;
13092 if (boundary > align)
13094 tree t ;
13095 /* This updates arg ptr by the amount that would be necessary
13096 to align the zero-sized (but not zero-alignment) item. */
13097 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist_tmp,
13098 fold_build_pointer_plus_hwi (valist_tmp, boundary - 1));
13099 gimplify_and_add (t, pre_p);
13101 t = fold_convert (sizetype, valist_tmp);
13102 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist_tmp,
13103 fold_convert (TREE_TYPE (valist),
13104 fold_build2 (BIT_AND_EXPR, sizetype, t,
13105 size_int (-boundary))));
13106 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
13107 gimplify_and_add (t, pre_p);
13109 /* Since it is zero-sized there's no increment for the item itself. */
13110 valist_tmp = fold_convert (build_pointer_type (type), valist_tmp);
13111 return build_va_arg_indirect_ref (valist_tmp);
13114 if (DEFAULT_ABI != ABI_V4)
13116 if (targetm.calls.split_complex_arg && TREE_CODE (type) == COMPLEX_TYPE)
13118 tree elem_type = TREE_TYPE (type);
13119 machine_mode elem_mode = TYPE_MODE (elem_type);
13120 int elem_size = GET_MODE_SIZE (elem_mode);
13122 if (elem_size < UNITS_PER_WORD)
13124 tree real_part, imag_part;
13125 gimple_seq post = NULL;
13127 real_part = rs6000_gimplify_va_arg (valist, elem_type, pre_p,
13128 &post);
13129 /* Copy the value into a temporary, lest the formal temporary
13130 be reused out from under us. */
13131 real_part = get_initialized_tmp_var (real_part, pre_p, &post);
13132 gimple_seq_add_seq (pre_p, post);
13134 imag_part = rs6000_gimplify_va_arg (valist, elem_type, pre_p,
13135 post_p);
13137 return build2 (COMPLEX_EXPR, type, real_part, imag_part);
13141 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
13144 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
13145 f_fpr = DECL_CHAIN (f_gpr);
13146 f_res = DECL_CHAIN (f_fpr);
13147 f_ovf = DECL_CHAIN (f_res);
13148 f_sav = DECL_CHAIN (f_ovf);
13150 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
13151 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
13152 f_fpr, NULL_TREE);
13153 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
13154 f_ovf, NULL_TREE);
13155 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
13156 f_sav, NULL_TREE);
13158 size = int_size_in_bytes (type);
13159 rsize = (size + 3) / 4;
13160 align = 1;
13162 machine_mode mode = TYPE_MODE (type);
13163 if (abi_v4_pass_in_fpr (mode))
13165 /* FP args go in FP registers, if present. */
13166 reg = fpr;
13167 n_reg = (size + 7) / 8;
13168 sav_ofs = ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT) ? 8 : 4) * 4;
13169 sav_scale = ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT) ? 8 : 4);
13170 if (mode != SFmode && mode != SDmode)
13171 align = 8;
13173 else
13175 /* Otherwise into GP registers. */
13176 reg = gpr;
13177 n_reg = rsize;
13178 sav_ofs = 0;
13179 sav_scale = 4;
13180 if (n_reg == 2)
13181 align = 8;
13184 /* Pull the value out of the saved registers.... */
13186 lab_over = NULL;
13187 addr = create_tmp_var (ptr_type_node, "addr");
13189 /* AltiVec vectors never go in registers when -mabi=altivec. */
13190 if (TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (mode))
13191 align = 16;
13192 else
13194 lab_false = create_artificial_label (input_location);
13195 lab_over = create_artificial_label (input_location);
13197 /* Long long and SPE vectors are aligned in the registers.
13198 As are any other 2 gpr item such as complex int due to a
13199 historical mistake. */
13200 u = reg;
13201 if (n_reg == 2 && reg == gpr)
13203 regalign = 1;
13204 u = build2 (BIT_AND_EXPR, TREE_TYPE (reg), unshare_expr (reg),
13205 build_int_cst (TREE_TYPE (reg), n_reg - 1));
13206 u = build2 (POSTINCREMENT_EXPR, TREE_TYPE (reg),
13207 unshare_expr (reg), u);
13209 /* _Decimal128 is passed in even/odd fpr pairs; the stored
13210 reg number is 0 for f1, so we want to make it odd. */
13211 else if (reg == fpr && mode == TDmode)
13213 t = build2 (BIT_IOR_EXPR, TREE_TYPE (reg), unshare_expr (reg),
13214 build_int_cst (TREE_TYPE (reg), 1));
13215 u = build2 (MODIFY_EXPR, void_type_node, unshare_expr (reg), t);
13218 t = fold_convert (TREE_TYPE (reg), size_int (8 - n_reg + 1));
13219 t = build2 (GE_EXPR, boolean_type_node, u, t);
13220 u = build1 (GOTO_EXPR, void_type_node, lab_false);
13221 t = build3 (COND_EXPR, void_type_node, t, u, NULL_TREE);
13222 gimplify_and_add (t, pre_p);
13224 t = sav;
13225 if (sav_ofs)
13226 t = fold_build_pointer_plus_hwi (sav, sav_ofs);
13228 u = build2 (POSTINCREMENT_EXPR, TREE_TYPE (reg), unshare_expr (reg),
13229 build_int_cst (TREE_TYPE (reg), n_reg));
13230 u = fold_convert (sizetype, u);
13231 u = build2 (MULT_EXPR, sizetype, u, size_int (sav_scale));
13232 t = fold_build_pointer_plus (t, u);
13234 /* _Decimal32 varargs are located in the second word of the 64-bit
13235 FP register for 32-bit binaries. */
13236 if (TARGET_32BIT
13237 && TARGET_HARD_FLOAT && TARGET_FPRS
13238 && mode == SDmode)
13239 t = fold_build_pointer_plus_hwi (t, size);
13241 gimplify_assign (addr, t, pre_p);
13243 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
13245 stmt = gimple_build_label (lab_false);
13246 gimple_seq_add_stmt (pre_p, stmt);
13248 if ((n_reg == 2 && !regalign) || n_reg > 2)
13250 /* Ensure that we don't find any more args in regs.
13251 Alignment has taken care of for special cases. */
13252 gimplify_assign (reg, build_int_cst (TREE_TYPE (reg), 8), pre_p);
13256 /* ... otherwise out of the overflow area. */
13258 /* Care for on-stack alignment if needed. */
13259 t = ovf;
13260 if (align != 1)
13262 t = fold_build_pointer_plus_hwi (t, align - 1);
13263 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
13264 build_int_cst (TREE_TYPE (t), -align));
13266 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
13268 gimplify_assign (unshare_expr (addr), t, pre_p);
13270 t = fold_build_pointer_plus_hwi (t, size);
13271 gimplify_assign (unshare_expr (ovf), t, pre_p);
13273 if (lab_over)
13275 stmt = gimple_build_label (lab_over);
13276 gimple_seq_add_stmt (pre_p, stmt);
13279 if (STRICT_ALIGNMENT
13280 && (TYPE_ALIGN (type)
13281 > (unsigned) BITS_PER_UNIT * (align < 4 ? 4 : align)))
13283 /* The value (of type complex double, for example) may not be
13284 aligned in memory in the saved registers, so copy via a
13285 temporary. (This is the same code as used for SPARC.) */
13286 tree tmp = create_tmp_var (type, "va_arg_tmp");
13287 tree dest_addr = build_fold_addr_expr (tmp);
13289 tree copy = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
13290 3, dest_addr, addr, size_int (rsize * 4));
13292 gimplify_and_add (copy, pre_p);
13293 addr = dest_addr;
13296 addr = fold_convert (ptrtype, addr);
13297 return build_va_arg_indirect_ref (addr);
13300 /* Builtins. */
13302 static void
13303 def_builtin (const char *name, tree type, enum rs6000_builtins code)
13305 tree t;
13306 unsigned classify = rs6000_builtin_info[(int)code].attr;
13307 const char *attr_string = "";
13309 gcc_assert (name != NULL);
13310 gcc_assert (IN_RANGE ((int)code, 0, (int)RS6000_BUILTIN_COUNT));
13312 if (rs6000_builtin_decls[(int)code])
13313 fatal_error (input_location,
13314 "internal error: builtin function %s already processed", name);
13316 rs6000_builtin_decls[(int)code] = t =
13317 add_builtin_function (name, type, (int)code, BUILT_IN_MD, NULL, NULL_TREE);
13319 /* Set any special attributes. */
13320 if ((classify & RS6000_BTC_CONST) != 0)
13322 /* const function, function only depends on the inputs. */
13323 TREE_READONLY (t) = 1;
13324 TREE_NOTHROW (t) = 1;
13325 attr_string = ", const";
13327 else if ((classify & RS6000_BTC_PURE) != 0)
13329 /* pure function, function can read global memory, but does not set any
13330 external state. */
13331 DECL_PURE_P (t) = 1;
13332 TREE_NOTHROW (t) = 1;
13333 attr_string = ", pure";
13335 else if ((classify & RS6000_BTC_FP) != 0)
13337 /* Function is a math function. If rounding mode is on, then treat the
13338 function as not reading global memory, but it can have arbitrary side
13339 effects. If it is off, then assume the function is a const function.
13340 This mimics the ATTR_MATHFN_FPROUNDING attribute in
13341 builtin-attribute.def that is used for the math functions. */
13342 TREE_NOTHROW (t) = 1;
13343 if (flag_rounding_math)
13345 DECL_PURE_P (t) = 1;
13346 DECL_IS_NOVOPS (t) = 1;
13347 attr_string = ", fp, pure";
13349 else
13351 TREE_READONLY (t) = 1;
13352 attr_string = ", fp, const";
13355 else if ((classify & RS6000_BTC_ATTR_MASK) != 0)
13356 gcc_unreachable ();
13358 if (TARGET_DEBUG_BUILTIN)
13359 fprintf (stderr, "rs6000_builtin, code = %4d, %s%s\n",
13360 (int)code, name, attr_string);
13363 /* Simple ternary operations: VECd = foo (VECa, VECb, VECc). */
13365 #undef RS6000_BUILTIN_0
13366 #undef RS6000_BUILTIN_1
13367 #undef RS6000_BUILTIN_2
13368 #undef RS6000_BUILTIN_3
13369 #undef RS6000_BUILTIN_A
13370 #undef RS6000_BUILTIN_D
13371 #undef RS6000_BUILTIN_E
13372 #undef RS6000_BUILTIN_H
13373 #undef RS6000_BUILTIN_P
13374 #undef RS6000_BUILTIN_Q
13375 #undef RS6000_BUILTIN_S
13376 #undef RS6000_BUILTIN_X
13378 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
13379 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
13380 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
13381 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE) \
13382 { MASK, ICODE, NAME, ENUM },
13384 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
13385 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
13386 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
13387 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
13388 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
13389 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
13390 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
13391 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13393 static const struct builtin_description bdesc_3arg[] =
13395 #include "rs6000-builtin.def"
13398 /* DST operations: void foo (void *, const int, const char). */
13400 #undef RS6000_BUILTIN_0
13401 #undef RS6000_BUILTIN_1
13402 #undef RS6000_BUILTIN_2
13403 #undef RS6000_BUILTIN_3
13404 #undef RS6000_BUILTIN_A
13405 #undef RS6000_BUILTIN_D
13406 #undef RS6000_BUILTIN_E
13407 #undef RS6000_BUILTIN_H
13408 #undef RS6000_BUILTIN_P
13409 #undef RS6000_BUILTIN_Q
13410 #undef RS6000_BUILTIN_S
13411 #undef RS6000_BUILTIN_X
13413 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
13414 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
13415 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
13416 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
13417 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
13418 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE) \
13419 { MASK, ICODE, NAME, ENUM },
13421 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
13422 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
13423 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
13424 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
13425 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
13426 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13428 static const struct builtin_description bdesc_dst[] =
13430 #include "rs6000-builtin.def"
13433 /* Simple binary operations: VECc = foo (VECa, VECb). */
13435 #undef RS6000_BUILTIN_0
13436 #undef RS6000_BUILTIN_1
13437 #undef RS6000_BUILTIN_2
13438 #undef RS6000_BUILTIN_3
13439 #undef RS6000_BUILTIN_A
13440 #undef RS6000_BUILTIN_D
13441 #undef RS6000_BUILTIN_E
13442 #undef RS6000_BUILTIN_H
13443 #undef RS6000_BUILTIN_P
13444 #undef RS6000_BUILTIN_Q
13445 #undef RS6000_BUILTIN_S
13446 #undef RS6000_BUILTIN_X
13448 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
13449 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
13450 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE) \
13451 { MASK, ICODE, NAME, ENUM },
13453 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
13454 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
13455 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
13456 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
13457 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
13458 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
13459 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
13460 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
13461 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13463 static const struct builtin_description bdesc_2arg[] =
13465 #include "rs6000-builtin.def"
13468 #undef RS6000_BUILTIN_0
13469 #undef RS6000_BUILTIN_1
13470 #undef RS6000_BUILTIN_2
13471 #undef RS6000_BUILTIN_3
13472 #undef RS6000_BUILTIN_A
13473 #undef RS6000_BUILTIN_D
13474 #undef RS6000_BUILTIN_E
13475 #undef RS6000_BUILTIN_H
13476 #undef RS6000_BUILTIN_P
13477 #undef RS6000_BUILTIN_Q
13478 #undef RS6000_BUILTIN_S
13479 #undef RS6000_BUILTIN_X
13481 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
13482 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
13483 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
13484 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
13485 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
13486 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
13487 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
13488 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
13489 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) \
13490 { MASK, ICODE, NAME, ENUM },
13492 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
13493 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
13494 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13496 /* AltiVec predicates. */
13498 static const struct builtin_description bdesc_altivec_preds[] =
13500 #include "rs6000-builtin.def"
13503 /* SPE predicates. */
13504 #undef RS6000_BUILTIN_0
13505 #undef RS6000_BUILTIN_1
13506 #undef RS6000_BUILTIN_2
13507 #undef RS6000_BUILTIN_3
13508 #undef RS6000_BUILTIN_A
13509 #undef RS6000_BUILTIN_D
13510 #undef RS6000_BUILTIN_E
13511 #undef RS6000_BUILTIN_H
13512 #undef RS6000_BUILTIN_P
13513 #undef RS6000_BUILTIN_Q
13514 #undef RS6000_BUILTIN_S
13515 #undef RS6000_BUILTIN_X
13517 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
13518 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
13519 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
13520 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
13521 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
13522 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
13523 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
13524 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
13525 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
13526 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
13527 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE) \
13528 { MASK, ICODE, NAME, ENUM },
13530 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13532 static const struct builtin_description bdesc_spe_predicates[] =
13534 #include "rs6000-builtin.def"
13537 /* SPE evsel predicates. */
13538 #undef RS6000_BUILTIN_0
13539 #undef RS6000_BUILTIN_1
13540 #undef RS6000_BUILTIN_2
13541 #undef RS6000_BUILTIN_3
13542 #undef RS6000_BUILTIN_A
13543 #undef RS6000_BUILTIN_D
13544 #undef RS6000_BUILTIN_E
13545 #undef RS6000_BUILTIN_H
13546 #undef RS6000_BUILTIN_P
13547 #undef RS6000_BUILTIN_Q
13548 #undef RS6000_BUILTIN_S
13549 #undef RS6000_BUILTIN_X
13551 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
13552 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
13553 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
13554 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
13555 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
13556 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
13557 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE) \
13558 { MASK, ICODE, NAME, ENUM },
13560 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
13561 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
13562 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
13563 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
13564 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13566 static const struct builtin_description bdesc_spe_evsel[] =
13568 #include "rs6000-builtin.def"
13571 /* PAIRED predicates. */
13572 #undef RS6000_BUILTIN_0
13573 #undef RS6000_BUILTIN_1
13574 #undef RS6000_BUILTIN_2
13575 #undef RS6000_BUILTIN_3
13576 #undef RS6000_BUILTIN_A
13577 #undef RS6000_BUILTIN_D
13578 #undef RS6000_BUILTIN_E
13579 #undef RS6000_BUILTIN_H
13580 #undef RS6000_BUILTIN_P
13581 #undef RS6000_BUILTIN_Q
13582 #undef RS6000_BUILTIN_S
13583 #undef RS6000_BUILTIN_X
13585 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
13586 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
13587 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
13588 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
13589 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
13590 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
13591 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
13592 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
13593 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
13594 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE) \
13595 { MASK, ICODE, NAME, ENUM },
13597 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
13598 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13600 static const struct builtin_description bdesc_paired_preds[] =
13602 #include "rs6000-builtin.def"
13605 /* ABS* operations. */
13607 #undef RS6000_BUILTIN_0
13608 #undef RS6000_BUILTIN_1
13609 #undef RS6000_BUILTIN_2
13610 #undef RS6000_BUILTIN_3
13611 #undef RS6000_BUILTIN_A
13612 #undef RS6000_BUILTIN_D
13613 #undef RS6000_BUILTIN_E
13614 #undef RS6000_BUILTIN_H
13615 #undef RS6000_BUILTIN_P
13616 #undef RS6000_BUILTIN_Q
13617 #undef RS6000_BUILTIN_S
13618 #undef RS6000_BUILTIN_X
13620 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
13621 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
13622 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
13623 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
13624 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE) \
13625 { MASK, ICODE, NAME, ENUM },
13627 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
13628 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
13629 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
13630 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
13631 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
13632 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
13633 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13635 static const struct builtin_description bdesc_abs[] =
13637 #include "rs6000-builtin.def"
13640 /* Simple unary operations: VECb = foo (unsigned literal) or VECb =
13641 foo (VECa). */
13643 #undef RS6000_BUILTIN_0
13644 #undef RS6000_BUILTIN_1
13645 #undef RS6000_BUILTIN_2
13646 #undef RS6000_BUILTIN_3
13647 #undef RS6000_BUILTIN_A
13648 #undef RS6000_BUILTIN_D
13649 #undef RS6000_BUILTIN_E
13650 #undef RS6000_BUILTIN_H
13651 #undef RS6000_BUILTIN_P
13652 #undef RS6000_BUILTIN_Q
13653 #undef RS6000_BUILTIN_S
13654 #undef RS6000_BUILTIN_X
13656 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
13657 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE) \
13658 { MASK, ICODE, NAME, ENUM },
13660 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
13661 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
13662 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
13663 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
13664 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
13665 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
13666 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
13667 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
13668 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
13669 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13671 static const struct builtin_description bdesc_1arg[] =
13673 #include "rs6000-builtin.def"
13676 /* Simple no-argument operations: result = __builtin_darn_32 () */
13678 #undef RS6000_BUILTIN_0
13679 #undef RS6000_BUILTIN_1
13680 #undef RS6000_BUILTIN_2
13681 #undef RS6000_BUILTIN_3
13682 #undef RS6000_BUILTIN_A
13683 #undef RS6000_BUILTIN_D
13684 #undef RS6000_BUILTIN_E
13685 #undef RS6000_BUILTIN_H
13686 #undef RS6000_BUILTIN_P
13687 #undef RS6000_BUILTIN_Q
13688 #undef RS6000_BUILTIN_S
13689 #undef RS6000_BUILTIN_X
13691 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE) \
13692 { MASK, ICODE, NAME, ENUM },
13694 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
13695 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
13696 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
13697 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
13698 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
13699 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
13700 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
13701 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
13702 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
13703 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
13704 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13706 static const struct builtin_description bdesc_0arg[] =
13708 #include "rs6000-builtin.def"
13711 /* HTM builtins. */
13712 #undef RS6000_BUILTIN_0
13713 #undef RS6000_BUILTIN_1
13714 #undef RS6000_BUILTIN_2
13715 #undef RS6000_BUILTIN_3
13716 #undef RS6000_BUILTIN_A
13717 #undef RS6000_BUILTIN_D
13718 #undef RS6000_BUILTIN_E
13719 #undef RS6000_BUILTIN_H
13720 #undef RS6000_BUILTIN_P
13721 #undef RS6000_BUILTIN_Q
13722 #undef RS6000_BUILTIN_S
13723 #undef RS6000_BUILTIN_X
13725 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
13726 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
13727 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
13728 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
13729 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
13730 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
13731 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
13732 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) \
13733 { MASK, ICODE, NAME, ENUM },
13735 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
13736 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
13737 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
13738 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13740 static const struct builtin_description bdesc_htm[] =
13742 #include "rs6000-builtin.def"
13745 #undef RS6000_BUILTIN_0
13746 #undef RS6000_BUILTIN_1
13747 #undef RS6000_BUILTIN_2
13748 #undef RS6000_BUILTIN_3
13749 #undef RS6000_BUILTIN_A
13750 #undef RS6000_BUILTIN_D
13751 #undef RS6000_BUILTIN_E
13752 #undef RS6000_BUILTIN_H
13753 #undef RS6000_BUILTIN_P
13754 #undef RS6000_BUILTIN_Q
13755 #undef RS6000_BUILTIN_S
13757 /* Return true if a builtin function is overloaded. */
13758 bool
13759 rs6000_overloaded_builtin_p (enum rs6000_builtins fncode)
13761 return (rs6000_builtin_info[(int)fncode].attr & RS6000_BTC_OVERLOADED) != 0;
13764 const char *
13765 rs6000_overloaded_builtin_name (enum rs6000_builtins fncode)
13767 return rs6000_builtin_info[(int)fncode].name;
13770 /* Expand an expression EXP that calls a builtin without arguments. */
13771 static rtx
13772 rs6000_expand_zeroop_builtin (enum insn_code icode, rtx target)
13774 rtx pat;
13775 machine_mode tmode = insn_data[icode].operand[0].mode;
13777 if (icode == CODE_FOR_nothing)
13778 /* Builtin not supported on this processor. */
13779 return 0;
13781 if (target == 0
13782 || GET_MODE (target) != tmode
13783 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13784 target = gen_reg_rtx (tmode);
13786 pat = GEN_FCN (icode) (target);
13787 if (! pat)
13788 return 0;
13789 emit_insn (pat);
13791 return target;
13795 static rtx
13796 rs6000_expand_mtfsf_builtin (enum insn_code icode, tree exp)
13798 rtx pat;
13799 tree arg0 = CALL_EXPR_ARG (exp, 0);
13800 tree arg1 = CALL_EXPR_ARG (exp, 1);
13801 rtx op0 = expand_normal (arg0);
13802 rtx op1 = expand_normal (arg1);
13803 machine_mode mode0 = insn_data[icode].operand[0].mode;
13804 machine_mode mode1 = insn_data[icode].operand[1].mode;
13806 if (icode == CODE_FOR_nothing)
13807 /* Builtin not supported on this processor. */
13808 return 0;
13810 /* If we got invalid arguments bail out before generating bad rtl. */
13811 if (arg0 == error_mark_node || arg1 == error_mark_node)
13812 return const0_rtx;
13814 if (GET_CODE (op0) != CONST_INT
13815 || INTVAL (op0) > 255
13816 || INTVAL (op0) < 0)
13818 error ("argument 1 must be an 8-bit field value");
13819 return const0_rtx;
13822 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
13823 op0 = copy_to_mode_reg (mode0, op0);
13825 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
13826 op1 = copy_to_mode_reg (mode1, op1);
13828 pat = GEN_FCN (icode) (op0, op1);
13829 if (! pat)
13830 return const0_rtx;
13831 emit_insn (pat);
13833 return NULL_RTX;
13836 static rtx
13837 rs6000_expand_unop_builtin (enum insn_code icode, tree exp, rtx target)
13839 rtx pat;
13840 tree arg0 = CALL_EXPR_ARG (exp, 0);
13841 rtx op0 = expand_normal (arg0);
13842 machine_mode tmode = insn_data[icode].operand[0].mode;
13843 machine_mode mode0 = insn_data[icode].operand[1].mode;
13845 if (icode == CODE_FOR_nothing)
13846 /* Builtin not supported on this processor. */
13847 return 0;
13849 /* If we got invalid arguments bail out before generating bad rtl. */
13850 if (arg0 == error_mark_node)
13851 return const0_rtx;
13853 if (icode == CODE_FOR_altivec_vspltisb
13854 || icode == CODE_FOR_altivec_vspltish
13855 || icode == CODE_FOR_altivec_vspltisw
13856 || icode == CODE_FOR_spe_evsplatfi
13857 || icode == CODE_FOR_spe_evsplati)
13859 /* Only allow 5-bit *signed* literals. */
13860 if (GET_CODE (op0) != CONST_INT
13861 || INTVAL (op0) > 15
13862 || INTVAL (op0) < -16)
13864 error ("argument 1 must be a 5-bit signed literal");
13865 return const0_rtx;
13869 if (target == 0
13870 || GET_MODE (target) != tmode
13871 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13872 target = gen_reg_rtx (tmode);
13874 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13875 op0 = copy_to_mode_reg (mode0, op0);
13877 pat = GEN_FCN (icode) (target, op0);
13878 if (! pat)
13879 return 0;
13880 emit_insn (pat);
13882 return target;
13885 static rtx
13886 altivec_expand_abs_builtin (enum insn_code icode, tree exp, rtx target)
13888 rtx pat, scratch1, scratch2;
13889 tree arg0 = CALL_EXPR_ARG (exp, 0);
13890 rtx op0 = expand_normal (arg0);
13891 machine_mode tmode = insn_data[icode].operand[0].mode;
13892 machine_mode mode0 = insn_data[icode].operand[1].mode;
13894 /* If we have invalid arguments, bail out before generating bad rtl. */
13895 if (arg0 == error_mark_node)
13896 return const0_rtx;
13898 if (target == 0
13899 || GET_MODE (target) != tmode
13900 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13901 target = gen_reg_rtx (tmode);
13903 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13904 op0 = copy_to_mode_reg (mode0, op0);
13906 scratch1 = gen_reg_rtx (mode0);
13907 scratch2 = gen_reg_rtx (mode0);
13909 pat = GEN_FCN (icode) (target, op0, scratch1, scratch2);
13910 if (! pat)
13911 return 0;
13912 emit_insn (pat);
13914 return target;
13917 static rtx
13918 rs6000_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
13920 rtx pat;
13921 tree arg0 = CALL_EXPR_ARG (exp, 0);
13922 tree arg1 = CALL_EXPR_ARG (exp, 1);
13923 rtx op0 = expand_normal (arg0);
13924 rtx op1 = expand_normal (arg1);
13925 machine_mode tmode = insn_data[icode].operand[0].mode;
13926 machine_mode mode0 = insn_data[icode].operand[1].mode;
13927 machine_mode mode1 = insn_data[icode].operand[2].mode;
13929 if (icode == CODE_FOR_nothing)
13930 /* Builtin not supported on this processor. */
13931 return 0;
13933 /* If we got invalid arguments bail out before generating bad rtl. */
13934 if (arg0 == error_mark_node || arg1 == error_mark_node)
13935 return const0_rtx;
13937 if (icode == CODE_FOR_altivec_vcfux
13938 || icode == CODE_FOR_altivec_vcfsx
13939 || icode == CODE_FOR_altivec_vctsxs
13940 || icode == CODE_FOR_altivec_vctuxs
13941 || icode == CODE_FOR_altivec_vspltb
13942 || icode == CODE_FOR_altivec_vsplth
13943 || icode == CODE_FOR_altivec_vspltw
13944 || icode == CODE_FOR_spe_evaddiw
13945 || icode == CODE_FOR_spe_evldd
13946 || icode == CODE_FOR_spe_evldh
13947 || icode == CODE_FOR_spe_evldw
13948 || icode == CODE_FOR_spe_evlhhesplat
13949 || icode == CODE_FOR_spe_evlhhossplat
13950 || icode == CODE_FOR_spe_evlhhousplat
13951 || icode == CODE_FOR_spe_evlwhe
13952 || icode == CODE_FOR_spe_evlwhos
13953 || icode == CODE_FOR_spe_evlwhou
13954 || icode == CODE_FOR_spe_evlwhsplat
13955 || icode == CODE_FOR_spe_evlwwsplat
13956 || icode == CODE_FOR_spe_evrlwi
13957 || icode == CODE_FOR_spe_evslwi
13958 || icode == CODE_FOR_spe_evsrwis
13959 || icode == CODE_FOR_spe_evsubifw
13960 || icode == CODE_FOR_spe_evsrwiu)
13962 /* Only allow 5-bit unsigned literals. */
13963 STRIP_NOPS (arg1);
13964 if (TREE_CODE (arg1) != INTEGER_CST
13965 || TREE_INT_CST_LOW (arg1) & ~0x1f)
13967 error ("argument 2 must be a 5-bit unsigned literal");
13968 return const0_rtx;
13971 else if (icode == CODE_FOR_dfptstsfi_eq_dd
13972 || icode == CODE_FOR_dfptstsfi_lt_dd
13973 || icode == CODE_FOR_dfptstsfi_gt_dd
13974 || icode == CODE_FOR_dfptstsfi_unordered_dd
13975 || icode == CODE_FOR_dfptstsfi_eq_td
13976 || icode == CODE_FOR_dfptstsfi_lt_td
13977 || icode == CODE_FOR_dfptstsfi_gt_td
13978 || icode == CODE_FOR_dfptstsfi_unordered_td)
13980 /* Only allow 6-bit unsigned literals. */
13981 STRIP_NOPS (arg0);
13982 if (TREE_CODE (arg0) != INTEGER_CST
13983 || !IN_RANGE (TREE_INT_CST_LOW (arg0), 0, 63))
13985 error ("argument 1 must be a 6-bit unsigned literal");
13986 return CONST0_RTX (tmode);
13989 else if (icode == CODE_FOR_xststdcdp
13990 || icode == CODE_FOR_xststdcsp
13991 || icode == CODE_FOR_xvtstdcdp
13992 || icode == CODE_FOR_xvtstdcsp)
13994 /* Only allow 7-bit unsigned literals. */
13995 STRIP_NOPS (arg1);
13996 if (TREE_CODE (arg1) != INTEGER_CST
13997 || !IN_RANGE (TREE_INT_CST_LOW (arg1), 0, 127))
13999 error ("argument 2 must be a 7-bit unsigned literal");
14000 return CONST0_RTX (tmode);
14004 if (target == 0
14005 || GET_MODE (target) != tmode
14006 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14007 target = gen_reg_rtx (tmode);
14009 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14010 op0 = copy_to_mode_reg (mode0, op0);
14011 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14012 op1 = copy_to_mode_reg (mode1, op1);
14014 pat = GEN_FCN (icode) (target, op0, op1);
14015 if (! pat)
14016 return 0;
14017 emit_insn (pat);
14019 return target;
14022 static rtx
14023 altivec_expand_predicate_builtin (enum insn_code icode, tree exp, rtx target)
14025 rtx pat, scratch;
14026 tree cr6_form = CALL_EXPR_ARG (exp, 0);
14027 tree arg0 = CALL_EXPR_ARG (exp, 1);
14028 tree arg1 = CALL_EXPR_ARG (exp, 2);
14029 rtx op0 = expand_normal (arg0);
14030 rtx op1 = expand_normal (arg1);
14031 machine_mode tmode = SImode;
14032 machine_mode mode0 = insn_data[icode].operand[1].mode;
14033 machine_mode mode1 = insn_data[icode].operand[2].mode;
14034 int cr6_form_int;
14036 if (TREE_CODE (cr6_form) != INTEGER_CST)
14038 error ("argument 1 of __builtin_altivec_predicate must be a constant");
14039 return const0_rtx;
14041 else
14042 cr6_form_int = TREE_INT_CST_LOW (cr6_form);
14044 gcc_assert (mode0 == mode1);
14046 /* If we have invalid arguments, bail out before generating bad rtl. */
14047 if (arg0 == error_mark_node || arg1 == error_mark_node)
14048 return const0_rtx;
14050 if (target == 0
14051 || GET_MODE (target) != tmode
14052 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14053 target = gen_reg_rtx (tmode);
14055 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14056 op0 = copy_to_mode_reg (mode0, op0);
14057 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14058 op1 = copy_to_mode_reg (mode1, op1);
14060 scratch = gen_reg_rtx (mode0);
14062 pat = GEN_FCN (icode) (scratch, op0, op1);
14063 if (! pat)
14064 return 0;
14065 emit_insn (pat);
14067 /* The vec_any* and vec_all* predicates use the same opcodes for two
14068 different operations, but the bits in CR6 will be different
14069 depending on what information we want. So we have to play tricks
14070 with CR6 to get the right bits out.
14072 If you think this is disgusting, look at the specs for the
14073 AltiVec predicates. */
14075 switch (cr6_form_int)
14077 case 0:
14078 emit_insn (gen_cr6_test_for_zero (target));
14079 break;
14080 case 1:
14081 emit_insn (gen_cr6_test_for_zero_reverse (target));
14082 break;
14083 case 2:
14084 emit_insn (gen_cr6_test_for_lt (target));
14085 break;
14086 case 3:
14087 emit_insn (gen_cr6_test_for_lt_reverse (target));
14088 break;
14089 default:
14090 error ("argument 1 of __builtin_altivec_predicate is out of range");
14091 break;
14094 return target;
14097 static rtx
14098 paired_expand_lv_builtin (enum insn_code icode, tree exp, rtx target)
14100 rtx pat, addr;
14101 tree arg0 = CALL_EXPR_ARG (exp, 0);
14102 tree arg1 = CALL_EXPR_ARG (exp, 1);
14103 machine_mode tmode = insn_data[icode].operand[0].mode;
14104 machine_mode mode0 = Pmode;
14105 machine_mode mode1 = Pmode;
14106 rtx op0 = expand_normal (arg0);
14107 rtx op1 = expand_normal (arg1);
14109 if (icode == CODE_FOR_nothing)
14110 /* Builtin not supported on this processor. */
14111 return 0;
14113 /* If we got invalid arguments bail out before generating bad rtl. */
14114 if (arg0 == error_mark_node || arg1 == error_mark_node)
14115 return const0_rtx;
14117 if (target == 0
14118 || GET_MODE (target) != tmode
14119 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14120 target = gen_reg_rtx (tmode);
14122 op1 = copy_to_mode_reg (mode1, op1);
14124 if (op0 == const0_rtx)
14126 addr = gen_rtx_MEM (tmode, op1);
14128 else
14130 op0 = copy_to_mode_reg (mode0, op0);
14131 addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op0, op1));
14134 pat = GEN_FCN (icode) (target, addr);
14136 if (! pat)
14137 return 0;
14138 emit_insn (pat);
14140 return target;
14143 /* Return a constant vector for use as a little-endian permute control vector
14144 to reverse the order of elements of the given vector mode. */
14145 static rtx
14146 swap_selector_for_mode (machine_mode mode)
14148 /* These are little endian vectors, so their elements are reversed
14149 from what you would normally expect for a permute control vector. */
14150 unsigned int swap2[16] = {7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8};
14151 unsigned int swap4[16] = {3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12};
14152 unsigned int swap8[16] = {1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14};
14153 unsigned int swap16[16] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
14154 unsigned int *swaparray, i;
14155 rtx perm[16];
14157 switch (mode)
14159 case V2DFmode:
14160 case V2DImode:
14161 swaparray = swap2;
14162 break;
14163 case V4SFmode:
14164 case V4SImode:
14165 swaparray = swap4;
14166 break;
14167 case V8HImode:
14168 swaparray = swap8;
14169 break;
14170 case V16QImode:
14171 swaparray = swap16;
14172 break;
14173 default:
14174 gcc_unreachable ();
14177 for (i = 0; i < 16; ++i)
14178 perm[i] = GEN_INT (swaparray[i]);
14180 return force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm)));
14183 /* Generate code for an "lvxl", or "lve*x" built-in for a little endian target
14184 with -maltivec=be specified. Issue the load followed by an element-
14185 reversing permute. */
14186 void
14187 altivec_expand_lvx_be (rtx op0, rtx op1, machine_mode mode, unsigned unspec)
14189 rtx tmp = gen_reg_rtx (mode);
14190 rtx load = gen_rtx_SET (tmp, op1);
14191 rtx lvx = gen_rtx_UNSPEC (mode, gen_rtvec (1, const0_rtx), unspec);
14192 rtx par = gen_rtx_PARALLEL (mode, gen_rtvec (2, load, lvx));
14193 rtx sel = swap_selector_for_mode (mode);
14194 rtx vperm = gen_rtx_UNSPEC (mode, gen_rtvec (3, tmp, tmp, sel), UNSPEC_VPERM);
14196 gcc_assert (REG_P (op0));
14197 emit_insn (par);
14198 emit_insn (gen_rtx_SET (op0, vperm));
14201 /* Generate code for a "stvxl" built-in for a little endian target with
14202 -maltivec=be specified. Issue the store preceded by an element-reversing
14203 permute. */
14204 void
14205 altivec_expand_stvx_be (rtx op0, rtx op1, machine_mode mode, unsigned unspec)
14207 rtx tmp = gen_reg_rtx (mode);
14208 rtx store = gen_rtx_SET (op0, tmp);
14209 rtx stvx = gen_rtx_UNSPEC (mode, gen_rtvec (1, const0_rtx), unspec);
14210 rtx par = gen_rtx_PARALLEL (mode, gen_rtvec (2, store, stvx));
14211 rtx sel = swap_selector_for_mode (mode);
14212 rtx vperm;
14214 gcc_assert (REG_P (op1));
14215 vperm = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op1, sel), UNSPEC_VPERM);
14216 emit_insn (gen_rtx_SET (tmp, vperm));
14217 emit_insn (par);
14220 /* Generate code for a "stve*x" built-in for a little endian target with -maltivec=be
14221 specified. Issue the store preceded by an element-reversing permute. */
14222 void
14223 altivec_expand_stvex_be (rtx op0, rtx op1, machine_mode mode, unsigned unspec)
14225 machine_mode inner_mode = GET_MODE_INNER (mode);
14226 rtx tmp = gen_reg_rtx (mode);
14227 rtx stvx = gen_rtx_UNSPEC (inner_mode, gen_rtvec (1, tmp), unspec);
14228 rtx sel = swap_selector_for_mode (mode);
14229 rtx vperm;
14231 gcc_assert (REG_P (op1));
14232 vperm = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op1, sel), UNSPEC_VPERM);
14233 emit_insn (gen_rtx_SET (tmp, vperm));
14234 emit_insn (gen_rtx_SET (op0, stvx));
14237 static rtx
14238 altivec_expand_lv_builtin (enum insn_code icode, tree exp, rtx target, bool blk)
14240 rtx pat, addr;
14241 tree arg0 = CALL_EXPR_ARG (exp, 0);
14242 tree arg1 = CALL_EXPR_ARG (exp, 1);
14243 machine_mode tmode = insn_data[icode].operand[0].mode;
14244 machine_mode mode0 = Pmode;
14245 machine_mode mode1 = Pmode;
14246 rtx op0 = expand_normal (arg0);
14247 rtx op1 = expand_normal (arg1);
14249 if (icode == CODE_FOR_nothing)
14250 /* Builtin not supported on this processor. */
14251 return 0;
14253 /* If we got invalid arguments bail out before generating bad rtl. */
14254 if (arg0 == error_mark_node || arg1 == error_mark_node)
14255 return const0_rtx;
14257 if (target == 0
14258 || GET_MODE (target) != tmode
14259 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14260 target = gen_reg_rtx (tmode);
14262 op1 = copy_to_mode_reg (mode1, op1);
14264 /* For LVX, express the RTL accurately by ANDing the address with -16.
14265 LVXL and LVE*X expand to use UNSPECs to hide their special behavior,
14266 so the raw address is fine. */
14267 if (icode == CODE_FOR_altivec_lvx_v2df_2op
14268 || icode == CODE_FOR_altivec_lvx_v2di_2op
14269 || icode == CODE_FOR_altivec_lvx_v4sf_2op
14270 || icode == CODE_FOR_altivec_lvx_v4si_2op
14271 || icode == CODE_FOR_altivec_lvx_v8hi_2op
14272 || icode == CODE_FOR_altivec_lvx_v16qi_2op)
14274 rtx rawaddr;
14275 if (op0 == const0_rtx)
14276 rawaddr = op1;
14277 else
14279 op0 = copy_to_mode_reg (mode0, op0);
14280 rawaddr = gen_rtx_PLUS (Pmode, op1, op0);
14282 addr = gen_rtx_AND (Pmode, rawaddr, gen_rtx_CONST_INT (Pmode, -16));
14283 addr = gen_rtx_MEM (blk ? BLKmode : tmode, addr);
14285 /* For -maltivec=be, emit the load and follow it up with a
14286 permute to swap the elements. */
14287 if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
14289 rtx temp = gen_reg_rtx (tmode);
14290 emit_insn (gen_rtx_SET (temp, addr));
14292 rtx sel = swap_selector_for_mode (tmode);
14293 rtx vperm = gen_rtx_UNSPEC (tmode, gen_rtvec (3, temp, temp, sel),
14294 UNSPEC_VPERM);
14295 emit_insn (gen_rtx_SET (target, vperm));
14297 else
14298 emit_insn (gen_rtx_SET (target, addr));
14300 else
14302 if (op0 == const0_rtx)
14303 addr = gen_rtx_MEM (blk ? BLKmode : tmode, op1);
14304 else
14306 op0 = copy_to_mode_reg (mode0, op0);
14307 addr = gen_rtx_MEM (blk ? BLKmode : tmode,
14308 gen_rtx_PLUS (Pmode, op1, op0));
14311 pat = GEN_FCN (icode) (target, addr);
14312 if (! pat)
14313 return 0;
14314 emit_insn (pat);
14317 return target;
14320 static rtx
14321 spe_expand_stv_builtin (enum insn_code icode, tree exp)
14323 tree arg0 = CALL_EXPR_ARG (exp, 0);
14324 tree arg1 = CALL_EXPR_ARG (exp, 1);
14325 tree arg2 = CALL_EXPR_ARG (exp, 2);
14326 rtx op0 = expand_normal (arg0);
14327 rtx op1 = expand_normal (arg1);
14328 rtx op2 = expand_normal (arg2);
14329 rtx pat;
14330 machine_mode mode0 = insn_data[icode].operand[0].mode;
14331 machine_mode mode1 = insn_data[icode].operand[1].mode;
14332 machine_mode mode2 = insn_data[icode].operand[2].mode;
14334 /* Invalid arguments. Bail before doing anything stoopid! */
14335 if (arg0 == error_mark_node
14336 || arg1 == error_mark_node
14337 || arg2 == error_mark_node)
14338 return const0_rtx;
14340 if (! (*insn_data[icode].operand[2].predicate) (op0, mode2))
14341 op0 = copy_to_mode_reg (mode2, op0);
14342 if (! (*insn_data[icode].operand[0].predicate) (op1, mode0))
14343 op1 = copy_to_mode_reg (mode0, op1);
14344 if (! (*insn_data[icode].operand[1].predicate) (op2, mode1))
14345 op2 = copy_to_mode_reg (mode1, op2);
14347 pat = GEN_FCN (icode) (op1, op2, op0);
14348 if (pat)
14349 emit_insn (pat);
14350 return NULL_RTX;
14353 static rtx
14354 paired_expand_stv_builtin (enum insn_code icode, tree exp)
14356 tree arg0 = CALL_EXPR_ARG (exp, 0);
14357 tree arg1 = CALL_EXPR_ARG (exp, 1);
14358 tree arg2 = CALL_EXPR_ARG (exp, 2);
14359 rtx op0 = expand_normal (arg0);
14360 rtx op1 = expand_normal (arg1);
14361 rtx op2 = expand_normal (arg2);
14362 rtx pat, addr;
14363 machine_mode tmode = insn_data[icode].operand[0].mode;
14364 machine_mode mode1 = Pmode;
14365 machine_mode mode2 = Pmode;
14367 /* Invalid arguments. Bail before doing anything stoopid! */
14368 if (arg0 == error_mark_node
14369 || arg1 == error_mark_node
14370 || arg2 == error_mark_node)
14371 return const0_rtx;
14373 if (! (*insn_data[icode].operand[1].predicate) (op0, tmode))
14374 op0 = copy_to_mode_reg (tmode, op0);
14376 op2 = copy_to_mode_reg (mode2, op2);
14378 if (op1 == const0_rtx)
14380 addr = gen_rtx_MEM (tmode, op2);
14382 else
14384 op1 = copy_to_mode_reg (mode1, op1);
14385 addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op1, op2));
14388 pat = GEN_FCN (icode) (addr, op0);
14389 if (pat)
14390 emit_insn (pat);
14391 return NULL_RTX;
14394 static rtx
14395 altivec_expand_stv_builtin (enum insn_code icode, tree exp)
14397 tree arg0 = CALL_EXPR_ARG (exp, 0);
14398 tree arg1 = CALL_EXPR_ARG (exp, 1);
14399 tree arg2 = CALL_EXPR_ARG (exp, 2);
14400 rtx op0 = expand_normal (arg0);
14401 rtx op1 = expand_normal (arg1);
14402 rtx op2 = expand_normal (arg2);
14403 rtx pat, addr, rawaddr;
14404 machine_mode tmode = insn_data[icode].operand[0].mode;
14405 machine_mode smode = insn_data[icode].operand[1].mode;
14406 machine_mode mode1 = Pmode;
14407 machine_mode mode2 = Pmode;
14409 /* Invalid arguments. Bail before doing anything stoopid! */
14410 if (arg0 == error_mark_node
14411 || arg1 == error_mark_node
14412 || arg2 == error_mark_node)
14413 return const0_rtx;
14415 op2 = copy_to_mode_reg (mode2, op2);
14417 /* For STVX, express the RTL accurately by ANDing the address with -16.
14418 STVXL and STVE*X expand to use UNSPECs to hide their special behavior,
14419 so the raw address is fine. */
14420 if (icode == CODE_FOR_altivec_stvx_v2df_2op
14421 || icode == CODE_FOR_altivec_stvx_v2di_2op
14422 || icode == CODE_FOR_altivec_stvx_v4sf_2op
14423 || icode == CODE_FOR_altivec_stvx_v4si_2op
14424 || icode == CODE_FOR_altivec_stvx_v8hi_2op
14425 || icode == CODE_FOR_altivec_stvx_v16qi_2op)
14427 if (op1 == const0_rtx)
14428 rawaddr = op2;
14429 else
14431 op1 = copy_to_mode_reg (mode1, op1);
14432 rawaddr = gen_rtx_PLUS (Pmode, op2, op1);
14435 addr = gen_rtx_AND (Pmode, rawaddr, gen_rtx_CONST_INT (Pmode, -16));
14436 addr = gen_rtx_MEM (tmode, addr);
14438 op0 = copy_to_mode_reg (tmode, op0);
14440 /* For -maltivec=be, emit a permute to swap the elements, followed
14441 by the store. */
14442 if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
14444 rtx temp = gen_reg_rtx (tmode);
14445 rtx sel = swap_selector_for_mode (tmode);
14446 rtx vperm = gen_rtx_UNSPEC (tmode, gen_rtvec (3, op0, op0, sel),
14447 UNSPEC_VPERM);
14448 emit_insn (gen_rtx_SET (temp, vperm));
14449 emit_insn (gen_rtx_SET (addr, temp));
14451 else
14452 emit_insn (gen_rtx_SET (addr, op0));
14454 else
14456 if (! (*insn_data[icode].operand[1].predicate) (op0, smode))
14457 op0 = copy_to_mode_reg (smode, op0);
14459 if (op1 == const0_rtx)
14460 addr = gen_rtx_MEM (tmode, op2);
14461 else
14463 op1 = copy_to_mode_reg (mode1, op1);
14464 addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op2, op1));
14467 pat = GEN_FCN (icode) (addr, op0);
14468 if (pat)
14469 emit_insn (pat);
14472 return NULL_RTX;
14475 /* Return the appropriate SPR number associated with the given builtin. */
14476 static inline HOST_WIDE_INT
14477 htm_spr_num (enum rs6000_builtins code)
14479 if (code == HTM_BUILTIN_GET_TFHAR
14480 || code == HTM_BUILTIN_SET_TFHAR)
14481 return TFHAR_SPR;
14482 else if (code == HTM_BUILTIN_GET_TFIAR
14483 || code == HTM_BUILTIN_SET_TFIAR)
14484 return TFIAR_SPR;
14485 else if (code == HTM_BUILTIN_GET_TEXASR
14486 || code == HTM_BUILTIN_SET_TEXASR)
14487 return TEXASR_SPR;
14488 gcc_assert (code == HTM_BUILTIN_GET_TEXASRU
14489 || code == HTM_BUILTIN_SET_TEXASRU);
14490 return TEXASRU_SPR;
14493 /* Return the appropriate SPR regno associated with the given builtin. */
14494 static inline HOST_WIDE_INT
14495 htm_spr_regno (enum rs6000_builtins code)
14497 if (code == HTM_BUILTIN_GET_TFHAR
14498 || code == HTM_BUILTIN_SET_TFHAR)
14499 return TFHAR_REGNO;
14500 else if (code == HTM_BUILTIN_GET_TFIAR
14501 || code == HTM_BUILTIN_SET_TFIAR)
14502 return TFIAR_REGNO;
14503 gcc_assert (code == HTM_BUILTIN_GET_TEXASR
14504 || code == HTM_BUILTIN_SET_TEXASR
14505 || code == HTM_BUILTIN_GET_TEXASRU
14506 || code == HTM_BUILTIN_SET_TEXASRU);
14507 return TEXASR_REGNO;
14510 /* Return the correct ICODE value depending on whether we are
14511 setting or reading the HTM SPRs. */
14512 static inline enum insn_code
14513 rs6000_htm_spr_icode (bool nonvoid)
14515 if (nonvoid)
14516 return (TARGET_POWERPC64) ? CODE_FOR_htm_mfspr_di : CODE_FOR_htm_mfspr_si;
14517 else
14518 return (TARGET_POWERPC64) ? CODE_FOR_htm_mtspr_di : CODE_FOR_htm_mtspr_si;
14521 /* Expand the HTM builtin in EXP and store the result in TARGET.
14522 Store true in *EXPANDEDP if we found a builtin to expand. */
14523 static rtx
14524 htm_expand_builtin (tree exp, rtx target, bool * expandedp)
14526 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
14527 bool nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
14528 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
14529 const struct builtin_description *d;
14530 size_t i;
14532 *expandedp = true;
14534 if (!TARGET_POWERPC64
14535 && (fcode == HTM_BUILTIN_TABORTDC
14536 || fcode == HTM_BUILTIN_TABORTDCI))
14538 size_t uns_fcode = (size_t)fcode;
14539 const char *name = rs6000_builtin_info[uns_fcode].name;
14540 error ("builtin %s is only valid in 64-bit mode", name);
14541 return const0_rtx;
14544 /* Expand the HTM builtins. */
14545 d = bdesc_htm;
14546 for (i = 0; i < ARRAY_SIZE (bdesc_htm); i++, d++)
14547 if (d->code == fcode)
14549 rtx op[MAX_HTM_OPERANDS], pat;
14550 int nopnds = 0;
14551 tree arg;
14552 call_expr_arg_iterator iter;
14553 unsigned attr = rs6000_builtin_info[fcode].attr;
14554 enum insn_code icode = d->icode;
14555 const struct insn_operand_data *insn_op;
14556 bool uses_spr = (attr & RS6000_BTC_SPR);
14557 rtx cr = NULL_RTX;
14559 if (uses_spr)
14560 icode = rs6000_htm_spr_icode (nonvoid);
14561 insn_op = &insn_data[icode].operand[0];
14563 if (nonvoid)
14565 machine_mode tmode = (uses_spr) ? insn_op->mode : SImode;
14566 if (!target
14567 || GET_MODE (target) != tmode
14568 || (uses_spr && !(*insn_op->predicate) (target, tmode)))
14569 target = gen_reg_rtx (tmode);
14570 if (uses_spr)
14571 op[nopnds++] = target;
14574 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
14576 if (arg == error_mark_node || nopnds >= MAX_HTM_OPERANDS)
14577 return const0_rtx;
14579 insn_op = &insn_data[icode].operand[nopnds];
14581 op[nopnds] = expand_normal (arg);
14583 if (!(*insn_op->predicate) (op[nopnds], insn_op->mode))
14585 if (!strcmp (insn_op->constraint, "n"))
14587 int arg_num = (nonvoid) ? nopnds : nopnds + 1;
14588 if (!CONST_INT_P (op[nopnds]))
14589 error ("argument %d must be an unsigned literal", arg_num);
14590 else
14591 error ("argument %d is an unsigned literal that is "
14592 "out of range", arg_num);
14593 return const0_rtx;
14595 op[nopnds] = copy_to_mode_reg (insn_op->mode, op[nopnds]);
14598 nopnds++;
14601 /* Handle the builtins for extended mnemonics. These accept
14602 no arguments, but map to builtins that take arguments. */
14603 switch (fcode)
14605 case HTM_BUILTIN_TENDALL: /* Alias for: tend. 1 */
14606 case HTM_BUILTIN_TRESUME: /* Alias for: tsr. 1 */
14607 op[nopnds++] = GEN_INT (1);
14608 if (flag_checking)
14609 attr |= RS6000_BTC_UNARY;
14610 break;
14611 case HTM_BUILTIN_TSUSPEND: /* Alias for: tsr. 0 */
14612 op[nopnds++] = GEN_INT (0);
14613 if (flag_checking)
14614 attr |= RS6000_BTC_UNARY;
14615 break;
14616 default:
14617 break;
14620 /* If this builtin accesses SPRs, then pass in the appropriate
14621 SPR number and SPR regno as the last two operands. */
14622 if (uses_spr)
14624 machine_mode mode = (TARGET_POWERPC64) ? DImode : SImode;
14625 op[nopnds++] = gen_rtx_CONST_INT (mode, htm_spr_num (fcode));
14626 op[nopnds++] = gen_rtx_REG (mode, htm_spr_regno (fcode));
14628 /* If this builtin accesses a CR, then pass in a scratch
14629 CR as the last operand. */
14630 else if (attr & RS6000_BTC_CR)
14631 { cr = gen_reg_rtx (CCmode);
14632 op[nopnds++] = cr;
14635 if (flag_checking)
14637 int expected_nopnds = 0;
14638 if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_UNARY)
14639 expected_nopnds = 1;
14640 else if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_BINARY)
14641 expected_nopnds = 2;
14642 else if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_TERNARY)
14643 expected_nopnds = 3;
14644 if (!(attr & RS6000_BTC_VOID))
14645 expected_nopnds += 1;
14646 if (uses_spr)
14647 expected_nopnds += 2;
14649 gcc_assert (nopnds == expected_nopnds
14650 && nopnds <= MAX_HTM_OPERANDS);
14653 switch (nopnds)
14655 case 1:
14656 pat = GEN_FCN (icode) (op[0]);
14657 break;
14658 case 2:
14659 pat = GEN_FCN (icode) (op[0], op[1]);
14660 break;
14661 case 3:
14662 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
14663 break;
14664 case 4:
14665 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
14666 break;
14667 default:
14668 gcc_unreachable ();
14670 if (!pat)
14671 return NULL_RTX;
14672 emit_insn (pat);
14674 if (attr & RS6000_BTC_CR)
14676 if (fcode == HTM_BUILTIN_TBEGIN)
14678 /* Emit code to set TARGET to true or false depending on
14679 whether the tbegin. instruction successfully or failed
14680 to start a transaction. We do this by placing the 1's
14681 complement of CR's EQ bit into TARGET. */
14682 rtx scratch = gen_reg_rtx (SImode);
14683 emit_insn (gen_rtx_SET (scratch,
14684 gen_rtx_EQ (SImode, cr,
14685 const0_rtx)));
14686 emit_insn (gen_rtx_SET (target,
14687 gen_rtx_XOR (SImode, scratch,
14688 GEN_INT (1))));
14690 else
14692 /* Emit code to copy the 4-bit condition register field
14693 CR into the least significant end of register TARGET. */
14694 rtx scratch1 = gen_reg_rtx (SImode);
14695 rtx scratch2 = gen_reg_rtx (SImode);
14696 rtx subreg = simplify_gen_subreg (CCmode, scratch1, SImode, 0);
14697 emit_insn (gen_movcc (subreg, cr));
14698 emit_insn (gen_lshrsi3 (scratch2, scratch1, GEN_INT (28)));
14699 emit_insn (gen_andsi3 (target, scratch2, GEN_INT (0xf)));
14703 if (nonvoid)
14704 return target;
14705 return const0_rtx;
14708 *expandedp = false;
14709 return NULL_RTX;
14712 /* Expand the CPU builtin in FCODE and store the result in TARGET. */
14714 static rtx
14715 cpu_expand_builtin (enum rs6000_builtins fcode, tree exp ATTRIBUTE_UNUSED,
14716 rtx target)
14718 /* __builtin_cpu_init () is a nop, so expand to nothing. */
14719 if (fcode == RS6000_BUILTIN_CPU_INIT)
14720 return const0_rtx;
14722 if (target == 0 || GET_MODE (target) != SImode)
14723 target = gen_reg_rtx (SImode);
14725 #ifdef TARGET_LIBC_PROVIDES_HWCAP_IN_TCB
14726 tree arg = TREE_OPERAND (CALL_EXPR_ARG (exp, 0), 0);
14727 if (TREE_CODE (arg) != STRING_CST)
14729 error ("builtin %s only accepts a string argument",
14730 rs6000_builtin_info[(size_t) fcode].name);
14731 return const0_rtx;
14734 if (fcode == RS6000_BUILTIN_CPU_IS)
14736 const char *cpu = TREE_STRING_POINTER (arg);
14737 rtx cpuid = NULL_RTX;
14738 for (size_t i = 0; i < ARRAY_SIZE (cpu_is_info); i++)
14739 if (strcmp (cpu, cpu_is_info[i].cpu) == 0)
14741 /* The CPUID value in the TCB is offset by _DL_FIRST_PLATFORM. */
14742 cpuid = GEN_INT (cpu_is_info[i].cpuid + _DL_FIRST_PLATFORM);
14743 break;
14745 if (cpuid == NULL_RTX)
14747 /* Invalid CPU argument. */
14748 error ("cpu %s is an invalid argument to builtin %s",
14749 cpu, rs6000_builtin_info[(size_t) fcode].name);
14750 return const0_rtx;
14753 rtx platform = gen_reg_rtx (SImode);
14754 rtx tcbmem = gen_const_mem (SImode,
14755 gen_rtx_PLUS (Pmode,
14756 gen_rtx_REG (Pmode, TLS_REGNUM),
14757 GEN_INT (TCB_PLATFORM_OFFSET)));
14758 emit_move_insn (platform, tcbmem);
14759 emit_insn (gen_eqsi3 (target, platform, cpuid));
14761 else if (fcode == RS6000_BUILTIN_CPU_SUPPORTS)
14763 const char *hwcap = TREE_STRING_POINTER (arg);
14764 rtx mask = NULL_RTX;
14765 int hwcap_offset;
14766 for (size_t i = 0; i < ARRAY_SIZE (cpu_supports_info); i++)
14767 if (strcmp (hwcap, cpu_supports_info[i].hwcap) == 0)
14769 mask = GEN_INT (cpu_supports_info[i].mask);
14770 hwcap_offset = TCB_HWCAP_OFFSET (cpu_supports_info[i].id);
14771 break;
14773 if (mask == NULL_RTX)
14775 /* Invalid HWCAP argument. */
14776 error ("hwcap %s is an invalid argument to builtin %s",
14777 hwcap, rs6000_builtin_info[(size_t) fcode].name);
14778 return const0_rtx;
14781 rtx tcb_hwcap = gen_reg_rtx (SImode);
14782 rtx tcbmem = gen_const_mem (SImode,
14783 gen_rtx_PLUS (Pmode,
14784 gen_rtx_REG (Pmode, TLS_REGNUM),
14785 GEN_INT (hwcap_offset)));
14786 emit_move_insn (tcb_hwcap, tcbmem);
14787 rtx scratch1 = gen_reg_rtx (SImode);
14788 emit_insn (gen_rtx_SET (scratch1, gen_rtx_AND (SImode, tcb_hwcap, mask)));
14789 rtx scratch2 = gen_reg_rtx (SImode);
14790 emit_insn (gen_eqsi3 (scratch2, scratch1, const0_rtx));
14791 emit_insn (gen_rtx_SET (target, gen_rtx_XOR (SImode, scratch2, const1_rtx)));
14794 /* Record that we have expanded a CPU builtin, so that we can later
14795 emit a reference to the special symbol exported by LIBC to ensure we
14796 do not link against an old LIBC that doesn't support this feature. */
14797 cpu_builtin_p = true;
14799 #else
14800 /* For old LIBCs, always return FALSE. */
14801 emit_move_insn (target, GEN_INT (0));
14802 #endif /* TARGET_LIBC_PROVIDES_HWCAP_IN_TCB */
14804 return target;
14807 static rtx
14808 rs6000_expand_ternop_builtin (enum insn_code icode, tree exp, rtx target)
14810 rtx pat;
14811 tree arg0 = CALL_EXPR_ARG (exp, 0);
14812 tree arg1 = CALL_EXPR_ARG (exp, 1);
14813 tree arg2 = CALL_EXPR_ARG (exp, 2);
14814 rtx op0 = expand_normal (arg0);
14815 rtx op1 = expand_normal (arg1);
14816 rtx op2 = expand_normal (arg2);
14817 machine_mode tmode = insn_data[icode].operand[0].mode;
14818 machine_mode mode0 = insn_data[icode].operand[1].mode;
14819 machine_mode mode1 = insn_data[icode].operand[2].mode;
14820 machine_mode mode2 = insn_data[icode].operand[3].mode;
14822 if (icode == CODE_FOR_nothing)
14823 /* Builtin not supported on this processor. */
14824 return 0;
14826 /* If we got invalid arguments bail out before generating bad rtl. */
14827 if (arg0 == error_mark_node
14828 || arg1 == error_mark_node
14829 || arg2 == error_mark_node)
14830 return const0_rtx;
14832 /* Check and prepare argument depending on the instruction code.
14834 Note that a switch statement instead of the sequence of tests
14835 would be incorrect as many of the CODE_FOR values could be
14836 CODE_FOR_nothing and that would yield multiple alternatives
14837 with identical values. We'd never reach here at runtime in
14838 this case. */
14839 if (icode == CODE_FOR_altivec_vsldoi_v4sf
14840 || icode == CODE_FOR_altivec_vsldoi_v4si
14841 || icode == CODE_FOR_altivec_vsldoi_v8hi
14842 || icode == CODE_FOR_altivec_vsldoi_v16qi)
14844 /* Only allow 4-bit unsigned literals. */
14845 STRIP_NOPS (arg2);
14846 if (TREE_CODE (arg2) != INTEGER_CST
14847 || TREE_INT_CST_LOW (arg2) & ~0xf)
14849 error ("argument 3 must be a 4-bit unsigned literal");
14850 return const0_rtx;
14853 else if (icode == CODE_FOR_vsx_xxpermdi_v2df
14854 || icode == CODE_FOR_vsx_xxpermdi_v2di
14855 || icode == CODE_FOR_vsx_xxsldwi_v16qi
14856 || icode == CODE_FOR_vsx_xxsldwi_v8hi
14857 || icode == CODE_FOR_vsx_xxsldwi_v4si
14858 || icode == CODE_FOR_vsx_xxsldwi_v4sf
14859 || icode == CODE_FOR_vsx_xxsldwi_v2di
14860 || icode == CODE_FOR_vsx_xxsldwi_v2df)
14862 /* Only allow 2-bit unsigned literals. */
14863 STRIP_NOPS (arg2);
14864 if (TREE_CODE (arg2) != INTEGER_CST
14865 || TREE_INT_CST_LOW (arg2) & ~0x3)
14867 error ("argument 3 must be a 2-bit unsigned literal");
14868 return const0_rtx;
14871 else if (icode == CODE_FOR_vsx_set_v2df
14872 || icode == CODE_FOR_vsx_set_v2di
14873 || icode == CODE_FOR_bcdadd
14874 || icode == CODE_FOR_bcdadd_lt
14875 || icode == CODE_FOR_bcdadd_eq
14876 || icode == CODE_FOR_bcdadd_gt
14877 || icode == CODE_FOR_bcdsub
14878 || icode == CODE_FOR_bcdsub_lt
14879 || icode == CODE_FOR_bcdsub_eq
14880 || icode == CODE_FOR_bcdsub_gt)
14882 /* Only allow 1-bit unsigned literals. */
14883 STRIP_NOPS (arg2);
14884 if (TREE_CODE (arg2) != INTEGER_CST
14885 || TREE_INT_CST_LOW (arg2) & ~0x1)
14887 error ("argument 3 must be a 1-bit unsigned literal");
14888 return const0_rtx;
14891 else if (icode == CODE_FOR_dfp_ddedpd_dd
14892 || icode == CODE_FOR_dfp_ddedpd_td)
14894 /* Only allow 2-bit unsigned literals where the value is 0 or 2. */
14895 STRIP_NOPS (arg0);
14896 if (TREE_CODE (arg0) != INTEGER_CST
14897 || TREE_INT_CST_LOW (arg2) & ~0x3)
14899 error ("argument 1 must be 0 or 2");
14900 return const0_rtx;
14903 else if (icode == CODE_FOR_dfp_denbcd_dd
14904 || icode == CODE_FOR_dfp_denbcd_td)
14906 /* Only allow 1-bit unsigned literals. */
14907 STRIP_NOPS (arg0);
14908 if (TREE_CODE (arg0) != INTEGER_CST
14909 || TREE_INT_CST_LOW (arg0) & ~0x1)
14911 error ("argument 1 must be a 1-bit unsigned literal");
14912 return const0_rtx;
14915 else if (icode == CODE_FOR_dfp_dscli_dd
14916 || icode == CODE_FOR_dfp_dscli_td
14917 || icode == CODE_FOR_dfp_dscri_dd
14918 || icode == CODE_FOR_dfp_dscri_td)
14920 /* Only allow 6-bit unsigned literals. */
14921 STRIP_NOPS (arg1);
14922 if (TREE_CODE (arg1) != INTEGER_CST
14923 || TREE_INT_CST_LOW (arg1) & ~0x3f)
14925 error ("argument 2 must be a 6-bit unsigned literal");
14926 return const0_rtx;
14929 else if (icode == CODE_FOR_crypto_vshasigmaw
14930 || icode == CODE_FOR_crypto_vshasigmad)
14932 /* Check whether the 2nd and 3rd arguments are integer constants and in
14933 range and prepare arguments. */
14934 STRIP_NOPS (arg1);
14935 if (TREE_CODE (arg1) != INTEGER_CST || wi::geu_p (arg1, 2))
14937 error ("argument 2 must be 0 or 1");
14938 return const0_rtx;
14941 STRIP_NOPS (arg2);
14942 if (TREE_CODE (arg2) != INTEGER_CST || wi::geu_p (arg1, 16))
14944 error ("argument 3 must be in the range 0..15");
14945 return const0_rtx;
14949 if (target == 0
14950 || GET_MODE (target) != tmode
14951 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14952 target = gen_reg_rtx (tmode);
14954 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14955 op0 = copy_to_mode_reg (mode0, op0);
14956 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14957 op1 = copy_to_mode_reg (mode1, op1);
14958 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
14959 op2 = copy_to_mode_reg (mode2, op2);
14961 if (TARGET_PAIRED_FLOAT && icode == CODE_FOR_selv2sf4)
14962 pat = GEN_FCN (icode) (target, op0, op1, op2, CONST0_RTX (SFmode));
14963 else
14964 pat = GEN_FCN (icode) (target, op0, op1, op2);
14965 if (! pat)
14966 return 0;
14967 emit_insn (pat);
14969 return target;
14972 /* Expand the lvx builtins. */
14973 static rtx
14974 altivec_expand_ld_builtin (tree exp, rtx target, bool *expandedp)
14976 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
14977 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
14978 tree arg0;
14979 machine_mode tmode, mode0;
14980 rtx pat, op0;
14981 enum insn_code icode;
14983 switch (fcode)
14985 case ALTIVEC_BUILTIN_LD_INTERNAL_16qi:
14986 icode = CODE_FOR_vector_altivec_load_v16qi;
14987 break;
14988 case ALTIVEC_BUILTIN_LD_INTERNAL_8hi:
14989 icode = CODE_FOR_vector_altivec_load_v8hi;
14990 break;
14991 case ALTIVEC_BUILTIN_LD_INTERNAL_4si:
14992 icode = CODE_FOR_vector_altivec_load_v4si;
14993 break;
14994 case ALTIVEC_BUILTIN_LD_INTERNAL_4sf:
14995 icode = CODE_FOR_vector_altivec_load_v4sf;
14996 break;
14997 case ALTIVEC_BUILTIN_LD_INTERNAL_2df:
14998 icode = CODE_FOR_vector_altivec_load_v2df;
14999 break;
15000 case ALTIVEC_BUILTIN_LD_INTERNAL_2di:
15001 icode = CODE_FOR_vector_altivec_load_v2di;
15002 break;
15003 case ALTIVEC_BUILTIN_LD_INTERNAL_1ti:
15004 icode = CODE_FOR_vector_altivec_load_v1ti;
15005 break;
15006 default:
15007 *expandedp = false;
15008 return NULL_RTX;
15011 *expandedp = true;
15013 arg0 = CALL_EXPR_ARG (exp, 0);
15014 op0 = expand_normal (arg0);
15015 tmode = insn_data[icode].operand[0].mode;
15016 mode0 = insn_data[icode].operand[1].mode;
15018 if (target == 0
15019 || GET_MODE (target) != tmode
15020 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15021 target = gen_reg_rtx (tmode);
15023 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15024 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
15026 pat = GEN_FCN (icode) (target, op0);
15027 if (! pat)
15028 return 0;
15029 emit_insn (pat);
15030 return target;
15033 /* Expand the stvx builtins. */
15034 static rtx
15035 altivec_expand_st_builtin (tree exp, rtx target ATTRIBUTE_UNUSED,
15036 bool *expandedp)
15038 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
15039 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
15040 tree arg0, arg1;
15041 machine_mode mode0, mode1;
15042 rtx pat, op0, op1;
15043 enum insn_code icode;
15045 switch (fcode)
15047 case ALTIVEC_BUILTIN_ST_INTERNAL_16qi:
15048 icode = CODE_FOR_vector_altivec_store_v16qi;
15049 break;
15050 case ALTIVEC_BUILTIN_ST_INTERNAL_8hi:
15051 icode = CODE_FOR_vector_altivec_store_v8hi;
15052 break;
15053 case ALTIVEC_BUILTIN_ST_INTERNAL_4si:
15054 icode = CODE_FOR_vector_altivec_store_v4si;
15055 break;
15056 case ALTIVEC_BUILTIN_ST_INTERNAL_4sf:
15057 icode = CODE_FOR_vector_altivec_store_v4sf;
15058 break;
15059 case ALTIVEC_BUILTIN_ST_INTERNAL_2df:
15060 icode = CODE_FOR_vector_altivec_store_v2df;
15061 break;
15062 case ALTIVEC_BUILTIN_ST_INTERNAL_2di:
15063 icode = CODE_FOR_vector_altivec_store_v2di;
15064 break;
15065 case ALTIVEC_BUILTIN_ST_INTERNAL_1ti:
15066 icode = CODE_FOR_vector_altivec_store_v1ti;
15067 break;
15068 default:
15069 *expandedp = false;
15070 return NULL_RTX;
15073 arg0 = CALL_EXPR_ARG (exp, 0);
15074 arg1 = CALL_EXPR_ARG (exp, 1);
15075 op0 = expand_normal (arg0);
15076 op1 = expand_normal (arg1);
15077 mode0 = insn_data[icode].operand[0].mode;
15078 mode1 = insn_data[icode].operand[1].mode;
15080 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
15081 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
15082 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
15083 op1 = copy_to_mode_reg (mode1, op1);
15085 pat = GEN_FCN (icode) (op0, op1);
15086 if (pat)
15087 emit_insn (pat);
15089 *expandedp = true;
15090 return NULL_RTX;
15093 /* Expand the dst builtins. */
15094 static rtx
15095 altivec_expand_dst_builtin (tree exp, rtx target ATTRIBUTE_UNUSED,
15096 bool *expandedp)
15098 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
15099 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
15100 tree arg0, arg1, arg2;
15101 machine_mode mode0, mode1;
15102 rtx pat, op0, op1, op2;
15103 const struct builtin_description *d;
15104 size_t i;
15106 *expandedp = false;
15108 /* Handle DST variants. */
15109 d = bdesc_dst;
15110 for (i = 0; i < ARRAY_SIZE (bdesc_dst); i++, d++)
15111 if (d->code == fcode)
15113 arg0 = CALL_EXPR_ARG (exp, 0);
15114 arg1 = CALL_EXPR_ARG (exp, 1);
15115 arg2 = CALL_EXPR_ARG (exp, 2);
15116 op0 = expand_normal (arg0);
15117 op1 = expand_normal (arg1);
15118 op2 = expand_normal (arg2);
15119 mode0 = insn_data[d->icode].operand[0].mode;
15120 mode1 = insn_data[d->icode].operand[1].mode;
15122 /* Invalid arguments, bail out before generating bad rtl. */
15123 if (arg0 == error_mark_node
15124 || arg1 == error_mark_node
15125 || arg2 == error_mark_node)
15126 return const0_rtx;
15128 *expandedp = true;
15129 STRIP_NOPS (arg2);
15130 if (TREE_CODE (arg2) != INTEGER_CST
15131 || TREE_INT_CST_LOW (arg2) & ~0x3)
15133 error ("argument to %qs must be a 2-bit unsigned literal", d->name);
15134 return const0_rtx;
15137 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
15138 op0 = copy_to_mode_reg (Pmode, op0);
15139 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
15140 op1 = copy_to_mode_reg (mode1, op1);
15142 pat = GEN_FCN (d->icode) (op0, op1, op2);
15143 if (pat != 0)
15144 emit_insn (pat);
15146 return NULL_RTX;
15149 return NULL_RTX;
15152 /* Expand vec_init builtin. */
15153 static rtx
15154 altivec_expand_vec_init_builtin (tree type, tree exp, rtx target)
15156 machine_mode tmode = TYPE_MODE (type);
15157 machine_mode inner_mode = GET_MODE_INNER (tmode);
15158 int i, n_elt = GET_MODE_NUNITS (tmode);
15160 gcc_assert (VECTOR_MODE_P (tmode));
15161 gcc_assert (n_elt == call_expr_nargs (exp));
15163 if (!target || !register_operand (target, tmode))
15164 target = gen_reg_rtx (tmode);
15166 /* If we have a vector compromised of a single element, such as V1TImode, do
15167 the initialization directly. */
15168 if (n_elt == 1 && GET_MODE_SIZE (tmode) == GET_MODE_SIZE (inner_mode))
15170 rtx x = expand_normal (CALL_EXPR_ARG (exp, 0));
15171 emit_move_insn (target, gen_lowpart (tmode, x));
15173 else
15175 rtvec v = rtvec_alloc (n_elt);
15177 for (i = 0; i < n_elt; ++i)
15179 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
15180 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
15183 rs6000_expand_vector_init (target, gen_rtx_PARALLEL (tmode, v));
15186 return target;
15189 /* Return the integer constant in ARG. Constrain it to be in the range
15190 of the subparts of VEC_TYPE; issue an error if not. */
15192 static int
15193 get_element_number (tree vec_type, tree arg)
15195 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
15197 if (!tree_fits_uhwi_p (arg)
15198 || (elt = tree_to_uhwi (arg), elt > max))
15200 error ("selector must be an integer constant in the range 0..%wi", max);
15201 return 0;
15204 return elt;
15207 /* Expand vec_set builtin. */
15208 static rtx
15209 altivec_expand_vec_set_builtin (tree exp)
15211 machine_mode tmode, mode1;
15212 tree arg0, arg1, arg2;
15213 int elt;
15214 rtx op0, op1;
15216 arg0 = CALL_EXPR_ARG (exp, 0);
15217 arg1 = CALL_EXPR_ARG (exp, 1);
15218 arg2 = CALL_EXPR_ARG (exp, 2);
15220 tmode = TYPE_MODE (TREE_TYPE (arg0));
15221 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
15222 gcc_assert (VECTOR_MODE_P (tmode));
15224 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
15225 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
15226 elt = get_element_number (TREE_TYPE (arg0), arg2);
15228 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
15229 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
15231 op0 = force_reg (tmode, op0);
15232 op1 = force_reg (mode1, op1);
15234 rs6000_expand_vector_set (op0, op1, elt);
15236 return op0;
15239 /* Expand vec_ext builtin. */
15240 static rtx
15241 altivec_expand_vec_ext_builtin (tree exp, rtx target)
15243 machine_mode tmode, mode0;
15244 tree arg0, arg1;
15245 rtx op0;
15246 rtx op1;
15248 arg0 = CALL_EXPR_ARG (exp, 0);
15249 arg1 = CALL_EXPR_ARG (exp, 1);
15251 op0 = expand_normal (arg0);
15252 op1 = expand_normal (arg1);
15254 /* Call get_element_number to validate arg1 if it is a constant. */
15255 if (TREE_CODE (arg1) == INTEGER_CST)
15256 (void) get_element_number (TREE_TYPE (arg0), arg1);
15258 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
15259 mode0 = TYPE_MODE (TREE_TYPE (arg0));
15260 gcc_assert (VECTOR_MODE_P (mode0));
15262 op0 = force_reg (mode0, op0);
15264 if (optimize || !target || !register_operand (target, tmode))
15265 target = gen_reg_rtx (tmode);
15267 rs6000_expand_vector_extract (target, op0, op1);
15269 return target;
15272 /* Expand the builtin in EXP and store the result in TARGET. Store
15273 true in *EXPANDEDP if we found a builtin to expand. */
15274 static rtx
15275 altivec_expand_builtin (tree exp, rtx target, bool *expandedp)
15277 const struct builtin_description *d;
15278 size_t i;
15279 enum insn_code icode;
15280 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
15281 tree arg0;
15282 rtx op0, pat;
15283 machine_mode tmode, mode0;
15284 enum rs6000_builtins fcode
15285 = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
15287 if (rs6000_overloaded_builtin_p (fcode))
15289 *expandedp = true;
15290 error ("unresolved overload for Altivec builtin %qF", fndecl);
15292 /* Given it is invalid, just generate a normal call. */
15293 return expand_call (exp, target, false);
15296 target = altivec_expand_ld_builtin (exp, target, expandedp);
15297 if (*expandedp)
15298 return target;
15300 target = altivec_expand_st_builtin (exp, target, expandedp);
15301 if (*expandedp)
15302 return target;
15304 target = altivec_expand_dst_builtin (exp, target, expandedp);
15305 if (*expandedp)
15306 return target;
15308 *expandedp = true;
15310 switch (fcode)
15312 case ALTIVEC_BUILTIN_STVX_V2DF:
15313 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v2df_2op, exp);
15314 case ALTIVEC_BUILTIN_STVX_V2DI:
15315 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v2di_2op, exp);
15316 case ALTIVEC_BUILTIN_STVX_V4SF:
15317 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v4sf_2op, exp);
15318 case ALTIVEC_BUILTIN_STVX:
15319 case ALTIVEC_BUILTIN_STVX_V4SI:
15320 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v4si_2op, exp);
15321 case ALTIVEC_BUILTIN_STVX_V8HI:
15322 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v8hi_2op, exp);
15323 case ALTIVEC_BUILTIN_STVX_V16QI:
15324 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v16qi_2op, exp);
15325 case ALTIVEC_BUILTIN_STVEBX:
15326 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvebx, exp);
15327 case ALTIVEC_BUILTIN_STVEHX:
15328 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvehx, exp);
15329 case ALTIVEC_BUILTIN_STVEWX:
15330 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvewx, exp);
15331 case ALTIVEC_BUILTIN_STVXL_V2DF:
15332 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v2df, exp);
15333 case ALTIVEC_BUILTIN_STVXL_V2DI:
15334 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v2di, exp);
15335 case ALTIVEC_BUILTIN_STVXL_V4SF:
15336 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v4sf, exp);
15337 case ALTIVEC_BUILTIN_STVXL:
15338 case ALTIVEC_BUILTIN_STVXL_V4SI:
15339 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v4si, exp);
15340 case ALTIVEC_BUILTIN_STVXL_V8HI:
15341 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v8hi, exp);
15342 case ALTIVEC_BUILTIN_STVXL_V16QI:
15343 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v16qi, exp);
15345 case ALTIVEC_BUILTIN_STVLX:
15346 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvlx, exp);
15347 case ALTIVEC_BUILTIN_STVLXL:
15348 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvlxl, exp);
15349 case ALTIVEC_BUILTIN_STVRX:
15350 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvrx, exp);
15351 case ALTIVEC_BUILTIN_STVRXL:
15352 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvrxl, exp);
15354 case VSX_BUILTIN_STXVD2X_V1TI:
15355 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v1ti, exp);
15356 case VSX_BUILTIN_STXVD2X_V2DF:
15357 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v2df, exp);
15358 case VSX_BUILTIN_STXVD2X_V2DI:
15359 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v2di, exp);
15360 case VSX_BUILTIN_STXVW4X_V4SF:
15361 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v4sf, exp);
15362 case VSX_BUILTIN_STXVW4X_V4SI:
15363 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v4si, exp);
15364 case VSX_BUILTIN_STXVW4X_V8HI:
15365 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v8hi, exp);
15366 case VSX_BUILTIN_STXVW4X_V16QI:
15367 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v16qi, exp);
15369 /* For the following on big endian, it's ok to use any appropriate
15370 unaligned-supporting store, so use a generic expander. For
15371 little-endian, the exact element-reversing instruction must
15372 be used. */
15373 case VSX_BUILTIN_ST_ELEMREV_V2DF:
15375 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v2df
15376 : CODE_FOR_vsx_st_elemrev_v2df);
15377 return altivec_expand_stv_builtin (code, exp);
15379 case VSX_BUILTIN_ST_ELEMREV_V2DI:
15381 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v2di
15382 : CODE_FOR_vsx_st_elemrev_v2di);
15383 return altivec_expand_stv_builtin (code, exp);
15385 case VSX_BUILTIN_ST_ELEMREV_V4SF:
15387 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v4sf
15388 : CODE_FOR_vsx_st_elemrev_v4sf);
15389 return altivec_expand_stv_builtin (code, exp);
15391 case VSX_BUILTIN_ST_ELEMREV_V4SI:
15393 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v4si
15394 : CODE_FOR_vsx_st_elemrev_v4si);
15395 return altivec_expand_stv_builtin (code, exp);
15397 case VSX_BUILTIN_ST_ELEMREV_V8HI:
15399 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v8hi
15400 : CODE_FOR_vsx_st_elemrev_v8hi);
15401 return altivec_expand_stv_builtin (code, exp);
15403 case VSX_BUILTIN_ST_ELEMREV_V16QI:
15405 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v16qi
15406 : CODE_FOR_vsx_st_elemrev_v16qi);
15407 return altivec_expand_stv_builtin (code, exp);
15410 case ALTIVEC_BUILTIN_MFVSCR:
15411 icode = CODE_FOR_altivec_mfvscr;
15412 tmode = insn_data[icode].operand[0].mode;
15414 if (target == 0
15415 || GET_MODE (target) != tmode
15416 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15417 target = gen_reg_rtx (tmode);
15419 pat = GEN_FCN (icode) (target);
15420 if (! pat)
15421 return 0;
15422 emit_insn (pat);
15423 return target;
15425 case ALTIVEC_BUILTIN_MTVSCR:
15426 icode = CODE_FOR_altivec_mtvscr;
15427 arg0 = CALL_EXPR_ARG (exp, 0);
15428 op0 = expand_normal (arg0);
15429 mode0 = insn_data[icode].operand[0].mode;
15431 /* If we got invalid arguments bail out before generating bad rtl. */
15432 if (arg0 == error_mark_node)
15433 return const0_rtx;
15435 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
15436 op0 = copy_to_mode_reg (mode0, op0);
15438 pat = GEN_FCN (icode) (op0);
15439 if (pat)
15440 emit_insn (pat);
15441 return NULL_RTX;
15443 case ALTIVEC_BUILTIN_DSSALL:
15444 emit_insn (gen_altivec_dssall ());
15445 return NULL_RTX;
15447 case ALTIVEC_BUILTIN_DSS:
15448 icode = CODE_FOR_altivec_dss;
15449 arg0 = CALL_EXPR_ARG (exp, 0);
15450 STRIP_NOPS (arg0);
15451 op0 = expand_normal (arg0);
15452 mode0 = insn_data[icode].operand[0].mode;
15454 /* If we got invalid arguments bail out before generating bad rtl. */
15455 if (arg0 == error_mark_node)
15456 return const0_rtx;
15458 if (TREE_CODE (arg0) != INTEGER_CST
15459 || TREE_INT_CST_LOW (arg0) & ~0x3)
15461 error ("argument to dss must be a 2-bit unsigned literal");
15462 return const0_rtx;
15465 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
15466 op0 = copy_to_mode_reg (mode0, op0);
15468 emit_insn (gen_altivec_dss (op0));
15469 return NULL_RTX;
15471 case ALTIVEC_BUILTIN_VEC_INIT_V4SI:
15472 case ALTIVEC_BUILTIN_VEC_INIT_V8HI:
15473 case ALTIVEC_BUILTIN_VEC_INIT_V16QI:
15474 case ALTIVEC_BUILTIN_VEC_INIT_V4SF:
15475 case VSX_BUILTIN_VEC_INIT_V2DF:
15476 case VSX_BUILTIN_VEC_INIT_V2DI:
15477 case VSX_BUILTIN_VEC_INIT_V1TI:
15478 return altivec_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
15480 case ALTIVEC_BUILTIN_VEC_SET_V4SI:
15481 case ALTIVEC_BUILTIN_VEC_SET_V8HI:
15482 case ALTIVEC_BUILTIN_VEC_SET_V16QI:
15483 case ALTIVEC_BUILTIN_VEC_SET_V4SF:
15484 case VSX_BUILTIN_VEC_SET_V2DF:
15485 case VSX_BUILTIN_VEC_SET_V2DI:
15486 case VSX_BUILTIN_VEC_SET_V1TI:
15487 return altivec_expand_vec_set_builtin (exp);
15489 case ALTIVEC_BUILTIN_VEC_EXT_V4SI:
15490 case ALTIVEC_BUILTIN_VEC_EXT_V8HI:
15491 case ALTIVEC_BUILTIN_VEC_EXT_V16QI:
15492 case ALTIVEC_BUILTIN_VEC_EXT_V4SF:
15493 case VSX_BUILTIN_VEC_EXT_V2DF:
15494 case VSX_BUILTIN_VEC_EXT_V2DI:
15495 case VSX_BUILTIN_VEC_EXT_V1TI:
15496 return altivec_expand_vec_ext_builtin (exp, target);
15498 default:
15499 break;
15500 /* Fall through. */
15503 /* Expand abs* operations. */
15504 d = bdesc_abs;
15505 for (i = 0; i < ARRAY_SIZE (bdesc_abs); i++, d++)
15506 if (d->code == fcode)
15507 return altivec_expand_abs_builtin (d->icode, exp, target);
15509 /* Expand the AltiVec predicates. */
15510 d = bdesc_altivec_preds;
15511 for (i = 0; i < ARRAY_SIZE (bdesc_altivec_preds); i++, d++)
15512 if (d->code == fcode)
15513 return altivec_expand_predicate_builtin (d->icode, exp, target);
15515 /* LV* are funky. We initialized them differently. */
15516 switch (fcode)
15518 case ALTIVEC_BUILTIN_LVSL:
15519 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvsl,
15520 exp, target, false);
15521 case ALTIVEC_BUILTIN_LVSR:
15522 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvsr,
15523 exp, target, false);
15524 case ALTIVEC_BUILTIN_LVEBX:
15525 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvebx,
15526 exp, target, false);
15527 case ALTIVEC_BUILTIN_LVEHX:
15528 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvehx,
15529 exp, target, false);
15530 case ALTIVEC_BUILTIN_LVEWX:
15531 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvewx,
15532 exp, target, false);
15533 case ALTIVEC_BUILTIN_LVXL_V2DF:
15534 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v2df,
15535 exp, target, false);
15536 case ALTIVEC_BUILTIN_LVXL_V2DI:
15537 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v2di,
15538 exp, target, false);
15539 case ALTIVEC_BUILTIN_LVXL_V4SF:
15540 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v4sf,
15541 exp, target, false);
15542 case ALTIVEC_BUILTIN_LVXL:
15543 case ALTIVEC_BUILTIN_LVXL_V4SI:
15544 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v4si,
15545 exp, target, false);
15546 case ALTIVEC_BUILTIN_LVXL_V8HI:
15547 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v8hi,
15548 exp, target, false);
15549 case ALTIVEC_BUILTIN_LVXL_V16QI:
15550 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v16qi,
15551 exp, target, false);
15552 case ALTIVEC_BUILTIN_LVX_V2DF:
15553 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v2df_2op,
15554 exp, target, false);
15555 case ALTIVEC_BUILTIN_LVX_V2DI:
15556 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v2di_2op,
15557 exp, target, false);
15558 case ALTIVEC_BUILTIN_LVX_V4SF:
15559 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v4sf_2op,
15560 exp, target, false);
15561 case ALTIVEC_BUILTIN_LVX:
15562 case ALTIVEC_BUILTIN_LVX_V4SI:
15563 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v4si_2op,
15564 exp, target, false);
15565 case ALTIVEC_BUILTIN_LVX_V8HI:
15566 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v8hi_2op,
15567 exp, target, false);
15568 case ALTIVEC_BUILTIN_LVX_V16QI:
15569 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v16qi_2op,
15570 exp, target, false);
15571 case ALTIVEC_BUILTIN_LVLX:
15572 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvlx,
15573 exp, target, true);
15574 case ALTIVEC_BUILTIN_LVLXL:
15575 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvlxl,
15576 exp, target, true);
15577 case ALTIVEC_BUILTIN_LVRX:
15578 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvrx,
15579 exp, target, true);
15580 case ALTIVEC_BUILTIN_LVRXL:
15581 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvrxl,
15582 exp, target, true);
15583 case VSX_BUILTIN_LXVD2X_V1TI:
15584 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v1ti,
15585 exp, target, false);
15586 case VSX_BUILTIN_LXVD2X_V2DF:
15587 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v2df,
15588 exp, target, false);
15589 case VSX_BUILTIN_LXVD2X_V2DI:
15590 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v2di,
15591 exp, target, false);
15592 case VSX_BUILTIN_LXVW4X_V4SF:
15593 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v4sf,
15594 exp, target, false);
15595 case VSX_BUILTIN_LXVW4X_V4SI:
15596 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v4si,
15597 exp, target, false);
15598 case VSX_BUILTIN_LXVW4X_V8HI:
15599 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v8hi,
15600 exp, target, false);
15601 case VSX_BUILTIN_LXVW4X_V16QI:
15602 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v16qi,
15603 exp, target, false);
15604 /* For the following on big endian, it's ok to use any appropriate
15605 unaligned-supporting load, so use a generic expander. For
15606 little-endian, the exact element-reversing instruction must
15607 be used. */
15608 case VSX_BUILTIN_LD_ELEMREV_V2DF:
15610 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v2df
15611 : CODE_FOR_vsx_ld_elemrev_v2df);
15612 return altivec_expand_lv_builtin (code, exp, target, false);
15614 case VSX_BUILTIN_LD_ELEMREV_V2DI:
15616 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v2di
15617 : CODE_FOR_vsx_ld_elemrev_v2di);
15618 return altivec_expand_lv_builtin (code, exp, target, false);
15620 case VSX_BUILTIN_LD_ELEMREV_V4SF:
15622 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v4sf
15623 : CODE_FOR_vsx_ld_elemrev_v4sf);
15624 return altivec_expand_lv_builtin (code, exp, target, false);
15626 case VSX_BUILTIN_LD_ELEMREV_V4SI:
15628 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v4si
15629 : CODE_FOR_vsx_ld_elemrev_v4si);
15630 return altivec_expand_lv_builtin (code, exp, target, false);
15632 case VSX_BUILTIN_LD_ELEMREV_V8HI:
15634 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v8hi
15635 : CODE_FOR_vsx_ld_elemrev_v8hi);
15636 return altivec_expand_lv_builtin (code, exp, target, false);
15638 case VSX_BUILTIN_LD_ELEMREV_V16QI:
15640 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v16qi
15641 : CODE_FOR_vsx_ld_elemrev_v16qi);
15642 return altivec_expand_lv_builtin (code, exp, target, false);
15644 break;
15645 default:
15646 break;
15647 /* Fall through. */
15650 *expandedp = false;
15651 return NULL_RTX;
15654 /* Expand the builtin in EXP and store the result in TARGET. Store
15655 true in *EXPANDEDP if we found a builtin to expand. */
15656 static rtx
15657 paired_expand_builtin (tree exp, rtx target, bool * expandedp)
15659 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
15660 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
15661 const struct builtin_description *d;
15662 size_t i;
15664 *expandedp = true;
15666 switch (fcode)
15668 case PAIRED_BUILTIN_STX:
15669 return paired_expand_stv_builtin (CODE_FOR_paired_stx, exp);
15670 case PAIRED_BUILTIN_LX:
15671 return paired_expand_lv_builtin (CODE_FOR_paired_lx, exp, target);
15672 default:
15673 break;
15674 /* Fall through. */
15677 /* Expand the paired predicates. */
15678 d = bdesc_paired_preds;
15679 for (i = 0; i < ARRAY_SIZE (bdesc_paired_preds); i++, d++)
15680 if (d->code == fcode)
15681 return paired_expand_predicate_builtin (d->icode, exp, target);
15683 *expandedp = false;
15684 return NULL_RTX;
15687 /* Binops that need to be initialized manually, but can be expanded
15688 automagically by rs6000_expand_binop_builtin. */
15689 static const struct builtin_description bdesc_2arg_spe[] =
15691 { RS6000_BTM_SPE, CODE_FOR_spe_evlddx, "__builtin_spe_evlddx", SPE_BUILTIN_EVLDDX },
15692 { RS6000_BTM_SPE, CODE_FOR_spe_evldwx, "__builtin_spe_evldwx", SPE_BUILTIN_EVLDWX },
15693 { RS6000_BTM_SPE, CODE_FOR_spe_evldhx, "__builtin_spe_evldhx", SPE_BUILTIN_EVLDHX },
15694 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhex, "__builtin_spe_evlwhex", SPE_BUILTIN_EVLWHEX },
15695 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhoux, "__builtin_spe_evlwhoux", SPE_BUILTIN_EVLWHOUX },
15696 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhosx, "__builtin_spe_evlwhosx", SPE_BUILTIN_EVLWHOSX },
15697 { RS6000_BTM_SPE, CODE_FOR_spe_evlwwsplatx, "__builtin_spe_evlwwsplatx", SPE_BUILTIN_EVLWWSPLATX },
15698 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhsplatx, "__builtin_spe_evlwhsplatx", SPE_BUILTIN_EVLWHSPLATX },
15699 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhesplatx, "__builtin_spe_evlhhesplatx", SPE_BUILTIN_EVLHHESPLATX },
15700 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhousplatx, "__builtin_spe_evlhhousplatx", SPE_BUILTIN_EVLHHOUSPLATX },
15701 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhossplatx, "__builtin_spe_evlhhossplatx", SPE_BUILTIN_EVLHHOSSPLATX },
15702 { RS6000_BTM_SPE, CODE_FOR_spe_evldd, "__builtin_spe_evldd", SPE_BUILTIN_EVLDD },
15703 { RS6000_BTM_SPE, CODE_FOR_spe_evldw, "__builtin_spe_evldw", SPE_BUILTIN_EVLDW },
15704 { RS6000_BTM_SPE, CODE_FOR_spe_evldh, "__builtin_spe_evldh", SPE_BUILTIN_EVLDH },
15705 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhe, "__builtin_spe_evlwhe", SPE_BUILTIN_EVLWHE },
15706 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhou, "__builtin_spe_evlwhou", SPE_BUILTIN_EVLWHOU },
15707 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhos, "__builtin_spe_evlwhos", SPE_BUILTIN_EVLWHOS },
15708 { RS6000_BTM_SPE, CODE_FOR_spe_evlwwsplat, "__builtin_spe_evlwwsplat", SPE_BUILTIN_EVLWWSPLAT },
15709 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhsplat, "__builtin_spe_evlwhsplat", SPE_BUILTIN_EVLWHSPLAT },
15710 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhesplat, "__builtin_spe_evlhhesplat", SPE_BUILTIN_EVLHHESPLAT },
15711 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhousplat, "__builtin_spe_evlhhousplat", SPE_BUILTIN_EVLHHOUSPLAT },
15712 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhossplat, "__builtin_spe_evlhhossplat", SPE_BUILTIN_EVLHHOSSPLAT }
15715 /* Expand the builtin in EXP and store the result in TARGET. Store
15716 true in *EXPANDEDP if we found a builtin to expand.
15718 This expands the SPE builtins that are not simple unary and binary
15719 operations. */
15720 static rtx
15721 spe_expand_builtin (tree exp, rtx target, bool *expandedp)
15723 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
15724 tree arg1, arg0;
15725 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
15726 enum insn_code icode;
15727 machine_mode tmode, mode0;
15728 rtx pat, op0;
15729 const struct builtin_description *d;
15730 size_t i;
15732 *expandedp = true;
15734 /* Syntax check for a 5-bit unsigned immediate. */
15735 switch (fcode)
15737 case SPE_BUILTIN_EVSTDD:
15738 case SPE_BUILTIN_EVSTDH:
15739 case SPE_BUILTIN_EVSTDW:
15740 case SPE_BUILTIN_EVSTWHE:
15741 case SPE_BUILTIN_EVSTWHO:
15742 case SPE_BUILTIN_EVSTWWE:
15743 case SPE_BUILTIN_EVSTWWO:
15744 arg1 = CALL_EXPR_ARG (exp, 2);
15745 if (TREE_CODE (arg1) != INTEGER_CST
15746 || TREE_INT_CST_LOW (arg1) & ~0x1f)
15748 error ("argument 2 must be a 5-bit unsigned literal");
15749 return const0_rtx;
15751 break;
15752 default:
15753 break;
15756 /* The evsplat*i instructions are not quite generic. */
15757 switch (fcode)
15759 case SPE_BUILTIN_EVSPLATFI:
15760 return rs6000_expand_unop_builtin (CODE_FOR_spe_evsplatfi,
15761 exp, target);
15762 case SPE_BUILTIN_EVSPLATI:
15763 return rs6000_expand_unop_builtin (CODE_FOR_spe_evsplati,
15764 exp, target);
15765 default:
15766 break;
15769 d = bdesc_2arg_spe;
15770 for (i = 0; i < ARRAY_SIZE (bdesc_2arg_spe); ++i, ++d)
15771 if (d->code == fcode)
15772 return rs6000_expand_binop_builtin (d->icode, exp, target);
15774 d = bdesc_spe_predicates;
15775 for (i = 0; i < ARRAY_SIZE (bdesc_spe_predicates); ++i, ++d)
15776 if (d->code == fcode)
15777 return spe_expand_predicate_builtin (d->icode, exp, target);
15779 d = bdesc_spe_evsel;
15780 for (i = 0; i < ARRAY_SIZE (bdesc_spe_evsel); ++i, ++d)
15781 if (d->code == fcode)
15782 return spe_expand_evsel_builtin (d->icode, exp, target);
15784 switch (fcode)
15786 case SPE_BUILTIN_EVSTDDX:
15787 return spe_expand_stv_builtin (CODE_FOR_spe_evstddx, exp);
15788 case SPE_BUILTIN_EVSTDHX:
15789 return spe_expand_stv_builtin (CODE_FOR_spe_evstdhx, exp);
15790 case SPE_BUILTIN_EVSTDWX:
15791 return spe_expand_stv_builtin (CODE_FOR_spe_evstdwx, exp);
15792 case SPE_BUILTIN_EVSTWHEX:
15793 return spe_expand_stv_builtin (CODE_FOR_spe_evstwhex, exp);
15794 case SPE_BUILTIN_EVSTWHOX:
15795 return spe_expand_stv_builtin (CODE_FOR_spe_evstwhox, exp);
15796 case SPE_BUILTIN_EVSTWWEX:
15797 return spe_expand_stv_builtin (CODE_FOR_spe_evstwwex, exp);
15798 case SPE_BUILTIN_EVSTWWOX:
15799 return spe_expand_stv_builtin (CODE_FOR_spe_evstwwox, exp);
15800 case SPE_BUILTIN_EVSTDD:
15801 return spe_expand_stv_builtin (CODE_FOR_spe_evstdd, exp);
15802 case SPE_BUILTIN_EVSTDH:
15803 return spe_expand_stv_builtin (CODE_FOR_spe_evstdh, exp);
15804 case SPE_BUILTIN_EVSTDW:
15805 return spe_expand_stv_builtin (CODE_FOR_spe_evstdw, exp);
15806 case SPE_BUILTIN_EVSTWHE:
15807 return spe_expand_stv_builtin (CODE_FOR_spe_evstwhe, exp);
15808 case SPE_BUILTIN_EVSTWHO:
15809 return spe_expand_stv_builtin (CODE_FOR_spe_evstwho, exp);
15810 case SPE_BUILTIN_EVSTWWE:
15811 return spe_expand_stv_builtin (CODE_FOR_spe_evstwwe, exp);
15812 case SPE_BUILTIN_EVSTWWO:
15813 return spe_expand_stv_builtin (CODE_FOR_spe_evstwwo, exp);
15814 case SPE_BUILTIN_MFSPEFSCR:
15815 icode = CODE_FOR_spe_mfspefscr;
15816 tmode = insn_data[icode].operand[0].mode;
15818 if (target == 0
15819 || GET_MODE (target) != tmode
15820 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15821 target = gen_reg_rtx (tmode);
15823 pat = GEN_FCN (icode) (target);
15824 if (! pat)
15825 return 0;
15826 emit_insn (pat);
15827 return target;
15828 case SPE_BUILTIN_MTSPEFSCR:
15829 icode = CODE_FOR_spe_mtspefscr;
15830 arg0 = CALL_EXPR_ARG (exp, 0);
15831 op0 = expand_normal (arg0);
15832 mode0 = insn_data[icode].operand[0].mode;
15834 if (arg0 == error_mark_node)
15835 return const0_rtx;
15837 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
15838 op0 = copy_to_mode_reg (mode0, op0);
15840 pat = GEN_FCN (icode) (op0);
15841 if (pat)
15842 emit_insn (pat);
15843 return NULL_RTX;
15844 default:
15845 break;
15848 *expandedp = false;
15849 return NULL_RTX;
15852 static rtx
15853 paired_expand_predicate_builtin (enum insn_code icode, tree exp, rtx target)
15855 rtx pat, scratch, tmp;
15856 tree form = CALL_EXPR_ARG (exp, 0);
15857 tree arg0 = CALL_EXPR_ARG (exp, 1);
15858 tree arg1 = CALL_EXPR_ARG (exp, 2);
15859 rtx op0 = expand_normal (arg0);
15860 rtx op1 = expand_normal (arg1);
15861 machine_mode mode0 = insn_data[icode].operand[1].mode;
15862 machine_mode mode1 = insn_data[icode].operand[2].mode;
15863 int form_int;
15864 enum rtx_code code;
15866 if (TREE_CODE (form) != INTEGER_CST)
15868 error ("argument 1 of __builtin_paired_predicate must be a constant");
15869 return const0_rtx;
15871 else
15872 form_int = TREE_INT_CST_LOW (form);
15874 gcc_assert (mode0 == mode1);
15876 if (arg0 == error_mark_node || arg1 == error_mark_node)
15877 return const0_rtx;
15879 if (target == 0
15880 || GET_MODE (target) != SImode
15881 || !(*insn_data[icode].operand[0].predicate) (target, SImode))
15882 target = gen_reg_rtx (SImode);
15883 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
15884 op0 = copy_to_mode_reg (mode0, op0);
15885 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
15886 op1 = copy_to_mode_reg (mode1, op1);
15888 scratch = gen_reg_rtx (CCFPmode);
15890 pat = GEN_FCN (icode) (scratch, op0, op1);
15891 if (!pat)
15892 return const0_rtx;
15894 emit_insn (pat);
15896 switch (form_int)
15898 /* LT bit. */
15899 case 0:
15900 code = LT;
15901 break;
15902 /* GT bit. */
15903 case 1:
15904 code = GT;
15905 break;
15906 /* EQ bit. */
15907 case 2:
15908 code = EQ;
15909 break;
15910 /* UN bit. */
15911 case 3:
15912 emit_insn (gen_move_from_CR_ov_bit (target, scratch));
15913 return target;
15914 default:
15915 error ("argument 1 of __builtin_paired_predicate is out of range");
15916 return const0_rtx;
15919 tmp = gen_rtx_fmt_ee (code, SImode, scratch, const0_rtx);
15920 emit_move_insn (target, tmp);
15921 return target;
15924 static rtx
15925 spe_expand_predicate_builtin (enum insn_code icode, tree exp, rtx target)
15927 rtx pat, scratch, tmp;
15928 tree form = CALL_EXPR_ARG (exp, 0);
15929 tree arg0 = CALL_EXPR_ARG (exp, 1);
15930 tree arg1 = CALL_EXPR_ARG (exp, 2);
15931 rtx op0 = expand_normal (arg0);
15932 rtx op1 = expand_normal (arg1);
15933 machine_mode mode0 = insn_data[icode].operand[1].mode;
15934 machine_mode mode1 = insn_data[icode].operand[2].mode;
15935 int form_int;
15936 enum rtx_code code;
15938 if (TREE_CODE (form) != INTEGER_CST)
15940 error ("argument 1 of __builtin_spe_predicate must be a constant");
15941 return const0_rtx;
15943 else
15944 form_int = TREE_INT_CST_LOW (form);
15946 gcc_assert (mode0 == mode1);
15948 if (arg0 == error_mark_node || arg1 == error_mark_node)
15949 return const0_rtx;
15951 if (target == 0
15952 || GET_MODE (target) != SImode
15953 || ! (*insn_data[icode].operand[0].predicate) (target, SImode))
15954 target = gen_reg_rtx (SImode);
15956 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15957 op0 = copy_to_mode_reg (mode0, op0);
15958 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
15959 op1 = copy_to_mode_reg (mode1, op1);
15961 scratch = gen_reg_rtx (CCmode);
15963 pat = GEN_FCN (icode) (scratch, op0, op1);
15964 if (! pat)
15965 return const0_rtx;
15966 emit_insn (pat);
15968 /* There are 4 variants for each predicate: _any_, _all_, _upper_,
15969 _lower_. We use one compare, but look in different bits of the
15970 CR for each variant.
15972 There are 2 elements in each SPE simd type (upper/lower). The CR
15973 bits are set as follows:
15975 BIT0 | BIT 1 | BIT 2 | BIT 3
15976 U | L | (U | L) | (U & L)
15978 So, for an "all" relationship, BIT 3 would be set.
15979 For an "any" relationship, BIT 2 would be set. Etc.
15981 Following traditional nomenclature, these bits map to:
15983 BIT0 | BIT 1 | BIT 2 | BIT 3
15984 LT | GT | EQ | OV
15986 Later, we will generate rtl to look in the LT/EQ/EQ/OV bits.
15989 switch (form_int)
15991 /* All variant. OV bit. */
15992 case 0:
15993 /* We need to get to the OV bit, which is the ORDERED bit. We
15994 could generate (ordered:SI (reg:CC xx) (const_int 0)), but
15995 that's ugly and will make validate_condition_mode die.
15996 So let's just use another pattern. */
15997 emit_insn (gen_move_from_CR_ov_bit (target, scratch));
15998 return target;
15999 /* Any variant. EQ bit. */
16000 case 1:
16001 code = EQ;
16002 break;
16003 /* Upper variant. LT bit. */
16004 case 2:
16005 code = LT;
16006 break;
16007 /* Lower variant. GT bit. */
16008 case 3:
16009 code = GT;
16010 break;
16011 default:
16012 error ("argument 1 of __builtin_spe_predicate is out of range");
16013 return const0_rtx;
16016 tmp = gen_rtx_fmt_ee (code, SImode, scratch, const0_rtx);
16017 emit_move_insn (target, tmp);
16019 return target;
16022 /* The evsel builtins look like this:
16024 e = __builtin_spe_evsel_OP (a, b, c, d);
16026 and work like this:
16028 e[upper] = a[upper] *OP* b[upper] ? c[upper] : d[upper];
16029 e[lower] = a[lower] *OP* b[lower] ? c[lower] : d[lower];
16032 static rtx
16033 spe_expand_evsel_builtin (enum insn_code icode, tree exp, rtx target)
16035 rtx pat, scratch;
16036 tree arg0 = CALL_EXPR_ARG (exp, 0);
16037 tree arg1 = CALL_EXPR_ARG (exp, 1);
16038 tree arg2 = CALL_EXPR_ARG (exp, 2);
16039 tree arg3 = CALL_EXPR_ARG (exp, 3);
16040 rtx op0 = expand_normal (arg0);
16041 rtx op1 = expand_normal (arg1);
16042 rtx op2 = expand_normal (arg2);
16043 rtx op3 = expand_normal (arg3);
16044 machine_mode mode0 = insn_data[icode].operand[1].mode;
16045 machine_mode mode1 = insn_data[icode].operand[2].mode;
16047 gcc_assert (mode0 == mode1);
16049 if (arg0 == error_mark_node || arg1 == error_mark_node
16050 || arg2 == error_mark_node || arg3 == error_mark_node)
16051 return const0_rtx;
16053 if (target == 0
16054 || GET_MODE (target) != mode0
16055 || ! (*insn_data[icode].operand[0].predicate) (target, mode0))
16056 target = gen_reg_rtx (mode0);
16058 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
16059 op0 = copy_to_mode_reg (mode0, op0);
16060 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
16061 op1 = copy_to_mode_reg (mode0, op1);
16062 if (! (*insn_data[icode].operand[1].predicate) (op2, mode1))
16063 op2 = copy_to_mode_reg (mode0, op2);
16064 if (! (*insn_data[icode].operand[1].predicate) (op3, mode1))
16065 op3 = copy_to_mode_reg (mode0, op3);
16067 /* Generate the compare. */
16068 scratch = gen_reg_rtx (CCmode);
16069 pat = GEN_FCN (icode) (scratch, op0, op1);
16070 if (! pat)
16071 return const0_rtx;
16072 emit_insn (pat);
16074 if (mode0 == V2SImode)
16075 emit_insn (gen_spe_evsel (target, op2, op3, scratch));
16076 else
16077 emit_insn (gen_spe_evsel_fs (target, op2, op3, scratch));
16079 return target;
16082 /* Raise an error message for a builtin function that is called without the
16083 appropriate target options being set. */
16085 static void
16086 rs6000_invalid_builtin (enum rs6000_builtins fncode)
16088 size_t uns_fncode = (size_t)fncode;
16089 const char *name = rs6000_builtin_info[uns_fncode].name;
16090 HOST_WIDE_INT fnmask = rs6000_builtin_info[uns_fncode].mask;
16092 gcc_assert (name != NULL);
16093 if ((fnmask & RS6000_BTM_CELL) != 0)
16094 error ("Builtin function %s is only valid for the cell processor", name);
16095 else if ((fnmask & RS6000_BTM_VSX) != 0)
16096 error ("Builtin function %s requires the -mvsx option", name);
16097 else if ((fnmask & RS6000_BTM_HTM) != 0)
16098 error ("Builtin function %s requires the -mhtm option", name);
16099 else if ((fnmask & RS6000_BTM_ALTIVEC) != 0)
16100 error ("Builtin function %s requires the -maltivec option", name);
16101 else if ((fnmask & RS6000_BTM_PAIRED) != 0)
16102 error ("Builtin function %s requires the -mpaired option", name);
16103 else if ((fnmask & RS6000_BTM_SPE) != 0)
16104 error ("Builtin function %s requires the -mspe option", name);
16105 else if ((fnmask & (RS6000_BTM_DFP | RS6000_BTM_P8_VECTOR))
16106 == (RS6000_BTM_DFP | RS6000_BTM_P8_VECTOR))
16107 error ("Builtin function %s requires the -mhard-dfp and"
16108 " -mpower8-vector options", name);
16109 else if ((fnmask & RS6000_BTM_DFP) != 0)
16110 error ("Builtin function %s requires the -mhard-dfp option", name);
16111 else if ((fnmask & RS6000_BTM_P8_VECTOR) != 0)
16112 error ("Builtin function %s requires the -mpower8-vector option", name);
16113 else if ((fnmask & (RS6000_BTM_P9_VECTOR | RS6000_BTM_64BIT))
16114 == (RS6000_BTM_P9_VECTOR | RS6000_BTM_64BIT))
16115 error ("Builtin function %s requires the -mcpu=power9 and"
16116 " -m64 options", name);
16117 else if ((fnmask & RS6000_BTM_P9_VECTOR) != 0)
16118 error ("Builtin function %s requires the -mcpu=power9 option", name);
16119 else if ((fnmask & (RS6000_BTM_P9_MISC | RS6000_BTM_64BIT))
16120 == (RS6000_BTM_P9_MISC | RS6000_BTM_64BIT))
16121 error ("Builtin function %s requires the -mcpu=power9 and"
16122 " -m64 options", name);
16123 else if ((fnmask & RS6000_BTM_P9_MISC) == RS6000_BTM_P9_MISC)
16124 error ("Builtin function %s requires the -mcpu=power9 option", name);
16125 else if ((fnmask & (RS6000_BTM_HARD_FLOAT | RS6000_BTM_LDBL128))
16126 == (RS6000_BTM_HARD_FLOAT | RS6000_BTM_LDBL128))
16127 error ("Builtin function %s requires the -mhard-float and"
16128 " -mlong-double-128 options", name);
16129 else if ((fnmask & RS6000_BTM_HARD_FLOAT) != 0)
16130 error ("Builtin function %s requires the -mhard-float option", name);
16131 else if ((fnmask & RS6000_BTM_FLOAT128) != 0)
16132 error ("Builtin function %s requires the -mfloat128 option", name);
16133 else
16134 error ("Builtin function %s is not supported with the current options",
16135 name);
16138 /* Target hook for early folding of built-ins, shamelessly stolen
16139 from ia64.c. */
16141 static tree
16142 rs6000_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED,
16143 tree *args, bool ignore ATTRIBUTE_UNUSED)
16145 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
16147 enum rs6000_builtins fn_code
16148 = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
16149 switch (fn_code)
16151 case RS6000_BUILTIN_NANQ:
16152 case RS6000_BUILTIN_NANSQ:
16154 tree type = TREE_TYPE (TREE_TYPE (fndecl));
16155 const char *str = c_getstr (*args);
16156 int quiet = fn_code == RS6000_BUILTIN_NANQ;
16157 REAL_VALUE_TYPE real;
16159 if (str && real_nan (&real, str, quiet, TYPE_MODE (type)))
16160 return build_real (type, real);
16161 return NULL_TREE;
16163 case RS6000_BUILTIN_INFQ:
16164 case RS6000_BUILTIN_HUGE_VALQ:
16166 tree type = TREE_TYPE (TREE_TYPE (fndecl));
16167 REAL_VALUE_TYPE inf;
16168 real_inf (&inf);
16169 return build_real (type, inf);
16171 default:
16172 break;
16175 #ifdef SUBTARGET_FOLD_BUILTIN
16176 return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
16177 #else
16178 return NULL_TREE;
16179 #endif
16182 /* Expand an expression EXP that calls a built-in function,
16183 with result going to TARGET if that's convenient
16184 (and in mode MODE if that's convenient).
16185 SUBTARGET may be used as the target for computing one of EXP's operands.
16186 IGNORE is nonzero if the value is to be ignored. */
16188 static rtx
16189 rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
16190 machine_mode mode ATTRIBUTE_UNUSED,
16191 int ignore ATTRIBUTE_UNUSED)
16193 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
16194 enum rs6000_builtins fcode
16195 = (enum rs6000_builtins)DECL_FUNCTION_CODE (fndecl);
16196 size_t uns_fcode = (size_t)fcode;
16197 const struct builtin_description *d;
16198 size_t i;
16199 rtx ret;
16200 bool success;
16201 HOST_WIDE_INT mask = rs6000_builtin_info[uns_fcode].mask;
16202 bool func_valid_p = ((rs6000_builtin_mask & mask) == mask);
16204 if (TARGET_DEBUG_BUILTIN)
16206 enum insn_code icode = rs6000_builtin_info[uns_fcode].icode;
16207 const char *name1 = rs6000_builtin_info[uns_fcode].name;
16208 const char *name2 = ((icode != CODE_FOR_nothing)
16209 ? get_insn_name ((int)icode)
16210 : "nothing");
16211 const char *name3;
16213 switch (rs6000_builtin_info[uns_fcode].attr & RS6000_BTC_TYPE_MASK)
16215 default: name3 = "unknown"; break;
16216 case RS6000_BTC_SPECIAL: name3 = "special"; break;
16217 case RS6000_BTC_UNARY: name3 = "unary"; break;
16218 case RS6000_BTC_BINARY: name3 = "binary"; break;
16219 case RS6000_BTC_TERNARY: name3 = "ternary"; break;
16220 case RS6000_BTC_PREDICATE: name3 = "predicate"; break;
16221 case RS6000_BTC_ABS: name3 = "abs"; break;
16222 case RS6000_BTC_EVSEL: name3 = "evsel"; break;
16223 case RS6000_BTC_DST: name3 = "dst"; break;
16227 fprintf (stderr,
16228 "rs6000_expand_builtin, %s (%d), insn = %s (%d), type=%s%s\n",
16229 (name1) ? name1 : "---", fcode,
16230 (name2) ? name2 : "---", (int)icode,
16231 name3,
16232 func_valid_p ? "" : ", not valid");
16235 if (!func_valid_p)
16237 rs6000_invalid_builtin (fcode);
16239 /* Given it is invalid, just generate a normal call. */
16240 return expand_call (exp, target, ignore);
16243 switch (fcode)
16245 case RS6000_BUILTIN_RECIP:
16246 return rs6000_expand_binop_builtin (CODE_FOR_recipdf3, exp, target);
16248 case RS6000_BUILTIN_RECIPF:
16249 return rs6000_expand_binop_builtin (CODE_FOR_recipsf3, exp, target);
16251 case RS6000_BUILTIN_RSQRTF:
16252 return rs6000_expand_unop_builtin (CODE_FOR_rsqrtsf2, exp, target);
16254 case RS6000_BUILTIN_RSQRT:
16255 return rs6000_expand_unop_builtin (CODE_FOR_rsqrtdf2, exp, target);
16257 case POWER7_BUILTIN_BPERMD:
16258 return rs6000_expand_binop_builtin (((TARGET_64BIT)
16259 ? CODE_FOR_bpermd_di
16260 : CODE_FOR_bpermd_si), exp, target);
16262 case RS6000_BUILTIN_GET_TB:
16263 return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_get_timebase,
16264 target);
16266 case RS6000_BUILTIN_MFTB:
16267 return rs6000_expand_zeroop_builtin (((TARGET_64BIT)
16268 ? CODE_FOR_rs6000_mftb_di
16269 : CODE_FOR_rs6000_mftb_si),
16270 target);
16272 case RS6000_BUILTIN_MFFS:
16273 return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_mffs, target);
16275 case RS6000_BUILTIN_MTFSF:
16276 return rs6000_expand_mtfsf_builtin (CODE_FOR_rs6000_mtfsf, exp);
16278 case RS6000_BUILTIN_CPU_INIT:
16279 case RS6000_BUILTIN_CPU_IS:
16280 case RS6000_BUILTIN_CPU_SUPPORTS:
16281 return cpu_expand_builtin (fcode, exp, target);
16283 case ALTIVEC_BUILTIN_MASK_FOR_LOAD:
16284 case ALTIVEC_BUILTIN_MASK_FOR_STORE:
16286 int icode = (BYTES_BIG_ENDIAN ? (int) CODE_FOR_altivec_lvsr_direct
16287 : (int) CODE_FOR_altivec_lvsl_direct);
16288 machine_mode tmode = insn_data[icode].operand[0].mode;
16289 machine_mode mode = insn_data[icode].operand[1].mode;
16290 tree arg;
16291 rtx op, addr, pat;
16293 gcc_assert (TARGET_ALTIVEC);
16295 arg = CALL_EXPR_ARG (exp, 0);
16296 gcc_assert (POINTER_TYPE_P (TREE_TYPE (arg)));
16297 op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
16298 addr = memory_address (mode, op);
16299 if (fcode == ALTIVEC_BUILTIN_MASK_FOR_STORE)
16300 op = addr;
16301 else
16303 /* For the load case need to negate the address. */
16304 op = gen_reg_rtx (GET_MODE (addr));
16305 emit_insn (gen_rtx_SET (op, gen_rtx_NEG (GET_MODE (addr), addr)));
16307 op = gen_rtx_MEM (mode, op);
16309 if (target == 0
16310 || GET_MODE (target) != tmode
16311 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16312 target = gen_reg_rtx (tmode);
16314 pat = GEN_FCN (icode) (target, op);
16315 if (!pat)
16316 return 0;
16317 emit_insn (pat);
16319 return target;
16322 case ALTIVEC_BUILTIN_VCFUX:
16323 case ALTIVEC_BUILTIN_VCFSX:
16324 case ALTIVEC_BUILTIN_VCTUXS:
16325 case ALTIVEC_BUILTIN_VCTSXS:
16326 /* FIXME: There's got to be a nicer way to handle this case than
16327 constructing a new CALL_EXPR. */
16328 if (call_expr_nargs (exp) == 1)
16330 exp = build_call_nary (TREE_TYPE (exp), CALL_EXPR_FN (exp),
16331 2, CALL_EXPR_ARG (exp, 0), integer_zero_node);
16333 break;
16335 default:
16336 break;
16339 if (TARGET_ALTIVEC)
16341 ret = altivec_expand_builtin (exp, target, &success);
16343 if (success)
16344 return ret;
16346 if (TARGET_SPE)
16348 ret = spe_expand_builtin (exp, target, &success);
16350 if (success)
16351 return ret;
16353 if (TARGET_PAIRED_FLOAT)
16355 ret = paired_expand_builtin (exp, target, &success);
16357 if (success)
16358 return ret;
16360 if (TARGET_HTM)
16362 ret = htm_expand_builtin (exp, target, &success);
16364 if (success)
16365 return ret;
16368 unsigned attr = rs6000_builtin_info[uns_fcode].attr & RS6000_BTC_TYPE_MASK;
16369 /* RS6000_BTC_SPECIAL represents no-operand operators. */
16370 gcc_assert (attr == RS6000_BTC_UNARY
16371 || attr == RS6000_BTC_BINARY
16372 || attr == RS6000_BTC_TERNARY
16373 || attr == RS6000_BTC_SPECIAL);
16375 /* Handle simple unary operations. */
16376 d = bdesc_1arg;
16377 for (i = 0; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
16378 if (d->code == fcode)
16379 return rs6000_expand_unop_builtin (d->icode, exp, target);
16381 /* Handle simple binary operations. */
16382 d = bdesc_2arg;
16383 for (i = 0; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
16384 if (d->code == fcode)
16385 return rs6000_expand_binop_builtin (d->icode, exp, target);
16387 /* Handle simple ternary operations. */
16388 d = bdesc_3arg;
16389 for (i = 0; i < ARRAY_SIZE (bdesc_3arg); i++, d++)
16390 if (d->code == fcode)
16391 return rs6000_expand_ternop_builtin (d->icode, exp, target);
16393 /* Handle simple no-argument operations. */
16394 d = bdesc_0arg;
16395 for (i = 0; i < ARRAY_SIZE (bdesc_0arg); i++, d++)
16396 if (d->code == fcode)
16397 return rs6000_expand_zeroop_builtin (d->icode, target);
16399 gcc_unreachable ();
16402 static void
16403 rs6000_init_builtins (void)
16405 tree tdecl;
16406 tree ftype;
16407 machine_mode mode;
16409 if (TARGET_DEBUG_BUILTIN)
16410 fprintf (stderr, "rs6000_init_builtins%s%s%s%s\n",
16411 (TARGET_PAIRED_FLOAT) ? ", paired" : "",
16412 (TARGET_SPE) ? ", spe" : "",
16413 (TARGET_ALTIVEC) ? ", altivec" : "",
16414 (TARGET_VSX) ? ", vsx" : "");
16416 V2SI_type_node = build_vector_type (intSI_type_node, 2);
16417 V2SF_type_node = build_vector_type (float_type_node, 2);
16418 V2DI_type_node = build_vector_type (intDI_type_node, 2);
16419 V2DF_type_node = build_vector_type (double_type_node, 2);
16420 V4HI_type_node = build_vector_type (intHI_type_node, 4);
16421 V4SI_type_node = build_vector_type (intSI_type_node, 4);
16422 V4SF_type_node = build_vector_type (float_type_node, 4);
16423 V8HI_type_node = build_vector_type (intHI_type_node, 8);
16424 V16QI_type_node = build_vector_type (intQI_type_node, 16);
16426 unsigned_V16QI_type_node = build_vector_type (unsigned_intQI_type_node, 16);
16427 unsigned_V8HI_type_node = build_vector_type (unsigned_intHI_type_node, 8);
16428 unsigned_V4SI_type_node = build_vector_type (unsigned_intSI_type_node, 4);
16429 unsigned_V2DI_type_node = build_vector_type (unsigned_intDI_type_node, 2);
16431 opaque_V2SF_type_node = build_opaque_vector_type (float_type_node, 2);
16432 opaque_V2SI_type_node = build_opaque_vector_type (intSI_type_node, 2);
16433 opaque_p_V2SI_type_node = build_pointer_type (opaque_V2SI_type_node);
16434 opaque_V4SI_type_node = build_opaque_vector_type (intSI_type_node, 4);
16436 const_str_type_node
16437 = build_pointer_type (build_qualified_type (char_type_node,
16438 TYPE_QUAL_CONST));
16440 /* We use V1TI mode as a special container to hold __int128_t items that
16441 must live in VSX registers. */
16442 if (intTI_type_node)
16444 V1TI_type_node = build_vector_type (intTI_type_node, 1);
16445 unsigned_V1TI_type_node = build_vector_type (unsigned_intTI_type_node, 1);
16448 /* The 'vector bool ...' types must be kept distinct from 'vector unsigned ...'
16449 types, especially in C++ land. Similarly, 'vector pixel' is distinct from
16450 'vector unsigned short'. */
16452 bool_char_type_node = build_distinct_type_copy (unsigned_intQI_type_node);
16453 bool_short_type_node = build_distinct_type_copy (unsigned_intHI_type_node);
16454 bool_int_type_node = build_distinct_type_copy (unsigned_intSI_type_node);
16455 bool_long_type_node = build_distinct_type_copy (unsigned_intDI_type_node);
16456 pixel_type_node = build_distinct_type_copy (unsigned_intHI_type_node);
16458 long_integer_type_internal_node = long_integer_type_node;
16459 long_unsigned_type_internal_node = long_unsigned_type_node;
16460 long_long_integer_type_internal_node = long_long_integer_type_node;
16461 long_long_unsigned_type_internal_node = long_long_unsigned_type_node;
16462 intQI_type_internal_node = intQI_type_node;
16463 uintQI_type_internal_node = unsigned_intQI_type_node;
16464 intHI_type_internal_node = intHI_type_node;
16465 uintHI_type_internal_node = unsigned_intHI_type_node;
16466 intSI_type_internal_node = intSI_type_node;
16467 uintSI_type_internal_node = unsigned_intSI_type_node;
16468 intDI_type_internal_node = intDI_type_node;
16469 uintDI_type_internal_node = unsigned_intDI_type_node;
16470 intTI_type_internal_node = intTI_type_node;
16471 uintTI_type_internal_node = unsigned_intTI_type_node;
16472 float_type_internal_node = float_type_node;
16473 double_type_internal_node = double_type_node;
16474 long_double_type_internal_node = long_double_type_node;
16475 dfloat64_type_internal_node = dfloat64_type_node;
16476 dfloat128_type_internal_node = dfloat128_type_node;
16477 void_type_internal_node = void_type_node;
16479 /* 128-bit floating point support. KFmode is IEEE 128-bit floating point.
16480 IFmode is the IBM extended 128-bit format that is a pair of doubles.
16481 TFmode will be either IEEE 128-bit floating point or the IBM double-double
16482 format that uses a pair of doubles, depending on the switches and
16483 defaults. */
16484 if (TARGET_FLOAT128)
16486 ibm128_float_type_node = make_node (REAL_TYPE);
16487 TYPE_PRECISION (ibm128_float_type_node) = 128;
16488 layout_type (ibm128_float_type_node);
16489 SET_TYPE_MODE (ibm128_float_type_node, IFmode);
16491 ieee128_float_type_node = float128_type_node;
16493 lang_hooks.types.register_builtin_type (ieee128_float_type_node,
16494 "__float128");
16496 lang_hooks.types.register_builtin_type (ibm128_float_type_node,
16497 "__ibm128");
16499 else
16501 /* All types must be nonzero, or self-test barfs during bootstrap. */
16502 ieee128_float_type_node = long_double_type_node;
16503 ibm128_float_type_node = long_double_type_node;
16506 /* Initialize the modes for builtin_function_type, mapping a machine mode to
16507 tree type node. */
16508 builtin_mode_to_type[QImode][0] = integer_type_node;
16509 builtin_mode_to_type[HImode][0] = integer_type_node;
16510 builtin_mode_to_type[SImode][0] = intSI_type_node;
16511 builtin_mode_to_type[SImode][1] = unsigned_intSI_type_node;
16512 builtin_mode_to_type[DImode][0] = intDI_type_node;
16513 builtin_mode_to_type[DImode][1] = unsigned_intDI_type_node;
16514 builtin_mode_to_type[TImode][0] = intTI_type_node;
16515 builtin_mode_to_type[TImode][1] = unsigned_intTI_type_node;
16516 builtin_mode_to_type[SFmode][0] = float_type_node;
16517 builtin_mode_to_type[DFmode][0] = double_type_node;
16518 builtin_mode_to_type[IFmode][0] = ibm128_float_type_node;
16519 builtin_mode_to_type[KFmode][0] = ieee128_float_type_node;
16520 builtin_mode_to_type[TFmode][0] = long_double_type_node;
16521 builtin_mode_to_type[DDmode][0] = dfloat64_type_node;
16522 builtin_mode_to_type[TDmode][0] = dfloat128_type_node;
16523 builtin_mode_to_type[V1TImode][0] = V1TI_type_node;
16524 builtin_mode_to_type[V1TImode][1] = unsigned_V1TI_type_node;
16525 builtin_mode_to_type[V2SImode][0] = V2SI_type_node;
16526 builtin_mode_to_type[V2SFmode][0] = V2SF_type_node;
16527 builtin_mode_to_type[V2DImode][0] = V2DI_type_node;
16528 builtin_mode_to_type[V2DImode][1] = unsigned_V2DI_type_node;
16529 builtin_mode_to_type[V2DFmode][0] = V2DF_type_node;
16530 builtin_mode_to_type[V4HImode][0] = V4HI_type_node;
16531 builtin_mode_to_type[V4SImode][0] = V4SI_type_node;
16532 builtin_mode_to_type[V4SImode][1] = unsigned_V4SI_type_node;
16533 builtin_mode_to_type[V4SFmode][0] = V4SF_type_node;
16534 builtin_mode_to_type[V8HImode][0] = V8HI_type_node;
16535 builtin_mode_to_type[V8HImode][1] = unsigned_V8HI_type_node;
16536 builtin_mode_to_type[V16QImode][0] = V16QI_type_node;
16537 builtin_mode_to_type[V16QImode][1] = unsigned_V16QI_type_node;
16539 tdecl = add_builtin_type ("__bool char", bool_char_type_node);
16540 TYPE_NAME (bool_char_type_node) = tdecl;
16542 tdecl = add_builtin_type ("__bool short", bool_short_type_node);
16543 TYPE_NAME (bool_short_type_node) = tdecl;
16545 tdecl = add_builtin_type ("__bool int", bool_int_type_node);
16546 TYPE_NAME (bool_int_type_node) = tdecl;
16548 tdecl = add_builtin_type ("__pixel", pixel_type_node);
16549 TYPE_NAME (pixel_type_node) = tdecl;
16551 bool_V16QI_type_node = build_vector_type (bool_char_type_node, 16);
16552 bool_V8HI_type_node = build_vector_type (bool_short_type_node, 8);
16553 bool_V4SI_type_node = build_vector_type (bool_int_type_node, 4);
16554 bool_V2DI_type_node = build_vector_type (bool_long_type_node, 2);
16555 pixel_V8HI_type_node = build_vector_type (pixel_type_node, 8);
16557 tdecl = add_builtin_type ("__vector unsigned char", unsigned_V16QI_type_node);
16558 TYPE_NAME (unsigned_V16QI_type_node) = tdecl;
16560 tdecl = add_builtin_type ("__vector signed char", V16QI_type_node);
16561 TYPE_NAME (V16QI_type_node) = tdecl;
16563 tdecl = add_builtin_type ("__vector __bool char", bool_V16QI_type_node);
16564 TYPE_NAME ( bool_V16QI_type_node) = tdecl;
16566 tdecl = add_builtin_type ("__vector unsigned short", unsigned_V8HI_type_node);
16567 TYPE_NAME (unsigned_V8HI_type_node) = tdecl;
16569 tdecl = add_builtin_type ("__vector signed short", V8HI_type_node);
16570 TYPE_NAME (V8HI_type_node) = tdecl;
16572 tdecl = add_builtin_type ("__vector __bool short", bool_V8HI_type_node);
16573 TYPE_NAME (bool_V8HI_type_node) = tdecl;
16575 tdecl = add_builtin_type ("__vector unsigned int", unsigned_V4SI_type_node);
16576 TYPE_NAME (unsigned_V4SI_type_node) = tdecl;
16578 tdecl = add_builtin_type ("__vector signed int", V4SI_type_node);
16579 TYPE_NAME (V4SI_type_node) = tdecl;
16581 tdecl = add_builtin_type ("__vector __bool int", bool_V4SI_type_node);
16582 TYPE_NAME (bool_V4SI_type_node) = tdecl;
16584 tdecl = add_builtin_type ("__vector float", V4SF_type_node);
16585 TYPE_NAME (V4SF_type_node) = tdecl;
16587 tdecl = add_builtin_type ("__vector __pixel", pixel_V8HI_type_node);
16588 TYPE_NAME (pixel_V8HI_type_node) = tdecl;
16590 tdecl = add_builtin_type ("__vector double", V2DF_type_node);
16591 TYPE_NAME (V2DF_type_node) = tdecl;
16593 if (TARGET_POWERPC64)
16595 tdecl = add_builtin_type ("__vector long", V2DI_type_node);
16596 TYPE_NAME (V2DI_type_node) = tdecl;
16598 tdecl = add_builtin_type ("__vector unsigned long",
16599 unsigned_V2DI_type_node);
16600 TYPE_NAME (unsigned_V2DI_type_node) = tdecl;
16602 tdecl = add_builtin_type ("__vector __bool long", bool_V2DI_type_node);
16603 TYPE_NAME (bool_V2DI_type_node) = tdecl;
16605 else
16607 tdecl = add_builtin_type ("__vector long long", V2DI_type_node);
16608 TYPE_NAME (V2DI_type_node) = tdecl;
16610 tdecl = add_builtin_type ("__vector unsigned long long",
16611 unsigned_V2DI_type_node);
16612 TYPE_NAME (unsigned_V2DI_type_node) = tdecl;
16614 tdecl = add_builtin_type ("__vector __bool long long",
16615 bool_V2DI_type_node);
16616 TYPE_NAME (bool_V2DI_type_node) = tdecl;
16619 if (V1TI_type_node)
16621 tdecl = add_builtin_type ("__vector __int128", V1TI_type_node);
16622 TYPE_NAME (V1TI_type_node) = tdecl;
16624 tdecl = add_builtin_type ("__vector unsigned __int128",
16625 unsigned_V1TI_type_node);
16626 TYPE_NAME (unsigned_V1TI_type_node) = tdecl;
16629 /* Paired and SPE builtins are only available if you build a compiler with
16630 the appropriate options, so only create those builtins with the
16631 appropriate compiler option. Create Altivec and VSX builtins on machines
16632 with at least the general purpose extensions (970 and newer) to allow the
16633 use of the target attribute. */
16634 if (TARGET_PAIRED_FLOAT)
16635 paired_init_builtins ();
16636 if (TARGET_SPE)
16637 spe_init_builtins ();
16638 if (TARGET_EXTRA_BUILTINS)
16639 altivec_init_builtins ();
16640 if (TARGET_HTM)
16641 htm_init_builtins ();
16643 if (TARGET_EXTRA_BUILTINS || TARGET_SPE || TARGET_PAIRED_FLOAT)
16644 rs6000_common_init_builtins ();
16646 ftype = build_function_type_list (ieee128_float_type_node,
16647 const_str_type_node, NULL_TREE);
16648 def_builtin ("__builtin_nanq", ftype, RS6000_BUILTIN_NANQ);
16649 def_builtin ("__builtin_nansq", ftype, RS6000_BUILTIN_NANSQ);
16651 ftype = build_function_type_list (ieee128_float_type_node, NULL_TREE);
16652 def_builtin ("__builtin_infq", ftype, RS6000_BUILTIN_INFQ);
16653 def_builtin ("__builtin_huge_valq", ftype, RS6000_BUILTIN_HUGE_VALQ);
16655 ftype = builtin_function_type (DFmode, DFmode, DFmode, VOIDmode,
16656 RS6000_BUILTIN_RECIP, "__builtin_recipdiv");
16657 def_builtin ("__builtin_recipdiv", ftype, RS6000_BUILTIN_RECIP);
16659 ftype = builtin_function_type (SFmode, SFmode, SFmode, VOIDmode,
16660 RS6000_BUILTIN_RECIPF, "__builtin_recipdivf");
16661 def_builtin ("__builtin_recipdivf", ftype, RS6000_BUILTIN_RECIPF);
16663 ftype = builtin_function_type (DFmode, DFmode, VOIDmode, VOIDmode,
16664 RS6000_BUILTIN_RSQRT, "__builtin_rsqrt");
16665 def_builtin ("__builtin_rsqrt", ftype, RS6000_BUILTIN_RSQRT);
16667 ftype = builtin_function_type (SFmode, SFmode, VOIDmode, VOIDmode,
16668 RS6000_BUILTIN_RSQRTF, "__builtin_rsqrtf");
16669 def_builtin ("__builtin_rsqrtf", ftype, RS6000_BUILTIN_RSQRTF);
16671 mode = (TARGET_64BIT) ? DImode : SImode;
16672 ftype = builtin_function_type (mode, mode, mode, VOIDmode,
16673 POWER7_BUILTIN_BPERMD, "__builtin_bpermd");
16674 def_builtin ("__builtin_bpermd", ftype, POWER7_BUILTIN_BPERMD);
16676 ftype = build_function_type_list (unsigned_intDI_type_node,
16677 NULL_TREE);
16678 def_builtin ("__builtin_ppc_get_timebase", ftype, RS6000_BUILTIN_GET_TB);
16680 if (TARGET_64BIT)
16681 ftype = build_function_type_list (unsigned_intDI_type_node,
16682 NULL_TREE);
16683 else
16684 ftype = build_function_type_list (unsigned_intSI_type_node,
16685 NULL_TREE);
16686 def_builtin ("__builtin_ppc_mftb", ftype, RS6000_BUILTIN_MFTB);
16688 ftype = build_function_type_list (double_type_node, NULL_TREE);
16689 def_builtin ("__builtin_mffs", ftype, RS6000_BUILTIN_MFFS);
16691 ftype = build_function_type_list (void_type_node,
16692 intSI_type_node, double_type_node,
16693 NULL_TREE);
16694 def_builtin ("__builtin_mtfsf", ftype, RS6000_BUILTIN_MTFSF);
16696 ftype = build_function_type_list (void_type_node, NULL_TREE);
16697 def_builtin ("__builtin_cpu_init", ftype, RS6000_BUILTIN_CPU_INIT);
16699 ftype = build_function_type_list (bool_int_type_node, const_ptr_type_node,
16700 NULL_TREE);
16701 def_builtin ("__builtin_cpu_is", ftype, RS6000_BUILTIN_CPU_IS);
16702 def_builtin ("__builtin_cpu_supports", ftype, RS6000_BUILTIN_CPU_SUPPORTS);
16704 #if TARGET_XCOFF
16705 /* AIX libm provides clog as __clog. */
16706 if ((tdecl = builtin_decl_explicit (BUILT_IN_CLOG)) != NULL_TREE)
16707 set_user_assembler_name (tdecl, "__clog");
16708 #endif
16710 #ifdef SUBTARGET_INIT_BUILTINS
16711 SUBTARGET_INIT_BUILTINS;
16712 #endif
16715 /* Returns the rs6000 builtin decl for CODE. */
16717 static tree
16718 rs6000_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
16720 HOST_WIDE_INT fnmask;
16722 if (code >= RS6000_BUILTIN_COUNT)
16723 return error_mark_node;
16725 fnmask = rs6000_builtin_info[code].mask;
16726 if ((fnmask & rs6000_builtin_mask) != fnmask)
16728 rs6000_invalid_builtin ((enum rs6000_builtins)code);
16729 return error_mark_node;
16732 return rs6000_builtin_decls[code];
16735 static void
16736 spe_init_builtins (void)
16738 tree puint_type_node = build_pointer_type (unsigned_type_node);
16739 tree pushort_type_node = build_pointer_type (short_unsigned_type_node);
16740 const struct builtin_description *d;
16741 size_t i;
16743 tree v2si_ftype_4_v2si
16744 = build_function_type_list (opaque_V2SI_type_node,
16745 opaque_V2SI_type_node,
16746 opaque_V2SI_type_node,
16747 opaque_V2SI_type_node,
16748 opaque_V2SI_type_node,
16749 NULL_TREE);
16751 tree v2sf_ftype_4_v2sf
16752 = build_function_type_list (opaque_V2SF_type_node,
16753 opaque_V2SF_type_node,
16754 opaque_V2SF_type_node,
16755 opaque_V2SF_type_node,
16756 opaque_V2SF_type_node,
16757 NULL_TREE);
16759 tree int_ftype_int_v2si_v2si
16760 = build_function_type_list (integer_type_node,
16761 integer_type_node,
16762 opaque_V2SI_type_node,
16763 opaque_V2SI_type_node,
16764 NULL_TREE);
16766 tree int_ftype_int_v2sf_v2sf
16767 = build_function_type_list (integer_type_node,
16768 integer_type_node,
16769 opaque_V2SF_type_node,
16770 opaque_V2SF_type_node,
16771 NULL_TREE);
16773 tree void_ftype_v2si_puint_int
16774 = build_function_type_list (void_type_node,
16775 opaque_V2SI_type_node,
16776 puint_type_node,
16777 integer_type_node,
16778 NULL_TREE);
16780 tree void_ftype_v2si_puint_char
16781 = build_function_type_list (void_type_node,
16782 opaque_V2SI_type_node,
16783 puint_type_node,
16784 char_type_node,
16785 NULL_TREE);
16787 tree void_ftype_v2si_pv2si_int
16788 = build_function_type_list (void_type_node,
16789 opaque_V2SI_type_node,
16790 opaque_p_V2SI_type_node,
16791 integer_type_node,
16792 NULL_TREE);
16794 tree void_ftype_v2si_pv2si_char
16795 = build_function_type_list (void_type_node,
16796 opaque_V2SI_type_node,
16797 opaque_p_V2SI_type_node,
16798 char_type_node,
16799 NULL_TREE);
16801 tree void_ftype_int
16802 = build_function_type_list (void_type_node, integer_type_node, NULL_TREE);
16804 tree int_ftype_void
16805 = build_function_type_list (integer_type_node, NULL_TREE);
16807 tree v2si_ftype_pv2si_int
16808 = build_function_type_list (opaque_V2SI_type_node,
16809 opaque_p_V2SI_type_node,
16810 integer_type_node,
16811 NULL_TREE);
16813 tree v2si_ftype_puint_int
16814 = build_function_type_list (opaque_V2SI_type_node,
16815 puint_type_node,
16816 integer_type_node,
16817 NULL_TREE);
16819 tree v2si_ftype_pushort_int
16820 = build_function_type_list (opaque_V2SI_type_node,
16821 pushort_type_node,
16822 integer_type_node,
16823 NULL_TREE);
16825 tree v2si_ftype_signed_char
16826 = build_function_type_list (opaque_V2SI_type_node,
16827 signed_char_type_node,
16828 NULL_TREE);
16830 add_builtin_type ("__ev64_opaque__", opaque_V2SI_type_node);
16832 /* Initialize irregular SPE builtins. */
16834 def_builtin ("__builtin_spe_mtspefscr", void_ftype_int, SPE_BUILTIN_MTSPEFSCR);
16835 def_builtin ("__builtin_spe_mfspefscr", int_ftype_void, SPE_BUILTIN_MFSPEFSCR);
16836 def_builtin ("__builtin_spe_evstddx", void_ftype_v2si_pv2si_int, SPE_BUILTIN_EVSTDDX);
16837 def_builtin ("__builtin_spe_evstdhx", void_ftype_v2si_pv2si_int, SPE_BUILTIN_EVSTDHX);
16838 def_builtin ("__builtin_spe_evstdwx", void_ftype_v2si_pv2si_int, SPE_BUILTIN_EVSTDWX);
16839 def_builtin ("__builtin_spe_evstwhex", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWHEX);
16840 def_builtin ("__builtin_spe_evstwhox", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWHOX);
16841 def_builtin ("__builtin_spe_evstwwex", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWWEX);
16842 def_builtin ("__builtin_spe_evstwwox", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWWOX);
16843 def_builtin ("__builtin_spe_evstdd", void_ftype_v2si_pv2si_char, SPE_BUILTIN_EVSTDD);
16844 def_builtin ("__builtin_spe_evstdh", void_ftype_v2si_pv2si_char, SPE_BUILTIN_EVSTDH);
16845 def_builtin ("__builtin_spe_evstdw", void_ftype_v2si_pv2si_char, SPE_BUILTIN_EVSTDW);
16846 def_builtin ("__builtin_spe_evstwhe", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWHE);
16847 def_builtin ("__builtin_spe_evstwho", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWHO);
16848 def_builtin ("__builtin_spe_evstwwe", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWWE);
16849 def_builtin ("__builtin_spe_evstwwo", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWWO);
16850 def_builtin ("__builtin_spe_evsplatfi", v2si_ftype_signed_char, SPE_BUILTIN_EVSPLATFI);
16851 def_builtin ("__builtin_spe_evsplati", v2si_ftype_signed_char, SPE_BUILTIN_EVSPLATI);
16853 /* Loads. */
16854 def_builtin ("__builtin_spe_evlddx", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDDX);
16855 def_builtin ("__builtin_spe_evldwx", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDWX);
16856 def_builtin ("__builtin_spe_evldhx", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDHX);
16857 def_builtin ("__builtin_spe_evlwhex", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHEX);
16858 def_builtin ("__builtin_spe_evlwhoux", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOUX);
16859 def_builtin ("__builtin_spe_evlwhosx", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOSX);
16860 def_builtin ("__builtin_spe_evlwwsplatx", v2si_ftype_puint_int, SPE_BUILTIN_EVLWWSPLATX);
16861 def_builtin ("__builtin_spe_evlwhsplatx", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHSPLATX);
16862 def_builtin ("__builtin_spe_evlhhesplatx", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHESPLATX);
16863 def_builtin ("__builtin_spe_evlhhousplatx", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOUSPLATX);
16864 def_builtin ("__builtin_spe_evlhhossplatx", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOSSPLATX);
16865 def_builtin ("__builtin_spe_evldd", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDD);
16866 def_builtin ("__builtin_spe_evldw", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDW);
16867 def_builtin ("__builtin_spe_evldh", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDH);
16868 def_builtin ("__builtin_spe_evlhhesplat", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHESPLAT);
16869 def_builtin ("__builtin_spe_evlhhossplat", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOSSPLAT);
16870 def_builtin ("__builtin_spe_evlhhousplat", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOUSPLAT);
16871 def_builtin ("__builtin_spe_evlwhe", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHE);
16872 def_builtin ("__builtin_spe_evlwhos", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOS);
16873 def_builtin ("__builtin_spe_evlwhou", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOU);
16874 def_builtin ("__builtin_spe_evlwhsplat", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHSPLAT);
16875 def_builtin ("__builtin_spe_evlwwsplat", v2si_ftype_puint_int, SPE_BUILTIN_EVLWWSPLAT);
16877 /* Predicates. */
16878 d = bdesc_spe_predicates;
16879 for (i = 0; i < ARRAY_SIZE (bdesc_spe_predicates); ++i, d++)
16881 tree type;
16883 switch (insn_data[d->icode].operand[1].mode)
16885 case V2SImode:
16886 type = int_ftype_int_v2si_v2si;
16887 break;
16888 case V2SFmode:
16889 type = int_ftype_int_v2sf_v2sf;
16890 break;
16891 default:
16892 gcc_unreachable ();
16895 def_builtin (d->name, type, d->code);
16898 /* Evsel predicates. */
16899 d = bdesc_spe_evsel;
16900 for (i = 0; i < ARRAY_SIZE (bdesc_spe_evsel); ++i, d++)
16902 tree type;
16904 switch (insn_data[d->icode].operand[1].mode)
16906 case V2SImode:
16907 type = v2si_ftype_4_v2si;
16908 break;
16909 case V2SFmode:
16910 type = v2sf_ftype_4_v2sf;
16911 break;
16912 default:
16913 gcc_unreachable ();
16916 def_builtin (d->name, type, d->code);
16920 static void
16921 paired_init_builtins (void)
16923 const struct builtin_description *d;
16924 size_t i;
16926 tree int_ftype_int_v2sf_v2sf
16927 = build_function_type_list (integer_type_node,
16928 integer_type_node,
16929 V2SF_type_node,
16930 V2SF_type_node,
16931 NULL_TREE);
16932 tree pcfloat_type_node =
16933 build_pointer_type (build_qualified_type
16934 (float_type_node, TYPE_QUAL_CONST));
16936 tree v2sf_ftype_long_pcfloat = build_function_type_list (V2SF_type_node,
16937 long_integer_type_node,
16938 pcfloat_type_node,
16939 NULL_TREE);
16940 tree void_ftype_v2sf_long_pcfloat =
16941 build_function_type_list (void_type_node,
16942 V2SF_type_node,
16943 long_integer_type_node,
16944 pcfloat_type_node,
16945 NULL_TREE);
16948 def_builtin ("__builtin_paired_lx", v2sf_ftype_long_pcfloat,
16949 PAIRED_BUILTIN_LX);
16952 def_builtin ("__builtin_paired_stx", void_ftype_v2sf_long_pcfloat,
16953 PAIRED_BUILTIN_STX);
16955 /* Predicates. */
16956 d = bdesc_paired_preds;
16957 for (i = 0; i < ARRAY_SIZE (bdesc_paired_preds); ++i, d++)
16959 tree type;
16961 if (TARGET_DEBUG_BUILTIN)
16962 fprintf (stderr, "paired pred #%d, insn = %s [%d], mode = %s\n",
16963 (int)i, get_insn_name (d->icode), (int)d->icode,
16964 GET_MODE_NAME (insn_data[d->icode].operand[1].mode));
16966 switch (insn_data[d->icode].operand[1].mode)
16968 case V2SFmode:
16969 type = int_ftype_int_v2sf_v2sf;
16970 break;
16971 default:
16972 gcc_unreachable ();
16975 def_builtin (d->name, type, d->code);
16979 static void
16980 altivec_init_builtins (void)
16982 const struct builtin_description *d;
16983 size_t i;
16984 tree ftype;
16985 tree decl;
16987 tree pvoid_type_node = build_pointer_type (void_type_node);
16989 tree pcvoid_type_node
16990 = build_pointer_type (build_qualified_type (void_type_node,
16991 TYPE_QUAL_CONST));
16993 tree int_ftype_opaque
16994 = build_function_type_list (integer_type_node,
16995 opaque_V4SI_type_node, NULL_TREE);
16996 tree opaque_ftype_opaque
16997 = build_function_type_list (integer_type_node, NULL_TREE);
16998 tree opaque_ftype_opaque_int
16999 = build_function_type_list (opaque_V4SI_type_node,
17000 opaque_V4SI_type_node, integer_type_node, NULL_TREE);
17001 tree opaque_ftype_opaque_opaque_int
17002 = build_function_type_list (opaque_V4SI_type_node,
17003 opaque_V4SI_type_node, opaque_V4SI_type_node,
17004 integer_type_node, NULL_TREE);
17005 tree opaque_ftype_opaque_opaque_opaque
17006 = build_function_type_list (opaque_V4SI_type_node,
17007 opaque_V4SI_type_node, opaque_V4SI_type_node,
17008 opaque_V4SI_type_node, NULL_TREE);
17009 tree opaque_ftype_opaque_opaque
17010 = build_function_type_list (opaque_V4SI_type_node,
17011 opaque_V4SI_type_node, opaque_V4SI_type_node,
17012 NULL_TREE);
17013 tree int_ftype_int_opaque_opaque
17014 = build_function_type_list (integer_type_node,
17015 integer_type_node, opaque_V4SI_type_node,
17016 opaque_V4SI_type_node, NULL_TREE);
17017 tree int_ftype_int_v4si_v4si
17018 = build_function_type_list (integer_type_node,
17019 integer_type_node, V4SI_type_node,
17020 V4SI_type_node, NULL_TREE);
17021 tree int_ftype_int_v2di_v2di
17022 = build_function_type_list (integer_type_node,
17023 integer_type_node, V2DI_type_node,
17024 V2DI_type_node, NULL_TREE);
17025 tree void_ftype_v4si
17026 = build_function_type_list (void_type_node, V4SI_type_node, NULL_TREE);
17027 tree v8hi_ftype_void
17028 = build_function_type_list (V8HI_type_node, NULL_TREE);
17029 tree void_ftype_void
17030 = build_function_type_list (void_type_node, NULL_TREE);
17031 tree void_ftype_int
17032 = build_function_type_list (void_type_node, integer_type_node, NULL_TREE);
17034 tree opaque_ftype_long_pcvoid
17035 = build_function_type_list (opaque_V4SI_type_node,
17036 long_integer_type_node, pcvoid_type_node,
17037 NULL_TREE);
17038 tree v16qi_ftype_long_pcvoid
17039 = build_function_type_list (V16QI_type_node,
17040 long_integer_type_node, pcvoid_type_node,
17041 NULL_TREE);
17042 tree v8hi_ftype_long_pcvoid
17043 = build_function_type_list (V8HI_type_node,
17044 long_integer_type_node, pcvoid_type_node,
17045 NULL_TREE);
17046 tree v4si_ftype_long_pcvoid
17047 = build_function_type_list (V4SI_type_node,
17048 long_integer_type_node, pcvoid_type_node,
17049 NULL_TREE);
17050 tree v4sf_ftype_long_pcvoid
17051 = build_function_type_list (V4SF_type_node,
17052 long_integer_type_node, pcvoid_type_node,
17053 NULL_TREE);
17054 tree v2df_ftype_long_pcvoid
17055 = build_function_type_list (V2DF_type_node,
17056 long_integer_type_node, pcvoid_type_node,
17057 NULL_TREE);
17058 tree v2di_ftype_long_pcvoid
17059 = build_function_type_list (V2DI_type_node,
17060 long_integer_type_node, pcvoid_type_node,
17061 NULL_TREE);
17063 tree void_ftype_opaque_long_pvoid
17064 = build_function_type_list (void_type_node,
17065 opaque_V4SI_type_node, long_integer_type_node,
17066 pvoid_type_node, NULL_TREE);
17067 tree void_ftype_v4si_long_pvoid
17068 = build_function_type_list (void_type_node,
17069 V4SI_type_node, long_integer_type_node,
17070 pvoid_type_node, NULL_TREE);
17071 tree void_ftype_v16qi_long_pvoid
17072 = build_function_type_list (void_type_node,
17073 V16QI_type_node, long_integer_type_node,
17074 pvoid_type_node, NULL_TREE);
17075 tree void_ftype_v8hi_long_pvoid
17076 = build_function_type_list (void_type_node,
17077 V8HI_type_node, long_integer_type_node,
17078 pvoid_type_node, NULL_TREE);
17079 tree void_ftype_v4sf_long_pvoid
17080 = build_function_type_list (void_type_node,
17081 V4SF_type_node, long_integer_type_node,
17082 pvoid_type_node, NULL_TREE);
17083 tree void_ftype_v2df_long_pvoid
17084 = build_function_type_list (void_type_node,
17085 V2DF_type_node, long_integer_type_node,
17086 pvoid_type_node, NULL_TREE);
17087 tree void_ftype_v2di_long_pvoid
17088 = build_function_type_list (void_type_node,
17089 V2DI_type_node, long_integer_type_node,
17090 pvoid_type_node, NULL_TREE);
17091 tree int_ftype_int_v8hi_v8hi
17092 = build_function_type_list (integer_type_node,
17093 integer_type_node, V8HI_type_node,
17094 V8HI_type_node, NULL_TREE);
17095 tree int_ftype_int_v16qi_v16qi
17096 = build_function_type_list (integer_type_node,
17097 integer_type_node, V16QI_type_node,
17098 V16QI_type_node, NULL_TREE);
17099 tree int_ftype_int_v4sf_v4sf
17100 = build_function_type_list (integer_type_node,
17101 integer_type_node, V4SF_type_node,
17102 V4SF_type_node, NULL_TREE);
17103 tree int_ftype_int_v2df_v2df
17104 = build_function_type_list (integer_type_node,
17105 integer_type_node, V2DF_type_node,
17106 V2DF_type_node, NULL_TREE);
17107 tree v2di_ftype_v2di
17108 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
17109 tree v4si_ftype_v4si
17110 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
17111 tree v8hi_ftype_v8hi
17112 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
17113 tree v16qi_ftype_v16qi
17114 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
17115 tree v4sf_ftype_v4sf
17116 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
17117 tree v2df_ftype_v2df
17118 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
17119 tree void_ftype_pcvoid_int_int
17120 = build_function_type_list (void_type_node,
17121 pcvoid_type_node, integer_type_node,
17122 integer_type_node, NULL_TREE);
17124 def_builtin ("__builtin_altivec_mtvscr", void_ftype_v4si, ALTIVEC_BUILTIN_MTVSCR);
17125 def_builtin ("__builtin_altivec_mfvscr", v8hi_ftype_void, ALTIVEC_BUILTIN_MFVSCR);
17126 def_builtin ("__builtin_altivec_dssall", void_ftype_void, ALTIVEC_BUILTIN_DSSALL);
17127 def_builtin ("__builtin_altivec_dss", void_ftype_int, ALTIVEC_BUILTIN_DSS);
17128 def_builtin ("__builtin_altivec_lvsl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVSL);
17129 def_builtin ("__builtin_altivec_lvsr", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVSR);
17130 def_builtin ("__builtin_altivec_lvebx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEBX);
17131 def_builtin ("__builtin_altivec_lvehx", v8hi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEHX);
17132 def_builtin ("__builtin_altivec_lvewx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEWX);
17133 def_builtin ("__builtin_altivec_lvxl", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVXL);
17134 def_builtin ("__builtin_altivec_lvxl_v2df", v2df_ftype_long_pcvoid,
17135 ALTIVEC_BUILTIN_LVXL_V2DF);
17136 def_builtin ("__builtin_altivec_lvxl_v2di", v2di_ftype_long_pcvoid,
17137 ALTIVEC_BUILTIN_LVXL_V2DI);
17138 def_builtin ("__builtin_altivec_lvxl_v4sf", v4sf_ftype_long_pcvoid,
17139 ALTIVEC_BUILTIN_LVXL_V4SF);
17140 def_builtin ("__builtin_altivec_lvxl_v4si", v4si_ftype_long_pcvoid,
17141 ALTIVEC_BUILTIN_LVXL_V4SI);
17142 def_builtin ("__builtin_altivec_lvxl_v8hi", v8hi_ftype_long_pcvoid,
17143 ALTIVEC_BUILTIN_LVXL_V8HI);
17144 def_builtin ("__builtin_altivec_lvxl_v16qi", v16qi_ftype_long_pcvoid,
17145 ALTIVEC_BUILTIN_LVXL_V16QI);
17146 def_builtin ("__builtin_altivec_lvx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVX);
17147 def_builtin ("__builtin_altivec_lvx_v2df", v2df_ftype_long_pcvoid,
17148 ALTIVEC_BUILTIN_LVX_V2DF);
17149 def_builtin ("__builtin_altivec_lvx_v2di", v2di_ftype_long_pcvoid,
17150 ALTIVEC_BUILTIN_LVX_V2DI);
17151 def_builtin ("__builtin_altivec_lvx_v4sf", v4sf_ftype_long_pcvoid,
17152 ALTIVEC_BUILTIN_LVX_V4SF);
17153 def_builtin ("__builtin_altivec_lvx_v4si", v4si_ftype_long_pcvoid,
17154 ALTIVEC_BUILTIN_LVX_V4SI);
17155 def_builtin ("__builtin_altivec_lvx_v8hi", v8hi_ftype_long_pcvoid,
17156 ALTIVEC_BUILTIN_LVX_V8HI);
17157 def_builtin ("__builtin_altivec_lvx_v16qi", v16qi_ftype_long_pcvoid,
17158 ALTIVEC_BUILTIN_LVX_V16QI);
17159 def_builtin ("__builtin_altivec_stvx", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVX);
17160 def_builtin ("__builtin_altivec_stvx_v2df", void_ftype_v2df_long_pvoid,
17161 ALTIVEC_BUILTIN_STVX_V2DF);
17162 def_builtin ("__builtin_altivec_stvx_v2di", void_ftype_v2di_long_pvoid,
17163 ALTIVEC_BUILTIN_STVX_V2DI);
17164 def_builtin ("__builtin_altivec_stvx_v4sf", void_ftype_v4sf_long_pvoid,
17165 ALTIVEC_BUILTIN_STVX_V4SF);
17166 def_builtin ("__builtin_altivec_stvx_v4si", void_ftype_v4si_long_pvoid,
17167 ALTIVEC_BUILTIN_STVX_V4SI);
17168 def_builtin ("__builtin_altivec_stvx_v8hi", void_ftype_v8hi_long_pvoid,
17169 ALTIVEC_BUILTIN_STVX_V8HI);
17170 def_builtin ("__builtin_altivec_stvx_v16qi", void_ftype_v16qi_long_pvoid,
17171 ALTIVEC_BUILTIN_STVX_V16QI);
17172 def_builtin ("__builtin_altivec_stvewx", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVEWX);
17173 def_builtin ("__builtin_altivec_stvxl", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVXL);
17174 def_builtin ("__builtin_altivec_stvxl_v2df", void_ftype_v2df_long_pvoid,
17175 ALTIVEC_BUILTIN_STVXL_V2DF);
17176 def_builtin ("__builtin_altivec_stvxl_v2di", void_ftype_v2di_long_pvoid,
17177 ALTIVEC_BUILTIN_STVXL_V2DI);
17178 def_builtin ("__builtin_altivec_stvxl_v4sf", void_ftype_v4sf_long_pvoid,
17179 ALTIVEC_BUILTIN_STVXL_V4SF);
17180 def_builtin ("__builtin_altivec_stvxl_v4si", void_ftype_v4si_long_pvoid,
17181 ALTIVEC_BUILTIN_STVXL_V4SI);
17182 def_builtin ("__builtin_altivec_stvxl_v8hi", void_ftype_v8hi_long_pvoid,
17183 ALTIVEC_BUILTIN_STVXL_V8HI);
17184 def_builtin ("__builtin_altivec_stvxl_v16qi", void_ftype_v16qi_long_pvoid,
17185 ALTIVEC_BUILTIN_STVXL_V16QI);
17186 def_builtin ("__builtin_altivec_stvebx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVEBX);
17187 def_builtin ("__builtin_altivec_stvehx", void_ftype_v8hi_long_pvoid, ALTIVEC_BUILTIN_STVEHX);
17188 def_builtin ("__builtin_vec_ld", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LD);
17189 def_builtin ("__builtin_vec_lde", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LDE);
17190 def_builtin ("__builtin_vec_ldl", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LDL);
17191 def_builtin ("__builtin_vec_lvsl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVSL);
17192 def_builtin ("__builtin_vec_lvsr", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVSR);
17193 def_builtin ("__builtin_vec_lvebx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEBX);
17194 def_builtin ("__builtin_vec_lvehx", v8hi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEHX);
17195 def_builtin ("__builtin_vec_lvewx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEWX);
17196 def_builtin ("__builtin_vec_st", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_ST);
17197 def_builtin ("__builtin_vec_ste", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STE);
17198 def_builtin ("__builtin_vec_stl", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STL);
17199 def_builtin ("__builtin_vec_stvewx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEWX);
17200 def_builtin ("__builtin_vec_stvebx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEBX);
17201 def_builtin ("__builtin_vec_stvehx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEHX);
17203 def_builtin ("__builtin_vsx_lxvd2x_v2df", v2df_ftype_long_pcvoid,
17204 VSX_BUILTIN_LXVD2X_V2DF);
17205 def_builtin ("__builtin_vsx_lxvd2x_v2di", v2di_ftype_long_pcvoid,
17206 VSX_BUILTIN_LXVD2X_V2DI);
17207 def_builtin ("__builtin_vsx_lxvw4x_v4sf", v4sf_ftype_long_pcvoid,
17208 VSX_BUILTIN_LXVW4X_V4SF);
17209 def_builtin ("__builtin_vsx_lxvw4x_v4si", v4si_ftype_long_pcvoid,
17210 VSX_BUILTIN_LXVW4X_V4SI);
17211 def_builtin ("__builtin_vsx_lxvw4x_v8hi", v8hi_ftype_long_pcvoid,
17212 VSX_BUILTIN_LXVW4X_V8HI);
17213 def_builtin ("__builtin_vsx_lxvw4x_v16qi", v16qi_ftype_long_pcvoid,
17214 VSX_BUILTIN_LXVW4X_V16QI);
17215 def_builtin ("__builtin_vsx_stxvd2x_v2df", void_ftype_v2df_long_pvoid,
17216 VSX_BUILTIN_STXVD2X_V2DF);
17217 def_builtin ("__builtin_vsx_stxvd2x_v2di", void_ftype_v2di_long_pvoid,
17218 VSX_BUILTIN_STXVD2X_V2DI);
17219 def_builtin ("__builtin_vsx_stxvw4x_v4sf", void_ftype_v4sf_long_pvoid,
17220 VSX_BUILTIN_STXVW4X_V4SF);
17221 def_builtin ("__builtin_vsx_stxvw4x_v4si", void_ftype_v4si_long_pvoid,
17222 VSX_BUILTIN_STXVW4X_V4SI);
17223 def_builtin ("__builtin_vsx_stxvw4x_v8hi", void_ftype_v8hi_long_pvoid,
17224 VSX_BUILTIN_STXVW4X_V8HI);
17225 def_builtin ("__builtin_vsx_stxvw4x_v16qi", void_ftype_v16qi_long_pvoid,
17226 VSX_BUILTIN_STXVW4X_V16QI);
17228 def_builtin ("__builtin_vsx_ld_elemrev_v2df", v2df_ftype_long_pcvoid,
17229 VSX_BUILTIN_LD_ELEMREV_V2DF);
17230 def_builtin ("__builtin_vsx_ld_elemrev_v2di", v2di_ftype_long_pcvoid,
17231 VSX_BUILTIN_LD_ELEMREV_V2DI);
17232 def_builtin ("__builtin_vsx_ld_elemrev_v4sf", v4sf_ftype_long_pcvoid,
17233 VSX_BUILTIN_LD_ELEMREV_V4SF);
17234 def_builtin ("__builtin_vsx_ld_elemrev_v4si", v4si_ftype_long_pcvoid,
17235 VSX_BUILTIN_LD_ELEMREV_V4SI);
17236 def_builtin ("__builtin_vsx_st_elemrev_v2df", void_ftype_v2df_long_pvoid,
17237 VSX_BUILTIN_ST_ELEMREV_V2DF);
17238 def_builtin ("__builtin_vsx_st_elemrev_v2di", void_ftype_v2di_long_pvoid,
17239 VSX_BUILTIN_ST_ELEMREV_V2DI);
17240 def_builtin ("__builtin_vsx_st_elemrev_v4sf", void_ftype_v4sf_long_pvoid,
17241 VSX_BUILTIN_ST_ELEMREV_V4SF);
17242 def_builtin ("__builtin_vsx_st_elemrev_v4si", void_ftype_v4si_long_pvoid,
17243 VSX_BUILTIN_ST_ELEMREV_V4SI);
17245 if (TARGET_P9_VECTOR)
17247 def_builtin ("__builtin_vsx_ld_elemrev_v8hi", v8hi_ftype_long_pcvoid,
17248 VSX_BUILTIN_LD_ELEMREV_V8HI);
17249 def_builtin ("__builtin_vsx_ld_elemrev_v16qi", v16qi_ftype_long_pcvoid,
17250 VSX_BUILTIN_LD_ELEMREV_V16QI);
17251 def_builtin ("__builtin_vsx_st_elemrev_v8hi",
17252 void_ftype_v8hi_long_pvoid, VSX_BUILTIN_ST_ELEMREV_V8HI);
17253 def_builtin ("__builtin_vsx_st_elemrev_v16qi",
17254 void_ftype_v16qi_long_pvoid, VSX_BUILTIN_ST_ELEMREV_V16QI);
17257 def_builtin ("__builtin_vec_vsx_ld", opaque_ftype_long_pcvoid,
17258 VSX_BUILTIN_VEC_LD);
17259 def_builtin ("__builtin_vec_vsx_st", void_ftype_opaque_long_pvoid,
17260 VSX_BUILTIN_VEC_ST);
17261 def_builtin ("__builtin_vec_xl", opaque_ftype_long_pcvoid,
17262 VSX_BUILTIN_VEC_XL);
17263 def_builtin ("__builtin_vec_xst", void_ftype_opaque_long_pvoid,
17264 VSX_BUILTIN_VEC_XST);
17266 def_builtin ("__builtin_vec_step", int_ftype_opaque, ALTIVEC_BUILTIN_VEC_STEP);
17267 def_builtin ("__builtin_vec_splats", opaque_ftype_opaque, ALTIVEC_BUILTIN_VEC_SPLATS);
17268 def_builtin ("__builtin_vec_promote", opaque_ftype_opaque, ALTIVEC_BUILTIN_VEC_PROMOTE);
17270 def_builtin ("__builtin_vec_sld", opaque_ftype_opaque_opaque_int, ALTIVEC_BUILTIN_VEC_SLD);
17271 def_builtin ("__builtin_vec_splat", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_SPLAT);
17272 def_builtin ("__builtin_vec_extract", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_EXTRACT);
17273 def_builtin ("__builtin_vec_insert", opaque_ftype_opaque_opaque_int, ALTIVEC_BUILTIN_VEC_INSERT);
17274 def_builtin ("__builtin_vec_vspltw", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTW);
17275 def_builtin ("__builtin_vec_vsplth", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTH);
17276 def_builtin ("__builtin_vec_vspltb", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTB);
17277 def_builtin ("__builtin_vec_ctf", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTF);
17278 def_builtin ("__builtin_vec_vcfsx", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VCFSX);
17279 def_builtin ("__builtin_vec_vcfux", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VCFUX);
17280 def_builtin ("__builtin_vec_cts", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTS);
17281 def_builtin ("__builtin_vec_ctu", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTU);
17283 def_builtin ("__builtin_vec_adde", opaque_ftype_opaque_opaque_opaque,
17284 ALTIVEC_BUILTIN_VEC_ADDE);
17285 def_builtin ("__builtin_vec_addec", opaque_ftype_opaque_opaque_opaque,
17286 ALTIVEC_BUILTIN_VEC_ADDEC);
17287 def_builtin ("__builtin_vec_cmpne", opaque_ftype_opaque_opaque,
17288 ALTIVEC_BUILTIN_VEC_CMPNE);
17289 def_builtin ("__builtin_vec_mul", opaque_ftype_opaque_opaque,
17290 ALTIVEC_BUILTIN_VEC_MUL);
17292 /* Cell builtins. */
17293 def_builtin ("__builtin_altivec_lvlx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVLX);
17294 def_builtin ("__builtin_altivec_lvlxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVLXL);
17295 def_builtin ("__builtin_altivec_lvrx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVRX);
17296 def_builtin ("__builtin_altivec_lvrxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVRXL);
17298 def_builtin ("__builtin_vec_lvlx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVLX);
17299 def_builtin ("__builtin_vec_lvlxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVLXL);
17300 def_builtin ("__builtin_vec_lvrx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVRX);
17301 def_builtin ("__builtin_vec_lvrxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVRXL);
17303 def_builtin ("__builtin_altivec_stvlx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVLX);
17304 def_builtin ("__builtin_altivec_stvlxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVLXL);
17305 def_builtin ("__builtin_altivec_stvrx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVRX);
17306 def_builtin ("__builtin_altivec_stvrxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVRXL);
17308 def_builtin ("__builtin_vec_stvlx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVLX);
17309 def_builtin ("__builtin_vec_stvlxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVLXL);
17310 def_builtin ("__builtin_vec_stvrx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVRX);
17311 def_builtin ("__builtin_vec_stvrxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVRXL);
17313 /* Add the DST variants. */
17314 d = bdesc_dst;
17315 for (i = 0; i < ARRAY_SIZE (bdesc_dst); i++, d++)
17316 def_builtin (d->name, void_ftype_pcvoid_int_int, d->code);
17318 /* Initialize the predicates. */
17319 d = bdesc_altivec_preds;
17320 for (i = 0; i < ARRAY_SIZE (bdesc_altivec_preds); i++, d++)
17322 machine_mode mode1;
17323 tree type;
17325 if (rs6000_overloaded_builtin_p (d->code))
17326 mode1 = VOIDmode;
17327 else
17328 mode1 = insn_data[d->icode].operand[1].mode;
17330 switch (mode1)
17332 case VOIDmode:
17333 type = int_ftype_int_opaque_opaque;
17334 break;
17335 case V2DImode:
17336 type = int_ftype_int_v2di_v2di;
17337 break;
17338 case V4SImode:
17339 type = int_ftype_int_v4si_v4si;
17340 break;
17341 case V8HImode:
17342 type = int_ftype_int_v8hi_v8hi;
17343 break;
17344 case V16QImode:
17345 type = int_ftype_int_v16qi_v16qi;
17346 break;
17347 case V4SFmode:
17348 type = int_ftype_int_v4sf_v4sf;
17349 break;
17350 case V2DFmode:
17351 type = int_ftype_int_v2df_v2df;
17352 break;
17353 default:
17354 gcc_unreachable ();
17357 def_builtin (d->name, type, d->code);
17360 /* Initialize the abs* operators. */
17361 d = bdesc_abs;
17362 for (i = 0; i < ARRAY_SIZE (bdesc_abs); i++, d++)
17364 machine_mode mode0;
17365 tree type;
17367 mode0 = insn_data[d->icode].operand[0].mode;
17369 switch (mode0)
17371 case V2DImode:
17372 type = v2di_ftype_v2di;
17373 break;
17374 case V4SImode:
17375 type = v4si_ftype_v4si;
17376 break;
17377 case V8HImode:
17378 type = v8hi_ftype_v8hi;
17379 break;
17380 case V16QImode:
17381 type = v16qi_ftype_v16qi;
17382 break;
17383 case V4SFmode:
17384 type = v4sf_ftype_v4sf;
17385 break;
17386 case V2DFmode:
17387 type = v2df_ftype_v2df;
17388 break;
17389 default:
17390 gcc_unreachable ();
17393 def_builtin (d->name, type, d->code);
17396 /* Initialize target builtin that implements
17397 targetm.vectorize.builtin_mask_for_load. */
17399 decl = add_builtin_function ("__builtin_altivec_mask_for_load",
17400 v16qi_ftype_long_pcvoid,
17401 ALTIVEC_BUILTIN_MASK_FOR_LOAD,
17402 BUILT_IN_MD, NULL, NULL_TREE);
17403 TREE_READONLY (decl) = 1;
17404 /* Record the decl. Will be used by rs6000_builtin_mask_for_load. */
17405 altivec_builtin_mask_for_load = decl;
17407 /* Access to the vec_init patterns. */
17408 ftype = build_function_type_list (V4SI_type_node, integer_type_node,
17409 integer_type_node, integer_type_node,
17410 integer_type_node, NULL_TREE);
17411 def_builtin ("__builtin_vec_init_v4si", ftype, ALTIVEC_BUILTIN_VEC_INIT_V4SI);
17413 ftype = build_function_type_list (V8HI_type_node, short_integer_type_node,
17414 short_integer_type_node,
17415 short_integer_type_node,
17416 short_integer_type_node,
17417 short_integer_type_node,
17418 short_integer_type_node,
17419 short_integer_type_node,
17420 short_integer_type_node, NULL_TREE);
17421 def_builtin ("__builtin_vec_init_v8hi", ftype, ALTIVEC_BUILTIN_VEC_INIT_V8HI);
17423 ftype = build_function_type_list (V16QI_type_node, char_type_node,
17424 char_type_node, char_type_node,
17425 char_type_node, char_type_node,
17426 char_type_node, char_type_node,
17427 char_type_node, char_type_node,
17428 char_type_node, char_type_node,
17429 char_type_node, char_type_node,
17430 char_type_node, char_type_node,
17431 char_type_node, NULL_TREE);
17432 def_builtin ("__builtin_vec_init_v16qi", ftype,
17433 ALTIVEC_BUILTIN_VEC_INIT_V16QI);
17435 ftype = build_function_type_list (V4SF_type_node, float_type_node,
17436 float_type_node, float_type_node,
17437 float_type_node, NULL_TREE);
17438 def_builtin ("__builtin_vec_init_v4sf", ftype, ALTIVEC_BUILTIN_VEC_INIT_V4SF);
17440 /* VSX builtins. */
17441 ftype = build_function_type_list (V2DF_type_node, double_type_node,
17442 double_type_node, NULL_TREE);
17443 def_builtin ("__builtin_vec_init_v2df", ftype, VSX_BUILTIN_VEC_INIT_V2DF);
17445 ftype = build_function_type_list (V2DI_type_node, intDI_type_node,
17446 intDI_type_node, NULL_TREE);
17447 def_builtin ("__builtin_vec_init_v2di", ftype, VSX_BUILTIN_VEC_INIT_V2DI);
17449 /* Access to the vec_set patterns. */
17450 ftype = build_function_type_list (V4SI_type_node, V4SI_type_node,
17451 intSI_type_node,
17452 integer_type_node, NULL_TREE);
17453 def_builtin ("__builtin_vec_set_v4si", ftype, ALTIVEC_BUILTIN_VEC_SET_V4SI);
17455 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
17456 intHI_type_node,
17457 integer_type_node, NULL_TREE);
17458 def_builtin ("__builtin_vec_set_v8hi", ftype, ALTIVEC_BUILTIN_VEC_SET_V8HI);
17460 ftype = build_function_type_list (V16QI_type_node, V16QI_type_node,
17461 intQI_type_node,
17462 integer_type_node, NULL_TREE);
17463 def_builtin ("__builtin_vec_set_v16qi", ftype, ALTIVEC_BUILTIN_VEC_SET_V16QI);
17465 ftype = build_function_type_list (V4SF_type_node, V4SF_type_node,
17466 float_type_node,
17467 integer_type_node, NULL_TREE);
17468 def_builtin ("__builtin_vec_set_v4sf", ftype, ALTIVEC_BUILTIN_VEC_SET_V4SF);
17470 ftype = build_function_type_list (V2DF_type_node, V2DF_type_node,
17471 double_type_node,
17472 integer_type_node, NULL_TREE);
17473 def_builtin ("__builtin_vec_set_v2df", ftype, VSX_BUILTIN_VEC_SET_V2DF);
17475 ftype = build_function_type_list (V2DI_type_node, V2DI_type_node,
17476 intDI_type_node,
17477 integer_type_node, NULL_TREE);
17478 def_builtin ("__builtin_vec_set_v2di", ftype, VSX_BUILTIN_VEC_SET_V2DI);
17480 /* Access to the vec_extract patterns. */
17481 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
17482 integer_type_node, NULL_TREE);
17483 def_builtin ("__builtin_vec_ext_v4si", ftype, ALTIVEC_BUILTIN_VEC_EXT_V4SI);
17485 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
17486 integer_type_node, NULL_TREE);
17487 def_builtin ("__builtin_vec_ext_v8hi", ftype, ALTIVEC_BUILTIN_VEC_EXT_V8HI);
17489 ftype = build_function_type_list (intQI_type_node, V16QI_type_node,
17490 integer_type_node, NULL_TREE);
17491 def_builtin ("__builtin_vec_ext_v16qi", ftype, ALTIVEC_BUILTIN_VEC_EXT_V16QI);
17493 ftype = build_function_type_list (float_type_node, V4SF_type_node,
17494 integer_type_node, NULL_TREE);
17495 def_builtin ("__builtin_vec_ext_v4sf", ftype, ALTIVEC_BUILTIN_VEC_EXT_V4SF);
17497 ftype = build_function_type_list (double_type_node, V2DF_type_node,
17498 integer_type_node, NULL_TREE);
17499 def_builtin ("__builtin_vec_ext_v2df", ftype, VSX_BUILTIN_VEC_EXT_V2DF);
17501 ftype = build_function_type_list (intDI_type_node, V2DI_type_node,
17502 integer_type_node, NULL_TREE);
17503 def_builtin ("__builtin_vec_ext_v2di", ftype, VSX_BUILTIN_VEC_EXT_V2DI);
17506 if (V1TI_type_node)
17508 tree v1ti_ftype_long_pcvoid
17509 = build_function_type_list (V1TI_type_node,
17510 long_integer_type_node, pcvoid_type_node,
17511 NULL_TREE);
17512 tree void_ftype_v1ti_long_pvoid
17513 = build_function_type_list (void_type_node,
17514 V1TI_type_node, long_integer_type_node,
17515 pvoid_type_node, NULL_TREE);
17516 def_builtin ("__builtin_vsx_lxvd2x_v1ti", v1ti_ftype_long_pcvoid,
17517 VSX_BUILTIN_LXVD2X_V1TI);
17518 def_builtin ("__builtin_vsx_stxvd2x_v1ti", void_ftype_v1ti_long_pvoid,
17519 VSX_BUILTIN_STXVD2X_V1TI);
17520 ftype = build_function_type_list (V1TI_type_node, intTI_type_node,
17521 NULL_TREE, NULL_TREE);
17522 def_builtin ("__builtin_vec_init_v1ti", ftype, VSX_BUILTIN_VEC_INIT_V1TI);
17523 ftype = build_function_type_list (V1TI_type_node, V1TI_type_node,
17524 intTI_type_node,
17525 integer_type_node, NULL_TREE);
17526 def_builtin ("__builtin_vec_set_v1ti", ftype, VSX_BUILTIN_VEC_SET_V1TI);
17527 ftype = build_function_type_list (intTI_type_node, V1TI_type_node,
17528 integer_type_node, NULL_TREE);
17529 def_builtin ("__builtin_vec_ext_v1ti", ftype, VSX_BUILTIN_VEC_EXT_V1TI);
17534 static void
17535 htm_init_builtins (void)
17537 HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;
17538 const struct builtin_description *d;
17539 size_t i;
17541 d = bdesc_htm;
17542 for (i = 0; i < ARRAY_SIZE (bdesc_htm); i++, d++)
17544 tree op[MAX_HTM_OPERANDS], type;
17545 HOST_WIDE_INT mask = d->mask;
17546 unsigned attr = rs6000_builtin_info[d->code].attr;
17547 bool void_func = (attr & RS6000_BTC_VOID);
17548 int attr_args = (attr & RS6000_BTC_TYPE_MASK);
17549 int nopnds = 0;
17550 tree gpr_type_node;
17551 tree rettype;
17552 tree argtype;
17554 if (TARGET_32BIT && TARGET_POWERPC64)
17555 gpr_type_node = long_long_unsigned_type_node;
17556 else
17557 gpr_type_node = long_unsigned_type_node;
17559 if (attr & RS6000_BTC_SPR)
17561 rettype = gpr_type_node;
17562 argtype = gpr_type_node;
17564 else if (d->code == HTM_BUILTIN_TABORTDC
17565 || d->code == HTM_BUILTIN_TABORTDCI)
17567 rettype = unsigned_type_node;
17568 argtype = gpr_type_node;
17570 else
17572 rettype = unsigned_type_node;
17573 argtype = unsigned_type_node;
17576 if ((mask & builtin_mask) != mask)
17578 if (TARGET_DEBUG_BUILTIN)
17579 fprintf (stderr, "htm_builtin, skip binary %s\n", d->name);
17580 continue;
17583 if (d->name == 0)
17585 if (TARGET_DEBUG_BUILTIN)
17586 fprintf (stderr, "htm_builtin, bdesc_htm[%ld] no name\n",
17587 (long unsigned) i);
17588 continue;
17591 op[nopnds++] = (void_func) ? void_type_node : rettype;
17593 if (attr_args == RS6000_BTC_UNARY)
17594 op[nopnds++] = argtype;
17595 else if (attr_args == RS6000_BTC_BINARY)
17597 op[nopnds++] = argtype;
17598 op[nopnds++] = argtype;
17600 else if (attr_args == RS6000_BTC_TERNARY)
17602 op[nopnds++] = argtype;
17603 op[nopnds++] = argtype;
17604 op[nopnds++] = argtype;
17607 switch (nopnds)
17609 case 1:
17610 type = build_function_type_list (op[0], NULL_TREE);
17611 break;
17612 case 2:
17613 type = build_function_type_list (op[0], op[1], NULL_TREE);
17614 break;
17615 case 3:
17616 type = build_function_type_list (op[0], op[1], op[2], NULL_TREE);
17617 break;
17618 case 4:
17619 type = build_function_type_list (op[0], op[1], op[2], op[3],
17620 NULL_TREE);
17621 break;
17622 default:
17623 gcc_unreachable ();
17626 def_builtin (d->name, type, d->code);
17630 /* Hash function for builtin functions with up to 3 arguments and a return
17631 type. */
17632 hashval_t
17633 builtin_hasher::hash (builtin_hash_struct *bh)
17635 unsigned ret = 0;
17636 int i;
17638 for (i = 0; i < 4; i++)
17640 ret = (ret * (unsigned)MAX_MACHINE_MODE) + ((unsigned)bh->mode[i]);
17641 ret = (ret * 2) + bh->uns_p[i];
17644 return ret;
17647 /* Compare builtin hash entries H1 and H2 for equivalence. */
17648 bool
17649 builtin_hasher::equal (builtin_hash_struct *p1, builtin_hash_struct *p2)
17651 return ((p1->mode[0] == p2->mode[0])
17652 && (p1->mode[1] == p2->mode[1])
17653 && (p1->mode[2] == p2->mode[2])
17654 && (p1->mode[3] == p2->mode[3])
17655 && (p1->uns_p[0] == p2->uns_p[0])
17656 && (p1->uns_p[1] == p2->uns_p[1])
17657 && (p1->uns_p[2] == p2->uns_p[2])
17658 && (p1->uns_p[3] == p2->uns_p[3]));
17661 /* Map types for builtin functions with an explicit return type and up to 3
17662 arguments. Functions with fewer than 3 arguments use VOIDmode as the type
17663 of the argument. */
17664 static tree
17665 builtin_function_type (machine_mode mode_ret, machine_mode mode_arg0,
17666 machine_mode mode_arg1, machine_mode mode_arg2,
17667 enum rs6000_builtins builtin, const char *name)
17669 struct builtin_hash_struct h;
17670 struct builtin_hash_struct *h2;
17671 int num_args = 3;
17672 int i;
17673 tree ret_type = NULL_TREE;
17674 tree arg_type[3] = { NULL_TREE, NULL_TREE, NULL_TREE };
17676 /* Create builtin_hash_table. */
17677 if (builtin_hash_table == NULL)
17678 builtin_hash_table = hash_table<builtin_hasher>::create_ggc (1500);
17680 h.type = NULL_TREE;
17681 h.mode[0] = mode_ret;
17682 h.mode[1] = mode_arg0;
17683 h.mode[2] = mode_arg1;
17684 h.mode[3] = mode_arg2;
17685 h.uns_p[0] = 0;
17686 h.uns_p[1] = 0;
17687 h.uns_p[2] = 0;
17688 h.uns_p[3] = 0;
17690 /* If the builtin is a type that produces unsigned results or takes unsigned
17691 arguments, and it is returned as a decl for the vectorizer (such as
17692 widening multiplies, permute), make sure the arguments and return value
17693 are type correct. */
17694 switch (builtin)
17696 /* unsigned 1 argument functions. */
17697 case CRYPTO_BUILTIN_VSBOX:
17698 case P8V_BUILTIN_VGBBD:
17699 case MISC_BUILTIN_CDTBCD:
17700 case MISC_BUILTIN_CBCDTD:
17701 h.uns_p[0] = 1;
17702 h.uns_p[1] = 1;
17703 break;
17705 /* unsigned 2 argument functions. */
17706 case ALTIVEC_BUILTIN_VMULEUB_UNS:
17707 case ALTIVEC_BUILTIN_VMULEUH_UNS:
17708 case ALTIVEC_BUILTIN_VMULOUB_UNS:
17709 case ALTIVEC_BUILTIN_VMULOUH_UNS:
17710 case CRYPTO_BUILTIN_VCIPHER:
17711 case CRYPTO_BUILTIN_VCIPHERLAST:
17712 case CRYPTO_BUILTIN_VNCIPHER:
17713 case CRYPTO_BUILTIN_VNCIPHERLAST:
17714 case CRYPTO_BUILTIN_VPMSUMB:
17715 case CRYPTO_BUILTIN_VPMSUMH:
17716 case CRYPTO_BUILTIN_VPMSUMW:
17717 case CRYPTO_BUILTIN_VPMSUMD:
17718 case CRYPTO_BUILTIN_VPMSUM:
17719 case MISC_BUILTIN_ADDG6S:
17720 case MISC_BUILTIN_DIVWEU:
17721 case MISC_BUILTIN_DIVWEUO:
17722 case MISC_BUILTIN_DIVDEU:
17723 case MISC_BUILTIN_DIVDEUO:
17724 h.uns_p[0] = 1;
17725 h.uns_p[1] = 1;
17726 h.uns_p[2] = 1;
17727 break;
17729 /* unsigned 3 argument functions. */
17730 case ALTIVEC_BUILTIN_VPERM_16QI_UNS:
17731 case ALTIVEC_BUILTIN_VPERM_8HI_UNS:
17732 case ALTIVEC_BUILTIN_VPERM_4SI_UNS:
17733 case ALTIVEC_BUILTIN_VPERM_2DI_UNS:
17734 case ALTIVEC_BUILTIN_VSEL_16QI_UNS:
17735 case ALTIVEC_BUILTIN_VSEL_8HI_UNS:
17736 case ALTIVEC_BUILTIN_VSEL_4SI_UNS:
17737 case ALTIVEC_BUILTIN_VSEL_2DI_UNS:
17738 case VSX_BUILTIN_VPERM_16QI_UNS:
17739 case VSX_BUILTIN_VPERM_8HI_UNS:
17740 case VSX_BUILTIN_VPERM_4SI_UNS:
17741 case VSX_BUILTIN_VPERM_2DI_UNS:
17742 case VSX_BUILTIN_XXSEL_16QI_UNS:
17743 case VSX_BUILTIN_XXSEL_8HI_UNS:
17744 case VSX_BUILTIN_XXSEL_4SI_UNS:
17745 case VSX_BUILTIN_XXSEL_2DI_UNS:
17746 case CRYPTO_BUILTIN_VPERMXOR:
17747 case CRYPTO_BUILTIN_VPERMXOR_V2DI:
17748 case CRYPTO_BUILTIN_VPERMXOR_V4SI:
17749 case CRYPTO_BUILTIN_VPERMXOR_V8HI:
17750 case CRYPTO_BUILTIN_VPERMXOR_V16QI:
17751 case CRYPTO_BUILTIN_VSHASIGMAW:
17752 case CRYPTO_BUILTIN_VSHASIGMAD:
17753 case CRYPTO_BUILTIN_VSHASIGMA:
17754 h.uns_p[0] = 1;
17755 h.uns_p[1] = 1;
17756 h.uns_p[2] = 1;
17757 h.uns_p[3] = 1;
17758 break;
17760 /* signed permute functions with unsigned char mask. */
17761 case ALTIVEC_BUILTIN_VPERM_16QI:
17762 case ALTIVEC_BUILTIN_VPERM_8HI:
17763 case ALTIVEC_BUILTIN_VPERM_4SI:
17764 case ALTIVEC_BUILTIN_VPERM_4SF:
17765 case ALTIVEC_BUILTIN_VPERM_2DI:
17766 case ALTIVEC_BUILTIN_VPERM_2DF:
17767 case VSX_BUILTIN_VPERM_16QI:
17768 case VSX_BUILTIN_VPERM_8HI:
17769 case VSX_BUILTIN_VPERM_4SI:
17770 case VSX_BUILTIN_VPERM_4SF:
17771 case VSX_BUILTIN_VPERM_2DI:
17772 case VSX_BUILTIN_VPERM_2DF:
17773 h.uns_p[3] = 1;
17774 break;
17776 /* unsigned args, signed return. */
17777 case VSX_BUILTIN_XVCVUXDDP_UNS:
17778 case ALTIVEC_BUILTIN_UNSFLOAT_V4SI_V4SF:
17779 h.uns_p[1] = 1;
17780 break;
17782 /* signed args, unsigned return. */
17783 case VSX_BUILTIN_XVCVDPUXDS_UNS:
17784 case ALTIVEC_BUILTIN_FIXUNS_V4SF_V4SI:
17785 case MISC_BUILTIN_UNPACK_TD:
17786 case MISC_BUILTIN_UNPACK_V1TI:
17787 h.uns_p[0] = 1;
17788 break;
17790 /* unsigned arguments for 128-bit pack instructions. */
17791 case MISC_BUILTIN_PACK_TD:
17792 case MISC_BUILTIN_PACK_V1TI:
17793 h.uns_p[1] = 1;
17794 h.uns_p[2] = 1;
17795 break;
17797 default:
17798 break;
17801 /* Figure out how many args are present. */
17802 while (num_args > 0 && h.mode[num_args] == VOIDmode)
17803 num_args--;
17805 ret_type = builtin_mode_to_type[h.mode[0]][h.uns_p[0]];
17806 if (!ret_type && h.uns_p[0])
17807 ret_type = builtin_mode_to_type[h.mode[0]][0];
17809 if (!ret_type)
17810 fatal_error (input_location,
17811 "internal error: builtin function %s had an unexpected "
17812 "return type %s", name, GET_MODE_NAME (h.mode[0]));
17814 for (i = 0; i < (int) ARRAY_SIZE (arg_type); i++)
17815 arg_type[i] = NULL_TREE;
17817 for (i = 0; i < num_args; i++)
17819 int m = (int) h.mode[i+1];
17820 int uns_p = h.uns_p[i+1];
17822 arg_type[i] = builtin_mode_to_type[m][uns_p];
17823 if (!arg_type[i] && uns_p)
17824 arg_type[i] = builtin_mode_to_type[m][0];
17826 if (!arg_type[i])
17827 fatal_error (input_location,
17828 "internal error: builtin function %s, argument %d "
17829 "had unexpected argument type %s", name, i,
17830 GET_MODE_NAME (m));
17833 builtin_hash_struct **found = builtin_hash_table->find_slot (&h, INSERT);
17834 if (*found == NULL)
17836 h2 = ggc_alloc<builtin_hash_struct> ();
17837 *h2 = h;
17838 *found = h2;
17840 h2->type = build_function_type_list (ret_type, arg_type[0], arg_type[1],
17841 arg_type[2], NULL_TREE);
17844 return (*found)->type;
17847 static void
17848 rs6000_common_init_builtins (void)
17850 const struct builtin_description *d;
17851 size_t i;
17853 tree opaque_ftype_opaque = NULL_TREE;
17854 tree opaque_ftype_opaque_opaque = NULL_TREE;
17855 tree opaque_ftype_opaque_opaque_opaque = NULL_TREE;
17856 tree v2si_ftype = NULL_TREE;
17857 tree v2si_ftype_qi = NULL_TREE;
17858 tree v2si_ftype_v2si_qi = NULL_TREE;
17859 tree v2si_ftype_int_qi = NULL_TREE;
17860 HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;
17862 if (!TARGET_PAIRED_FLOAT)
17864 builtin_mode_to_type[V2SImode][0] = opaque_V2SI_type_node;
17865 builtin_mode_to_type[V2SFmode][0] = opaque_V2SF_type_node;
17868 /* Paired and SPE builtins are only available if you build a compiler with
17869 the appropriate options, so only create those builtins with the
17870 appropriate compiler option. Create Altivec and VSX builtins on machines
17871 with at least the general purpose extensions (970 and newer) to allow the
17872 use of the target attribute.. */
17874 if (TARGET_EXTRA_BUILTINS)
17875 builtin_mask |= RS6000_BTM_COMMON;
17877 /* Add the ternary operators. */
17878 d = bdesc_3arg;
17879 for (i = 0; i < ARRAY_SIZE (bdesc_3arg); i++, d++)
17881 tree type;
17882 HOST_WIDE_INT mask = d->mask;
17884 if ((mask & builtin_mask) != mask)
17886 if (TARGET_DEBUG_BUILTIN)
17887 fprintf (stderr, "rs6000_builtin, skip ternary %s\n", d->name);
17888 continue;
17891 if (rs6000_overloaded_builtin_p (d->code))
17893 if (! (type = opaque_ftype_opaque_opaque_opaque))
17894 type = opaque_ftype_opaque_opaque_opaque
17895 = build_function_type_list (opaque_V4SI_type_node,
17896 opaque_V4SI_type_node,
17897 opaque_V4SI_type_node,
17898 opaque_V4SI_type_node,
17899 NULL_TREE);
17901 else
17903 enum insn_code icode = d->icode;
17904 if (d->name == 0)
17906 if (TARGET_DEBUG_BUILTIN)
17907 fprintf (stderr, "rs6000_builtin, bdesc_3arg[%ld] no name\n",
17908 (long unsigned)i);
17910 continue;
17913 if (icode == CODE_FOR_nothing)
17915 if (TARGET_DEBUG_BUILTIN)
17916 fprintf (stderr, "rs6000_builtin, skip ternary %s (no code)\n",
17917 d->name);
17919 continue;
17922 type = builtin_function_type (insn_data[icode].operand[0].mode,
17923 insn_data[icode].operand[1].mode,
17924 insn_data[icode].operand[2].mode,
17925 insn_data[icode].operand[3].mode,
17926 d->code, d->name);
17929 def_builtin (d->name, type, d->code);
17932 /* Add the binary operators. */
17933 d = bdesc_2arg;
17934 for (i = 0; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
17936 machine_mode mode0, mode1, mode2;
17937 tree type;
17938 HOST_WIDE_INT mask = d->mask;
17940 if ((mask & builtin_mask) != mask)
17942 if (TARGET_DEBUG_BUILTIN)
17943 fprintf (stderr, "rs6000_builtin, skip binary %s\n", d->name);
17944 continue;
17947 if (rs6000_overloaded_builtin_p (d->code))
17949 if (! (type = opaque_ftype_opaque_opaque))
17950 type = opaque_ftype_opaque_opaque
17951 = build_function_type_list (opaque_V4SI_type_node,
17952 opaque_V4SI_type_node,
17953 opaque_V4SI_type_node,
17954 NULL_TREE);
17956 else
17958 enum insn_code icode = d->icode;
17959 if (d->name == 0)
17961 if (TARGET_DEBUG_BUILTIN)
17962 fprintf (stderr, "rs6000_builtin, bdesc_2arg[%ld] no name\n",
17963 (long unsigned)i);
17965 continue;
17968 if (icode == CODE_FOR_nothing)
17970 if (TARGET_DEBUG_BUILTIN)
17971 fprintf (stderr, "rs6000_builtin, skip binary %s (no code)\n",
17972 d->name);
17974 continue;
17977 mode0 = insn_data[icode].operand[0].mode;
17978 mode1 = insn_data[icode].operand[1].mode;
17979 mode2 = insn_data[icode].operand[2].mode;
17981 if (mode0 == V2SImode && mode1 == V2SImode && mode2 == QImode)
17983 if (! (type = v2si_ftype_v2si_qi))
17984 type = v2si_ftype_v2si_qi
17985 = build_function_type_list (opaque_V2SI_type_node,
17986 opaque_V2SI_type_node,
17987 char_type_node,
17988 NULL_TREE);
17991 else if (mode0 == V2SImode && GET_MODE_CLASS (mode1) == MODE_INT
17992 && mode2 == QImode)
17994 if (! (type = v2si_ftype_int_qi))
17995 type = v2si_ftype_int_qi
17996 = build_function_type_list (opaque_V2SI_type_node,
17997 integer_type_node,
17998 char_type_node,
17999 NULL_TREE);
18002 else
18003 type = builtin_function_type (mode0, mode1, mode2, VOIDmode,
18004 d->code, d->name);
18007 def_builtin (d->name, type, d->code);
18010 /* Add the simple unary operators. */
18011 d = bdesc_1arg;
18012 for (i = 0; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
18014 machine_mode mode0, mode1;
18015 tree type;
18016 HOST_WIDE_INT mask = d->mask;
18018 if ((mask & builtin_mask) != mask)
18020 if (TARGET_DEBUG_BUILTIN)
18021 fprintf (stderr, "rs6000_builtin, skip unary %s\n", d->name);
18022 continue;
18025 if (rs6000_overloaded_builtin_p (d->code))
18027 if (! (type = opaque_ftype_opaque))
18028 type = opaque_ftype_opaque
18029 = build_function_type_list (opaque_V4SI_type_node,
18030 opaque_V4SI_type_node,
18031 NULL_TREE);
18033 else
18035 enum insn_code icode = d->icode;
18036 if (d->name == 0)
18038 if (TARGET_DEBUG_BUILTIN)
18039 fprintf (stderr, "rs6000_builtin, bdesc_1arg[%ld] no name\n",
18040 (long unsigned)i);
18042 continue;
18045 if (icode == CODE_FOR_nothing)
18047 if (TARGET_DEBUG_BUILTIN)
18048 fprintf (stderr, "rs6000_builtin, skip unary %s (no code)\n",
18049 d->name);
18051 continue;
18054 mode0 = insn_data[icode].operand[0].mode;
18055 mode1 = insn_data[icode].operand[1].mode;
18057 if (mode0 == V2SImode && mode1 == QImode)
18059 if (! (type = v2si_ftype_qi))
18060 type = v2si_ftype_qi
18061 = build_function_type_list (opaque_V2SI_type_node,
18062 char_type_node,
18063 NULL_TREE);
18066 else
18067 type = builtin_function_type (mode0, mode1, VOIDmode, VOIDmode,
18068 d->code, d->name);
18071 def_builtin (d->name, type, d->code);
18074 /* Add the simple no-argument operators. */
18075 d = bdesc_0arg;
18076 for (i = 0; i < ARRAY_SIZE (bdesc_0arg); i++, d++)
18078 machine_mode mode0;
18079 tree type;
18080 HOST_WIDE_INT mask = d->mask;
18082 if ((mask & builtin_mask) != mask)
18084 if (TARGET_DEBUG_BUILTIN)
18085 fprintf (stderr, "rs6000_builtin, skip no-argument %s\n", d->name);
18086 continue;
18088 if (rs6000_overloaded_builtin_p (d->code))
18090 if (!opaque_ftype_opaque)
18091 opaque_ftype_opaque
18092 = build_function_type_list (opaque_V4SI_type_node, NULL_TREE);
18093 type = opaque_ftype_opaque;
18095 else
18097 enum insn_code icode = d->icode;
18098 if (d->name == 0)
18100 if (TARGET_DEBUG_BUILTIN)
18101 fprintf (stderr, "rs6000_builtin, bdesc_0arg[%lu] no name\n",
18102 (long unsigned) i);
18103 continue;
18105 if (icode == CODE_FOR_nothing)
18107 if (TARGET_DEBUG_BUILTIN)
18108 fprintf (stderr,
18109 "rs6000_builtin, skip no-argument %s (no code)\n",
18110 d->name);
18111 continue;
18113 mode0 = insn_data[icode].operand[0].mode;
18114 if (mode0 == V2SImode)
18116 /* code for SPE */
18117 if (! (type = v2si_ftype))
18119 v2si_ftype
18120 = build_function_type_list (opaque_V2SI_type_node,
18121 NULL_TREE);
18122 type = v2si_ftype;
18125 else
18126 type = builtin_function_type (mode0, VOIDmode, VOIDmode, VOIDmode,
18127 d->code, d->name);
18129 def_builtin (d->name, type, d->code);
18133 /* Set up AIX/Darwin/64-bit Linux quad floating point routines. */
18134 static void
18135 init_float128_ibm (machine_mode mode)
18137 if (!TARGET_XL_COMPAT)
18139 set_optab_libfunc (add_optab, mode, "__gcc_qadd");
18140 set_optab_libfunc (sub_optab, mode, "__gcc_qsub");
18141 set_optab_libfunc (smul_optab, mode, "__gcc_qmul");
18142 set_optab_libfunc (sdiv_optab, mode, "__gcc_qdiv");
18144 if (!(TARGET_HARD_FLOAT && (TARGET_FPRS || TARGET_E500_DOUBLE)))
18146 set_optab_libfunc (neg_optab, mode, "__gcc_qneg");
18147 set_optab_libfunc (eq_optab, mode, "__gcc_qeq");
18148 set_optab_libfunc (ne_optab, mode, "__gcc_qne");
18149 set_optab_libfunc (gt_optab, mode, "__gcc_qgt");
18150 set_optab_libfunc (ge_optab, mode, "__gcc_qge");
18151 set_optab_libfunc (lt_optab, mode, "__gcc_qlt");
18152 set_optab_libfunc (le_optab, mode, "__gcc_qle");
18154 set_conv_libfunc (sext_optab, mode, SFmode, "__gcc_stoq");
18155 set_conv_libfunc (sext_optab, mode, DFmode, "__gcc_dtoq");
18156 set_conv_libfunc (trunc_optab, SFmode, mode, "__gcc_qtos");
18157 set_conv_libfunc (trunc_optab, DFmode, mode, "__gcc_qtod");
18158 set_conv_libfunc (sfix_optab, SImode, mode, "__gcc_qtoi");
18159 set_conv_libfunc (ufix_optab, SImode, mode, "__gcc_qtou");
18160 set_conv_libfunc (sfloat_optab, mode, SImode, "__gcc_itoq");
18161 set_conv_libfunc (ufloat_optab, mode, SImode, "__gcc_utoq");
18164 if (!(TARGET_HARD_FLOAT && TARGET_FPRS))
18165 set_optab_libfunc (unord_optab, mode, "__gcc_qunord");
18167 else
18169 set_optab_libfunc (add_optab, mode, "_xlqadd");
18170 set_optab_libfunc (sub_optab, mode, "_xlqsub");
18171 set_optab_libfunc (smul_optab, mode, "_xlqmul");
18172 set_optab_libfunc (sdiv_optab, mode, "_xlqdiv");
18175 /* Add various conversions for IFmode to use the traditional TFmode
18176 names. */
18177 if (mode == IFmode)
18179 set_conv_libfunc (sext_optab, mode, SDmode, "__dpd_extendsdtf2");
18180 set_conv_libfunc (sext_optab, mode, DDmode, "__dpd_extendddtf2");
18181 set_conv_libfunc (trunc_optab, mode, TDmode, "__dpd_trunctftd2");
18182 set_conv_libfunc (trunc_optab, SDmode, mode, "__dpd_trunctfsd2");
18183 set_conv_libfunc (trunc_optab, DDmode, mode, "__dpd_trunctfdd2");
18184 set_conv_libfunc (sext_optab, TDmode, mode, "__dpd_extendtdtf2");
18186 if (TARGET_POWERPC64)
18188 set_conv_libfunc (sfix_optab, TImode, mode, "__fixtfti");
18189 set_conv_libfunc (ufix_optab, TImode, mode, "__fixunstfti");
18190 set_conv_libfunc (sfloat_optab, mode, TImode, "__floattitf");
18191 set_conv_libfunc (ufloat_optab, mode, TImode, "__floatuntitf");
18196 /* Set up IEEE 128-bit floating point routines. Use different names if the
18197 arguments can be passed in a vector register. The historical PowerPC
18198 implementation of IEEE 128-bit floating point used _q_<op> for the names, so
18199 continue to use that if we aren't using vector registers to pass IEEE
18200 128-bit floating point. */
18202 static void
18203 init_float128_ieee (machine_mode mode)
18205 if (FLOAT128_VECTOR_P (mode))
18207 set_optab_libfunc (add_optab, mode, "__addkf3");
18208 set_optab_libfunc (sub_optab, mode, "__subkf3");
18209 set_optab_libfunc (neg_optab, mode, "__negkf2");
18210 set_optab_libfunc (smul_optab, mode, "__mulkf3");
18211 set_optab_libfunc (sdiv_optab, mode, "__divkf3");
18212 set_optab_libfunc (sqrt_optab, mode, "__sqrtkf2");
18213 set_optab_libfunc (abs_optab, mode, "__abstkf2");
18215 set_optab_libfunc (eq_optab, mode, "__eqkf2");
18216 set_optab_libfunc (ne_optab, mode, "__nekf2");
18217 set_optab_libfunc (gt_optab, mode, "__gtkf2");
18218 set_optab_libfunc (ge_optab, mode, "__gekf2");
18219 set_optab_libfunc (lt_optab, mode, "__ltkf2");
18220 set_optab_libfunc (le_optab, mode, "__lekf2");
18221 set_optab_libfunc (unord_optab, mode, "__unordkf2");
18223 set_conv_libfunc (sext_optab, mode, SFmode, "__extendsfkf2");
18224 set_conv_libfunc (sext_optab, mode, DFmode, "__extenddfkf2");
18225 set_conv_libfunc (trunc_optab, SFmode, mode, "__trunckfsf2");
18226 set_conv_libfunc (trunc_optab, DFmode, mode, "__trunckfdf2");
18228 set_conv_libfunc (sext_optab, mode, IFmode, "__extendtfkf2");
18229 if (mode != TFmode && FLOAT128_IBM_P (TFmode))
18230 set_conv_libfunc (sext_optab, mode, TFmode, "__extendtfkf2");
18232 set_conv_libfunc (trunc_optab, IFmode, mode, "__trunckftf2");
18233 if (mode != TFmode && FLOAT128_IBM_P (TFmode))
18234 set_conv_libfunc (trunc_optab, TFmode, mode, "__trunckftf2");
18236 set_conv_libfunc (sext_optab, mode, SDmode, "__dpd_extendsdkf2");
18237 set_conv_libfunc (sext_optab, mode, DDmode, "__dpd_extendddkf2");
18238 set_conv_libfunc (trunc_optab, mode, TDmode, "__dpd_trunckftd2");
18239 set_conv_libfunc (trunc_optab, SDmode, mode, "__dpd_trunckfsd2");
18240 set_conv_libfunc (trunc_optab, DDmode, mode, "__dpd_trunckfdd2");
18241 set_conv_libfunc (sext_optab, TDmode, mode, "__dpd_extendtdkf2");
18243 set_conv_libfunc (sfix_optab, SImode, mode, "__fixkfsi");
18244 set_conv_libfunc (ufix_optab, SImode, mode, "__fixunskfsi");
18245 set_conv_libfunc (sfix_optab, DImode, mode, "__fixkfdi");
18246 set_conv_libfunc (ufix_optab, DImode, mode, "__fixunskfdi");
18248 set_conv_libfunc (sfloat_optab, mode, SImode, "__floatsikf");
18249 set_conv_libfunc (ufloat_optab, mode, SImode, "__floatunsikf");
18250 set_conv_libfunc (sfloat_optab, mode, DImode, "__floatdikf");
18251 set_conv_libfunc (ufloat_optab, mode, DImode, "__floatundikf");
18253 if (TARGET_POWERPC64)
18255 set_conv_libfunc (sfix_optab, TImode, mode, "__fixkfti");
18256 set_conv_libfunc (ufix_optab, TImode, mode, "__fixunskfti");
18257 set_conv_libfunc (sfloat_optab, mode, TImode, "__floattikf");
18258 set_conv_libfunc (ufloat_optab, mode, TImode, "__floatuntikf");
18262 else
18264 set_optab_libfunc (add_optab, mode, "_q_add");
18265 set_optab_libfunc (sub_optab, mode, "_q_sub");
18266 set_optab_libfunc (neg_optab, mode, "_q_neg");
18267 set_optab_libfunc (smul_optab, mode, "_q_mul");
18268 set_optab_libfunc (sdiv_optab, mode, "_q_div");
18269 if (TARGET_PPC_GPOPT)
18270 set_optab_libfunc (sqrt_optab, mode, "_q_sqrt");
18272 set_optab_libfunc (eq_optab, mode, "_q_feq");
18273 set_optab_libfunc (ne_optab, mode, "_q_fne");
18274 set_optab_libfunc (gt_optab, mode, "_q_fgt");
18275 set_optab_libfunc (ge_optab, mode, "_q_fge");
18276 set_optab_libfunc (lt_optab, mode, "_q_flt");
18277 set_optab_libfunc (le_optab, mode, "_q_fle");
18279 set_conv_libfunc (sext_optab, mode, SFmode, "_q_stoq");
18280 set_conv_libfunc (sext_optab, mode, DFmode, "_q_dtoq");
18281 set_conv_libfunc (trunc_optab, SFmode, mode, "_q_qtos");
18282 set_conv_libfunc (trunc_optab, DFmode, mode, "_q_qtod");
18283 set_conv_libfunc (sfix_optab, SImode, mode, "_q_qtoi");
18284 set_conv_libfunc (ufix_optab, SImode, mode, "_q_qtou");
18285 set_conv_libfunc (sfloat_optab, mode, SImode, "_q_itoq");
18286 set_conv_libfunc (ufloat_optab, mode, SImode, "_q_utoq");
18290 static void
18291 rs6000_init_libfuncs (void)
18293 /* __float128 support. */
18294 if (TARGET_FLOAT128)
18296 init_float128_ibm (IFmode);
18297 init_float128_ieee (KFmode);
18300 /* AIX/Darwin/64-bit Linux quad floating point routines. */
18301 if (TARGET_LONG_DOUBLE_128)
18303 if (!TARGET_IEEEQUAD)
18304 init_float128_ibm (TFmode);
18306 /* IEEE 128-bit including 32-bit SVR4 quad floating point routines. */
18307 else
18308 init_float128_ieee (TFmode);
18313 /* Expand a block clear operation, and return 1 if successful. Return 0
18314 if we should let the compiler generate normal code.
18316 operands[0] is the destination
18317 operands[1] is the length
18318 operands[3] is the alignment */
18321 expand_block_clear (rtx operands[])
18323 rtx orig_dest = operands[0];
18324 rtx bytes_rtx = operands[1];
18325 rtx align_rtx = operands[3];
18326 bool constp = (GET_CODE (bytes_rtx) == CONST_INT);
18327 HOST_WIDE_INT align;
18328 HOST_WIDE_INT bytes;
18329 int offset;
18330 int clear_bytes;
18331 int clear_step;
18333 /* If this is not a fixed size move, just call memcpy */
18334 if (! constp)
18335 return 0;
18337 /* This must be a fixed size alignment */
18338 gcc_assert (GET_CODE (align_rtx) == CONST_INT);
18339 align = INTVAL (align_rtx) * BITS_PER_UNIT;
18341 /* Anything to clear? */
18342 bytes = INTVAL (bytes_rtx);
18343 if (bytes <= 0)
18344 return 1;
18346 /* Use the builtin memset after a point, to avoid huge code bloat.
18347 When optimize_size, avoid any significant code bloat; calling
18348 memset is about 4 instructions, so allow for one instruction to
18349 load zero and three to do clearing. */
18350 if (TARGET_ALTIVEC && align >= 128)
18351 clear_step = 16;
18352 else if (TARGET_POWERPC64 && (align >= 64 || !STRICT_ALIGNMENT))
18353 clear_step = 8;
18354 else if (TARGET_SPE && align >= 64)
18355 clear_step = 8;
18356 else
18357 clear_step = 4;
18359 if (optimize_size && bytes > 3 * clear_step)
18360 return 0;
18361 if (! optimize_size && bytes > 8 * clear_step)
18362 return 0;
18364 for (offset = 0; bytes > 0; offset += clear_bytes, bytes -= clear_bytes)
18366 machine_mode mode = BLKmode;
18367 rtx dest;
18369 if (bytes >= 16 && TARGET_ALTIVEC && align >= 128)
18371 clear_bytes = 16;
18372 mode = V4SImode;
18374 else if (bytes >= 8 && TARGET_SPE && align >= 64)
18376 clear_bytes = 8;
18377 mode = V2SImode;
18379 else if (bytes >= 8 && TARGET_POWERPC64
18380 && (align >= 64 || !STRICT_ALIGNMENT))
18382 clear_bytes = 8;
18383 mode = DImode;
18384 if (offset == 0 && align < 64)
18386 rtx addr;
18388 /* If the address form is reg+offset with offset not a
18389 multiple of four, reload into reg indirect form here
18390 rather than waiting for reload. This way we get one
18391 reload, not one per store. */
18392 addr = XEXP (orig_dest, 0);
18393 if ((GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
18394 && GET_CODE (XEXP (addr, 1)) == CONST_INT
18395 && (INTVAL (XEXP (addr, 1)) & 3) != 0)
18397 addr = copy_addr_to_reg (addr);
18398 orig_dest = replace_equiv_address (orig_dest, addr);
18402 else if (bytes >= 4 && (align >= 32 || !STRICT_ALIGNMENT))
18403 { /* move 4 bytes */
18404 clear_bytes = 4;
18405 mode = SImode;
18407 else if (bytes >= 2 && (align >= 16 || !STRICT_ALIGNMENT))
18408 { /* move 2 bytes */
18409 clear_bytes = 2;
18410 mode = HImode;
18412 else /* move 1 byte at a time */
18414 clear_bytes = 1;
18415 mode = QImode;
18418 dest = adjust_address (orig_dest, mode, offset);
18420 emit_move_insn (dest, CONST0_RTX (mode));
18423 return 1;
18426 /* Emit a potentially record-form instruction, setting DST from SRC.
18427 If DOT is 0, that is all; otherwise, set CCREG to the result of the
18428 signed comparison of DST with zero. If DOT is 1, the generated RTL
18429 doesn't care about the DST result; if DOT is 2, it does. If CCREG
18430 is CR0 do a single dot insn (as a PARALLEL); otherwise, do a SET and
18431 a separate COMPARE. */
18433 static void
18434 rs6000_emit_dot_insn (rtx dst, rtx src, int dot, rtx ccreg)
18436 if (dot == 0)
18438 emit_move_insn (dst, src);
18439 return;
18442 if (cc_reg_not_cr0_operand (ccreg, CCmode))
18444 emit_move_insn (dst, src);
18445 emit_move_insn (ccreg, gen_rtx_COMPARE (CCmode, dst, const0_rtx));
18446 return;
18449 rtx ccset = gen_rtx_SET (ccreg, gen_rtx_COMPARE (CCmode, src, const0_rtx));
18450 if (dot == 1)
18452 rtx clobber = gen_rtx_CLOBBER (VOIDmode, dst);
18453 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, ccset, clobber)));
18455 else
18457 rtx set = gen_rtx_SET (dst, src);
18458 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, ccset, set)));
18463 /* Figure out the correct instructions to generate to load data for
18464 block compare. MODE is used for the read from memory, and
18465 data is zero extended if REG is wider than MODE. If LE code
18466 is being generated, bswap loads are used.
18468 REG is the destination register to move the data into.
18469 MEM is the memory block being read.
18470 MODE is the mode of memory to use for the read. */
18471 static void
18472 do_load_for_compare (rtx reg, rtx mem, machine_mode mode)
18474 switch (GET_MODE (reg))
18476 case DImode:
18477 switch (mode)
18479 case QImode:
18480 emit_insn (gen_zero_extendqidi2 (reg, mem));
18481 break;
18482 case HImode:
18484 rtx src = mem;
18485 if (TARGET_LITTLE_ENDIAN)
18487 src = gen_reg_rtx (HImode);
18488 emit_insn (gen_bswaphi2 (src, mem));
18490 emit_insn (gen_zero_extendhidi2 (reg, src));
18491 break;
18493 case SImode:
18495 rtx src = mem;
18496 if (TARGET_LITTLE_ENDIAN)
18498 src = gen_reg_rtx (SImode);
18499 emit_insn (gen_bswapsi2 (src, mem));
18501 emit_insn (gen_zero_extendsidi2 (reg, src));
18503 break;
18504 case DImode:
18505 if (TARGET_LITTLE_ENDIAN)
18506 emit_insn (gen_bswapdi2 (reg, mem));
18507 else
18508 emit_insn (gen_movdi (reg, mem));
18509 break;
18510 default:
18511 gcc_unreachable ();
18513 break;
18515 case SImode:
18516 switch (mode)
18518 case QImode:
18519 emit_insn (gen_zero_extendqisi2 (reg, mem));
18520 break;
18521 case HImode:
18523 rtx src = mem;
18524 if (TARGET_LITTLE_ENDIAN)
18526 src = gen_reg_rtx (HImode);
18527 emit_insn (gen_bswaphi2 (src, mem));
18529 emit_insn (gen_zero_extendhisi2 (reg, src));
18530 break;
18532 case SImode:
18533 if (TARGET_LITTLE_ENDIAN)
18534 emit_insn (gen_bswapsi2 (reg, mem));
18535 else
18536 emit_insn (gen_movsi (reg, mem));
18537 break;
18538 case DImode:
18539 /* DImode is larger than the destination reg so is not expected. */
18540 gcc_unreachable ();
18541 break;
18542 default:
18543 gcc_unreachable ();
18545 break;
18546 default:
18547 gcc_unreachable ();
18548 break;
18552 /* Select the mode to be used for reading the next chunk of bytes
18553 in the compare.
18555 OFFSET is the current read offset from the beginning of the block.
18556 BYTES is the number of bytes remaining to be read.
18557 ALIGN is the minimum alignment of the memory blocks being compared in bytes.
18558 WORD_MODE_OK indicates using WORD_MODE is allowed, else SImode is
18559 the largest allowable mode. */
18560 static machine_mode
18561 select_block_compare_mode (HOST_WIDE_INT offset, HOST_WIDE_INT bytes,
18562 HOST_WIDE_INT align, bool word_mode_ok)
18564 /* First see if we can do a whole load unit
18565 as that will be more efficient than a larger load + shift. */
18567 /* If big, use biggest chunk.
18568 If exactly chunk size, use that size.
18569 If remainder can be done in one piece with shifting, do that.
18570 Do largest chunk possible without violating alignment rules. */
18572 /* The most we can read without potential page crossing. */
18573 HOST_WIDE_INT maxread = ROUND_UP (bytes, align);
18575 if (word_mode_ok && bytes >= UNITS_PER_WORD)
18576 return word_mode;
18577 else if (bytes == GET_MODE_SIZE (SImode))
18578 return SImode;
18579 else if (bytes == GET_MODE_SIZE (HImode))
18580 return HImode;
18581 else if (bytes == GET_MODE_SIZE (QImode))
18582 return QImode;
18583 else if (bytes < GET_MODE_SIZE (SImode)
18584 && offset >= GET_MODE_SIZE (SImode) - bytes)
18585 /* This matches the case were we have SImode and 3 bytes
18586 and offset >= 1 and permits us to move back one and overlap
18587 with the previous read, thus avoiding having to shift
18588 unwanted bytes off of the input. */
18589 return SImode;
18590 else if (word_mode_ok && bytes < UNITS_PER_WORD
18591 && offset >= UNITS_PER_WORD-bytes)
18592 /* Similarly, if we can use DImode it will get matched here and
18593 can do an overlapping read that ends at the end of the block. */
18594 return word_mode;
18595 else if (word_mode_ok && maxread >= UNITS_PER_WORD)
18596 /* It is safe to do all remaining in one load of largest size,
18597 possibly with a shift to get rid of unwanted bytes. */
18598 return word_mode;
18599 else if (maxread >= GET_MODE_SIZE (SImode))
18600 /* It is safe to do all remaining in one SImode load,
18601 possibly with a shift to get rid of unwanted bytes. */
18602 return SImode;
18603 else if (bytes > GET_MODE_SIZE (SImode))
18604 return SImode;
18605 else if (bytes > GET_MODE_SIZE (HImode))
18606 return HImode;
18608 /* final fallback is do one byte */
18609 return QImode;
18612 /* Compute the alignment of pointer+OFFSET where the original alignment
18613 of pointer was BASE_ALIGN. */
18614 static HOST_WIDE_INT
18615 compute_current_alignment (HOST_WIDE_INT base_align, HOST_WIDE_INT offset)
18617 if (offset == 0)
18618 return base_align;
18619 return min (base_align, offset & -offset);
18622 /* Expand a block compare operation, and return true if successful.
18623 Return false if we should let the compiler generate normal code,
18624 probably a memcmp call.
18626 OPERANDS[0] is the target (result).
18627 OPERANDS[1] is the first source.
18628 OPERANDS[2] is the second source.
18629 OPERANDS[3] is the length.
18630 OPERANDS[4] is the alignment. */
18631 bool
18632 expand_block_compare (rtx operands[])
18634 rtx target = operands[0];
18635 rtx orig_src1 = operands[1];
18636 rtx orig_src2 = operands[2];
18637 rtx bytes_rtx = operands[3];
18638 rtx align_rtx = operands[4];
18639 HOST_WIDE_INT cmp_bytes = 0;
18640 rtx src1 = orig_src1;
18641 rtx src2 = orig_src2;
18643 /* If this is not a fixed size compare, just call memcmp */
18644 if (!CONST_INT_P (bytes_rtx))
18645 return false;
18647 /* This must be a fixed size alignment */
18648 if (!CONST_INT_P (align_rtx))
18649 return false;
18651 int base_align = INTVAL (align_rtx) / BITS_PER_UNIT;
18653 /* SLOW_UNALIGNED_ACCESS -- don't do unaligned stuff */
18654 if (SLOW_UNALIGNED_ACCESS (word_mode, MEM_ALIGN (orig_src1))
18655 || SLOW_UNALIGNED_ACCESS (word_mode, MEM_ALIGN (orig_src2)))
18656 return false;
18658 gcc_assert (GET_MODE (target) == SImode);
18660 /* Anything to move? */
18661 HOST_WIDE_INT bytes = INTVAL (bytes_rtx);
18662 if (bytes <= 0)
18663 return true;
18665 rtx tmp_reg_src1 = gen_reg_rtx (word_mode);
18666 rtx tmp_reg_src2 = gen_reg_rtx (word_mode);
18668 /* If we have an LE target without ldbrx and word_mode is DImode,
18669 then we must avoid using word_mode. */
18670 int word_mode_ok = !(TARGET_LITTLE_ENDIAN && !TARGET_LDBRX
18671 && word_mode == DImode);
18673 /* Strategy phase. How many ops will this take and should we expand it? */
18675 int offset = 0;
18676 machine_mode load_mode =
18677 select_block_compare_mode (offset, bytes, base_align, word_mode_ok);
18678 int load_mode_size = GET_MODE_SIZE (load_mode);
18680 /* We don't want to generate too much code. */
18681 if (ROUND_UP (bytes, load_mode_size) / load_mode_size
18682 > rs6000_block_compare_inline_limit)
18683 return false;
18685 bool generate_6432_conversion = false;
18686 rtx convert_label = NULL;
18687 rtx final_label = NULL;
18689 /* Example of generated code for 11 bytes aligned 1 byte:
18690 .L10:
18691 ldbrx 10,6,9
18692 ldbrx 9,7,9
18693 subf. 9,9,10
18694 bne 0,.L8
18695 addi 9,4,7
18696 lwbrx 10,0,9
18697 addi 9,5,7
18698 lwbrx 9,0,9
18699 subf 9,9,10
18700 b .L9
18701 .L8: # convert_label
18702 cntlzd 9,9
18703 addi 9,9,-1
18704 xori 9,9,0x3f
18705 .L9: # final_label
18707 We start off with DImode and have a compare/branch to something
18708 with a smaller mode then we will need a block with the DI->SI conversion
18709 that may or may not be executed. */
18711 while (bytes > 0)
18713 int align = compute_current_alignment (base_align, offset);
18714 load_mode = select_block_compare_mode(offset, bytes, align, word_mode_ok);
18715 load_mode_size = GET_MODE_SIZE (load_mode);
18716 if (bytes >= load_mode_size)
18717 cmp_bytes = load_mode_size;
18718 else
18720 /* Move this load back so it doesn't go past the end. */
18721 int extra_bytes = load_mode_size - bytes;
18722 cmp_bytes = bytes;
18723 if (extra_bytes < offset)
18725 offset -= extra_bytes;
18726 cmp_bytes = load_mode_size;
18727 bytes = cmp_bytes;
18731 src1 = adjust_address (orig_src1, load_mode, offset);
18732 src2 = adjust_address (orig_src2, load_mode, offset);
18734 if (!REG_P (XEXP (src1, 0)))
18736 rtx src1_reg = copy_addr_to_reg (XEXP (src1, 0));
18737 src1 = replace_equiv_address (src1, src1_reg);
18739 set_mem_size (src1, cmp_bytes);
18741 if (!REG_P (XEXP (src2, 0)))
18743 rtx src2_reg = copy_addr_to_reg (XEXP (src2, 0));
18744 src2 = replace_equiv_address (src2, src2_reg);
18746 set_mem_size (src2, cmp_bytes);
18748 do_load_for_compare (tmp_reg_src1, src1, load_mode);
18749 do_load_for_compare (tmp_reg_src2, src2, load_mode);
18751 if (cmp_bytes < load_mode_size)
18753 /* Shift unneeded bytes off. */
18754 rtx sh = GEN_INT (BITS_PER_UNIT * (load_mode_size - cmp_bytes));
18755 if (word_mode == DImode)
18757 emit_insn (gen_lshrdi3 (tmp_reg_src1, tmp_reg_src1, sh));
18758 emit_insn (gen_lshrdi3 (tmp_reg_src2, tmp_reg_src2, sh));
18760 else
18762 emit_insn (gen_lshrsi3 (tmp_reg_src1, tmp_reg_src1, sh));
18763 emit_insn (gen_lshrsi3 (tmp_reg_src2, tmp_reg_src2, sh));
18767 /* We previously did a block that need 64->32 conversion but
18768 the current block does not, so a label is needed to jump
18769 to the end. */
18770 if (generate_6432_conversion && !final_label
18771 && GET_MODE_SIZE (GET_MODE (target)) >= load_mode_size)
18772 final_label = gen_label_rtx ();
18774 /* Do we need a 64->32 conversion block? */
18775 int remain = bytes - cmp_bytes;
18776 if (GET_MODE_SIZE (GET_MODE (target)) < GET_MODE_SIZE (load_mode))
18778 generate_6432_conversion = true;
18779 if (remain > 0 && !convert_label)
18780 convert_label = gen_label_rtx ();
18783 if (GET_MODE_SIZE (GET_MODE (target)) >= GET_MODE_SIZE (load_mode))
18785 /* Target is larger than load size so we don't need to
18786 reduce result size. */
18787 if (remain > 0)
18789 /* This is not the last block, branch to the end if the result
18790 of this subtract is not zero. */
18791 if (!final_label)
18792 final_label = gen_label_rtx ();
18793 rtx fin_ref = gen_rtx_LABEL_REF (VOIDmode, final_label);
18794 rtx cond = gen_reg_rtx (CCmode);
18795 rtx tmp = gen_rtx_MINUS (word_mode, tmp_reg_src1, tmp_reg_src2);
18796 rs6000_emit_dot_insn (tmp_reg_src2, tmp, 2, cond);
18797 emit_insn (gen_movsi (target, gen_lowpart (SImode, tmp_reg_src2)));
18798 rtx ne_rtx = gen_rtx_NE (VOIDmode, cond, const0_rtx);
18799 rtx ifelse = gen_rtx_IF_THEN_ELSE (VOIDmode, ne_rtx,
18800 fin_ref, pc_rtx);
18801 rtx j = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse));
18802 JUMP_LABEL (j) = final_label;
18803 LABEL_NUSES (final_label) += 1;
18805 else
18807 if (word_mode == DImode)
18809 emit_insn (gen_subdi3 (tmp_reg_src2, tmp_reg_src1,
18810 tmp_reg_src2));
18811 emit_insn (gen_movsi (target,
18812 gen_lowpart (SImode, tmp_reg_src2)));
18814 else
18815 emit_insn (gen_subsi3 (target, tmp_reg_src1, tmp_reg_src2));
18817 if (final_label)
18819 rtx fin_ref = gen_rtx_LABEL_REF (VOIDmode, final_label);
18820 rtx j = emit_jump_insn (gen_rtx_SET (pc_rtx, fin_ref));
18821 JUMP_LABEL(j) = final_label;
18822 LABEL_NUSES (final_label) += 1;
18823 emit_barrier ();
18827 else
18829 generate_6432_conversion = true;
18830 if (remain > 0)
18832 if (!convert_label)
18833 convert_label = gen_label_rtx ();
18835 /* Compare to zero and branch to convert_label if not zero. */
18836 rtx cvt_ref = gen_rtx_LABEL_REF (VOIDmode, convert_label);
18837 rtx cond = gen_reg_rtx (CCmode);
18838 rtx tmp = gen_rtx_MINUS (DImode, tmp_reg_src1, tmp_reg_src2);
18839 rs6000_emit_dot_insn (tmp_reg_src2, tmp, 2, cond);
18840 rtx ne_rtx = gen_rtx_NE (VOIDmode, cond, const0_rtx);
18841 rtx ifelse = gen_rtx_IF_THEN_ELSE (VOIDmode, ne_rtx,
18842 cvt_ref, pc_rtx);
18843 rtx j = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse));
18844 JUMP_LABEL(j) = convert_label;
18845 LABEL_NUSES (convert_label) += 1;
18847 else
18849 /* Just do the subtract. Since this is the last block the
18850 convert code will be generated immediately following. */
18851 emit_insn (gen_subdi3 (tmp_reg_src2, tmp_reg_src1,
18852 tmp_reg_src2));
18856 offset += cmp_bytes;
18857 bytes -= cmp_bytes;
18860 if (generate_6432_conversion)
18862 if (convert_label)
18863 emit_label (convert_label);
18865 /* We need to produce DI result from sub, then convert to target SI
18866 while maintaining <0 / ==0 / >0 properties.
18867 Segher's sequence: cntlzd 3,3 ; addi 3,3,-1 ; xori 3,3,63 */
18868 emit_insn (gen_clzdi2 (tmp_reg_src2, tmp_reg_src2));
18869 emit_insn (gen_adddi3 (tmp_reg_src2, tmp_reg_src2, GEN_INT (-1)));
18870 emit_insn (gen_xordi3 (tmp_reg_src2, tmp_reg_src2, GEN_INT (63)));
18871 emit_insn (gen_movsi (target, gen_lowpart (SImode, tmp_reg_src2)));
18874 if (final_label)
18875 emit_label (final_label);
18877 gcc_assert (bytes == 0);
18878 return true;
18882 /* Expand a block move operation, and return 1 if successful. Return 0
18883 if we should let the compiler generate normal code.
18885 operands[0] is the destination
18886 operands[1] is the source
18887 operands[2] is the length
18888 operands[3] is the alignment */
18890 #define MAX_MOVE_REG 4
18893 expand_block_move (rtx operands[])
18895 rtx orig_dest = operands[0];
18896 rtx orig_src = operands[1];
18897 rtx bytes_rtx = operands[2];
18898 rtx align_rtx = operands[3];
18899 int constp = (GET_CODE (bytes_rtx) == CONST_INT);
18900 int align;
18901 int bytes;
18902 int offset;
18903 int move_bytes;
18904 rtx stores[MAX_MOVE_REG];
18905 int num_reg = 0;
18907 /* If this is not a fixed size move, just call memcpy */
18908 if (! constp)
18909 return 0;
18911 /* This must be a fixed size alignment */
18912 gcc_assert (GET_CODE (align_rtx) == CONST_INT);
18913 align = INTVAL (align_rtx) * BITS_PER_UNIT;
18915 /* Anything to move? */
18916 bytes = INTVAL (bytes_rtx);
18917 if (bytes <= 0)
18918 return 1;
18920 if (bytes > rs6000_block_move_inline_limit)
18921 return 0;
18923 for (offset = 0; bytes > 0; offset += move_bytes, bytes -= move_bytes)
18925 union {
18926 rtx (*movmemsi) (rtx, rtx, rtx, rtx);
18927 rtx (*mov) (rtx, rtx);
18928 } gen_func;
18929 machine_mode mode = BLKmode;
18930 rtx src, dest;
18932 /* Altivec first, since it will be faster than a string move
18933 when it applies, and usually not significantly larger. */
18934 if (TARGET_ALTIVEC && bytes >= 16 && align >= 128)
18936 move_bytes = 16;
18937 mode = V4SImode;
18938 gen_func.mov = gen_movv4si;
18940 else if (TARGET_SPE && bytes >= 8 && align >= 64)
18942 move_bytes = 8;
18943 mode = V2SImode;
18944 gen_func.mov = gen_movv2si;
18946 else if (TARGET_STRING
18947 && bytes > 24 /* move up to 32 bytes at a time */
18948 && ! fixed_regs[5]
18949 && ! fixed_regs[6]
18950 && ! fixed_regs[7]
18951 && ! fixed_regs[8]
18952 && ! fixed_regs[9]
18953 && ! fixed_regs[10]
18954 && ! fixed_regs[11]
18955 && ! fixed_regs[12])
18957 move_bytes = (bytes > 32) ? 32 : bytes;
18958 gen_func.movmemsi = gen_movmemsi_8reg;
18960 else if (TARGET_STRING
18961 && bytes > 16 /* move up to 24 bytes at a time */
18962 && ! fixed_regs[5]
18963 && ! fixed_regs[6]
18964 && ! fixed_regs[7]
18965 && ! fixed_regs[8]
18966 && ! fixed_regs[9]
18967 && ! fixed_regs[10])
18969 move_bytes = (bytes > 24) ? 24 : bytes;
18970 gen_func.movmemsi = gen_movmemsi_6reg;
18972 else if (TARGET_STRING
18973 && bytes > 8 /* move up to 16 bytes at a time */
18974 && ! fixed_regs[5]
18975 && ! fixed_regs[6]
18976 && ! fixed_regs[7]
18977 && ! fixed_regs[8])
18979 move_bytes = (bytes > 16) ? 16 : bytes;
18980 gen_func.movmemsi = gen_movmemsi_4reg;
18982 else if (bytes >= 8 && TARGET_POWERPC64
18983 && (align >= 64 || !STRICT_ALIGNMENT))
18985 move_bytes = 8;
18986 mode = DImode;
18987 gen_func.mov = gen_movdi;
18988 if (offset == 0 && align < 64)
18990 rtx addr;
18992 /* If the address form is reg+offset with offset not a
18993 multiple of four, reload into reg indirect form here
18994 rather than waiting for reload. This way we get one
18995 reload, not one per load and/or store. */
18996 addr = XEXP (orig_dest, 0);
18997 if ((GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
18998 && GET_CODE (XEXP (addr, 1)) == CONST_INT
18999 && (INTVAL (XEXP (addr, 1)) & 3) != 0)
19001 addr = copy_addr_to_reg (addr);
19002 orig_dest = replace_equiv_address (orig_dest, addr);
19004 addr = XEXP (orig_src, 0);
19005 if ((GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
19006 && GET_CODE (XEXP (addr, 1)) == CONST_INT
19007 && (INTVAL (XEXP (addr, 1)) & 3) != 0)
19009 addr = copy_addr_to_reg (addr);
19010 orig_src = replace_equiv_address (orig_src, addr);
19014 else if (TARGET_STRING && bytes > 4 && !TARGET_POWERPC64)
19015 { /* move up to 8 bytes at a time */
19016 move_bytes = (bytes > 8) ? 8 : bytes;
19017 gen_func.movmemsi = gen_movmemsi_2reg;
19019 else if (bytes >= 4 && (align >= 32 || !STRICT_ALIGNMENT))
19020 { /* move 4 bytes */
19021 move_bytes = 4;
19022 mode = SImode;
19023 gen_func.mov = gen_movsi;
19025 else if (bytes >= 2 && (align >= 16 || !STRICT_ALIGNMENT))
19026 { /* move 2 bytes */
19027 move_bytes = 2;
19028 mode = HImode;
19029 gen_func.mov = gen_movhi;
19031 else if (TARGET_STRING && bytes > 1)
19032 { /* move up to 4 bytes at a time */
19033 move_bytes = (bytes > 4) ? 4 : bytes;
19034 gen_func.movmemsi = gen_movmemsi_1reg;
19036 else /* move 1 byte at a time */
19038 move_bytes = 1;
19039 mode = QImode;
19040 gen_func.mov = gen_movqi;
19043 src = adjust_address (orig_src, mode, offset);
19044 dest = adjust_address (orig_dest, mode, offset);
19046 if (mode != BLKmode)
19048 rtx tmp_reg = gen_reg_rtx (mode);
19050 emit_insn ((*gen_func.mov) (tmp_reg, src));
19051 stores[num_reg++] = (*gen_func.mov) (dest, tmp_reg);
19054 if (mode == BLKmode || num_reg >= MAX_MOVE_REG || bytes == move_bytes)
19056 int i;
19057 for (i = 0; i < num_reg; i++)
19058 emit_insn (stores[i]);
19059 num_reg = 0;
19062 if (mode == BLKmode)
19064 /* Move the address into scratch registers. The movmemsi
19065 patterns require zero offset. */
19066 if (!REG_P (XEXP (src, 0)))
19068 rtx src_reg = copy_addr_to_reg (XEXP (src, 0));
19069 src = replace_equiv_address (src, src_reg);
19071 set_mem_size (src, move_bytes);
19073 if (!REG_P (XEXP (dest, 0)))
19075 rtx dest_reg = copy_addr_to_reg (XEXP (dest, 0));
19076 dest = replace_equiv_address (dest, dest_reg);
19078 set_mem_size (dest, move_bytes);
19080 emit_insn ((*gen_func.movmemsi) (dest, src,
19081 GEN_INT (move_bytes & 31),
19082 align_rtx));
19086 return 1;
19090 /* Return a string to perform a load_multiple operation.
19091 operands[0] is the vector.
19092 operands[1] is the source address.
19093 operands[2] is the first destination register. */
19095 const char *
19096 rs6000_output_load_multiple (rtx operands[3])
19098 /* We have to handle the case where the pseudo used to contain the address
19099 is assigned to one of the output registers. */
19100 int i, j;
19101 int words = XVECLEN (operands[0], 0);
19102 rtx xop[10];
19104 if (XVECLEN (operands[0], 0) == 1)
19105 return "lwz %2,0(%1)";
19107 for (i = 0; i < words; i++)
19108 if (refers_to_regno_p (REGNO (operands[2]) + i, operands[1]))
19110 if (i == words-1)
19112 xop[0] = GEN_INT (4 * (words-1));
19113 xop[1] = operands[1];
19114 xop[2] = operands[2];
19115 output_asm_insn ("lswi %2,%1,%0\n\tlwz %1,%0(%1)", xop);
19116 return "";
19118 else if (i == 0)
19120 xop[0] = GEN_INT (4 * (words-1));
19121 xop[1] = operands[1];
19122 xop[2] = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
19123 output_asm_insn ("addi %1,%1,4\n\tlswi %2,%1,%0\n\tlwz %1,-4(%1)", xop);
19124 return "";
19126 else
19128 for (j = 0; j < words; j++)
19129 if (j != i)
19131 xop[0] = GEN_INT (j * 4);
19132 xop[1] = operands[1];
19133 xop[2] = gen_rtx_REG (SImode, REGNO (operands[2]) + j);
19134 output_asm_insn ("lwz %2,%0(%1)", xop);
19136 xop[0] = GEN_INT (i * 4);
19137 xop[1] = operands[1];
19138 output_asm_insn ("lwz %1,%0(%1)", xop);
19139 return "";
19143 return "lswi %2,%1,%N0";
19147 /* A validation routine: say whether CODE, a condition code, and MODE
19148 match. The other alternatives either don't make sense or should
19149 never be generated. */
19151 void
19152 validate_condition_mode (enum rtx_code code, machine_mode mode)
19154 gcc_assert ((GET_RTX_CLASS (code) == RTX_COMPARE
19155 || GET_RTX_CLASS (code) == RTX_COMM_COMPARE)
19156 && GET_MODE_CLASS (mode) == MODE_CC);
19158 /* These don't make sense. */
19159 gcc_assert ((code != GT && code != LT && code != GE && code != LE)
19160 || mode != CCUNSmode);
19162 gcc_assert ((code != GTU && code != LTU && code != GEU && code != LEU)
19163 || mode == CCUNSmode);
19165 gcc_assert (mode == CCFPmode
19166 || (code != ORDERED && code != UNORDERED
19167 && code != UNEQ && code != LTGT
19168 && code != UNGT && code != UNLT
19169 && code != UNGE && code != UNLE));
19171 /* These should never be generated except for
19172 flag_finite_math_only. */
19173 gcc_assert (mode != CCFPmode
19174 || flag_finite_math_only
19175 || (code != LE && code != GE
19176 && code != UNEQ && code != LTGT
19177 && code != UNGT && code != UNLT));
19179 /* These are invalid; the information is not there. */
19180 gcc_assert (mode != CCEQmode || code == EQ || code == NE);
19184 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm,
19185 rldicl, rldicr, or rldic instruction in mode MODE. If so, if E is
19186 not zero, store there the bit offset (counted from the right) where
19187 the single stretch of 1 bits begins; and similarly for B, the bit
19188 offset where it ends. */
19190 bool
19191 rs6000_is_valid_mask (rtx mask, int *b, int *e, machine_mode mode)
19193 unsigned HOST_WIDE_INT val = INTVAL (mask);
19194 unsigned HOST_WIDE_INT bit;
19195 int nb, ne;
19196 int n = GET_MODE_PRECISION (mode);
19198 if (mode != DImode && mode != SImode)
19199 return false;
19201 if (INTVAL (mask) >= 0)
19203 bit = val & -val;
19204 ne = exact_log2 (bit);
19205 nb = exact_log2 (val + bit);
19207 else if (val + 1 == 0)
19209 nb = n;
19210 ne = 0;
19212 else if (val & 1)
19214 val = ~val;
19215 bit = val & -val;
19216 nb = exact_log2 (bit);
19217 ne = exact_log2 (val + bit);
19219 else
19221 bit = val & -val;
19222 ne = exact_log2 (bit);
19223 if (val + bit == 0)
19224 nb = n;
19225 else
19226 nb = 0;
19229 nb--;
19231 if (nb < 0 || ne < 0 || nb >= n || ne >= n)
19232 return false;
19234 if (b)
19235 *b = nb;
19236 if (e)
19237 *e = ne;
19239 return true;
19242 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm, rldicl,
19243 or rldicr instruction, to implement an AND with it in mode MODE. */
19245 bool
19246 rs6000_is_valid_and_mask (rtx mask, machine_mode mode)
19248 int nb, ne;
19250 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
19251 return false;
19253 /* For DImode, we need a rldicl, rldicr, or a rlwinm with mask that
19254 does not wrap. */
19255 if (mode == DImode)
19256 return (ne == 0 || nb == 63 || (nb < 32 && ne <= nb));
19258 /* For SImode, rlwinm can do everything. */
19259 if (mode == SImode)
19260 return (nb < 32 && ne < 32);
19262 return false;
19265 /* Return the instruction template for an AND with mask in mode MODE, with
19266 operands OPERANDS. If DOT is true, make it a record-form instruction. */
19268 const char *
19269 rs6000_insn_for_and_mask (machine_mode mode, rtx *operands, bool dot)
19271 int nb, ne;
19273 if (!rs6000_is_valid_mask (operands[2], &nb, &ne, mode))
19274 gcc_unreachable ();
19276 if (mode == DImode && ne == 0)
19278 operands[3] = GEN_INT (63 - nb);
19279 if (dot)
19280 return "rldicl. %0,%1,0,%3";
19281 return "rldicl %0,%1,0,%3";
19284 if (mode == DImode && nb == 63)
19286 operands[3] = GEN_INT (63 - ne);
19287 if (dot)
19288 return "rldicr. %0,%1,0,%3";
19289 return "rldicr %0,%1,0,%3";
19292 if (nb < 32 && ne < 32)
19294 operands[3] = GEN_INT (31 - nb);
19295 operands[4] = GEN_INT (31 - ne);
19296 if (dot)
19297 return "rlwinm. %0,%1,0,%3,%4";
19298 return "rlwinm %0,%1,0,%3,%4";
19301 gcc_unreachable ();
19304 /* Return whether MASK (a CONST_INT) is a valid mask for any rlw[i]nm,
19305 rld[i]cl, rld[i]cr, or rld[i]c instruction, to implement an AND with
19306 shift SHIFT (a ROTATE, ASHIFT, or LSHIFTRT) in mode MODE. */
19308 bool
19309 rs6000_is_valid_shift_mask (rtx mask, rtx shift, machine_mode mode)
19311 int nb, ne;
19313 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
19314 return false;
19316 int n = GET_MODE_PRECISION (mode);
19317 int sh = -1;
19319 if (CONST_INT_P (XEXP (shift, 1)))
19321 sh = INTVAL (XEXP (shift, 1));
19322 if (sh < 0 || sh >= n)
19323 return false;
19326 rtx_code code = GET_CODE (shift);
19328 /* Convert any shift by 0 to a rotate, to simplify below code. */
19329 if (sh == 0)
19330 code = ROTATE;
19332 /* Convert rotate to simple shift if we can, to make analysis simpler. */
19333 if (code == ROTATE && sh >= 0 && nb >= ne && ne >= sh)
19334 code = ASHIFT;
19335 if (code == ROTATE && sh >= 0 && nb >= ne && nb < sh)
19337 code = LSHIFTRT;
19338 sh = n - sh;
19341 /* DImode rotates need rld*. */
19342 if (mode == DImode && code == ROTATE)
19343 return (nb == 63 || ne == 0 || ne == sh);
19345 /* SImode rotates need rlw*. */
19346 if (mode == SImode && code == ROTATE)
19347 return (nb < 32 && ne < 32 && sh < 32);
19349 /* Wrap-around masks are only okay for rotates. */
19350 if (ne > nb)
19351 return false;
19353 /* Variable shifts are only okay for rotates. */
19354 if (sh < 0)
19355 return false;
19357 /* Don't allow ASHIFT if the mask is wrong for that. */
19358 if (code == ASHIFT && ne < sh)
19359 return false;
19361 /* If we can do it with an rlw*, we can do it. Don't allow LSHIFTRT
19362 if the mask is wrong for that. */
19363 if (nb < 32 && ne < 32 && sh < 32
19364 && !(code == LSHIFTRT && nb >= 32 - sh))
19365 return true;
19367 /* If we can do it with an rld*, we can do it. Don't allow LSHIFTRT
19368 if the mask is wrong for that. */
19369 if (code == LSHIFTRT)
19370 sh = 64 - sh;
19371 if (nb == 63 || ne == 0 || ne == sh)
19372 return !(code == LSHIFTRT && nb >= sh);
19374 return false;
19377 /* Return the instruction template for a shift with mask in mode MODE, with
19378 operands OPERANDS. If DOT is true, make it a record-form instruction. */
19380 const char *
19381 rs6000_insn_for_shift_mask (machine_mode mode, rtx *operands, bool dot)
19383 int nb, ne;
19385 if (!rs6000_is_valid_mask (operands[3], &nb, &ne, mode))
19386 gcc_unreachable ();
19388 if (mode == DImode && ne == 0)
19390 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
19391 operands[2] = GEN_INT (64 - INTVAL (operands[2]));
19392 operands[3] = GEN_INT (63 - nb);
19393 if (dot)
19394 return "rld%I2cl. %0,%1,%2,%3";
19395 return "rld%I2cl %0,%1,%2,%3";
19398 if (mode == DImode && nb == 63)
19400 operands[3] = GEN_INT (63 - ne);
19401 if (dot)
19402 return "rld%I2cr. %0,%1,%2,%3";
19403 return "rld%I2cr %0,%1,%2,%3";
19406 if (mode == DImode
19407 && GET_CODE (operands[4]) != LSHIFTRT
19408 && CONST_INT_P (operands[2])
19409 && ne == INTVAL (operands[2]))
19411 operands[3] = GEN_INT (63 - nb);
19412 if (dot)
19413 return "rld%I2c. %0,%1,%2,%3";
19414 return "rld%I2c %0,%1,%2,%3";
19417 if (nb < 32 && ne < 32)
19419 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
19420 operands[2] = GEN_INT (32 - INTVAL (operands[2]));
19421 operands[3] = GEN_INT (31 - nb);
19422 operands[4] = GEN_INT (31 - ne);
19423 /* This insn can also be a 64-bit rotate with mask that really makes
19424 it just a shift right (with mask); the %h below are to adjust for
19425 that situation (shift count is >= 32 in that case). */
19426 if (dot)
19427 return "rlw%I2nm. %0,%1,%h2,%3,%4";
19428 return "rlw%I2nm %0,%1,%h2,%3,%4";
19431 gcc_unreachable ();
19434 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwimi or
19435 rldimi instruction, to implement an insert with shift SHIFT (a ROTATE,
19436 ASHIFT, or LSHIFTRT) in mode MODE. */
19438 bool
19439 rs6000_is_valid_insert_mask (rtx mask, rtx shift, machine_mode mode)
19441 int nb, ne;
19443 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
19444 return false;
19446 int n = GET_MODE_PRECISION (mode);
19448 int sh = INTVAL (XEXP (shift, 1));
19449 if (sh < 0 || sh >= n)
19450 return false;
19452 rtx_code code = GET_CODE (shift);
19454 /* Convert any shift by 0 to a rotate, to simplify below code. */
19455 if (sh == 0)
19456 code = ROTATE;
19458 /* Convert rotate to simple shift if we can, to make analysis simpler. */
19459 if (code == ROTATE && sh >= 0 && nb >= ne && ne >= sh)
19460 code = ASHIFT;
19461 if (code == ROTATE && sh >= 0 && nb >= ne && nb < sh)
19463 code = LSHIFTRT;
19464 sh = n - sh;
19467 /* DImode rotates need rldimi. */
19468 if (mode == DImode && code == ROTATE)
19469 return (ne == sh);
19471 /* SImode rotates need rlwimi. */
19472 if (mode == SImode && code == ROTATE)
19473 return (nb < 32 && ne < 32 && sh < 32);
19475 /* Wrap-around masks are only okay for rotates. */
19476 if (ne > nb)
19477 return false;
19479 /* Don't allow ASHIFT if the mask is wrong for that. */
19480 if (code == ASHIFT && ne < sh)
19481 return false;
19483 /* If we can do it with an rlwimi, we can do it. Don't allow LSHIFTRT
19484 if the mask is wrong for that. */
19485 if (nb < 32 && ne < 32 && sh < 32
19486 && !(code == LSHIFTRT && nb >= 32 - sh))
19487 return true;
19489 /* If we can do it with an rldimi, we can do it. Don't allow LSHIFTRT
19490 if the mask is wrong for that. */
19491 if (code == LSHIFTRT)
19492 sh = 64 - sh;
19493 if (ne == sh)
19494 return !(code == LSHIFTRT && nb >= sh);
19496 return false;
19499 /* Return the instruction template for an insert with mask in mode MODE, with
19500 operands OPERANDS. If DOT is true, make it a record-form instruction. */
19502 const char *
19503 rs6000_insn_for_insert_mask (machine_mode mode, rtx *operands, bool dot)
19505 int nb, ne;
19507 if (!rs6000_is_valid_mask (operands[3], &nb, &ne, mode))
19508 gcc_unreachable ();
19510 /* Prefer rldimi because rlwimi is cracked. */
19511 if (TARGET_POWERPC64
19512 && (!dot || mode == DImode)
19513 && GET_CODE (operands[4]) != LSHIFTRT
19514 && ne == INTVAL (operands[2]))
19516 operands[3] = GEN_INT (63 - nb);
19517 if (dot)
19518 return "rldimi. %0,%1,%2,%3";
19519 return "rldimi %0,%1,%2,%3";
19522 if (nb < 32 && ne < 32)
19524 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
19525 operands[2] = GEN_INT (32 - INTVAL (operands[2]));
19526 operands[3] = GEN_INT (31 - nb);
19527 operands[4] = GEN_INT (31 - ne);
19528 if (dot)
19529 return "rlwimi. %0,%1,%2,%3,%4";
19530 return "rlwimi %0,%1,%2,%3,%4";
19533 gcc_unreachable ();
19536 /* Return whether an AND with C (a CONST_INT) in mode MODE can be done
19537 using two machine instructions. */
19539 bool
19540 rs6000_is_valid_2insn_and (rtx c, machine_mode mode)
19542 /* There are two kinds of AND we can handle with two insns:
19543 1) those we can do with two rl* insn;
19544 2) ori[s];xori[s].
19546 We do not handle that last case yet. */
19548 /* If there is just one stretch of ones, we can do it. */
19549 if (rs6000_is_valid_mask (c, NULL, NULL, mode))
19550 return true;
19552 /* Otherwise, fill in the lowest "hole"; if we can do the result with
19553 one insn, we can do the whole thing with two. */
19554 unsigned HOST_WIDE_INT val = INTVAL (c);
19555 unsigned HOST_WIDE_INT bit1 = val & -val;
19556 unsigned HOST_WIDE_INT bit2 = (val + bit1) & ~val;
19557 unsigned HOST_WIDE_INT val1 = (val + bit1) & val;
19558 unsigned HOST_WIDE_INT bit3 = val1 & -val1;
19559 return rs6000_is_valid_and_mask (GEN_INT (val + bit3 - bit2), mode);
19562 /* Emit the two insns to do an AND in mode MODE, with operands OPERANDS.
19563 If EXPAND is true, split rotate-and-mask instructions we generate to
19564 their constituent parts as well (this is used during expand); if DOT
19565 is 1, make the last insn a record-form instruction clobbering the
19566 destination GPR and setting the CC reg (from operands[3]); if 2, set
19567 that GPR as well as the CC reg. */
19569 void
19570 rs6000_emit_2insn_and (machine_mode mode, rtx *operands, bool expand, int dot)
19572 gcc_assert (!(expand && dot));
19574 unsigned HOST_WIDE_INT val = INTVAL (operands[2]);
19576 /* If it is one stretch of ones, it is DImode; shift left, mask, then
19577 shift right. This generates better code than doing the masks without
19578 shifts, or shifting first right and then left. */
19579 int nb, ne;
19580 if (rs6000_is_valid_mask (operands[2], &nb, &ne, mode) && nb >= ne)
19582 gcc_assert (mode == DImode);
19584 int shift = 63 - nb;
19585 if (expand)
19587 rtx tmp1 = gen_reg_rtx (DImode);
19588 rtx tmp2 = gen_reg_rtx (DImode);
19589 emit_insn (gen_ashldi3 (tmp1, operands[1], GEN_INT (shift)));
19590 emit_insn (gen_anddi3 (tmp2, tmp1, GEN_INT (val << shift)));
19591 emit_insn (gen_lshrdi3 (operands[0], tmp2, GEN_INT (shift)));
19593 else
19595 rtx tmp = gen_rtx_ASHIFT (mode, operands[1], GEN_INT (shift));
19596 tmp = gen_rtx_AND (mode, tmp, GEN_INT (val << shift));
19597 emit_move_insn (operands[0], tmp);
19598 tmp = gen_rtx_LSHIFTRT (mode, operands[0], GEN_INT (shift));
19599 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
19601 return;
19604 /* Otherwise, make a mask2 that cuts out the lowest "hole", and a mask1
19605 that does the rest. */
19606 unsigned HOST_WIDE_INT bit1 = val & -val;
19607 unsigned HOST_WIDE_INT bit2 = (val + bit1) & ~val;
19608 unsigned HOST_WIDE_INT val1 = (val + bit1) & val;
19609 unsigned HOST_WIDE_INT bit3 = val1 & -val1;
19611 unsigned HOST_WIDE_INT mask1 = -bit3 + bit2 - 1;
19612 unsigned HOST_WIDE_INT mask2 = val + bit3 - bit2;
19614 gcc_assert (rs6000_is_valid_and_mask (GEN_INT (mask2), mode));
19616 /* Two "no-rotate"-and-mask instructions, for SImode. */
19617 if (rs6000_is_valid_and_mask (GEN_INT (mask1), mode))
19619 gcc_assert (mode == SImode);
19621 rtx reg = expand ? gen_reg_rtx (mode) : operands[0];
19622 rtx tmp = gen_rtx_AND (mode, operands[1], GEN_INT (mask1));
19623 emit_move_insn (reg, tmp);
19624 tmp = gen_rtx_AND (mode, reg, GEN_INT (mask2));
19625 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
19626 return;
19629 gcc_assert (mode == DImode);
19631 /* Two "no-rotate"-and-mask instructions, for DImode: both are rlwinm
19632 insns; we have to do the first in SImode, because it wraps. */
19633 if (mask2 <= 0xffffffff
19634 && rs6000_is_valid_and_mask (GEN_INT (mask1), SImode))
19636 rtx reg = expand ? gen_reg_rtx (mode) : operands[0];
19637 rtx tmp = gen_rtx_AND (SImode, gen_lowpart (SImode, operands[1]),
19638 GEN_INT (mask1));
19639 rtx reg_low = gen_lowpart (SImode, reg);
19640 emit_move_insn (reg_low, tmp);
19641 tmp = gen_rtx_AND (mode, reg, GEN_INT (mask2));
19642 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
19643 return;
19646 /* Two rld* insns: rotate, clear the hole in the middle (which now is
19647 at the top end), rotate back and clear the other hole. */
19648 int right = exact_log2 (bit3);
19649 int left = 64 - right;
19651 /* Rotate the mask too. */
19652 mask1 = (mask1 >> right) | ((bit2 - 1) << left);
19654 if (expand)
19656 rtx tmp1 = gen_reg_rtx (DImode);
19657 rtx tmp2 = gen_reg_rtx (DImode);
19658 rtx tmp3 = gen_reg_rtx (DImode);
19659 emit_insn (gen_rotldi3 (tmp1, operands[1], GEN_INT (left)));
19660 emit_insn (gen_anddi3 (tmp2, tmp1, GEN_INT (mask1)));
19661 emit_insn (gen_rotldi3 (tmp3, tmp2, GEN_INT (right)));
19662 emit_insn (gen_anddi3 (operands[0], tmp3, GEN_INT (mask2)));
19664 else
19666 rtx tmp = gen_rtx_ROTATE (mode, operands[1], GEN_INT (left));
19667 tmp = gen_rtx_AND (mode, tmp, GEN_INT (mask1));
19668 emit_move_insn (operands[0], tmp);
19669 tmp = gen_rtx_ROTATE (mode, operands[0], GEN_INT (right));
19670 tmp = gen_rtx_AND (mode, tmp, GEN_INT (mask2));
19671 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
19675 /* Return 1 if REGNO (reg1) == REGNO (reg2) - 1 making them candidates
19676 for lfq and stfq insns iff the registers are hard registers. */
19679 registers_ok_for_quad_peep (rtx reg1, rtx reg2)
19681 /* We might have been passed a SUBREG. */
19682 if (GET_CODE (reg1) != REG || GET_CODE (reg2) != REG)
19683 return 0;
19685 /* We might have been passed non floating point registers. */
19686 if (!FP_REGNO_P (REGNO (reg1))
19687 || !FP_REGNO_P (REGNO (reg2)))
19688 return 0;
19690 return (REGNO (reg1) == REGNO (reg2) - 1);
19693 /* Return 1 if addr1 and addr2 are suitable for lfq or stfq insn.
19694 addr1 and addr2 must be in consecutive memory locations
19695 (addr2 == addr1 + 8). */
19698 mems_ok_for_quad_peep (rtx mem1, rtx mem2)
19700 rtx addr1, addr2;
19701 unsigned int reg1, reg2;
19702 int offset1, offset2;
19704 /* The mems cannot be volatile. */
19705 if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
19706 return 0;
19708 addr1 = XEXP (mem1, 0);
19709 addr2 = XEXP (mem2, 0);
19711 /* Extract an offset (if used) from the first addr. */
19712 if (GET_CODE (addr1) == PLUS)
19714 /* If not a REG, return zero. */
19715 if (GET_CODE (XEXP (addr1, 0)) != REG)
19716 return 0;
19717 else
19719 reg1 = REGNO (XEXP (addr1, 0));
19720 /* The offset must be constant! */
19721 if (GET_CODE (XEXP (addr1, 1)) != CONST_INT)
19722 return 0;
19723 offset1 = INTVAL (XEXP (addr1, 1));
19726 else if (GET_CODE (addr1) != REG)
19727 return 0;
19728 else
19730 reg1 = REGNO (addr1);
19731 /* This was a simple (mem (reg)) expression. Offset is 0. */
19732 offset1 = 0;
19735 /* And now for the second addr. */
19736 if (GET_CODE (addr2) == PLUS)
19738 /* If not a REG, return zero. */
19739 if (GET_CODE (XEXP (addr2, 0)) != REG)
19740 return 0;
19741 else
19743 reg2 = REGNO (XEXP (addr2, 0));
19744 /* The offset must be constant. */
19745 if (GET_CODE (XEXP (addr2, 1)) != CONST_INT)
19746 return 0;
19747 offset2 = INTVAL (XEXP (addr2, 1));
19750 else if (GET_CODE (addr2) != REG)
19751 return 0;
19752 else
19754 reg2 = REGNO (addr2);
19755 /* This was a simple (mem (reg)) expression. Offset is 0. */
19756 offset2 = 0;
19759 /* Both of these must have the same base register. */
19760 if (reg1 != reg2)
19761 return 0;
19763 /* The offset for the second addr must be 8 more than the first addr. */
19764 if (offset2 != offset1 + 8)
19765 return 0;
19767 /* All the tests passed. addr1 and addr2 are valid for lfq or stfq
19768 instructions. */
19769 return 1;
19774 rs6000_secondary_memory_needed_rtx (machine_mode mode)
19776 static bool eliminated = false;
19777 rtx ret;
19779 if (mode != SDmode || TARGET_NO_SDMODE_STACK)
19780 ret = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
19781 else
19783 rtx mem = cfun->machine->sdmode_stack_slot;
19784 gcc_assert (mem != NULL_RTX);
19786 if (!eliminated)
19788 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
19789 cfun->machine->sdmode_stack_slot = mem;
19790 eliminated = true;
19792 ret = mem;
19795 if (TARGET_DEBUG_ADDR)
19797 fprintf (stderr, "\nrs6000_secondary_memory_needed_rtx, mode %s, rtx:\n",
19798 GET_MODE_NAME (mode));
19799 if (!ret)
19800 fprintf (stderr, "\tNULL_RTX\n");
19801 else
19802 debug_rtx (ret);
19805 return ret;
19808 /* Return the mode to be used for memory when a secondary memory
19809 location is needed. For SDmode values we need to use DDmode, in
19810 all other cases we can use the same mode. */
19811 machine_mode
19812 rs6000_secondary_memory_needed_mode (machine_mode mode)
19814 if (lra_in_progress && mode == SDmode)
19815 return DDmode;
19816 return mode;
19819 static tree
19820 rs6000_check_sdmode (tree *tp, int *walk_subtrees, void *data ATTRIBUTE_UNUSED)
19822 /* Don't walk into types. */
19823 if (*tp == NULL_TREE || *tp == error_mark_node || TYPE_P (*tp))
19825 *walk_subtrees = 0;
19826 return NULL_TREE;
19829 switch (TREE_CODE (*tp))
19831 case VAR_DECL:
19832 case PARM_DECL:
19833 case FIELD_DECL:
19834 case RESULT_DECL:
19835 case SSA_NAME:
19836 case REAL_CST:
19837 case MEM_REF:
19838 case VIEW_CONVERT_EXPR:
19839 if (TYPE_MODE (TREE_TYPE (*tp)) == SDmode)
19840 return *tp;
19841 break;
19842 default:
19843 break;
19846 return NULL_TREE;
19849 /* Classify a register type. Because the FMRGOW/FMRGEW instructions only work
19850 on traditional floating point registers, and the VMRGOW/VMRGEW instructions
19851 only work on the traditional altivec registers, note if an altivec register
19852 was chosen. */
19854 static enum rs6000_reg_type
19855 register_to_reg_type (rtx reg, bool *is_altivec)
19857 HOST_WIDE_INT regno;
19858 enum reg_class rclass;
19860 if (GET_CODE (reg) == SUBREG)
19861 reg = SUBREG_REG (reg);
19863 if (!REG_P (reg))
19864 return NO_REG_TYPE;
19866 regno = REGNO (reg);
19867 if (regno >= FIRST_PSEUDO_REGISTER)
19869 if (!lra_in_progress && !reload_in_progress && !reload_completed)
19870 return PSEUDO_REG_TYPE;
19872 regno = true_regnum (reg);
19873 if (regno < 0 || regno >= FIRST_PSEUDO_REGISTER)
19874 return PSEUDO_REG_TYPE;
19877 gcc_assert (regno >= 0);
19879 if (is_altivec && ALTIVEC_REGNO_P (regno))
19880 *is_altivec = true;
19882 rclass = rs6000_regno_regclass[regno];
19883 return reg_class_to_reg_type[(int)rclass];
19886 /* Helper function to return the cost of adding a TOC entry address. */
19888 static inline int
19889 rs6000_secondary_reload_toc_costs (addr_mask_type addr_mask)
19891 int ret;
19893 if (TARGET_CMODEL != CMODEL_SMALL)
19894 ret = ((addr_mask & RELOAD_REG_OFFSET) == 0) ? 1 : 2;
19896 else
19897 ret = (TARGET_MINIMAL_TOC) ? 6 : 3;
19899 return ret;
19902 /* Helper function for rs6000_secondary_reload to determine whether the memory
19903 address (ADDR) with a given register class (RCLASS) and machine mode (MODE)
19904 needs reloading. Return negative if the memory is not handled by the memory
19905 helper functions and to try a different reload method, 0 if no additional
19906 instructions are need, and positive to give the extra cost for the
19907 memory. */
19909 static int
19910 rs6000_secondary_reload_memory (rtx addr,
19911 enum reg_class rclass,
19912 machine_mode mode)
19914 int extra_cost = 0;
19915 rtx reg, and_arg, plus_arg0, plus_arg1;
19916 addr_mask_type addr_mask;
19917 const char *type = NULL;
19918 const char *fail_msg = NULL;
19920 if (GPR_REG_CLASS_P (rclass))
19921 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
19923 else if (rclass == FLOAT_REGS)
19924 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
19926 else if (rclass == ALTIVEC_REGS)
19927 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
19929 /* For the combined VSX_REGS, turn off Altivec AND -16. */
19930 else if (rclass == VSX_REGS)
19931 addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_VMX]
19932 & ~RELOAD_REG_AND_M16);
19934 /* If the register allocator hasn't made up its mind yet on the register
19935 class to use, settle on defaults to use. */
19936 else if (rclass == NO_REGS)
19938 addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_ANY]
19939 & ~RELOAD_REG_AND_M16);
19941 if ((addr_mask & RELOAD_REG_MULTIPLE) != 0)
19942 addr_mask &= ~(RELOAD_REG_INDEXED
19943 | RELOAD_REG_PRE_INCDEC
19944 | RELOAD_REG_PRE_MODIFY);
19947 else
19948 addr_mask = 0;
19950 /* If the register isn't valid in this register class, just return now. */
19951 if ((addr_mask & RELOAD_REG_VALID) == 0)
19953 if (TARGET_DEBUG_ADDR)
19955 fprintf (stderr,
19956 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
19957 "not valid in class\n",
19958 GET_MODE_NAME (mode), reg_class_names[rclass]);
19959 debug_rtx (addr);
19962 return -1;
19965 switch (GET_CODE (addr))
19967 /* Does the register class supports auto update forms for this mode? We
19968 don't need a scratch register, since the powerpc only supports
19969 PRE_INC, PRE_DEC, and PRE_MODIFY. */
19970 case PRE_INC:
19971 case PRE_DEC:
19972 reg = XEXP (addr, 0);
19973 if (!base_reg_operand (addr, GET_MODE (reg)))
19975 fail_msg = "no base register #1";
19976 extra_cost = -1;
19979 else if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
19981 extra_cost = 1;
19982 type = "update";
19984 break;
19986 case PRE_MODIFY:
19987 reg = XEXP (addr, 0);
19988 plus_arg1 = XEXP (addr, 1);
19989 if (!base_reg_operand (reg, GET_MODE (reg))
19990 || GET_CODE (plus_arg1) != PLUS
19991 || !rtx_equal_p (reg, XEXP (plus_arg1, 0)))
19993 fail_msg = "bad PRE_MODIFY";
19994 extra_cost = -1;
19997 else if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
19999 extra_cost = 1;
20000 type = "update";
20002 break;
20004 /* Do we need to simulate AND -16 to clear the bottom address bits used
20005 in VMX load/stores? Only allow the AND for vector sizes. */
20006 case AND:
20007 and_arg = XEXP (addr, 0);
20008 if (GET_MODE_SIZE (mode) != 16
20009 || GET_CODE (XEXP (addr, 1)) != CONST_INT
20010 || INTVAL (XEXP (addr, 1)) != -16)
20012 fail_msg = "bad Altivec AND #1";
20013 extra_cost = -1;
20016 if (rclass != ALTIVEC_REGS)
20018 if (legitimate_indirect_address_p (and_arg, false))
20019 extra_cost = 1;
20021 else if (legitimate_indexed_address_p (and_arg, false))
20022 extra_cost = 2;
20024 else
20026 fail_msg = "bad Altivec AND #2";
20027 extra_cost = -1;
20030 type = "and";
20032 break;
20034 /* If this is an indirect address, make sure it is a base register. */
20035 case REG:
20036 case SUBREG:
20037 if (!legitimate_indirect_address_p (addr, false))
20039 extra_cost = 1;
20040 type = "move";
20042 break;
20044 /* If this is an indexed address, make sure the register class can handle
20045 indexed addresses for this mode. */
20046 case PLUS:
20047 plus_arg0 = XEXP (addr, 0);
20048 plus_arg1 = XEXP (addr, 1);
20050 /* (plus (plus (reg) (constant)) (constant)) is generated during
20051 push_reload processing, so handle it now. */
20052 if (GET_CODE (plus_arg0) == PLUS && CONST_INT_P (plus_arg1))
20054 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
20056 extra_cost = 1;
20057 type = "offset";
20061 /* (plus (plus (reg) (constant)) (reg)) is also generated during
20062 push_reload processing, so handle it now. */
20063 else if (GET_CODE (plus_arg0) == PLUS && REG_P (plus_arg1))
20065 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
20067 extra_cost = 1;
20068 type = "indexed #2";
20072 else if (!base_reg_operand (plus_arg0, GET_MODE (plus_arg0)))
20074 fail_msg = "no base register #2";
20075 extra_cost = -1;
20078 else if (int_reg_operand (plus_arg1, GET_MODE (plus_arg1)))
20080 if ((addr_mask & RELOAD_REG_INDEXED) == 0
20081 || !legitimate_indexed_address_p (addr, false))
20083 extra_cost = 1;
20084 type = "indexed";
20088 else if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0
20089 && CONST_INT_P (plus_arg1))
20091 if (!quad_address_offset_p (INTVAL (plus_arg1)))
20093 extra_cost = 1;
20094 type = "vector d-form offset";
20098 /* Make sure the register class can handle offset addresses. */
20099 else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
20101 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
20103 extra_cost = 1;
20104 type = "offset #2";
20108 else
20110 fail_msg = "bad PLUS";
20111 extra_cost = -1;
20114 break;
20116 case LO_SUM:
20117 /* Quad offsets are restricted and can't handle normal addresses. */
20118 if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
20120 extra_cost = -1;
20121 type = "vector d-form lo_sum";
20124 else if (!legitimate_lo_sum_address_p (mode, addr, false))
20126 fail_msg = "bad LO_SUM";
20127 extra_cost = -1;
20130 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
20132 extra_cost = 1;
20133 type = "lo_sum";
20135 break;
20137 /* Static addresses need to create a TOC entry. */
20138 case CONST:
20139 case SYMBOL_REF:
20140 case LABEL_REF:
20141 if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
20143 extra_cost = -1;
20144 type = "vector d-form lo_sum #2";
20147 else
20149 type = "address";
20150 extra_cost = rs6000_secondary_reload_toc_costs (addr_mask);
20152 break;
20154 /* TOC references look like offsetable memory. */
20155 case UNSPEC:
20156 if (TARGET_CMODEL == CMODEL_SMALL || XINT (addr, 1) != UNSPEC_TOCREL)
20158 fail_msg = "bad UNSPEC";
20159 extra_cost = -1;
20162 else if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
20164 extra_cost = -1;
20165 type = "vector d-form lo_sum #3";
20168 else if ((addr_mask & RELOAD_REG_OFFSET) == 0)
20170 extra_cost = 1;
20171 type = "toc reference";
20173 break;
20175 default:
20177 fail_msg = "bad address";
20178 extra_cost = -1;
20182 if (TARGET_DEBUG_ADDR /* && extra_cost != 0 */)
20184 if (extra_cost < 0)
20185 fprintf (stderr,
20186 "rs6000_secondary_reload_memory error: mode = %s, "
20187 "class = %s, addr_mask = '%s', %s\n",
20188 GET_MODE_NAME (mode),
20189 reg_class_names[rclass],
20190 rs6000_debug_addr_mask (addr_mask, false),
20191 (fail_msg != NULL) ? fail_msg : "<bad address>");
20193 else
20194 fprintf (stderr,
20195 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
20196 "addr_mask = '%s', extra cost = %d, %s\n",
20197 GET_MODE_NAME (mode),
20198 reg_class_names[rclass],
20199 rs6000_debug_addr_mask (addr_mask, false),
20200 extra_cost,
20201 (type) ? type : "<none>");
20203 debug_rtx (addr);
20206 return extra_cost;
20209 /* Helper function for rs6000_secondary_reload to return true if a move to a
20210 different register classe is really a simple move. */
20212 static bool
20213 rs6000_secondary_reload_simple_move (enum rs6000_reg_type to_type,
20214 enum rs6000_reg_type from_type,
20215 machine_mode mode)
20217 int size;
20219 /* Add support for various direct moves available. In this function, we only
20220 look at cases where we don't need any extra registers, and one or more
20221 simple move insns are issued. At present, 32-bit integers are not allowed
20222 in FPR/VSX registers. Single precision binary floating is not a simple
20223 move because we need to convert to the single precision memory layout.
20224 The 4-byte SDmode can be moved. TDmode values are disallowed since they
20225 need special direct move handling, which we do not support yet. */
20226 size = GET_MODE_SIZE (mode);
20227 if (TARGET_DIRECT_MOVE
20228 && ((mode == SDmode) || (TARGET_POWERPC64 && size == 8))
20229 && ((to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
20230 || (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)))
20231 return true;
20233 else if (TARGET_DIRECT_MOVE_128 && size == 16 && mode != TDmode
20234 && ((to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
20235 || (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)))
20236 return true;
20238 else if (TARGET_MFPGPR && TARGET_POWERPC64 && size == 8
20239 && ((to_type == GPR_REG_TYPE && from_type == FPR_REG_TYPE)
20240 || (to_type == FPR_REG_TYPE && from_type == GPR_REG_TYPE)))
20241 return true;
20243 else if ((size == 4 || (TARGET_POWERPC64 && size == 8))
20244 && ((to_type == GPR_REG_TYPE && from_type == SPR_REG_TYPE)
20245 || (to_type == SPR_REG_TYPE && from_type == GPR_REG_TYPE)))
20246 return true;
20248 return false;
20251 /* Direct move helper function for rs6000_secondary_reload, handle all of the
20252 special direct moves that involve allocating an extra register, return the
20253 insn code of the helper function if there is such a function or
20254 CODE_FOR_nothing if not. */
20256 static bool
20257 rs6000_secondary_reload_direct_move (enum rs6000_reg_type to_type,
20258 enum rs6000_reg_type from_type,
20259 machine_mode mode,
20260 secondary_reload_info *sri,
20261 bool altivec_p)
20263 bool ret = false;
20264 enum insn_code icode = CODE_FOR_nothing;
20265 int cost = 0;
20266 int size = GET_MODE_SIZE (mode);
20268 if (TARGET_POWERPC64 && size == 16)
20270 /* Handle moving 128-bit values from GPRs to VSX point registers on
20271 ISA 2.07 (power8, power9) when running in 64-bit mode using
20272 XXPERMDI to glue the two 64-bit values back together. */
20273 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
20275 cost = 3; /* 2 mtvsrd's, 1 xxpermdi. */
20276 icode = reg_addr[mode].reload_vsx_gpr;
20279 /* Handle moving 128-bit values from VSX point registers to GPRs on
20280 ISA 2.07 when running in 64-bit mode using XXPERMDI to get access to the
20281 bottom 64-bit value. */
20282 else if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
20284 cost = 3; /* 2 mfvsrd's, 1 xxpermdi. */
20285 icode = reg_addr[mode].reload_gpr_vsx;
20289 else if (TARGET_POWERPC64 && mode == SFmode)
20291 if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
20293 cost = 3; /* xscvdpspn, mfvsrd, and. */
20294 icode = reg_addr[mode].reload_gpr_vsx;
20297 else if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
20299 cost = 2; /* mtvsrz, xscvspdpn. */
20300 icode = reg_addr[mode].reload_vsx_gpr;
20304 else if (!TARGET_POWERPC64 && size == 8)
20306 /* Handle moving 64-bit values from GPRs to floating point registers on
20307 ISA 2.07 when running in 32-bit mode using FMRGOW to glue the two
20308 32-bit values back together. Altivec register classes must be handled
20309 specially since a different instruction is used, and the secondary
20310 reload support requires a single instruction class in the scratch
20311 register constraint. However, right now TFmode is not allowed in
20312 Altivec registers, so the pattern will never match. */
20313 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE && !altivec_p)
20315 cost = 3; /* 2 mtvsrwz's, 1 fmrgow. */
20316 icode = reg_addr[mode].reload_fpr_gpr;
20320 if (icode != CODE_FOR_nothing)
20322 ret = true;
20323 if (sri)
20325 sri->icode = icode;
20326 sri->extra_cost = cost;
20330 return ret;
20333 /* Return whether a move between two register classes can be done either
20334 directly (simple move) or via a pattern that uses a single extra temporary
20335 (using ISA 2.07's direct move in this case. */
20337 static bool
20338 rs6000_secondary_reload_move (enum rs6000_reg_type to_type,
20339 enum rs6000_reg_type from_type,
20340 machine_mode mode,
20341 secondary_reload_info *sri,
20342 bool altivec_p)
20344 /* Fall back to load/store reloads if either type is not a register. */
20345 if (to_type == NO_REG_TYPE || from_type == NO_REG_TYPE)
20346 return false;
20348 /* If we haven't allocated registers yet, assume the move can be done for the
20349 standard register types. */
20350 if ((to_type == PSEUDO_REG_TYPE && from_type == PSEUDO_REG_TYPE)
20351 || (to_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (from_type))
20352 || (from_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (to_type)))
20353 return true;
20355 /* Moves to the same set of registers is a simple move for non-specialized
20356 registers. */
20357 if (to_type == from_type && IS_STD_REG_TYPE (to_type))
20358 return true;
20360 /* Check whether a simple move can be done directly. */
20361 if (rs6000_secondary_reload_simple_move (to_type, from_type, mode))
20363 if (sri)
20365 sri->icode = CODE_FOR_nothing;
20366 sri->extra_cost = 0;
20368 return true;
20371 /* Now check if we can do it in a few steps. */
20372 return rs6000_secondary_reload_direct_move (to_type, from_type, mode, sri,
20373 altivec_p);
20376 /* Inform reload about cases where moving X with a mode MODE to a register in
20377 RCLASS requires an extra scratch or immediate register. Return the class
20378 needed for the immediate register.
20380 For VSX and Altivec, we may need a register to convert sp+offset into
20381 reg+sp.
20383 For misaligned 64-bit gpr loads and stores we need a register to
20384 convert an offset address to indirect. */
20386 static reg_class_t
20387 rs6000_secondary_reload (bool in_p,
20388 rtx x,
20389 reg_class_t rclass_i,
20390 machine_mode mode,
20391 secondary_reload_info *sri)
20393 enum reg_class rclass = (enum reg_class) rclass_i;
20394 reg_class_t ret = ALL_REGS;
20395 enum insn_code icode;
20396 bool default_p = false;
20397 bool done_p = false;
20399 /* Allow subreg of memory before/during reload. */
20400 bool memory_p = (MEM_P (x)
20401 || (!reload_completed && GET_CODE (x) == SUBREG
20402 && MEM_P (SUBREG_REG (x))));
20404 sri->icode = CODE_FOR_nothing;
20405 sri->t_icode = CODE_FOR_nothing;
20406 sri->extra_cost = 0;
20407 icode = ((in_p)
20408 ? reg_addr[mode].reload_load
20409 : reg_addr[mode].reload_store);
20411 if (REG_P (x) || register_operand (x, mode))
20413 enum rs6000_reg_type to_type = reg_class_to_reg_type[(int)rclass];
20414 bool altivec_p = (rclass == ALTIVEC_REGS);
20415 enum rs6000_reg_type from_type = register_to_reg_type (x, &altivec_p);
20417 if (!in_p)
20419 enum rs6000_reg_type exchange = to_type;
20420 to_type = from_type;
20421 from_type = exchange;
20424 /* Can we do a direct move of some sort? */
20425 if (rs6000_secondary_reload_move (to_type, from_type, mode, sri,
20426 altivec_p))
20428 icode = (enum insn_code)sri->icode;
20429 default_p = false;
20430 done_p = true;
20431 ret = NO_REGS;
20435 /* Make sure 0.0 is not reloaded or forced into memory. */
20436 if (x == CONST0_RTX (mode) && VSX_REG_CLASS_P (rclass))
20438 ret = NO_REGS;
20439 default_p = false;
20440 done_p = true;
20443 /* If this is a scalar floating point value and we want to load it into the
20444 traditional Altivec registers, do it via a move via a traditional floating
20445 point register, unless we have D-form addressing. Also make sure that
20446 non-zero constants use a FPR. */
20447 if (!done_p && reg_addr[mode].scalar_in_vmx_p
20448 && !mode_supports_vmx_dform (mode)
20449 && (rclass == VSX_REGS || rclass == ALTIVEC_REGS)
20450 && (memory_p || (GET_CODE (x) == CONST_DOUBLE)))
20452 ret = FLOAT_REGS;
20453 default_p = false;
20454 done_p = true;
20457 /* Handle reload of load/stores if we have reload helper functions. */
20458 if (!done_p && icode != CODE_FOR_nothing && memory_p)
20460 int extra_cost = rs6000_secondary_reload_memory (XEXP (x, 0), rclass,
20461 mode);
20463 if (extra_cost >= 0)
20465 done_p = true;
20466 ret = NO_REGS;
20467 if (extra_cost > 0)
20469 sri->extra_cost = extra_cost;
20470 sri->icode = icode;
20475 /* Handle unaligned loads and stores of integer registers. */
20476 if (!done_p && TARGET_POWERPC64
20477 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
20478 && memory_p
20479 && GET_MODE_SIZE (GET_MODE (x)) >= UNITS_PER_WORD)
20481 rtx addr = XEXP (x, 0);
20482 rtx off = address_offset (addr);
20484 if (off != NULL_RTX)
20486 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
20487 unsigned HOST_WIDE_INT offset = INTVAL (off);
20489 /* We need a secondary reload when our legitimate_address_p
20490 says the address is good (as otherwise the entire address
20491 will be reloaded), and the offset is not a multiple of
20492 four or we have an address wrap. Address wrap will only
20493 occur for LO_SUMs since legitimate_offset_address_p
20494 rejects addresses for 16-byte mems that will wrap. */
20495 if (GET_CODE (addr) == LO_SUM
20496 ? (1 /* legitimate_address_p allows any offset for lo_sum */
20497 && ((offset & 3) != 0
20498 || ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra))
20499 : (offset + 0x8000 < 0x10000 - extra /* legitimate_address_p */
20500 && (offset & 3) != 0))
20502 /* -m32 -mpowerpc64 needs to use a 32-bit scratch register. */
20503 if (in_p)
20504 sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_load
20505 : CODE_FOR_reload_di_load);
20506 else
20507 sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_store
20508 : CODE_FOR_reload_di_store);
20509 sri->extra_cost = 2;
20510 ret = NO_REGS;
20511 done_p = true;
20513 else
20514 default_p = true;
20516 else
20517 default_p = true;
20520 if (!done_p && !TARGET_POWERPC64
20521 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
20522 && memory_p
20523 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
20525 rtx addr = XEXP (x, 0);
20526 rtx off = address_offset (addr);
20528 if (off != NULL_RTX)
20530 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
20531 unsigned HOST_WIDE_INT offset = INTVAL (off);
20533 /* We need a secondary reload when our legitimate_address_p
20534 says the address is good (as otherwise the entire address
20535 will be reloaded), and we have a wrap.
20537 legitimate_lo_sum_address_p allows LO_SUM addresses to
20538 have any offset so test for wrap in the low 16 bits.
20540 legitimate_offset_address_p checks for the range
20541 [-0x8000,0x7fff] for mode size of 8 and [-0x8000,0x7ff7]
20542 for mode size of 16. We wrap at [0x7ffc,0x7fff] and
20543 [0x7ff4,0x7fff] respectively, so test for the
20544 intersection of these ranges, [0x7ffc,0x7fff] and
20545 [0x7ff4,0x7ff7] respectively.
20547 Note that the address we see here may have been
20548 manipulated by legitimize_reload_address. */
20549 if (GET_CODE (addr) == LO_SUM
20550 ? ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra
20551 : offset - (0x8000 - extra) < UNITS_PER_WORD)
20553 if (in_p)
20554 sri->icode = CODE_FOR_reload_si_load;
20555 else
20556 sri->icode = CODE_FOR_reload_si_store;
20557 sri->extra_cost = 2;
20558 ret = NO_REGS;
20559 done_p = true;
20561 else
20562 default_p = true;
20564 else
20565 default_p = true;
20568 if (!done_p)
20569 default_p = true;
20571 if (default_p)
20572 ret = default_secondary_reload (in_p, x, rclass, mode, sri);
20574 gcc_assert (ret != ALL_REGS);
20576 if (TARGET_DEBUG_ADDR)
20578 fprintf (stderr,
20579 "\nrs6000_secondary_reload, return %s, in_p = %s, rclass = %s, "
20580 "mode = %s",
20581 reg_class_names[ret],
20582 in_p ? "true" : "false",
20583 reg_class_names[rclass],
20584 GET_MODE_NAME (mode));
20586 if (reload_completed)
20587 fputs (", after reload", stderr);
20589 if (!done_p)
20590 fputs (", done_p not set", stderr);
20592 if (default_p)
20593 fputs (", default secondary reload", stderr);
20595 if (sri->icode != CODE_FOR_nothing)
20596 fprintf (stderr, ", reload func = %s, extra cost = %d",
20597 insn_data[sri->icode].name, sri->extra_cost);
20599 else if (sri->extra_cost > 0)
20600 fprintf (stderr, ", extra cost = %d", sri->extra_cost);
20602 fputs ("\n", stderr);
20603 debug_rtx (x);
20606 return ret;
20609 /* Better tracing for rs6000_secondary_reload_inner. */
20611 static void
20612 rs6000_secondary_reload_trace (int line, rtx reg, rtx mem, rtx scratch,
20613 bool store_p)
20615 rtx set, clobber;
20617 gcc_assert (reg != NULL_RTX && mem != NULL_RTX && scratch != NULL_RTX);
20619 fprintf (stderr, "rs6000_secondary_reload_inner:%d, type = %s\n", line,
20620 store_p ? "store" : "load");
20622 if (store_p)
20623 set = gen_rtx_SET (mem, reg);
20624 else
20625 set = gen_rtx_SET (reg, mem);
20627 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
20628 debug_rtx (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
20631 static void rs6000_secondary_reload_fail (int, rtx, rtx, rtx, bool)
20632 ATTRIBUTE_NORETURN;
20634 static void
20635 rs6000_secondary_reload_fail (int line, rtx reg, rtx mem, rtx scratch,
20636 bool store_p)
20638 rs6000_secondary_reload_trace (line, reg, mem, scratch, store_p);
20639 gcc_unreachable ();
20642 /* Fixup reload addresses for values in GPR, FPR, and VMX registers that have
20643 reload helper functions. These were identified in
20644 rs6000_secondary_reload_memory, and if reload decided to use the secondary
20645 reload, it calls the insns:
20646 reload_<RELOAD:mode>_<P:mptrsize>_store
20647 reload_<RELOAD:mode>_<P:mptrsize>_load
20649 which in turn calls this function, to do whatever is necessary to create
20650 valid addresses. */
20652 void
20653 rs6000_secondary_reload_inner (rtx reg, rtx mem, rtx scratch, bool store_p)
20655 int regno = true_regnum (reg);
20656 machine_mode mode = GET_MODE (reg);
20657 addr_mask_type addr_mask;
20658 rtx addr;
20659 rtx new_addr;
20660 rtx op_reg, op0, op1;
20661 rtx and_op;
20662 rtx cc_clobber;
20663 rtvec rv;
20665 if (regno < 0 || regno >= FIRST_PSEUDO_REGISTER || !MEM_P (mem)
20666 || !base_reg_operand (scratch, GET_MODE (scratch)))
20667 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
20669 if (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO))
20670 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
20672 else if (IN_RANGE (regno, FIRST_FPR_REGNO, LAST_FPR_REGNO))
20673 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
20675 else if (IN_RANGE (regno, FIRST_ALTIVEC_REGNO, LAST_ALTIVEC_REGNO))
20676 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
20678 else
20679 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
20681 /* Make sure the mode is valid in this register class. */
20682 if ((addr_mask & RELOAD_REG_VALID) == 0)
20683 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
20685 if (TARGET_DEBUG_ADDR)
20686 rs6000_secondary_reload_trace (__LINE__, reg, mem, scratch, store_p);
20688 new_addr = addr = XEXP (mem, 0);
20689 switch (GET_CODE (addr))
20691 /* Does the register class support auto update forms for this mode? If
20692 not, do the update now. We don't need a scratch register, since the
20693 powerpc only supports PRE_INC, PRE_DEC, and PRE_MODIFY. */
20694 case PRE_INC:
20695 case PRE_DEC:
20696 op_reg = XEXP (addr, 0);
20697 if (!base_reg_operand (op_reg, Pmode))
20698 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
20700 if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
20702 emit_insn (gen_add2_insn (op_reg, GEN_INT (GET_MODE_SIZE (mode))));
20703 new_addr = op_reg;
20705 break;
20707 case PRE_MODIFY:
20708 op0 = XEXP (addr, 0);
20709 op1 = XEXP (addr, 1);
20710 if (!base_reg_operand (op0, Pmode)
20711 || GET_CODE (op1) != PLUS
20712 || !rtx_equal_p (op0, XEXP (op1, 0)))
20713 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
20715 if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
20717 emit_insn (gen_rtx_SET (op0, op1));
20718 new_addr = reg;
20720 break;
20722 /* Do we need to simulate AND -16 to clear the bottom address bits used
20723 in VMX load/stores? */
20724 case AND:
20725 op0 = XEXP (addr, 0);
20726 op1 = XEXP (addr, 1);
20727 if ((addr_mask & RELOAD_REG_AND_M16) == 0)
20729 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
20730 op_reg = op0;
20732 else if (GET_CODE (op1) == PLUS)
20734 emit_insn (gen_rtx_SET (scratch, op1));
20735 op_reg = scratch;
20738 else
20739 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
20741 and_op = gen_rtx_AND (GET_MODE (scratch), op_reg, op1);
20742 cc_clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (CCmode));
20743 rv = gen_rtvec (2, gen_rtx_SET (scratch, and_op), cc_clobber);
20744 emit_insn (gen_rtx_PARALLEL (VOIDmode, rv));
20745 new_addr = scratch;
20747 break;
20749 /* If this is an indirect address, make sure it is a base register. */
20750 case REG:
20751 case SUBREG:
20752 if (!base_reg_operand (addr, GET_MODE (addr)))
20754 emit_insn (gen_rtx_SET (scratch, addr));
20755 new_addr = scratch;
20757 break;
20759 /* If this is an indexed address, make sure the register class can handle
20760 indexed addresses for this mode. */
20761 case PLUS:
20762 op0 = XEXP (addr, 0);
20763 op1 = XEXP (addr, 1);
20764 if (!base_reg_operand (op0, Pmode))
20765 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
20767 else if (int_reg_operand (op1, Pmode))
20769 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
20771 emit_insn (gen_rtx_SET (scratch, addr));
20772 new_addr = scratch;
20776 else if (mode_supports_vsx_dform_quad (mode) && CONST_INT_P (op1))
20778 if (((addr_mask & RELOAD_REG_QUAD_OFFSET) == 0)
20779 || !quad_address_p (addr, mode, false))
20781 emit_insn (gen_rtx_SET (scratch, addr));
20782 new_addr = scratch;
20786 /* Make sure the register class can handle offset addresses. */
20787 else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
20789 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
20791 emit_insn (gen_rtx_SET (scratch, addr));
20792 new_addr = scratch;
20796 else
20797 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
20799 break;
20801 case LO_SUM:
20802 op0 = XEXP (addr, 0);
20803 op1 = XEXP (addr, 1);
20804 if (!base_reg_operand (op0, Pmode))
20805 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
20807 else if (int_reg_operand (op1, Pmode))
20809 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
20811 emit_insn (gen_rtx_SET (scratch, addr));
20812 new_addr = scratch;
20816 /* Quad offsets are restricted and can't handle normal addresses. */
20817 else if (mode_supports_vsx_dform_quad (mode))
20819 emit_insn (gen_rtx_SET (scratch, addr));
20820 new_addr = scratch;
20823 /* Make sure the register class can handle offset addresses. */
20824 else if (legitimate_lo_sum_address_p (mode, addr, false))
20826 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
20828 emit_insn (gen_rtx_SET (scratch, addr));
20829 new_addr = scratch;
20833 else
20834 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
20836 break;
20838 case SYMBOL_REF:
20839 case CONST:
20840 case LABEL_REF:
20841 rs6000_emit_move (scratch, addr, Pmode);
20842 new_addr = scratch;
20843 break;
20845 default:
20846 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
20849 /* Adjust the address if it changed. */
20850 if (addr != new_addr)
20852 mem = replace_equiv_address_nv (mem, new_addr);
20853 if (TARGET_DEBUG_ADDR)
20854 fprintf (stderr, "\nrs6000_secondary_reload_inner, mem adjusted.\n");
20857 /* Now create the move. */
20858 if (store_p)
20859 emit_insn (gen_rtx_SET (mem, reg));
20860 else
20861 emit_insn (gen_rtx_SET (reg, mem));
20863 return;
20866 /* Convert reloads involving 64-bit gprs and misaligned offset
20867 addressing, or multiple 32-bit gprs and offsets that are too large,
20868 to use indirect addressing. */
20870 void
20871 rs6000_secondary_reload_gpr (rtx reg, rtx mem, rtx scratch, bool store_p)
20873 int regno = true_regnum (reg);
20874 enum reg_class rclass;
20875 rtx addr;
20876 rtx scratch_or_premodify = scratch;
20878 if (TARGET_DEBUG_ADDR)
20880 fprintf (stderr, "\nrs6000_secondary_reload_gpr, type = %s\n",
20881 store_p ? "store" : "load");
20882 fprintf (stderr, "reg:\n");
20883 debug_rtx (reg);
20884 fprintf (stderr, "mem:\n");
20885 debug_rtx (mem);
20886 fprintf (stderr, "scratch:\n");
20887 debug_rtx (scratch);
20890 gcc_assert (regno >= 0 && regno < FIRST_PSEUDO_REGISTER);
20891 gcc_assert (GET_CODE (mem) == MEM);
20892 rclass = REGNO_REG_CLASS (regno);
20893 gcc_assert (rclass == GENERAL_REGS || rclass == BASE_REGS);
20894 addr = XEXP (mem, 0);
20896 if (GET_CODE (addr) == PRE_MODIFY)
20898 gcc_assert (REG_P (XEXP (addr, 0))
20899 && GET_CODE (XEXP (addr, 1)) == PLUS
20900 && XEXP (XEXP (addr, 1), 0) == XEXP (addr, 0));
20901 scratch_or_premodify = XEXP (addr, 0);
20902 if (!HARD_REGISTER_P (scratch_or_premodify))
20903 /* If we have a pseudo here then reload will have arranged
20904 to have it replaced, but only in the original insn.
20905 Use the replacement here too. */
20906 scratch_or_premodify = find_replacement (&XEXP (addr, 0));
20908 /* RTL emitted by rs6000_secondary_reload_gpr uses RTL
20909 expressions from the original insn, without unsharing them.
20910 Any RTL that points into the original insn will of course
20911 have register replacements applied. That is why we don't
20912 need to look for replacements under the PLUS. */
20913 addr = XEXP (addr, 1);
20915 gcc_assert (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM);
20917 rs6000_emit_move (scratch_or_premodify, addr, Pmode);
20919 mem = replace_equiv_address_nv (mem, scratch_or_premodify);
20921 /* Now create the move. */
20922 if (store_p)
20923 emit_insn (gen_rtx_SET (mem, reg));
20924 else
20925 emit_insn (gen_rtx_SET (reg, mem));
20927 return;
20930 /* Allocate a 64-bit stack slot to be used for copying SDmode values through if
20931 this function has any SDmode references. If we are on a power7 or later, we
20932 don't need the 64-bit stack slot since the LFIWZX and STIFWX instructions
20933 can load/store the value. */
20935 static void
20936 rs6000_alloc_sdmode_stack_slot (void)
20938 tree t;
20939 basic_block bb;
20940 gimple_stmt_iterator gsi;
20942 gcc_assert (cfun->machine->sdmode_stack_slot == NULL_RTX);
20943 /* We use a different approach for dealing with the secondary
20944 memory in LRA. */
20945 if (ira_use_lra_p)
20946 return;
20948 if (TARGET_NO_SDMODE_STACK)
20949 return;
20951 FOR_EACH_BB_FN (bb, cfun)
20952 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
20954 tree ret = walk_gimple_op (gsi_stmt (gsi), rs6000_check_sdmode, NULL);
20955 if (ret)
20957 rtx stack = assign_stack_local (DDmode, GET_MODE_SIZE (DDmode), 0);
20958 cfun->machine->sdmode_stack_slot = adjust_address_nv (stack,
20959 SDmode, 0);
20960 return;
20964 /* Check for any SDmode parameters of the function. */
20965 for (t = DECL_ARGUMENTS (cfun->decl); t; t = DECL_CHAIN (t))
20967 if (TREE_TYPE (t) == error_mark_node)
20968 continue;
20970 if (TYPE_MODE (TREE_TYPE (t)) == SDmode
20971 || TYPE_MODE (DECL_ARG_TYPE (t)) == SDmode)
20973 rtx stack = assign_stack_local (DDmode, GET_MODE_SIZE (DDmode), 0);
20974 cfun->machine->sdmode_stack_slot = adjust_address_nv (stack,
20975 SDmode, 0);
20976 return;
20981 static void
20982 rs6000_instantiate_decls (void)
20984 if (cfun->machine->sdmode_stack_slot != NULL_RTX)
20985 instantiate_decl_rtl (cfun->machine->sdmode_stack_slot);
20988 /* Given an rtx X being reloaded into a reg required to be
20989 in class CLASS, return the class of reg to actually use.
20990 In general this is just CLASS; but on some machines
20991 in some cases it is preferable to use a more restrictive class.
20993 On the RS/6000, we have to return NO_REGS when we want to reload a
20994 floating-point CONST_DOUBLE to force it to be copied to memory.
20996 We also don't want to reload integer values into floating-point
20997 registers if we can at all help it. In fact, this can
20998 cause reload to die, if it tries to generate a reload of CTR
20999 into a FP register and discovers it doesn't have the memory location
21000 required.
21002 ??? Would it be a good idea to have reload do the converse, that is
21003 try to reload floating modes into FP registers if possible?
21006 static enum reg_class
21007 rs6000_preferred_reload_class (rtx x, enum reg_class rclass)
21009 machine_mode mode = GET_MODE (x);
21010 bool is_constant = CONSTANT_P (x);
21012 /* If a mode can't go in FPR/ALTIVEC/VSX registers, don't return a preferred
21013 reload class for it. */
21014 if ((rclass == ALTIVEC_REGS || rclass == VSX_REGS)
21015 && (reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_VALID) == 0)
21016 return NO_REGS;
21018 if ((rclass == FLOAT_REGS || rclass == VSX_REGS)
21019 && (reg_addr[mode].addr_mask[RELOAD_REG_FPR] & RELOAD_REG_VALID) == 0)
21020 return NO_REGS;
21022 /* For VSX, see if we should prefer FLOAT_REGS or ALTIVEC_REGS. Do not allow
21023 the reloading of address expressions using PLUS into floating point
21024 registers. */
21025 if (TARGET_VSX && VSX_REG_CLASS_P (rclass) && GET_CODE (x) != PLUS)
21027 if (is_constant)
21029 /* Zero is always allowed in all VSX registers. */
21030 if (x == CONST0_RTX (mode))
21031 return rclass;
21033 /* If this is a vector constant that can be formed with a few Altivec
21034 instructions, we want altivec registers. */
21035 if (GET_CODE (x) == CONST_VECTOR && easy_vector_constant (x, mode))
21036 return ALTIVEC_REGS;
21038 /* Force constant to memory. */
21039 return NO_REGS;
21042 /* D-form addressing can easily reload the value. */
21043 if (mode_supports_vmx_dform (mode)
21044 || mode_supports_vsx_dform_quad (mode))
21045 return rclass;
21047 /* If this is a scalar floating point value and we don't have D-form
21048 addressing, prefer the traditional floating point registers so that we
21049 can use D-form (register+offset) addressing. */
21050 if (GET_MODE_SIZE (mode) < 16 && rclass == VSX_REGS)
21051 return FLOAT_REGS;
21053 /* Prefer the Altivec registers if Altivec is handling the vector
21054 operations (i.e. V16QI, V8HI, and V4SI), or if we prefer Altivec
21055 loads. */
21056 if (VECTOR_UNIT_ALTIVEC_P (mode) || VECTOR_MEM_ALTIVEC_P (mode)
21057 || mode == V1TImode)
21058 return ALTIVEC_REGS;
21060 return rclass;
21063 if (is_constant || GET_CODE (x) == PLUS)
21065 if (reg_class_subset_p (GENERAL_REGS, rclass))
21066 return GENERAL_REGS;
21067 if (reg_class_subset_p (BASE_REGS, rclass))
21068 return BASE_REGS;
21069 return NO_REGS;
21072 if (GET_MODE_CLASS (mode) == MODE_INT && rclass == NON_SPECIAL_REGS)
21073 return GENERAL_REGS;
21075 return rclass;
21078 /* Debug version of rs6000_preferred_reload_class. */
21079 static enum reg_class
21080 rs6000_debug_preferred_reload_class (rtx x, enum reg_class rclass)
21082 enum reg_class ret = rs6000_preferred_reload_class (x, rclass);
21084 fprintf (stderr,
21085 "\nrs6000_preferred_reload_class, return %s, rclass = %s, "
21086 "mode = %s, x:\n",
21087 reg_class_names[ret], reg_class_names[rclass],
21088 GET_MODE_NAME (GET_MODE (x)));
21089 debug_rtx (x);
21091 return ret;
21094 /* If we are copying between FP or AltiVec registers and anything else, we need
21095 a memory location. The exception is when we are targeting ppc64 and the
21096 move to/from fpr to gpr instructions are available. Also, under VSX, you
21097 can copy vector registers from the FP register set to the Altivec register
21098 set and vice versa. */
21100 static bool
21101 rs6000_secondary_memory_needed (enum reg_class from_class,
21102 enum reg_class to_class,
21103 machine_mode mode)
21105 enum rs6000_reg_type from_type, to_type;
21106 bool altivec_p = ((from_class == ALTIVEC_REGS)
21107 || (to_class == ALTIVEC_REGS));
21109 /* If a simple/direct move is available, we don't need secondary memory */
21110 from_type = reg_class_to_reg_type[(int)from_class];
21111 to_type = reg_class_to_reg_type[(int)to_class];
21113 if (rs6000_secondary_reload_move (to_type, from_type, mode,
21114 (secondary_reload_info *)0, altivec_p))
21115 return false;
21117 /* If we have a floating point or vector register class, we need to use
21118 memory to transfer the data. */
21119 if (IS_FP_VECT_REG_TYPE (from_type) || IS_FP_VECT_REG_TYPE (to_type))
21120 return true;
21122 return false;
21125 /* Debug version of rs6000_secondary_memory_needed. */
21126 static bool
21127 rs6000_debug_secondary_memory_needed (enum reg_class from_class,
21128 enum reg_class to_class,
21129 machine_mode mode)
21131 bool ret = rs6000_secondary_memory_needed (from_class, to_class, mode);
21133 fprintf (stderr,
21134 "rs6000_secondary_memory_needed, return: %s, from_class = %s, "
21135 "to_class = %s, mode = %s\n",
21136 ret ? "true" : "false",
21137 reg_class_names[from_class],
21138 reg_class_names[to_class],
21139 GET_MODE_NAME (mode));
21141 return ret;
21144 /* Return the register class of a scratch register needed to copy IN into
21145 or out of a register in RCLASS in MODE. If it can be done directly,
21146 NO_REGS is returned. */
21148 static enum reg_class
21149 rs6000_secondary_reload_class (enum reg_class rclass, machine_mode mode,
21150 rtx in)
21152 int regno;
21154 if (TARGET_ELF || (DEFAULT_ABI == ABI_DARWIN
21155 #if TARGET_MACHO
21156 && MACHOPIC_INDIRECT
21157 #endif
21160 /* We cannot copy a symbolic operand directly into anything
21161 other than BASE_REGS for TARGET_ELF. So indicate that a
21162 register from BASE_REGS is needed as an intermediate
21163 register.
21165 On Darwin, pic addresses require a load from memory, which
21166 needs a base register. */
21167 if (rclass != BASE_REGS
21168 && (GET_CODE (in) == SYMBOL_REF
21169 || GET_CODE (in) == HIGH
21170 || GET_CODE (in) == LABEL_REF
21171 || GET_CODE (in) == CONST))
21172 return BASE_REGS;
21175 if (GET_CODE (in) == REG)
21177 regno = REGNO (in);
21178 if (regno >= FIRST_PSEUDO_REGISTER)
21180 regno = true_regnum (in);
21181 if (regno >= FIRST_PSEUDO_REGISTER)
21182 regno = -1;
21185 else if (GET_CODE (in) == SUBREG)
21187 regno = true_regnum (in);
21188 if (regno >= FIRST_PSEUDO_REGISTER)
21189 regno = -1;
21191 else
21192 regno = -1;
21194 /* If we have VSX register moves, prefer moving scalar values between
21195 Altivec registers and GPR by going via an FPR (and then via memory)
21196 instead of reloading the secondary memory address for Altivec moves. */
21197 if (TARGET_VSX
21198 && GET_MODE_SIZE (mode) < 16
21199 && !mode_supports_vmx_dform (mode)
21200 && (((rclass == GENERAL_REGS || rclass == BASE_REGS)
21201 && (regno >= 0 && ALTIVEC_REGNO_P (regno)))
21202 || ((rclass == VSX_REGS || rclass == ALTIVEC_REGS)
21203 && (regno >= 0 && INT_REGNO_P (regno)))))
21204 return FLOAT_REGS;
21206 /* We can place anything into GENERAL_REGS and can put GENERAL_REGS
21207 into anything. */
21208 if (rclass == GENERAL_REGS || rclass == BASE_REGS
21209 || (regno >= 0 && INT_REGNO_P (regno)))
21210 return NO_REGS;
21212 /* Constants, memory, and VSX registers can go into VSX registers (both the
21213 traditional floating point and the altivec registers). */
21214 if (rclass == VSX_REGS
21215 && (regno == -1 || VSX_REGNO_P (regno)))
21216 return NO_REGS;
21218 /* Constants, memory, and FP registers can go into FP registers. */
21219 if ((regno == -1 || FP_REGNO_P (regno))
21220 && (rclass == FLOAT_REGS || rclass == NON_SPECIAL_REGS))
21221 return (mode != SDmode || lra_in_progress) ? NO_REGS : GENERAL_REGS;
21223 /* Memory, and AltiVec registers can go into AltiVec registers. */
21224 if ((regno == -1 || ALTIVEC_REGNO_P (regno))
21225 && rclass == ALTIVEC_REGS)
21226 return NO_REGS;
21228 /* We can copy among the CR registers. */
21229 if ((rclass == CR_REGS || rclass == CR0_REGS)
21230 && regno >= 0 && CR_REGNO_P (regno))
21231 return NO_REGS;
21233 /* Otherwise, we need GENERAL_REGS. */
21234 return GENERAL_REGS;
21237 /* Debug version of rs6000_secondary_reload_class. */
21238 static enum reg_class
21239 rs6000_debug_secondary_reload_class (enum reg_class rclass,
21240 machine_mode mode, rtx in)
21242 enum reg_class ret = rs6000_secondary_reload_class (rclass, mode, in);
21243 fprintf (stderr,
21244 "\nrs6000_secondary_reload_class, return %s, rclass = %s, "
21245 "mode = %s, input rtx:\n",
21246 reg_class_names[ret], reg_class_names[rclass],
21247 GET_MODE_NAME (mode));
21248 debug_rtx (in);
21250 return ret;
21253 /* Return nonzero if for CLASS a mode change from FROM to TO is invalid. */
21255 static bool
21256 rs6000_cannot_change_mode_class (machine_mode from,
21257 machine_mode to,
21258 enum reg_class rclass)
21260 unsigned from_size = GET_MODE_SIZE (from);
21261 unsigned to_size = GET_MODE_SIZE (to);
21263 if (from_size != to_size)
21265 enum reg_class xclass = (TARGET_VSX) ? VSX_REGS : FLOAT_REGS;
21267 if (reg_classes_intersect_p (xclass, rclass))
21269 unsigned to_nregs = hard_regno_nregs[FIRST_FPR_REGNO][to];
21270 unsigned from_nregs = hard_regno_nregs[FIRST_FPR_REGNO][from];
21271 bool to_float128_vector_p = FLOAT128_VECTOR_P (to);
21272 bool from_float128_vector_p = FLOAT128_VECTOR_P (from);
21274 /* Don't allow 64-bit types to overlap with 128-bit types that take a
21275 single register under VSX because the scalar part of the register
21276 is in the upper 64-bits, and not the lower 64-bits. Types like
21277 TFmode/TDmode that take 2 scalar register can overlap. 128-bit
21278 IEEE floating point can't overlap, and neither can small
21279 values. */
21281 if (to_float128_vector_p && from_float128_vector_p)
21282 return false;
21284 else if (to_float128_vector_p || from_float128_vector_p)
21285 return true;
21287 /* TDmode in floating-mode registers must always go into a register
21288 pair with the most significant word in the even-numbered register
21289 to match ISA requirements. In little-endian mode, this does not
21290 match subreg numbering, so we cannot allow subregs. */
21291 if (!BYTES_BIG_ENDIAN && (to == TDmode || from == TDmode))
21292 return true;
21294 if (from_size < 8 || to_size < 8)
21295 return true;
21297 if (from_size == 8 && (8 * to_nregs) != to_size)
21298 return true;
21300 if (to_size == 8 && (8 * from_nregs) != from_size)
21301 return true;
21303 return false;
21305 else
21306 return false;
21309 if (TARGET_E500_DOUBLE
21310 && ((((to) == DFmode) + ((from) == DFmode)) == 1
21311 || (((to) == TFmode) + ((from) == TFmode)) == 1
21312 || (((to) == IFmode) + ((from) == IFmode)) == 1
21313 || (((to) == KFmode) + ((from) == KFmode)) == 1
21314 || (((to) == DDmode) + ((from) == DDmode)) == 1
21315 || (((to) == TDmode) + ((from) == TDmode)) == 1
21316 || (((to) == DImode) + ((from) == DImode)) == 1))
21317 return true;
21319 /* Since the VSX register set includes traditional floating point registers
21320 and altivec registers, just check for the size being different instead of
21321 trying to check whether the modes are vector modes. Otherwise it won't
21322 allow say DF and DI to change classes. For types like TFmode and TDmode
21323 that take 2 64-bit registers, rather than a single 128-bit register, don't
21324 allow subregs of those types to other 128 bit types. */
21325 if (TARGET_VSX && VSX_REG_CLASS_P (rclass))
21327 unsigned num_regs = (from_size + 15) / 16;
21328 if (hard_regno_nregs[FIRST_FPR_REGNO][to] > num_regs
21329 || hard_regno_nregs[FIRST_FPR_REGNO][from] > num_regs)
21330 return true;
21332 return (from_size != 8 && from_size != 16);
21335 if (TARGET_ALTIVEC && rclass == ALTIVEC_REGS
21336 && (ALTIVEC_VECTOR_MODE (from) + ALTIVEC_VECTOR_MODE (to)) == 1)
21337 return true;
21339 if (TARGET_SPE && (SPE_VECTOR_MODE (from) + SPE_VECTOR_MODE (to)) == 1
21340 && reg_classes_intersect_p (GENERAL_REGS, rclass))
21341 return true;
21343 return false;
21346 /* Debug version of rs6000_cannot_change_mode_class. */
21347 static bool
21348 rs6000_debug_cannot_change_mode_class (machine_mode from,
21349 machine_mode to,
21350 enum reg_class rclass)
21352 bool ret = rs6000_cannot_change_mode_class (from, to, rclass);
21354 fprintf (stderr,
21355 "rs6000_cannot_change_mode_class, return %s, from = %s, "
21356 "to = %s, rclass = %s\n",
21357 ret ? "true" : "false",
21358 GET_MODE_NAME (from), GET_MODE_NAME (to),
21359 reg_class_names[rclass]);
21361 return ret;
21364 /* Return a string to do a move operation of 128 bits of data. */
21366 const char *
21367 rs6000_output_move_128bit (rtx operands[])
21369 rtx dest = operands[0];
21370 rtx src = operands[1];
21371 machine_mode mode = GET_MODE (dest);
21372 int dest_regno;
21373 int src_regno;
21374 bool dest_gpr_p, dest_fp_p, dest_vmx_p, dest_vsx_p;
21375 bool src_gpr_p, src_fp_p, src_vmx_p, src_vsx_p;
21377 if (REG_P (dest))
21379 dest_regno = REGNO (dest);
21380 dest_gpr_p = INT_REGNO_P (dest_regno);
21381 dest_fp_p = FP_REGNO_P (dest_regno);
21382 dest_vmx_p = ALTIVEC_REGNO_P (dest_regno);
21383 dest_vsx_p = dest_fp_p | dest_vmx_p;
21385 else
21387 dest_regno = -1;
21388 dest_gpr_p = dest_fp_p = dest_vmx_p = dest_vsx_p = false;
21391 if (REG_P (src))
21393 src_regno = REGNO (src);
21394 src_gpr_p = INT_REGNO_P (src_regno);
21395 src_fp_p = FP_REGNO_P (src_regno);
21396 src_vmx_p = ALTIVEC_REGNO_P (src_regno);
21397 src_vsx_p = src_fp_p | src_vmx_p;
21399 else
21401 src_regno = -1;
21402 src_gpr_p = src_fp_p = src_vmx_p = src_vsx_p = false;
21405 /* Register moves. */
21406 if (dest_regno >= 0 && src_regno >= 0)
21408 if (dest_gpr_p)
21410 if (src_gpr_p)
21411 return "#";
21413 if (TARGET_DIRECT_MOVE_128 && src_vsx_p)
21414 return (WORDS_BIG_ENDIAN
21415 ? "mfvsrd %0,%x1\n\tmfvsrld %L0,%x1"
21416 : "mfvsrd %L0,%x1\n\tmfvsrld %0,%x1");
21418 else if (TARGET_VSX && TARGET_DIRECT_MOVE && src_vsx_p)
21419 return "#";
21422 else if (TARGET_VSX && dest_vsx_p)
21424 if (src_vsx_p)
21425 return "xxlor %x0,%x1,%x1";
21427 else if (TARGET_DIRECT_MOVE_128 && src_gpr_p)
21428 return (WORDS_BIG_ENDIAN
21429 ? "mtvsrdd %x0,%1,%L1"
21430 : "mtvsrdd %x0,%L1,%1");
21432 else if (TARGET_DIRECT_MOVE && src_gpr_p)
21433 return "#";
21436 else if (TARGET_ALTIVEC && dest_vmx_p && src_vmx_p)
21437 return "vor %0,%1,%1";
21439 else if (dest_fp_p && src_fp_p)
21440 return "#";
21443 /* Loads. */
21444 else if (dest_regno >= 0 && MEM_P (src))
21446 if (dest_gpr_p)
21448 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
21449 return "lq %0,%1";
21450 else
21451 return "#";
21454 else if (TARGET_ALTIVEC && dest_vmx_p
21455 && altivec_indexed_or_indirect_operand (src, mode))
21456 return "lvx %0,%y1";
21458 else if (TARGET_VSX && dest_vsx_p)
21460 if (mode_supports_vsx_dform_quad (mode)
21461 && quad_address_p (XEXP (src, 0), mode, true))
21462 return "lxv %x0,%1";
21464 else if (TARGET_P9_VECTOR)
21465 return "lxvx %x0,%y1";
21467 else if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
21468 return "lxvw4x %x0,%y1";
21470 else
21471 return "lxvd2x %x0,%y1";
21474 else if (TARGET_ALTIVEC && dest_vmx_p)
21475 return "lvx %0,%y1";
21477 else if (dest_fp_p)
21478 return "#";
21481 /* Stores. */
21482 else if (src_regno >= 0 && MEM_P (dest))
21484 if (src_gpr_p)
21486 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
21487 return "stq %1,%0";
21488 else
21489 return "#";
21492 else if (TARGET_ALTIVEC && src_vmx_p
21493 && altivec_indexed_or_indirect_operand (src, mode))
21494 return "stvx %1,%y0";
21496 else if (TARGET_VSX && src_vsx_p)
21498 if (mode_supports_vsx_dform_quad (mode)
21499 && quad_address_p (XEXP (dest, 0), mode, true))
21500 return "stxv %x1,%0";
21502 else if (TARGET_P9_VECTOR)
21503 return "stxvx %x1,%y0";
21505 else if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
21506 return "stxvw4x %x1,%y0";
21508 else
21509 return "stxvd2x %x1,%y0";
21512 else if (TARGET_ALTIVEC && src_vmx_p)
21513 return "stvx %1,%y0";
21515 else if (src_fp_p)
21516 return "#";
21519 /* Constants. */
21520 else if (dest_regno >= 0
21521 && (GET_CODE (src) == CONST_INT
21522 || GET_CODE (src) == CONST_WIDE_INT
21523 || GET_CODE (src) == CONST_DOUBLE
21524 || GET_CODE (src) == CONST_VECTOR))
21526 if (dest_gpr_p)
21527 return "#";
21529 else if ((dest_vmx_p && TARGET_ALTIVEC)
21530 || (dest_vsx_p && TARGET_VSX))
21531 return output_vec_const_move (operands);
21534 fatal_insn ("Bad 128-bit move", gen_rtx_SET (dest, src));
21537 /* Validate a 128-bit move. */
21538 bool
21539 rs6000_move_128bit_ok_p (rtx operands[])
21541 machine_mode mode = GET_MODE (operands[0]);
21542 return (gpc_reg_operand (operands[0], mode)
21543 || gpc_reg_operand (operands[1], mode));
21546 /* Return true if a 128-bit move needs to be split. */
21547 bool
21548 rs6000_split_128bit_ok_p (rtx operands[])
21550 if (!reload_completed)
21551 return false;
21553 if (!gpr_or_gpr_p (operands[0], operands[1]))
21554 return false;
21556 if (quad_load_store_p (operands[0], operands[1]))
21557 return false;
21559 return true;
21563 /* Given a comparison operation, return the bit number in CCR to test. We
21564 know this is a valid comparison.
21566 SCC_P is 1 if this is for an scc. That means that %D will have been
21567 used instead of %C, so the bits will be in different places.
21569 Return -1 if OP isn't a valid comparison for some reason. */
21572 ccr_bit (rtx op, int scc_p)
21574 enum rtx_code code = GET_CODE (op);
21575 machine_mode cc_mode;
21576 int cc_regnum;
21577 int base_bit;
21578 rtx reg;
21580 if (!COMPARISON_P (op))
21581 return -1;
21583 reg = XEXP (op, 0);
21585 gcc_assert (GET_CODE (reg) == REG && CR_REGNO_P (REGNO (reg)));
21587 cc_mode = GET_MODE (reg);
21588 cc_regnum = REGNO (reg);
21589 base_bit = 4 * (cc_regnum - CR0_REGNO);
21591 validate_condition_mode (code, cc_mode);
21593 /* When generating a sCOND operation, only positive conditions are
21594 allowed. */
21595 gcc_assert (!scc_p
21596 || code == EQ || code == GT || code == LT || code == UNORDERED
21597 || code == GTU || code == LTU);
21599 switch (code)
21601 case NE:
21602 return scc_p ? base_bit + 3 : base_bit + 2;
21603 case EQ:
21604 return base_bit + 2;
21605 case GT: case GTU: case UNLE:
21606 return base_bit + 1;
21607 case LT: case LTU: case UNGE:
21608 return base_bit;
21609 case ORDERED: case UNORDERED:
21610 return base_bit + 3;
21612 case GE: case GEU:
21613 /* If scc, we will have done a cror to put the bit in the
21614 unordered position. So test that bit. For integer, this is ! LT
21615 unless this is an scc insn. */
21616 return scc_p ? base_bit + 3 : base_bit;
21618 case LE: case LEU:
21619 return scc_p ? base_bit + 3 : base_bit + 1;
21621 default:
21622 gcc_unreachable ();
21626 /* Return the GOT register. */
21629 rs6000_got_register (rtx value ATTRIBUTE_UNUSED)
21631 /* The second flow pass currently (June 1999) can't update
21632 regs_ever_live without disturbing other parts of the compiler, so
21633 update it here to make the prolog/epilogue code happy. */
21634 if (!can_create_pseudo_p ()
21635 && !df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM))
21636 df_set_regs_ever_live (RS6000_PIC_OFFSET_TABLE_REGNUM, true);
21638 crtl->uses_pic_offset_table = 1;
21640 return pic_offset_table_rtx;
21643 static rs6000_stack_t stack_info;
21645 /* Function to init struct machine_function.
21646 This will be called, via a pointer variable,
21647 from push_function_context. */
21649 static struct machine_function *
21650 rs6000_init_machine_status (void)
21652 stack_info.reload_completed = 0;
21653 return ggc_cleared_alloc<machine_function> ();
21656 #define INT_P(X) (GET_CODE (X) == CONST_INT && GET_MODE (X) == VOIDmode)
21658 /* Write out a function code label. */
21660 void
21661 rs6000_output_function_entry (FILE *file, const char *fname)
21663 if (fname[0] != '.')
21665 switch (DEFAULT_ABI)
21667 default:
21668 gcc_unreachable ();
21670 case ABI_AIX:
21671 if (DOT_SYMBOLS)
21672 putc ('.', file);
21673 else
21674 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "L.");
21675 break;
21677 case ABI_ELFv2:
21678 case ABI_V4:
21679 case ABI_DARWIN:
21680 break;
21684 RS6000_OUTPUT_BASENAME (file, fname);
21687 /* Print an operand. Recognize special options, documented below. */
21689 #if TARGET_ELF
21690 #define SMALL_DATA_RELOC ((rs6000_sdata == SDATA_EABI) ? "sda21" : "sdarel")
21691 #define SMALL_DATA_REG ((rs6000_sdata == SDATA_EABI) ? 0 : 13)
21692 #else
21693 #define SMALL_DATA_RELOC "sda21"
21694 #define SMALL_DATA_REG 0
21695 #endif
21697 void
21698 print_operand (FILE *file, rtx x, int code)
21700 int i;
21701 unsigned HOST_WIDE_INT uval;
21703 switch (code)
21705 /* %a is output_address. */
21707 /* %c is output_addr_const if a CONSTANT_ADDRESS_P, otherwise
21708 output_operand. */
21710 case 'D':
21711 /* Like 'J' but get to the GT bit only. */
21712 gcc_assert (REG_P (x));
21714 /* Bit 1 is GT bit. */
21715 i = 4 * (REGNO (x) - CR0_REGNO) + 1;
21717 /* Add one for shift count in rlinm for scc. */
21718 fprintf (file, "%d", i + 1);
21719 return;
21721 case 'e':
21722 /* If the low 16 bits are 0, but some other bit is set, write 's'. */
21723 if (! INT_P (x))
21725 output_operand_lossage ("invalid %%e value");
21726 return;
21729 uval = INTVAL (x);
21730 if ((uval & 0xffff) == 0 && uval != 0)
21731 putc ('s', file);
21732 return;
21734 case 'E':
21735 /* X is a CR register. Print the number of the EQ bit of the CR */
21736 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
21737 output_operand_lossage ("invalid %%E value");
21738 else
21739 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO) + 2);
21740 return;
21742 case 'f':
21743 /* X is a CR register. Print the shift count needed to move it
21744 to the high-order four bits. */
21745 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
21746 output_operand_lossage ("invalid %%f value");
21747 else
21748 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO));
21749 return;
21751 case 'F':
21752 /* Similar, but print the count for the rotate in the opposite
21753 direction. */
21754 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
21755 output_operand_lossage ("invalid %%F value");
21756 else
21757 fprintf (file, "%d", 32 - 4 * (REGNO (x) - CR0_REGNO));
21758 return;
21760 case 'G':
21761 /* X is a constant integer. If it is negative, print "m",
21762 otherwise print "z". This is to make an aze or ame insn. */
21763 if (GET_CODE (x) != CONST_INT)
21764 output_operand_lossage ("invalid %%G value");
21765 else if (INTVAL (x) >= 0)
21766 putc ('z', file);
21767 else
21768 putc ('m', file);
21769 return;
21771 case 'h':
21772 /* If constant, output low-order five bits. Otherwise, write
21773 normally. */
21774 if (INT_P (x))
21775 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 31);
21776 else
21777 print_operand (file, x, 0);
21778 return;
21780 case 'H':
21781 /* If constant, output low-order six bits. Otherwise, write
21782 normally. */
21783 if (INT_P (x))
21784 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 63);
21785 else
21786 print_operand (file, x, 0);
21787 return;
21789 case 'I':
21790 /* Print `i' if this is a constant, else nothing. */
21791 if (INT_P (x))
21792 putc ('i', file);
21793 return;
21795 case 'j':
21796 /* Write the bit number in CCR for jump. */
21797 i = ccr_bit (x, 0);
21798 if (i == -1)
21799 output_operand_lossage ("invalid %%j code");
21800 else
21801 fprintf (file, "%d", i);
21802 return;
21804 case 'J':
21805 /* Similar, but add one for shift count in rlinm for scc and pass
21806 scc flag to `ccr_bit'. */
21807 i = ccr_bit (x, 1);
21808 if (i == -1)
21809 output_operand_lossage ("invalid %%J code");
21810 else
21811 /* If we want bit 31, write a shift count of zero, not 32. */
21812 fprintf (file, "%d", i == 31 ? 0 : i + 1);
21813 return;
21815 case 'k':
21816 /* X must be a constant. Write the 1's complement of the
21817 constant. */
21818 if (! INT_P (x))
21819 output_operand_lossage ("invalid %%k value");
21820 else
21821 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~ INTVAL (x));
21822 return;
21824 case 'K':
21825 /* X must be a symbolic constant on ELF. Write an
21826 expression suitable for an 'addi' that adds in the low 16
21827 bits of the MEM. */
21828 if (GET_CODE (x) == CONST)
21830 if (GET_CODE (XEXP (x, 0)) != PLUS
21831 || (GET_CODE (XEXP (XEXP (x, 0), 0)) != SYMBOL_REF
21832 && GET_CODE (XEXP (XEXP (x, 0), 0)) != LABEL_REF)
21833 || GET_CODE (XEXP (XEXP (x, 0), 1)) != CONST_INT)
21834 output_operand_lossage ("invalid %%K value");
21836 print_operand_address (file, x);
21837 fputs ("@l", file);
21838 return;
21840 /* %l is output_asm_label. */
21842 case 'L':
21843 /* Write second word of DImode or DFmode reference. Works on register
21844 or non-indexed memory only. */
21845 if (REG_P (x))
21846 fputs (reg_names[REGNO (x) + 1], file);
21847 else if (MEM_P (x))
21849 machine_mode mode = GET_MODE (x);
21850 /* Handle possible auto-increment. Since it is pre-increment and
21851 we have already done it, we can just use an offset of word. */
21852 if (GET_CODE (XEXP (x, 0)) == PRE_INC
21853 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
21854 output_address (mode, plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
21855 UNITS_PER_WORD));
21856 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
21857 output_address (mode, plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
21858 UNITS_PER_WORD));
21859 else
21860 output_address (mode, XEXP (adjust_address_nv (x, SImode,
21861 UNITS_PER_WORD),
21862 0));
21864 if (small_data_operand (x, GET_MODE (x)))
21865 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
21866 reg_names[SMALL_DATA_REG]);
21868 return;
21870 case 'N':
21871 /* Write the number of elements in the vector times 4. */
21872 if (GET_CODE (x) != PARALLEL)
21873 output_operand_lossage ("invalid %%N value");
21874 else
21875 fprintf (file, "%d", XVECLEN (x, 0) * 4);
21876 return;
21878 case 'O':
21879 /* Similar, but subtract 1 first. */
21880 if (GET_CODE (x) != PARALLEL)
21881 output_operand_lossage ("invalid %%O value");
21882 else
21883 fprintf (file, "%d", (XVECLEN (x, 0) - 1) * 4);
21884 return;
21886 case 'p':
21887 /* X is a CONST_INT that is a power of two. Output the logarithm. */
21888 if (! INT_P (x)
21889 || INTVAL (x) < 0
21890 || (i = exact_log2 (INTVAL (x))) < 0)
21891 output_operand_lossage ("invalid %%p value");
21892 else
21893 fprintf (file, "%d", i);
21894 return;
21896 case 'P':
21897 /* The operand must be an indirect memory reference. The result
21898 is the register name. */
21899 if (GET_CODE (x) != MEM || GET_CODE (XEXP (x, 0)) != REG
21900 || REGNO (XEXP (x, 0)) >= 32)
21901 output_operand_lossage ("invalid %%P value");
21902 else
21903 fputs (reg_names[REGNO (XEXP (x, 0))], file);
21904 return;
21906 case 'q':
21907 /* This outputs the logical code corresponding to a boolean
21908 expression. The expression may have one or both operands
21909 negated (if one, only the first one). For condition register
21910 logical operations, it will also treat the negated
21911 CR codes as NOTs, but not handle NOTs of them. */
21913 const char *const *t = 0;
21914 const char *s;
21915 enum rtx_code code = GET_CODE (x);
21916 static const char * const tbl[3][3] = {
21917 { "and", "andc", "nor" },
21918 { "or", "orc", "nand" },
21919 { "xor", "eqv", "xor" } };
21921 if (code == AND)
21922 t = tbl[0];
21923 else if (code == IOR)
21924 t = tbl[1];
21925 else if (code == XOR)
21926 t = tbl[2];
21927 else
21928 output_operand_lossage ("invalid %%q value");
21930 if (GET_CODE (XEXP (x, 0)) != NOT)
21931 s = t[0];
21932 else
21934 if (GET_CODE (XEXP (x, 1)) == NOT)
21935 s = t[2];
21936 else
21937 s = t[1];
21940 fputs (s, file);
21942 return;
21944 case 'Q':
21945 if (! TARGET_MFCRF)
21946 return;
21947 fputc (',', file);
21948 /* FALLTHRU */
21950 case 'R':
21951 /* X is a CR register. Print the mask for `mtcrf'. */
21952 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
21953 output_operand_lossage ("invalid %%R value");
21954 else
21955 fprintf (file, "%d", 128 >> (REGNO (x) - CR0_REGNO));
21956 return;
21958 case 's':
21959 /* Low 5 bits of 32 - value */
21960 if (! INT_P (x))
21961 output_operand_lossage ("invalid %%s value");
21962 else
21963 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (32 - INTVAL (x)) & 31);
21964 return;
21966 case 't':
21967 /* Like 'J' but get to the OVERFLOW/UNORDERED bit. */
21968 gcc_assert (REG_P (x) && GET_MODE (x) == CCmode);
21970 /* Bit 3 is OV bit. */
21971 i = 4 * (REGNO (x) - CR0_REGNO) + 3;
21973 /* If we want bit 31, write a shift count of zero, not 32. */
21974 fprintf (file, "%d", i == 31 ? 0 : i + 1);
21975 return;
21977 case 'T':
21978 /* Print the symbolic name of a branch target register. */
21979 if (GET_CODE (x) != REG || (REGNO (x) != LR_REGNO
21980 && REGNO (x) != CTR_REGNO))
21981 output_operand_lossage ("invalid %%T value");
21982 else if (REGNO (x) == LR_REGNO)
21983 fputs ("lr", file);
21984 else
21985 fputs ("ctr", file);
21986 return;
21988 case 'u':
21989 /* High-order or low-order 16 bits of constant, whichever is non-zero,
21990 for use in unsigned operand. */
21991 if (! INT_P (x))
21993 output_operand_lossage ("invalid %%u value");
21994 return;
21997 uval = INTVAL (x);
21998 if ((uval & 0xffff) == 0)
21999 uval >>= 16;
22001 fprintf (file, HOST_WIDE_INT_PRINT_HEX, uval & 0xffff);
22002 return;
22004 case 'v':
22005 /* High-order 16 bits of constant for use in signed operand. */
22006 if (! INT_P (x))
22007 output_operand_lossage ("invalid %%v value");
22008 else
22009 fprintf (file, HOST_WIDE_INT_PRINT_HEX,
22010 (INTVAL (x) >> 16) & 0xffff);
22011 return;
22013 case 'U':
22014 /* Print `u' if this has an auto-increment or auto-decrement. */
22015 if (MEM_P (x)
22016 && (GET_CODE (XEXP (x, 0)) == PRE_INC
22017 || GET_CODE (XEXP (x, 0)) == PRE_DEC
22018 || GET_CODE (XEXP (x, 0)) == PRE_MODIFY))
22019 putc ('u', file);
22020 return;
22022 case 'V':
22023 /* Print the trap code for this operand. */
22024 switch (GET_CODE (x))
22026 case EQ:
22027 fputs ("eq", file); /* 4 */
22028 break;
22029 case NE:
22030 fputs ("ne", file); /* 24 */
22031 break;
22032 case LT:
22033 fputs ("lt", file); /* 16 */
22034 break;
22035 case LE:
22036 fputs ("le", file); /* 20 */
22037 break;
22038 case GT:
22039 fputs ("gt", file); /* 8 */
22040 break;
22041 case GE:
22042 fputs ("ge", file); /* 12 */
22043 break;
22044 case LTU:
22045 fputs ("llt", file); /* 2 */
22046 break;
22047 case LEU:
22048 fputs ("lle", file); /* 6 */
22049 break;
22050 case GTU:
22051 fputs ("lgt", file); /* 1 */
22052 break;
22053 case GEU:
22054 fputs ("lge", file); /* 5 */
22055 break;
22056 default:
22057 gcc_unreachable ();
22059 break;
22061 case 'w':
22062 /* If constant, low-order 16 bits of constant, signed. Otherwise, write
22063 normally. */
22064 if (INT_P (x))
22065 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
22066 ((INTVAL (x) & 0xffff) ^ 0x8000) - 0x8000);
22067 else
22068 print_operand (file, x, 0);
22069 return;
22071 case 'x':
22072 /* X is a FPR or Altivec register used in a VSX context. */
22073 if (GET_CODE (x) != REG || !VSX_REGNO_P (REGNO (x)))
22074 output_operand_lossage ("invalid %%x value");
22075 else
22077 int reg = REGNO (x);
22078 int vsx_reg = (FP_REGNO_P (reg)
22079 ? reg - 32
22080 : reg - FIRST_ALTIVEC_REGNO + 32);
22082 #ifdef TARGET_REGNAMES
22083 if (TARGET_REGNAMES)
22084 fprintf (file, "%%vs%d", vsx_reg);
22085 else
22086 #endif
22087 fprintf (file, "%d", vsx_reg);
22089 return;
22091 case 'X':
22092 if (MEM_P (x)
22093 && (legitimate_indexed_address_p (XEXP (x, 0), 0)
22094 || (GET_CODE (XEXP (x, 0)) == PRE_MODIFY
22095 && legitimate_indexed_address_p (XEXP (XEXP (x, 0), 1), 0))))
22096 putc ('x', file);
22097 return;
22099 case 'Y':
22100 /* Like 'L', for third word of TImode/PTImode */
22101 if (REG_P (x))
22102 fputs (reg_names[REGNO (x) + 2], file);
22103 else if (MEM_P (x))
22105 machine_mode mode = GET_MODE (x);
22106 if (GET_CODE (XEXP (x, 0)) == PRE_INC
22107 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
22108 output_address (mode, plus_constant (Pmode,
22109 XEXP (XEXP (x, 0), 0), 8));
22110 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
22111 output_address (mode, plus_constant (Pmode,
22112 XEXP (XEXP (x, 0), 0), 8));
22113 else
22114 output_address (mode, XEXP (adjust_address_nv (x, SImode, 8), 0));
22115 if (small_data_operand (x, GET_MODE (x)))
22116 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
22117 reg_names[SMALL_DATA_REG]);
22119 return;
22121 case 'z':
22122 /* X is a SYMBOL_REF. Write out the name preceded by a
22123 period and without any trailing data in brackets. Used for function
22124 names. If we are configured for System V (or the embedded ABI) on
22125 the PowerPC, do not emit the period, since those systems do not use
22126 TOCs and the like. */
22127 gcc_assert (GET_CODE (x) == SYMBOL_REF);
22129 /* For macho, check to see if we need a stub. */
22130 if (TARGET_MACHO)
22132 const char *name = XSTR (x, 0);
22133 #if TARGET_MACHO
22134 if (darwin_emit_branch_islands
22135 && MACHOPIC_INDIRECT
22136 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
22137 name = machopic_indirection_name (x, /*stub_p=*/true);
22138 #endif
22139 assemble_name (file, name);
22141 else if (!DOT_SYMBOLS)
22142 assemble_name (file, XSTR (x, 0));
22143 else
22144 rs6000_output_function_entry (file, XSTR (x, 0));
22145 return;
22147 case 'Z':
22148 /* Like 'L', for last word of TImode/PTImode. */
22149 if (REG_P (x))
22150 fputs (reg_names[REGNO (x) + 3], file);
22151 else if (MEM_P (x))
22153 machine_mode mode = GET_MODE (x);
22154 if (GET_CODE (XEXP (x, 0)) == PRE_INC
22155 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
22156 output_address (mode, plus_constant (Pmode,
22157 XEXP (XEXP (x, 0), 0), 12));
22158 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
22159 output_address (mode, plus_constant (Pmode,
22160 XEXP (XEXP (x, 0), 0), 12));
22161 else
22162 output_address (mode, XEXP (adjust_address_nv (x, SImode, 12), 0));
22163 if (small_data_operand (x, GET_MODE (x)))
22164 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
22165 reg_names[SMALL_DATA_REG]);
22167 return;
22169 /* Print AltiVec or SPE memory operand. */
22170 case 'y':
22172 rtx tmp;
22174 gcc_assert (MEM_P (x));
22176 tmp = XEXP (x, 0);
22178 /* Ugly hack because %y is overloaded. */
22179 if ((TARGET_SPE || TARGET_E500_DOUBLE)
22180 && (GET_MODE_SIZE (GET_MODE (x)) == 8
22181 || FLOAT128_2REG_P (GET_MODE (x))
22182 || GET_MODE (x) == TImode
22183 || GET_MODE (x) == PTImode))
22185 /* Handle [reg]. */
22186 if (REG_P (tmp))
22188 fprintf (file, "0(%s)", reg_names[REGNO (tmp)]);
22189 break;
22191 /* Handle [reg+UIMM]. */
22192 else if (GET_CODE (tmp) == PLUS &&
22193 GET_CODE (XEXP (tmp, 1)) == CONST_INT)
22195 int x;
22197 gcc_assert (REG_P (XEXP (tmp, 0)));
22199 x = INTVAL (XEXP (tmp, 1));
22200 fprintf (file, "%d(%s)", x, reg_names[REGNO (XEXP (tmp, 0))]);
22201 break;
22204 /* Fall through. Must be [reg+reg]. */
22206 if (VECTOR_MEM_ALTIVEC_P (GET_MODE (x))
22207 && GET_CODE (tmp) == AND
22208 && GET_CODE (XEXP (tmp, 1)) == CONST_INT
22209 && INTVAL (XEXP (tmp, 1)) == -16)
22210 tmp = XEXP (tmp, 0);
22211 else if (VECTOR_MEM_VSX_P (GET_MODE (x))
22212 && GET_CODE (tmp) == PRE_MODIFY)
22213 tmp = XEXP (tmp, 1);
22214 if (REG_P (tmp))
22215 fprintf (file, "0,%s", reg_names[REGNO (tmp)]);
22216 else
22218 if (GET_CODE (tmp) != PLUS
22219 || !REG_P (XEXP (tmp, 0))
22220 || !REG_P (XEXP (tmp, 1)))
22222 output_operand_lossage ("invalid %%y value, try using the 'Z' constraint");
22223 break;
22226 if (REGNO (XEXP (tmp, 0)) == 0)
22227 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 1)) ],
22228 reg_names[ REGNO (XEXP (tmp, 0)) ]);
22229 else
22230 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 0)) ],
22231 reg_names[ REGNO (XEXP (tmp, 1)) ]);
22233 break;
22236 case 0:
22237 if (REG_P (x))
22238 fprintf (file, "%s", reg_names[REGNO (x)]);
22239 else if (MEM_P (x))
22241 /* We need to handle PRE_INC and PRE_DEC here, since we need to
22242 know the width from the mode. */
22243 if (GET_CODE (XEXP (x, 0)) == PRE_INC)
22244 fprintf (file, "%d(%s)", GET_MODE_SIZE (GET_MODE (x)),
22245 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
22246 else if (GET_CODE (XEXP (x, 0)) == PRE_DEC)
22247 fprintf (file, "%d(%s)", - GET_MODE_SIZE (GET_MODE (x)),
22248 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
22249 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
22250 output_address (GET_MODE (x), XEXP (XEXP (x, 0), 1));
22251 else
22252 output_address (GET_MODE (x), XEXP (x, 0));
22254 else
22256 if (toc_relative_expr_p (x, false))
22257 /* This hack along with a corresponding hack in
22258 rs6000_output_addr_const_extra arranges to output addends
22259 where the assembler expects to find them. eg.
22260 (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 4)
22261 without this hack would be output as "x@toc+4". We
22262 want "x+4@toc". */
22263 output_addr_const (file, CONST_CAST_RTX (tocrel_base));
22264 else
22265 output_addr_const (file, x);
22267 return;
22269 case '&':
22270 if (const char *name = get_some_local_dynamic_name ())
22271 assemble_name (file, name);
22272 else
22273 output_operand_lossage ("'%%&' used without any "
22274 "local dynamic TLS references");
22275 return;
22277 default:
22278 output_operand_lossage ("invalid %%xn code");
22282 /* Print the address of an operand. */
22284 void
22285 print_operand_address (FILE *file, rtx x)
22287 if (REG_P (x))
22288 fprintf (file, "0(%s)", reg_names[ REGNO (x) ]);
22289 else if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == CONST
22290 || GET_CODE (x) == LABEL_REF)
22292 output_addr_const (file, x);
22293 if (small_data_operand (x, GET_MODE (x)))
22294 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
22295 reg_names[SMALL_DATA_REG]);
22296 else
22297 gcc_assert (!TARGET_TOC);
22299 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
22300 && REG_P (XEXP (x, 1)))
22302 if (REGNO (XEXP (x, 0)) == 0)
22303 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 1)) ],
22304 reg_names[ REGNO (XEXP (x, 0)) ]);
22305 else
22306 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 0)) ],
22307 reg_names[ REGNO (XEXP (x, 1)) ]);
22309 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
22310 && GET_CODE (XEXP (x, 1)) == CONST_INT)
22311 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
22312 INTVAL (XEXP (x, 1)), reg_names[ REGNO (XEXP (x, 0)) ]);
22313 #if TARGET_MACHO
22314 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
22315 && CONSTANT_P (XEXP (x, 1)))
22317 fprintf (file, "lo16(");
22318 output_addr_const (file, XEXP (x, 1));
22319 fprintf (file, ")(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
22321 #endif
22322 #if TARGET_ELF
22323 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
22324 && CONSTANT_P (XEXP (x, 1)))
22326 output_addr_const (file, XEXP (x, 1));
22327 fprintf (file, "@l(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
22329 #endif
22330 else if (toc_relative_expr_p (x, false))
22332 /* This hack along with a corresponding hack in
22333 rs6000_output_addr_const_extra arranges to output addends
22334 where the assembler expects to find them. eg.
22335 (lo_sum (reg 9)
22336 . (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 8))
22337 without this hack would be output as "x@toc+8@l(9)". We
22338 want "x+8@toc@l(9)". */
22339 output_addr_const (file, CONST_CAST_RTX (tocrel_base));
22340 if (GET_CODE (x) == LO_SUM)
22341 fprintf (file, "@l(%s)", reg_names[REGNO (XEXP (x, 0))]);
22342 else
22343 fprintf (file, "(%s)", reg_names[REGNO (XVECEXP (tocrel_base, 0, 1))]);
22345 else
22346 gcc_unreachable ();
22349 /* Implement TARGET_OUTPUT_ADDR_CONST_EXTRA. */
22351 static bool
22352 rs6000_output_addr_const_extra (FILE *file, rtx x)
22354 if (GET_CODE (x) == UNSPEC)
22355 switch (XINT (x, 1))
22357 case UNSPEC_TOCREL:
22358 gcc_checking_assert (GET_CODE (XVECEXP (x, 0, 0)) == SYMBOL_REF
22359 && REG_P (XVECEXP (x, 0, 1))
22360 && REGNO (XVECEXP (x, 0, 1)) == TOC_REGISTER);
22361 output_addr_const (file, XVECEXP (x, 0, 0));
22362 if (x == tocrel_base && tocrel_offset != const0_rtx)
22364 if (INTVAL (tocrel_offset) >= 0)
22365 fprintf (file, "+");
22366 output_addr_const (file, CONST_CAST_RTX (tocrel_offset));
22368 if (!TARGET_AIX || (TARGET_ELF && TARGET_MINIMAL_TOC))
22370 putc ('-', file);
22371 assemble_name (file, toc_label_name);
22372 need_toc_init = 1;
22374 else if (TARGET_ELF)
22375 fputs ("@toc", file);
22376 return true;
22378 #if TARGET_MACHO
22379 case UNSPEC_MACHOPIC_OFFSET:
22380 output_addr_const (file, XVECEXP (x, 0, 0));
22381 putc ('-', file);
22382 machopic_output_function_base_name (file);
22383 return true;
22384 #endif
22386 return false;
22389 /* Target hook for assembling integer objects. The PowerPC version has
22390 to handle fixup entries for relocatable code if RELOCATABLE_NEEDS_FIXUP
22391 is defined. It also needs to handle DI-mode objects on 64-bit
22392 targets. */
22394 static bool
22395 rs6000_assemble_integer (rtx x, unsigned int size, int aligned_p)
22397 #ifdef RELOCATABLE_NEEDS_FIXUP
22398 /* Special handling for SI values. */
22399 if (RELOCATABLE_NEEDS_FIXUP && size == 4 && aligned_p)
22401 static int recurse = 0;
22403 /* For -mrelocatable, we mark all addresses that need to be fixed up in
22404 the .fixup section. Since the TOC section is already relocated, we
22405 don't need to mark it here. We used to skip the text section, but it
22406 should never be valid for relocated addresses to be placed in the text
22407 section. */
22408 if (DEFAULT_ABI == ABI_V4
22409 && (TARGET_RELOCATABLE || flag_pic > 1)
22410 && in_section != toc_section
22411 && !recurse
22412 && !CONST_SCALAR_INT_P (x)
22413 && CONSTANT_P (x))
22415 char buf[256];
22417 recurse = 1;
22418 ASM_GENERATE_INTERNAL_LABEL (buf, "LCP", fixuplabelno);
22419 fixuplabelno++;
22420 ASM_OUTPUT_LABEL (asm_out_file, buf);
22421 fprintf (asm_out_file, "\t.long\t(");
22422 output_addr_const (asm_out_file, x);
22423 fprintf (asm_out_file, ")@fixup\n");
22424 fprintf (asm_out_file, "\t.section\t\".fixup\",\"aw\"\n");
22425 ASM_OUTPUT_ALIGN (asm_out_file, 2);
22426 fprintf (asm_out_file, "\t.long\t");
22427 assemble_name (asm_out_file, buf);
22428 fprintf (asm_out_file, "\n\t.previous\n");
22429 recurse = 0;
22430 return true;
22432 /* Remove initial .'s to turn a -mcall-aixdesc function
22433 address into the address of the descriptor, not the function
22434 itself. */
22435 else if (GET_CODE (x) == SYMBOL_REF
22436 && XSTR (x, 0)[0] == '.'
22437 && DEFAULT_ABI == ABI_AIX)
22439 const char *name = XSTR (x, 0);
22440 while (*name == '.')
22441 name++;
22443 fprintf (asm_out_file, "\t.long\t%s\n", name);
22444 return true;
22447 #endif /* RELOCATABLE_NEEDS_FIXUP */
22448 return default_assemble_integer (x, size, aligned_p);
22451 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
22452 /* Emit an assembler directive to set symbol visibility for DECL to
22453 VISIBILITY_TYPE. */
22455 static void
22456 rs6000_assemble_visibility (tree decl, int vis)
22458 if (TARGET_XCOFF)
22459 return;
22461 /* Functions need to have their entry point symbol visibility set as
22462 well as their descriptor symbol visibility. */
22463 if (DEFAULT_ABI == ABI_AIX
22464 && DOT_SYMBOLS
22465 && TREE_CODE (decl) == FUNCTION_DECL)
22467 static const char * const visibility_types[] = {
22468 NULL, "internal", "hidden", "protected"
22471 const char *name, *type;
22473 name = ((* targetm.strip_name_encoding)
22474 (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl))));
22475 type = visibility_types[vis];
22477 fprintf (asm_out_file, "\t.%s\t%s\n", type, name);
22478 fprintf (asm_out_file, "\t.%s\t.%s\n", type, name);
22480 else
22481 default_assemble_visibility (decl, vis);
22483 #endif
22485 enum rtx_code
22486 rs6000_reverse_condition (machine_mode mode, enum rtx_code code)
22488 /* Reversal of FP compares takes care -- an ordered compare
22489 becomes an unordered compare and vice versa. */
22490 if (mode == CCFPmode
22491 && (!flag_finite_math_only
22492 || code == UNLT || code == UNLE || code == UNGT || code == UNGE
22493 || code == UNEQ || code == LTGT))
22494 return reverse_condition_maybe_unordered (code);
22495 else
22496 return reverse_condition (code);
22499 /* Generate a compare for CODE. Return a brand-new rtx that
22500 represents the result of the compare. */
22502 static rtx
22503 rs6000_generate_compare (rtx cmp, machine_mode mode)
22505 machine_mode comp_mode;
22506 rtx compare_result;
22507 enum rtx_code code = GET_CODE (cmp);
22508 rtx op0 = XEXP (cmp, 0);
22509 rtx op1 = XEXP (cmp, 1);
22511 if (!TARGET_FLOAT128_HW && FLOAT128_VECTOR_P (mode))
22512 comp_mode = CCmode;
22513 else if (FLOAT_MODE_P (mode))
22514 comp_mode = CCFPmode;
22515 else if (code == GTU || code == LTU
22516 || code == GEU || code == LEU)
22517 comp_mode = CCUNSmode;
22518 else if ((code == EQ || code == NE)
22519 && unsigned_reg_p (op0)
22520 && (unsigned_reg_p (op1)
22521 || (CONST_INT_P (op1) && INTVAL (op1) != 0)))
22522 /* These are unsigned values, perhaps there will be a later
22523 ordering compare that can be shared with this one. */
22524 comp_mode = CCUNSmode;
22525 else
22526 comp_mode = CCmode;
22528 /* If we have an unsigned compare, make sure we don't have a signed value as
22529 an immediate. */
22530 if (comp_mode == CCUNSmode && GET_CODE (op1) == CONST_INT
22531 && INTVAL (op1) < 0)
22533 op0 = copy_rtx_if_shared (op0);
22534 op1 = force_reg (GET_MODE (op0), op1);
22535 cmp = gen_rtx_fmt_ee (code, GET_MODE (cmp), op0, op1);
22538 /* First, the compare. */
22539 compare_result = gen_reg_rtx (comp_mode);
22541 /* E500 FP compare instructions on the GPRs. Yuck! */
22542 if ((!TARGET_FPRS && TARGET_HARD_FLOAT)
22543 && FLOAT_MODE_P (mode))
22545 rtx cmp, or_result, compare_result2;
22546 machine_mode op_mode = GET_MODE (op0);
22547 bool reverse_p;
22549 if (op_mode == VOIDmode)
22550 op_mode = GET_MODE (op1);
22552 /* First reverse the condition codes that aren't directly supported. */
22553 switch (code)
22555 case NE:
22556 case UNLT:
22557 case UNLE:
22558 case UNGT:
22559 case UNGE:
22560 code = reverse_condition_maybe_unordered (code);
22561 reverse_p = true;
22562 break;
22564 case EQ:
22565 case LT:
22566 case LE:
22567 case GT:
22568 case GE:
22569 reverse_p = false;
22570 break;
22572 default:
22573 gcc_unreachable ();
22576 /* The E500 FP compare instructions toggle the GT bit (CR bit 1) only.
22577 This explains the following mess. */
22579 switch (code)
22581 case EQ:
22582 switch (op_mode)
22584 case SFmode:
22585 cmp = (flag_finite_math_only && !flag_trapping_math)
22586 ? gen_tstsfeq_gpr (compare_result, op0, op1)
22587 : gen_cmpsfeq_gpr (compare_result, op0, op1);
22588 break;
22590 case DFmode:
22591 cmp = (flag_finite_math_only && !flag_trapping_math)
22592 ? gen_tstdfeq_gpr (compare_result, op0, op1)
22593 : gen_cmpdfeq_gpr (compare_result, op0, op1);
22594 break;
22596 case TFmode:
22597 case IFmode:
22598 case KFmode:
22599 cmp = (flag_finite_math_only && !flag_trapping_math)
22600 ? gen_tsttfeq_gpr (compare_result, op0, op1)
22601 : gen_cmptfeq_gpr (compare_result, op0, op1);
22602 break;
22604 default:
22605 gcc_unreachable ();
22607 break;
22609 case GT:
22610 case GE:
22611 switch (op_mode)
22613 case SFmode:
22614 cmp = (flag_finite_math_only && !flag_trapping_math)
22615 ? gen_tstsfgt_gpr (compare_result, op0, op1)
22616 : gen_cmpsfgt_gpr (compare_result, op0, op1);
22617 break;
22619 case DFmode:
22620 cmp = (flag_finite_math_only && !flag_trapping_math)
22621 ? gen_tstdfgt_gpr (compare_result, op0, op1)
22622 : gen_cmpdfgt_gpr (compare_result, op0, op1);
22623 break;
22625 case TFmode:
22626 case IFmode:
22627 case KFmode:
22628 cmp = (flag_finite_math_only && !flag_trapping_math)
22629 ? gen_tsttfgt_gpr (compare_result, op0, op1)
22630 : gen_cmptfgt_gpr (compare_result, op0, op1);
22631 break;
22633 default:
22634 gcc_unreachable ();
22636 break;
22638 case LT:
22639 case LE:
22640 switch (op_mode)
22642 case SFmode:
22643 cmp = (flag_finite_math_only && !flag_trapping_math)
22644 ? gen_tstsflt_gpr (compare_result, op0, op1)
22645 : gen_cmpsflt_gpr (compare_result, op0, op1);
22646 break;
22648 case DFmode:
22649 cmp = (flag_finite_math_only && !flag_trapping_math)
22650 ? gen_tstdflt_gpr (compare_result, op0, op1)
22651 : gen_cmpdflt_gpr (compare_result, op0, op1);
22652 break;
22654 case TFmode:
22655 case IFmode:
22656 case KFmode:
22657 cmp = (flag_finite_math_only && !flag_trapping_math)
22658 ? gen_tsttflt_gpr (compare_result, op0, op1)
22659 : gen_cmptflt_gpr (compare_result, op0, op1);
22660 break;
22662 default:
22663 gcc_unreachable ();
22665 break;
22667 default:
22668 gcc_unreachable ();
22671 /* Synthesize LE and GE from LT/GT || EQ. */
22672 if (code == LE || code == GE)
22674 emit_insn (cmp);
22676 compare_result2 = gen_reg_rtx (CCFPmode);
22678 /* Do the EQ. */
22679 switch (op_mode)
22681 case SFmode:
22682 cmp = (flag_finite_math_only && !flag_trapping_math)
22683 ? gen_tstsfeq_gpr (compare_result2, op0, op1)
22684 : gen_cmpsfeq_gpr (compare_result2, op0, op1);
22685 break;
22687 case DFmode:
22688 cmp = (flag_finite_math_only && !flag_trapping_math)
22689 ? gen_tstdfeq_gpr (compare_result2, op0, op1)
22690 : gen_cmpdfeq_gpr (compare_result2, op0, op1);
22691 break;
22693 case TFmode:
22694 case IFmode:
22695 case KFmode:
22696 cmp = (flag_finite_math_only && !flag_trapping_math)
22697 ? gen_tsttfeq_gpr (compare_result2, op0, op1)
22698 : gen_cmptfeq_gpr (compare_result2, op0, op1);
22699 break;
22701 default:
22702 gcc_unreachable ();
22705 emit_insn (cmp);
22707 /* OR them together. */
22708 or_result = gen_reg_rtx (CCFPmode);
22709 cmp = gen_e500_cr_ior_compare (or_result, compare_result,
22710 compare_result2);
22711 compare_result = or_result;
22714 code = reverse_p ? NE : EQ;
22716 emit_insn (cmp);
22719 /* IEEE 128-bit support in VSX registers when we do not have hardware
22720 support. */
22721 else if (!TARGET_FLOAT128_HW && FLOAT128_VECTOR_P (mode))
22723 rtx libfunc = NULL_RTX;
22724 bool check_nan = false;
22725 rtx dest;
22727 switch (code)
22729 case EQ:
22730 case NE:
22731 libfunc = optab_libfunc (eq_optab, mode);
22732 break;
22734 case GT:
22735 case GE:
22736 libfunc = optab_libfunc (ge_optab, mode);
22737 break;
22739 case LT:
22740 case LE:
22741 libfunc = optab_libfunc (le_optab, mode);
22742 break;
22744 case UNORDERED:
22745 case ORDERED:
22746 libfunc = optab_libfunc (unord_optab, mode);
22747 code = (code == UNORDERED) ? NE : EQ;
22748 break;
22750 case UNGE:
22751 case UNGT:
22752 check_nan = true;
22753 libfunc = optab_libfunc (ge_optab, mode);
22754 code = (code == UNGE) ? GE : GT;
22755 break;
22757 case UNLE:
22758 case UNLT:
22759 check_nan = true;
22760 libfunc = optab_libfunc (le_optab, mode);
22761 code = (code == UNLE) ? LE : LT;
22762 break;
22764 case UNEQ:
22765 case LTGT:
22766 check_nan = true;
22767 libfunc = optab_libfunc (eq_optab, mode);
22768 code = (code = UNEQ) ? EQ : NE;
22769 break;
22771 default:
22772 gcc_unreachable ();
22775 gcc_assert (libfunc);
22777 if (!check_nan)
22778 dest = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
22779 SImode, 2, op0, mode, op1, mode);
22781 /* The library signals an exception for signalling NaNs, so we need to
22782 handle isgreater, etc. by first checking isordered. */
22783 else
22785 rtx ne_rtx, normal_dest, unord_dest;
22786 rtx unord_func = optab_libfunc (unord_optab, mode);
22787 rtx join_label = gen_label_rtx ();
22788 rtx join_ref = gen_rtx_LABEL_REF (VOIDmode, join_label);
22789 rtx unord_cmp = gen_reg_rtx (comp_mode);
22792 /* Test for either value being a NaN. */
22793 gcc_assert (unord_func);
22794 unord_dest = emit_library_call_value (unord_func, NULL_RTX, LCT_CONST,
22795 SImode, 2, op0, mode, op1,
22796 mode);
22798 /* Set value (0) if either value is a NaN, and jump to the join
22799 label. */
22800 dest = gen_reg_rtx (SImode);
22801 emit_move_insn (dest, const1_rtx);
22802 emit_insn (gen_rtx_SET (unord_cmp,
22803 gen_rtx_COMPARE (comp_mode, unord_dest,
22804 const0_rtx)));
22806 ne_rtx = gen_rtx_NE (comp_mode, unord_cmp, const0_rtx);
22807 emit_jump_insn (gen_rtx_SET (pc_rtx,
22808 gen_rtx_IF_THEN_ELSE (VOIDmode, ne_rtx,
22809 join_ref,
22810 pc_rtx)));
22812 /* Do the normal comparison, knowing that the values are not
22813 NaNs. */
22814 normal_dest = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
22815 SImode, 2, op0, mode, op1,
22816 mode);
22818 emit_insn (gen_cstoresi4 (dest,
22819 gen_rtx_fmt_ee (code, SImode, normal_dest,
22820 const0_rtx),
22821 normal_dest, const0_rtx));
22823 /* Join NaN and non-Nan paths. Compare dest against 0. */
22824 emit_label (join_label);
22825 code = NE;
22828 emit_insn (gen_rtx_SET (compare_result,
22829 gen_rtx_COMPARE (comp_mode, dest, const0_rtx)));
22832 else
22834 /* Generate XLC-compatible TFmode compare as PARALLEL with extra
22835 CLOBBERs to match cmptf_internal2 pattern. */
22836 if (comp_mode == CCFPmode && TARGET_XL_COMPAT
22837 && FLOAT128_IBM_P (GET_MODE (op0))
22838 && TARGET_HARD_FLOAT && TARGET_FPRS)
22839 emit_insn (gen_rtx_PARALLEL (VOIDmode,
22840 gen_rtvec (10,
22841 gen_rtx_SET (compare_result,
22842 gen_rtx_COMPARE (comp_mode, op0, op1)),
22843 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
22844 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
22845 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
22846 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
22847 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
22848 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
22849 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
22850 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
22851 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (Pmode)))));
22852 else if (GET_CODE (op1) == UNSPEC
22853 && XINT (op1, 1) == UNSPEC_SP_TEST)
22855 rtx op1b = XVECEXP (op1, 0, 0);
22856 comp_mode = CCEQmode;
22857 compare_result = gen_reg_rtx (CCEQmode);
22858 if (TARGET_64BIT)
22859 emit_insn (gen_stack_protect_testdi (compare_result, op0, op1b));
22860 else
22861 emit_insn (gen_stack_protect_testsi (compare_result, op0, op1b));
22863 else
22864 emit_insn (gen_rtx_SET (compare_result,
22865 gen_rtx_COMPARE (comp_mode, op0, op1)));
22868 /* Some kinds of FP comparisons need an OR operation;
22869 under flag_finite_math_only we don't bother. */
22870 if (FLOAT_MODE_P (mode)
22871 && (!FLOAT128_IEEE_P (mode) || TARGET_FLOAT128_HW)
22872 && !flag_finite_math_only
22873 && !(TARGET_HARD_FLOAT && !TARGET_FPRS)
22874 && (code == LE || code == GE
22875 || code == UNEQ || code == LTGT
22876 || code == UNGT || code == UNLT))
22878 enum rtx_code or1, or2;
22879 rtx or1_rtx, or2_rtx, compare2_rtx;
22880 rtx or_result = gen_reg_rtx (CCEQmode);
22882 switch (code)
22884 case LE: or1 = LT; or2 = EQ; break;
22885 case GE: or1 = GT; or2 = EQ; break;
22886 case UNEQ: or1 = UNORDERED; or2 = EQ; break;
22887 case LTGT: or1 = LT; or2 = GT; break;
22888 case UNGT: or1 = UNORDERED; or2 = GT; break;
22889 case UNLT: or1 = UNORDERED; or2 = LT; break;
22890 default: gcc_unreachable ();
22892 validate_condition_mode (or1, comp_mode);
22893 validate_condition_mode (or2, comp_mode);
22894 or1_rtx = gen_rtx_fmt_ee (or1, SImode, compare_result, const0_rtx);
22895 or2_rtx = gen_rtx_fmt_ee (or2, SImode, compare_result, const0_rtx);
22896 compare2_rtx = gen_rtx_COMPARE (CCEQmode,
22897 gen_rtx_IOR (SImode, or1_rtx, or2_rtx),
22898 const_true_rtx);
22899 emit_insn (gen_rtx_SET (or_result, compare2_rtx));
22901 compare_result = or_result;
22902 code = EQ;
22905 validate_condition_mode (code, GET_MODE (compare_result));
22907 return gen_rtx_fmt_ee (code, VOIDmode, compare_result, const0_rtx);
22911 /* Return the diagnostic message string if the binary operation OP is
22912 not permitted on TYPE1 and TYPE2, NULL otherwise. */
22914 static const char*
22915 rs6000_invalid_binary_op (int op ATTRIBUTE_UNUSED,
22916 const_tree type1,
22917 const_tree type2)
22919 enum machine_mode mode1 = TYPE_MODE (type1);
22920 enum machine_mode mode2 = TYPE_MODE (type2);
22922 /* For complex modes, use the inner type. */
22923 if (COMPLEX_MODE_P (mode1))
22924 mode1 = GET_MODE_INNER (mode1);
22926 if (COMPLEX_MODE_P (mode2))
22927 mode2 = GET_MODE_INNER (mode2);
22929 /* Don't allow IEEE 754R 128-bit binary floating point and IBM extended
22930 double to intermix unless -mfloat128-convert. */
22931 if (mode1 == mode2)
22932 return NULL;
22934 if (!TARGET_FLOAT128_CVT)
22936 if ((mode1 == KFmode && mode2 == IFmode)
22937 || (mode1 == IFmode && mode2 == KFmode))
22938 return N_("__float128 and __ibm128 cannot be used in the same "
22939 "expression");
22941 if (TARGET_IEEEQUAD
22942 && ((mode1 == IFmode && mode2 == TFmode)
22943 || (mode1 == TFmode && mode2 == IFmode)))
22944 return N_("__ibm128 and long double cannot be used in the same "
22945 "expression");
22947 if (!TARGET_IEEEQUAD
22948 && ((mode1 == KFmode && mode2 == TFmode)
22949 || (mode1 == TFmode && mode2 == KFmode)))
22950 return N_("__float128 and long double cannot be used in the same "
22951 "expression");
22954 return NULL;
22958 /* Expand floating point conversion to/from __float128 and __ibm128. */
22960 void
22961 rs6000_expand_float128_convert (rtx dest, rtx src, bool unsigned_p)
22963 machine_mode dest_mode = GET_MODE (dest);
22964 machine_mode src_mode = GET_MODE (src);
22965 convert_optab cvt = unknown_optab;
22966 bool do_move = false;
22967 rtx libfunc = NULL_RTX;
22968 rtx dest2;
22969 typedef rtx (*rtx_2func_t) (rtx, rtx);
22970 rtx_2func_t hw_convert = (rtx_2func_t)0;
22971 size_t kf_or_tf;
22973 struct hw_conv_t {
22974 rtx_2func_t from_df;
22975 rtx_2func_t from_sf;
22976 rtx_2func_t from_si_sign;
22977 rtx_2func_t from_si_uns;
22978 rtx_2func_t from_di_sign;
22979 rtx_2func_t from_di_uns;
22980 rtx_2func_t to_df;
22981 rtx_2func_t to_sf;
22982 rtx_2func_t to_si_sign;
22983 rtx_2func_t to_si_uns;
22984 rtx_2func_t to_di_sign;
22985 rtx_2func_t to_di_uns;
22986 } hw_conversions[2] = {
22987 /* convertions to/from KFmode */
22989 gen_extenddfkf2_hw, /* KFmode <- DFmode. */
22990 gen_extendsfkf2_hw, /* KFmode <- SFmode. */
22991 gen_float_kfsi2_hw, /* KFmode <- SImode (signed). */
22992 gen_floatuns_kfsi2_hw, /* KFmode <- SImode (unsigned). */
22993 gen_float_kfdi2_hw, /* KFmode <- DImode (signed). */
22994 gen_floatuns_kfdi2_hw, /* KFmode <- DImode (unsigned). */
22995 gen_trunckfdf2_hw, /* DFmode <- KFmode. */
22996 gen_trunckfsf2_hw, /* SFmode <- KFmode. */
22997 gen_fix_kfsi2_hw, /* SImode <- KFmode (signed). */
22998 gen_fixuns_kfsi2_hw, /* SImode <- KFmode (unsigned). */
22999 gen_fix_kfdi2_hw, /* DImode <- KFmode (signed). */
23000 gen_fixuns_kfdi2_hw, /* DImode <- KFmode (unsigned). */
23003 /* convertions to/from TFmode */
23005 gen_extenddftf2_hw, /* TFmode <- DFmode. */
23006 gen_extendsftf2_hw, /* TFmode <- SFmode. */
23007 gen_float_tfsi2_hw, /* TFmode <- SImode (signed). */
23008 gen_floatuns_tfsi2_hw, /* TFmode <- SImode (unsigned). */
23009 gen_float_tfdi2_hw, /* TFmode <- DImode (signed). */
23010 gen_floatuns_tfdi2_hw, /* TFmode <- DImode (unsigned). */
23011 gen_trunctfdf2_hw, /* DFmode <- TFmode. */
23012 gen_trunctfsf2_hw, /* SFmode <- TFmode. */
23013 gen_fix_tfsi2_hw, /* SImode <- TFmode (signed). */
23014 gen_fixuns_tfsi2_hw, /* SImode <- TFmode (unsigned). */
23015 gen_fix_tfdi2_hw, /* DImode <- TFmode (signed). */
23016 gen_fixuns_tfdi2_hw, /* DImode <- TFmode (unsigned). */
23020 if (dest_mode == src_mode)
23021 gcc_unreachable ();
23023 /* Eliminate memory operations. */
23024 if (MEM_P (src))
23025 src = force_reg (src_mode, src);
23027 if (MEM_P (dest))
23029 rtx tmp = gen_reg_rtx (dest_mode);
23030 rs6000_expand_float128_convert (tmp, src, unsigned_p);
23031 rs6000_emit_move (dest, tmp, dest_mode);
23032 return;
23035 /* Convert to IEEE 128-bit floating point. */
23036 if (FLOAT128_IEEE_P (dest_mode))
23038 if (dest_mode == KFmode)
23039 kf_or_tf = 0;
23040 else if (dest_mode == TFmode)
23041 kf_or_tf = 1;
23042 else
23043 gcc_unreachable ();
23045 switch (src_mode)
23047 case DFmode:
23048 cvt = sext_optab;
23049 hw_convert = hw_conversions[kf_or_tf].from_df;
23050 break;
23052 case SFmode:
23053 cvt = sext_optab;
23054 hw_convert = hw_conversions[kf_or_tf].from_sf;
23055 break;
23057 case KFmode:
23058 case IFmode:
23059 case TFmode:
23060 if (FLOAT128_IBM_P (src_mode))
23061 cvt = sext_optab;
23062 else
23063 do_move = true;
23064 break;
23066 case SImode:
23067 if (unsigned_p)
23069 cvt = ufloat_optab;
23070 hw_convert = hw_conversions[kf_or_tf].from_si_uns;
23072 else
23074 cvt = sfloat_optab;
23075 hw_convert = hw_conversions[kf_or_tf].from_si_sign;
23077 break;
23079 case DImode:
23080 if (unsigned_p)
23082 cvt = ufloat_optab;
23083 hw_convert = hw_conversions[kf_or_tf].from_di_uns;
23085 else
23087 cvt = sfloat_optab;
23088 hw_convert = hw_conversions[kf_or_tf].from_di_sign;
23090 break;
23092 default:
23093 gcc_unreachable ();
23097 /* Convert from IEEE 128-bit floating point. */
23098 else if (FLOAT128_IEEE_P (src_mode))
23100 if (src_mode == KFmode)
23101 kf_or_tf = 0;
23102 else if (src_mode == TFmode)
23103 kf_or_tf = 1;
23104 else
23105 gcc_unreachable ();
23107 switch (dest_mode)
23109 case DFmode:
23110 cvt = trunc_optab;
23111 hw_convert = hw_conversions[kf_or_tf].to_df;
23112 break;
23114 case SFmode:
23115 cvt = trunc_optab;
23116 hw_convert = hw_conversions[kf_or_tf].to_sf;
23117 break;
23119 case KFmode:
23120 case IFmode:
23121 case TFmode:
23122 if (FLOAT128_IBM_P (dest_mode))
23123 cvt = trunc_optab;
23124 else
23125 do_move = true;
23126 break;
23128 case SImode:
23129 if (unsigned_p)
23131 cvt = ufix_optab;
23132 hw_convert = hw_conversions[kf_or_tf].to_si_uns;
23134 else
23136 cvt = sfix_optab;
23137 hw_convert = hw_conversions[kf_or_tf].to_si_sign;
23139 break;
23141 case DImode:
23142 if (unsigned_p)
23144 cvt = ufix_optab;
23145 hw_convert = hw_conversions[kf_or_tf].to_di_uns;
23147 else
23149 cvt = sfix_optab;
23150 hw_convert = hw_conversions[kf_or_tf].to_di_sign;
23152 break;
23154 default:
23155 gcc_unreachable ();
23159 /* Both IBM format. */
23160 else if (FLOAT128_IBM_P (dest_mode) && FLOAT128_IBM_P (src_mode))
23161 do_move = true;
23163 else
23164 gcc_unreachable ();
23166 /* Handle conversion between TFmode/KFmode. */
23167 if (do_move)
23168 emit_move_insn (dest, gen_lowpart (dest_mode, src));
23170 /* Handle conversion if we have hardware support. */
23171 else if (TARGET_FLOAT128_HW && hw_convert)
23172 emit_insn ((hw_convert) (dest, src));
23174 /* Call an external function to do the conversion. */
23175 else if (cvt != unknown_optab)
23177 libfunc = convert_optab_libfunc (cvt, dest_mode, src_mode);
23178 gcc_assert (libfunc != NULL_RTX);
23180 dest2 = emit_library_call_value (libfunc, dest, LCT_CONST, dest_mode, 1, src,
23181 src_mode);
23183 gcc_assert (dest2 != NULL_RTX);
23184 if (!rtx_equal_p (dest, dest2))
23185 emit_move_insn (dest, dest2);
23188 else
23189 gcc_unreachable ();
23191 return;
23194 /* Split a conversion from __float128 to an integer type into separate insns.
23195 OPERANDS points to the destination, source, and V2DI temporary
23196 register. CODE is either FIX or UNSIGNED_FIX. */
23198 void
23199 convert_float128_to_int (rtx *operands, enum rtx_code code)
23201 rtx dest = operands[0];
23202 rtx src = operands[1];
23203 rtx tmp = operands[2];
23204 rtx cvt;
23205 rtvec cvt_vec;
23206 rtx cvt_unspec;
23207 rtvec move_vec;
23208 rtx move_unspec;
23210 if (GET_CODE (tmp) == SCRATCH)
23211 tmp = gen_reg_rtx (V2DImode);
23213 if (MEM_P (dest))
23214 dest = rs6000_address_for_fpconvert (dest);
23216 /* Generate the actual convert insn of the form:
23217 (set (tmp) (unspec:V2DI [(fix:SI (reg:KF))] UNSPEC_IEEE128_CONVERT)). */
23218 cvt = gen_rtx_fmt_e (code, GET_MODE (dest), src);
23219 cvt_vec = gen_rtvec (1, cvt);
23220 cvt_unspec = gen_rtx_UNSPEC (V2DImode, cvt_vec, UNSPEC_IEEE128_CONVERT);
23221 emit_insn (gen_rtx_SET (tmp, cvt_unspec));
23223 /* Generate the move insn of the form:
23224 (set (dest:SI) (unspec:SI [(tmp:V2DI))] UNSPEC_IEEE128_MOVE)). */
23225 move_vec = gen_rtvec (1, tmp);
23226 move_unspec = gen_rtx_UNSPEC (GET_MODE (dest), move_vec, UNSPEC_IEEE128_MOVE);
23227 emit_insn (gen_rtx_SET (dest, move_unspec));
23230 /* Split a conversion from an integer type to __float128 into separate insns.
23231 OPERANDS points to the destination, source, and V2DI temporary
23232 register. CODE is either FLOAT or UNSIGNED_FLOAT. */
23234 void
23235 convert_int_to_float128 (rtx *operands, enum rtx_code code)
23237 rtx dest = operands[0];
23238 rtx src = operands[1];
23239 rtx tmp = operands[2];
23240 rtx cvt;
23241 rtvec cvt_vec;
23242 rtx cvt_unspec;
23243 rtvec move_vec;
23244 rtx move_unspec;
23245 rtx unsigned_flag;
23247 if (GET_CODE (tmp) == SCRATCH)
23248 tmp = gen_reg_rtx (V2DImode);
23250 if (MEM_P (src))
23251 src = rs6000_address_for_fpconvert (src);
23253 /* Generate the move of the integer into the Altivec register of the form:
23254 (set (tmp:V2DI) (unspec:V2DI [(src:SI)
23255 (const_int 0)] UNSPEC_IEEE128_MOVE)).
23258 (set (tmp:V2DI) (unspec:V2DI [(src:DI)] UNSPEC_IEEE128_MOVE)). */
23260 if (GET_MODE (src) == SImode)
23262 unsigned_flag = (code == UNSIGNED_FLOAT) ? const1_rtx : const0_rtx;
23263 move_vec = gen_rtvec (2, src, unsigned_flag);
23265 else
23266 move_vec = gen_rtvec (1, src);
23268 move_unspec = gen_rtx_UNSPEC (V2DImode, move_vec, UNSPEC_IEEE128_MOVE);
23269 emit_insn (gen_rtx_SET (tmp, move_unspec));
23271 /* Generate the actual convert insn of the form:
23272 (set (dest:KF) (float:KF (unspec:DI [(tmp:V2DI)]
23273 UNSPEC_IEEE128_CONVERT))). */
23274 cvt_vec = gen_rtvec (1, tmp);
23275 cvt_unspec = gen_rtx_UNSPEC (DImode, cvt_vec, UNSPEC_IEEE128_CONVERT);
23276 cvt = gen_rtx_fmt_e (code, GET_MODE (dest), cvt_unspec);
23277 emit_insn (gen_rtx_SET (dest, cvt));
23281 /* Emit the RTL for an sISEL pattern. */
23283 void
23284 rs6000_emit_sISEL (machine_mode mode ATTRIBUTE_UNUSED, rtx operands[])
23286 rs6000_emit_int_cmove (operands[0], operands[1], const1_rtx, const0_rtx);
23289 /* Emit RTL that sets a register to zero if OP1 and OP2 are equal. SCRATCH
23290 can be used as that dest register. Return the dest register. */
23293 rs6000_emit_eqne (machine_mode mode, rtx op1, rtx op2, rtx scratch)
23295 if (op2 == const0_rtx)
23296 return op1;
23298 if (GET_CODE (scratch) == SCRATCH)
23299 scratch = gen_reg_rtx (mode);
23301 if (logical_operand (op2, mode))
23302 emit_insn (gen_rtx_SET (scratch, gen_rtx_XOR (mode, op1, op2)));
23303 else
23304 emit_insn (gen_rtx_SET (scratch,
23305 gen_rtx_PLUS (mode, op1, negate_rtx (mode, op2))));
23307 return scratch;
23310 void
23311 rs6000_emit_sCOND (machine_mode mode, rtx operands[])
23313 rtx condition_rtx;
23314 machine_mode op_mode;
23315 enum rtx_code cond_code;
23316 rtx result = operands[0];
23318 condition_rtx = rs6000_generate_compare (operands[1], mode);
23319 cond_code = GET_CODE (condition_rtx);
23321 if (FLOAT_MODE_P (mode)
23322 && !TARGET_FPRS && TARGET_HARD_FLOAT)
23324 rtx t;
23326 PUT_MODE (condition_rtx, SImode);
23327 t = XEXP (condition_rtx, 0);
23329 gcc_assert (cond_code == NE || cond_code == EQ);
23331 if (cond_code == NE)
23332 emit_insn (gen_e500_flip_gt_bit (t, t));
23334 emit_insn (gen_move_from_CR_gt_bit (result, t));
23335 return;
23338 if (cond_code == NE
23339 || cond_code == GE || cond_code == LE
23340 || cond_code == GEU || cond_code == LEU
23341 || cond_code == ORDERED || cond_code == UNGE || cond_code == UNLE)
23343 rtx not_result = gen_reg_rtx (CCEQmode);
23344 rtx not_op, rev_cond_rtx;
23345 machine_mode cc_mode;
23347 cc_mode = GET_MODE (XEXP (condition_rtx, 0));
23349 rev_cond_rtx = gen_rtx_fmt_ee (rs6000_reverse_condition (cc_mode, cond_code),
23350 SImode, XEXP (condition_rtx, 0), const0_rtx);
23351 not_op = gen_rtx_COMPARE (CCEQmode, rev_cond_rtx, const0_rtx);
23352 emit_insn (gen_rtx_SET (not_result, not_op));
23353 condition_rtx = gen_rtx_EQ (VOIDmode, not_result, const0_rtx);
23356 op_mode = GET_MODE (XEXP (operands[1], 0));
23357 if (op_mode == VOIDmode)
23358 op_mode = GET_MODE (XEXP (operands[1], 1));
23360 if (TARGET_POWERPC64 && (op_mode == DImode || FLOAT_MODE_P (mode)))
23362 PUT_MODE (condition_rtx, DImode);
23363 convert_move (result, condition_rtx, 0);
23365 else
23367 PUT_MODE (condition_rtx, SImode);
23368 emit_insn (gen_rtx_SET (result, condition_rtx));
23372 /* Emit a branch of kind CODE to location LOC. */
23374 void
23375 rs6000_emit_cbranch (machine_mode mode, rtx operands[])
23377 rtx condition_rtx, loc_ref;
23379 condition_rtx = rs6000_generate_compare (operands[0], mode);
23380 loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
23381 emit_jump_insn (gen_rtx_SET (pc_rtx,
23382 gen_rtx_IF_THEN_ELSE (VOIDmode, condition_rtx,
23383 loc_ref, pc_rtx)));
23386 /* Return the string to output a conditional branch to LABEL, which is
23387 the operand template of the label, or NULL if the branch is really a
23388 conditional return.
23390 OP is the conditional expression. XEXP (OP, 0) is assumed to be a
23391 condition code register and its mode specifies what kind of
23392 comparison we made.
23394 REVERSED is nonzero if we should reverse the sense of the comparison.
23396 INSN is the insn. */
23398 char *
23399 output_cbranch (rtx op, const char *label, int reversed, rtx_insn *insn)
23401 static char string[64];
23402 enum rtx_code code = GET_CODE (op);
23403 rtx cc_reg = XEXP (op, 0);
23404 machine_mode mode = GET_MODE (cc_reg);
23405 int cc_regno = REGNO (cc_reg) - CR0_REGNO;
23406 int need_longbranch = label != NULL && get_attr_length (insn) == 8;
23407 int really_reversed = reversed ^ need_longbranch;
23408 char *s = string;
23409 const char *ccode;
23410 const char *pred;
23411 rtx note;
23413 validate_condition_mode (code, mode);
23415 /* Work out which way this really branches. We could use
23416 reverse_condition_maybe_unordered here always but this
23417 makes the resulting assembler clearer. */
23418 if (really_reversed)
23420 /* Reversal of FP compares takes care -- an ordered compare
23421 becomes an unordered compare and vice versa. */
23422 if (mode == CCFPmode)
23423 code = reverse_condition_maybe_unordered (code);
23424 else
23425 code = reverse_condition (code);
23428 if ((!TARGET_FPRS && TARGET_HARD_FLOAT) && mode == CCFPmode)
23430 /* The efscmp/tst* instructions twiddle bit 2, which maps nicely
23431 to the GT bit. */
23432 switch (code)
23434 case EQ:
23435 /* Opposite of GT. */
23436 code = GT;
23437 break;
23439 case NE:
23440 code = UNLE;
23441 break;
23443 default:
23444 gcc_unreachable ();
23448 switch (code)
23450 /* Not all of these are actually distinct opcodes, but
23451 we distinguish them for clarity of the resulting assembler. */
23452 case NE: case LTGT:
23453 ccode = "ne"; break;
23454 case EQ: case UNEQ:
23455 ccode = "eq"; break;
23456 case GE: case GEU:
23457 ccode = "ge"; break;
23458 case GT: case GTU: case UNGT:
23459 ccode = "gt"; break;
23460 case LE: case LEU:
23461 ccode = "le"; break;
23462 case LT: case LTU: case UNLT:
23463 ccode = "lt"; break;
23464 case UNORDERED: ccode = "un"; break;
23465 case ORDERED: ccode = "nu"; break;
23466 case UNGE: ccode = "nl"; break;
23467 case UNLE: ccode = "ng"; break;
23468 default:
23469 gcc_unreachable ();
23472 /* Maybe we have a guess as to how likely the branch is. */
23473 pred = "";
23474 note = find_reg_note (insn, REG_BR_PROB, NULL_RTX);
23475 if (note != NULL_RTX)
23477 /* PROB is the difference from 50%. */
23478 int prob = XINT (note, 0) - REG_BR_PROB_BASE / 2;
23480 /* Only hint for highly probable/improbable branches on newer cpus when
23481 we have real profile data, as static prediction overrides processor
23482 dynamic prediction. For older cpus we may as well always hint, but
23483 assume not taken for branches that are very close to 50% as a
23484 mispredicted taken branch is more expensive than a
23485 mispredicted not-taken branch. */
23486 if (rs6000_always_hint
23487 || (abs (prob) > REG_BR_PROB_BASE / 100 * 48
23488 && (profile_status_for_fn (cfun) != PROFILE_GUESSED)
23489 && br_prob_note_reliable_p (note)))
23491 if (abs (prob) > REG_BR_PROB_BASE / 20
23492 && ((prob > 0) ^ need_longbranch))
23493 pred = "+";
23494 else
23495 pred = "-";
23499 if (label == NULL)
23500 s += sprintf (s, "b%slr%s ", ccode, pred);
23501 else
23502 s += sprintf (s, "b%s%s ", ccode, pred);
23504 /* We need to escape any '%' characters in the reg_names string.
23505 Assume they'd only be the first character.... */
23506 if (reg_names[cc_regno + CR0_REGNO][0] == '%')
23507 *s++ = '%';
23508 s += sprintf (s, "%s", reg_names[cc_regno + CR0_REGNO]);
23510 if (label != NULL)
23512 /* If the branch distance was too far, we may have to use an
23513 unconditional branch to go the distance. */
23514 if (need_longbranch)
23515 s += sprintf (s, ",$+8\n\tb %s", label);
23516 else
23517 s += sprintf (s, ",%s", label);
23520 return string;
23523 /* Return the string to flip the GT bit on a CR. */
23524 char *
23525 output_e500_flip_gt_bit (rtx dst, rtx src)
23527 static char string[64];
23528 int a, b;
23530 gcc_assert (GET_CODE (dst) == REG && CR_REGNO_P (REGNO (dst))
23531 && GET_CODE (src) == REG && CR_REGNO_P (REGNO (src)));
23533 /* GT bit. */
23534 a = 4 * (REGNO (dst) - CR0_REGNO) + 1;
23535 b = 4 * (REGNO (src) - CR0_REGNO) + 1;
23537 sprintf (string, "crnot %d,%d", a, b);
23538 return string;
23541 /* Return insn for VSX or Altivec comparisons. */
23543 static rtx
23544 rs6000_emit_vector_compare_inner (enum rtx_code code, rtx op0, rtx op1)
23546 rtx mask;
23547 machine_mode mode = GET_MODE (op0);
23549 switch (code)
23551 default:
23552 break;
23554 case GE:
23555 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
23556 return NULL_RTX;
23557 /* FALLTHRU */
23559 case EQ:
23560 case GT:
23561 case GTU:
23562 case ORDERED:
23563 case UNORDERED:
23564 case UNEQ:
23565 case LTGT:
23566 mask = gen_reg_rtx (mode);
23567 emit_insn (gen_rtx_SET (mask, gen_rtx_fmt_ee (code, mode, op0, op1)));
23568 return mask;
23571 return NULL_RTX;
23574 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
23575 DMODE is expected destination mode. This is a recursive function. */
23577 static rtx
23578 rs6000_emit_vector_compare (enum rtx_code rcode,
23579 rtx op0, rtx op1,
23580 machine_mode dmode)
23582 rtx mask;
23583 bool swap_operands = false;
23584 bool try_again = false;
23586 gcc_assert (VECTOR_UNIT_ALTIVEC_OR_VSX_P (dmode));
23587 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
23589 /* See if the comparison works as is. */
23590 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
23591 if (mask)
23592 return mask;
23594 switch (rcode)
23596 case LT:
23597 rcode = GT;
23598 swap_operands = true;
23599 try_again = true;
23600 break;
23601 case LTU:
23602 rcode = GTU;
23603 swap_operands = true;
23604 try_again = true;
23605 break;
23606 case NE:
23607 case UNLE:
23608 case UNLT:
23609 case UNGE:
23610 case UNGT:
23611 /* Invert condition and try again.
23612 e.g., A != B becomes ~(A==B). */
23614 enum rtx_code rev_code;
23615 enum insn_code nor_code;
23616 rtx mask2;
23618 rev_code = reverse_condition_maybe_unordered (rcode);
23619 if (rev_code == UNKNOWN)
23620 return NULL_RTX;
23622 nor_code = optab_handler (one_cmpl_optab, dmode);
23623 if (nor_code == CODE_FOR_nothing)
23624 return NULL_RTX;
23626 mask2 = rs6000_emit_vector_compare (rev_code, op0, op1, dmode);
23627 if (!mask2)
23628 return NULL_RTX;
23630 mask = gen_reg_rtx (dmode);
23631 emit_insn (GEN_FCN (nor_code) (mask, mask2));
23632 return mask;
23634 break;
23635 case GE:
23636 case GEU:
23637 case LE:
23638 case LEU:
23639 /* Try GT/GTU/LT/LTU OR EQ */
23641 rtx c_rtx, eq_rtx;
23642 enum insn_code ior_code;
23643 enum rtx_code new_code;
23645 switch (rcode)
23647 case GE:
23648 new_code = GT;
23649 break;
23651 case GEU:
23652 new_code = GTU;
23653 break;
23655 case LE:
23656 new_code = LT;
23657 break;
23659 case LEU:
23660 new_code = LTU;
23661 break;
23663 default:
23664 gcc_unreachable ();
23667 ior_code = optab_handler (ior_optab, dmode);
23668 if (ior_code == CODE_FOR_nothing)
23669 return NULL_RTX;
23671 c_rtx = rs6000_emit_vector_compare (new_code, op0, op1, dmode);
23672 if (!c_rtx)
23673 return NULL_RTX;
23675 eq_rtx = rs6000_emit_vector_compare (EQ, op0, op1, dmode);
23676 if (!eq_rtx)
23677 return NULL_RTX;
23679 mask = gen_reg_rtx (dmode);
23680 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
23681 return mask;
23683 break;
23684 default:
23685 return NULL_RTX;
23688 if (try_again)
23690 if (swap_operands)
23691 std::swap (op0, op1);
23693 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
23694 if (mask)
23695 return mask;
23698 /* You only get two chances. */
23699 return NULL_RTX;
23702 /* Emit vector conditional expression. DEST is destination. OP_TRUE and
23703 OP_FALSE are two VEC_COND_EXPR operands. CC_OP0 and CC_OP1 are the two
23704 operands for the relation operation COND. */
23707 rs6000_emit_vector_cond_expr (rtx dest, rtx op_true, rtx op_false,
23708 rtx cond, rtx cc_op0, rtx cc_op1)
23710 machine_mode dest_mode = GET_MODE (dest);
23711 machine_mode mask_mode = GET_MODE (cc_op0);
23712 enum rtx_code rcode = GET_CODE (cond);
23713 machine_mode cc_mode = CCmode;
23714 rtx mask;
23715 rtx cond2;
23716 rtx tmp;
23717 bool invert_move = false;
23719 if (VECTOR_UNIT_NONE_P (dest_mode))
23720 return 0;
23722 gcc_assert (GET_MODE_SIZE (dest_mode) == GET_MODE_SIZE (mask_mode)
23723 && GET_MODE_NUNITS (dest_mode) == GET_MODE_NUNITS (mask_mode));
23725 switch (rcode)
23727 /* Swap operands if we can, and fall back to doing the operation as
23728 specified, and doing a NOR to invert the test. */
23729 case NE:
23730 case UNLE:
23731 case UNLT:
23732 case UNGE:
23733 case UNGT:
23734 /* Invert condition and try again.
23735 e.g., A = (B != C) ? D : E becomes A = (B == C) ? E : D. */
23736 invert_move = true;
23737 rcode = reverse_condition_maybe_unordered (rcode);
23738 if (rcode == UNKNOWN)
23739 return 0;
23740 break;
23742 /* Mark unsigned tests with CCUNSmode. */
23743 case GTU:
23744 case GEU:
23745 case LTU:
23746 case LEU:
23747 cc_mode = CCUNSmode;
23748 break;
23750 default:
23751 break;
23754 /* Get the vector mask for the given relational operations. */
23755 mask = rs6000_emit_vector_compare (rcode, cc_op0, cc_op1, mask_mode);
23757 if (!mask)
23758 return 0;
23760 if (invert_move)
23762 tmp = op_true;
23763 op_true = op_false;
23764 op_false = tmp;
23767 cond2 = gen_rtx_fmt_ee (NE, cc_mode, gen_lowpart (dest_mode, mask),
23768 CONST0_RTX (dest_mode));
23769 emit_insn (gen_rtx_SET (dest,
23770 gen_rtx_IF_THEN_ELSE (dest_mode,
23771 cond2,
23772 op_true,
23773 op_false)));
23774 return 1;
23777 /* ISA 3.0 (power9) minmax subcase to emit a XSMAXCDP or XSMINCDP instruction
23778 for SF/DF scalars. Move TRUE_COND to DEST if OP of the operands of the last
23779 comparison is nonzero/true, FALSE_COND if it is zero/false. Return 0 if the
23780 hardware has no such operation. */
23782 static int
23783 rs6000_emit_p9_fp_minmax (rtx dest, rtx op, rtx true_cond, rtx false_cond)
23785 enum rtx_code code = GET_CODE (op);
23786 rtx op0 = XEXP (op, 0);
23787 rtx op1 = XEXP (op, 1);
23788 machine_mode compare_mode = GET_MODE (op0);
23789 machine_mode result_mode = GET_MODE (dest);
23790 bool max_p = false;
23792 if (result_mode != compare_mode)
23793 return 0;
23795 if (code == GE || code == GT)
23796 max_p = true;
23797 else if (code == LE || code == LT)
23798 max_p = false;
23799 else
23800 return 0;
23802 if (rtx_equal_p (op0, true_cond) && rtx_equal_p (op1, false_cond))
23805 else if (rtx_equal_p (op1, true_cond) && rtx_equal_p (op0, false_cond))
23806 max_p = !max_p;
23808 else
23809 return 0;
23811 rs6000_emit_minmax (dest, max_p ? SMAX : SMIN, op0, op1);
23812 return 1;
23815 /* ISA 3.0 (power9) conditional move subcase to emit XSCMP{EQ,GE,GT,NE}DP and
23816 XXSEL instructions for SF/DF scalars. Move TRUE_COND to DEST if OP of the
23817 operands of the last comparison is nonzero/true, FALSE_COND if it is
23818 zero/false. Return 0 if the hardware has no such operation. */
23820 static int
23821 rs6000_emit_p9_fp_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
23823 enum rtx_code code = GET_CODE (op);
23824 rtx op0 = XEXP (op, 0);
23825 rtx op1 = XEXP (op, 1);
23826 machine_mode result_mode = GET_MODE (dest);
23827 rtx compare_rtx;
23828 rtx cmove_rtx;
23829 rtx clobber_rtx;
23831 if (!can_create_pseudo_p ())
23832 return 0;
23834 switch (code)
23836 case EQ:
23837 case GE:
23838 case GT:
23839 break;
23841 case NE:
23842 case LT:
23843 case LE:
23844 code = swap_condition (code);
23845 std::swap (op0, op1);
23846 break;
23848 default:
23849 return 0;
23852 /* Generate: [(parallel [(set (dest)
23853 (if_then_else (op (cmp1) (cmp2))
23854 (true)
23855 (false)))
23856 (clobber (scratch))])]. */
23858 compare_rtx = gen_rtx_fmt_ee (code, CCFPmode, op0, op1);
23859 cmove_rtx = gen_rtx_SET (dest,
23860 gen_rtx_IF_THEN_ELSE (result_mode,
23861 compare_rtx,
23862 true_cond,
23863 false_cond));
23865 clobber_rtx = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (V2DImode));
23866 emit_insn (gen_rtx_PARALLEL (VOIDmode,
23867 gen_rtvec (2, cmove_rtx, clobber_rtx)));
23869 return 1;
23872 /* Emit a conditional move: move TRUE_COND to DEST if OP of the
23873 operands of the last comparison is nonzero/true, FALSE_COND if it
23874 is zero/false. Return 0 if the hardware has no such operation. */
23877 rs6000_emit_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
23879 enum rtx_code code = GET_CODE (op);
23880 rtx op0 = XEXP (op, 0);
23881 rtx op1 = XEXP (op, 1);
23882 machine_mode compare_mode = GET_MODE (op0);
23883 machine_mode result_mode = GET_MODE (dest);
23884 rtx temp;
23885 bool is_against_zero;
23887 /* These modes should always match. */
23888 if (GET_MODE (op1) != compare_mode
23889 /* In the isel case however, we can use a compare immediate, so
23890 op1 may be a small constant. */
23891 && (!TARGET_ISEL || !short_cint_operand (op1, VOIDmode)))
23892 return 0;
23893 if (GET_MODE (true_cond) != result_mode)
23894 return 0;
23895 if (GET_MODE (false_cond) != result_mode)
23896 return 0;
23898 /* See if we can use the ISA 3.0 (power9) min/max/compare functions. */
23899 if (TARGET_P9_MINMAX
23900 && (compare_mode == SFmode || compare_mode == DFmode)
23901 && (result_mode == SFmode || result_mode == DFmode))
23903 if (rs6000_emit_p9_fp_minmax (dest, op, true_cond, false_cond))
23904 return 1;
23906 if (rs6000_emit_p9_fp_cmove (dest, op, true_cond, false_cond))
23907 return 1;
23910 /* Don't allow using floating point comparisons for integer results for
23911 now. */
23912 if (FLOAT_MODE_P (compare_mode) && !FLOAT_MODE_P (result_mode))
23913 return 0;
23915 /* First, work out if the hardware can do this at all, or
23916 if it's too slow.... */
23917 if (!FLOAT_MODE_P (compare_mode))
23919 if (TARGET_ISEL)
23920 return rs6000_emit_int_cmove (dest, op, true_cond, false_cond);
23921 return 0;
23923 else if (TARGET_HARD_FLOAT && !TARGET_FPRS
23924 && SCALAR_FLOAT_MODE_P (compare_mode))
23925 return 0;
23927 is_against_zero = op1 == CONST0_RTX (compare_mode);
23929 /* A floating-point subtract might overflow, underflow, or produce
23930 an inexact result, thus changing the floating-point flags, so it
23931 can't be generated if we care about that. It's safe if one side
23932 of the construct is zero, since then no subtract will be
23933 generated. */
23934 if (SCALAR_FLOAT_MODE_P (compare_mode)
23935 && flag_trapping_math && ! is_against_zero)
23936 return 0;
23938 /* Eliminate half of the comparisons by switching operands, this
23939 makes the remaining code simpler. */
23940 if (code == UNLT || code == UNGT || code == UNORDERED || code == NE
23941 || code == LTGT || code == LT || code == UNLE)
23943 code = reverse_condition_maybe_unordered (code);
23944 temp = true_cond;
23945 true_cond = false_cond;
23946 false_cond = temp;
23949 /* UNEQ and LTGT take four instructions for a comparison with zero,
23950 it'll probably be faster to use a branch here too. */
23951 if (code == UNEQ && HONOR_NANS (compare_mode))
23952 return 0;
23954 /* We're going to try to implement comparisons by performing
23955 a subtract, then comparing against zero. Unfortunately,
23956 Inf - Inf is NaN which is not zero, and so if we don't
23957 know that the operand is finite and the comparison
23958 would treat EQ different to UNORDERED, we can't do it. */
23959 if (HONOR_INFINITIES (compare_mode)
23960 && code != GT && code != UNGE
23961 && (GET_CODE (op1) != CONST_DOUBLE
23962 || real_isinf (CONST_DOUBLE_REAL_VALUE (op1)))
23963 /* Constructs of the form (a OP b ? a : b) are safe. */
23964 && ((! rtx_equal_p (op0, false_cond) && ! rtx_equal_p (op1, false_cond))
23965 || (! rtx_equal_p (op0, true_cond)
23966 && ! rtx_equal_p (op1, true_cond))))
23967 return 0;
23969 /* At this point we know we can use fsel. */
23971 /* Reduce the comparison to a comparison against zero. */
23972 if (! is_against_zero)
23974 temp = gen_reg_rtx (compare_mode);
23975 emit_insn (gen_rtx_SET (temp, gen_rtx_MINUS (compare_mode, op0, op1)));
23976 op0 = temp;
23977 op1 = CONST0_RTX (compare_mode);
23980 /* If we don't care about NaNs we can reduce some of the comparisons
23981 down to faster ones. */
23982 if (! HONOR_NANS (compare_mode))
23983 switch (code)
23985 case GT:
23986 code = LE;
23987 temp = true_cond;
23988 true_cond = false_cond;
23989 false_cond = temp;
23990 break;
23991 case UNGE:
23992 code = GE;
23993 break;
23994 case UNEQ:
23995 code = EQ;
23996 break;
23997 default:
23998 break;
24001 /* Now, reduce everything down to a GE. */
24002 switch (code)
24004 case GE:
24005 break;
24007 case LE:
24008 temp = gen_reg_rtx (compare_mode);
24009 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
24010 op0 = temp;
24011 break;
24013 case ORDERED:
24014 temp = gen_reg_rtx (compare_mode);
24015 emit_insn (gen_rtx_SET (temp, gen_rtx_ABS (compare_mode, op0)));
24016 op0 = temp;
24017 break;
24019 case EQ:
24020 temp = gen_reg_rtx (compare_mode);
24021 emit_insn (gen_rtx_SET (temp,
24022 gen_rtx_NEG (compare_mode,
24023 gen_rtx_ABS (compare_mode, op0))));
24024 op0 = temp;
24025 break;
24027 case UNGE:
24028 /* a UNGE 0 <-> (a GE 0 || -a UNLT 0) */
24029 temp = gen_reg_rtx (result_mode);
24030 emit_insn (gen_rtx_SET (temp,
24031 gen_rtx_IF_THEN_ELSE (result_mode,
24032 gen_rtx_GE (VOIDmode,
24033 op0, op1),
24034 true_cond, false_cond)));
24035 false_cond = true_cond;
24036 true_cond = temp;
24038 temp = gen_reg_rtx (compare_mode);
24039 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
24040 op0 = temp;
24041 break;
24043 case GT:
24044 /* a GT 0 <-> (a GE 0 && -a UNLT 0) */
24045 temp = gen_reg_rtx (result_mode);
24046 emit_insn (gen_rtx_SET (temp,
24047 gen_rtx_IF_THEN_ELSE (result_mode,
24048 gen_rtx_GE (VOIDmode,
24049 op0, op1),
24050 true_cond, false_cond)));
24051 true_cond = false_cond;
24052 false_cond = temp;
24054 temp = gen_reg_rtx (compare_mode);
24055 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
24056 op0 = temp;
24057 break;
24059 default:
24060 gcc_unreachable ();
24063 emit_insn (gen_rtx_SET (dest,
24064 gen_rtx_IF_THEN_ELSE (result_mode,
24065 gen_rtx_GE (VOIDmode,
24066 op0, op1),
24067 true_cond, false_cond)));
24068 return 1;
24071 /* Same as above, but for ints (isel). */
24073 static int
24074 rs6000_emit_int_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
24076 rtx condition_rtx, cr;
24077 machine_mode mode = GET_MODE (dest);
24078 enum rtx_code cond_code;
24079 rtx (*isel_func) (rtx, rtx, rtx, rtx, rtx);
24080 bool signedp;
24082 if (mode != SImode && (!TARGET_POWERPC64 || mode != DImode))
24083 return 0;
24085 /* We still have to do the compare, because isel doesn't do a
24086 compare, it just looks at the CRx bits set by a previous compare
24087 instruction. */
24088 condition_rtx = rs6000_generate_compare (op, mode);
24089 cond_code = GET_CODE (condition_rtx);
24090 cr = XEXP (condition_rtx, 0);
24091 signedp = GET_MODE (cr) == CCmode;
24093 isel_func = (mode == SImode
24094 ? (signedp ? gen_isel_signed_si : gen_isel_unsigned_si)
24095 : (signedp ? gen_isel_signed_di : gen_isel_unsigned_di));
24097 switch (cond_code)
24099 case LT: case GT: case LTU: case GTU: case EQ:
24100 /* isel handles these directly. */
24101 break;
24103 default:
24104 /* We need to swap the sense of the comparison. */
24106 std::swap (false_cond, true_cond);
24107 PUT_CODE (condition_rtx, reverse_condition (cond_code));
24109 break;
24112 false_cond = force_reg (mode, false_cond);
24113 if (true_cond != const0_rtx)
24114 true_cond = force_reg (mode, true_cond);
24116 emit_insn (isel_func (dest, condition_rtx, true_cond, false_cond, cr));
24118 return 1;
24121 const char *
24122 output_isel (rtx *operands)
24124 enum rtx_code code;
24126 code = GET_CODE (operands[1]);
24128 if (code == GE || code == GEU || code == LE || code == LEU || code == NE)
24130 gcc_assert (GET_CODE (operands[2]) == REG
24131 && GET_CODE (operands[3]) == REG);
24132 PUT_CODE (operands[1], reverse_condition (code));
24133 return "isel %0,%3,%2,%j1";
24136 return "isel %0,%2,%3,%j1";
24139 void
24140 rs6000_emit_minmax (rtx dest, enum rtx_code code, rtx op0, rtx op1)
24142 machine_mode mode = GET_MODE (op0);
24143 enum rtx_code c;
24144 rtx target;
24146 /* VSX/altivec have direct min/max insns. */
24147 if ((code == SMAX || code == SMIN)
24148 && (VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode)
24149 || (mode == SFmode && VECTOR_UNIT_VSX_P (DFmode))))
24151 emit_insn (gen_rtx_SET (dest, gen_rtx_fmt_ee (code, mode, op0, op1)));
24152 return;
24155 if (code == SMAX || code == SMIN)
24156 c = GE;
24157 else
24158 c = GEU;
24160 if (code == SMAX || code == UMAX)
24161 target = emit_conditional_move (dest, c, op0, op1, mode,
24162 op0, op1, mode, 0);
24163 else
24164 target = emit_conditional_move (dest, c, op0, op1, mode,
24165 op1, op0, mode, 0);
24166 gcc_assert (target);
24167 if (target != dest)
24168 emit_move_insn (dest, target);
24171 /* Split a signbit operation on 64-bit machines with direct move. Also allow
24172 for the value to come from memory or if it is already loaded into a GPR. */
24174 void
24175 rs6000_split_signbit (rtx dest, rtx src)
24177 machine_mode d_mode = GET_MODE (dest);
24178 machine_mode s_mode = GET_MODE (src);
24179 rtx dest_di = (d_mode == DImode) ? dest : gen_lowpart (DImode, dest);
24180 rtx shift_reg = dest_di;
24182 gcc_assert (REG_P (dest));
24183 gcc_assert (REG_P (src) || MEM_P (src));
24184 gcc_assert (s_mode == KFmode || s_mode == TFmode);
24186 if (MEM_P (src))
24188 rtx mem = (WORDS_BIG_ENDIAN
24189 ? adjust_address (src, DImode, 0)
24190 : adjust_address (src, DImode, 8));
24191 emit_insn (gen_rtx_SET (dest_di, mem));
24194 else
24196 unsigned int r = REGNO (src);
24198 /* If this is a VSX register, generate the special mfvsrd instruction
24199 to get it in a GPR. Until we support SF and DF modes, that will
24200 always be true. */
24201 gcc_assert (VSX_REGNO_P (r));
24203 if (s_mode == KFmode)
24204 emit_insn (gen_signbitkf2_dm2 (dest_di, src));
24205 else
24206 emit_insn (gen_signbittf2_dm2 (dest_di, src));
24209 emit_insn (gen_lshrdi3 (dest_di, shift_reg, GEN_INT (63)));
24210 return;
24213 /* A subroutine of the atomic operation splitters. Jump to LABEL if
24214 COND is true. Mark the jump as unlikely to be taken. */
24216 static void
24217 emit_unlikely_jump (rtx cond, rtx label)
24219 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
24220 rtx x;
24222 x = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, label, pc_rtx);
24223 x = emit_jump_insn (gen_rtx_SET (pc_rtx, x));
24224 add_int_reg_note (x, REG_BR_PROB, very_unlikely);
24227 /* A subroutine of the atomic operation splitters. Emit a load-locked
24228 instruction in MODE. For QI/HImode, possibly use a pattern than includes
24229 the zero_extend operation. */
24231 static void
24232 emit_load_locked (machine_mode mode, rtx reg, rtx mem)
24234 rtx (*fn) (rtx, rtx) = NULL;
24236 switch (mode)
24238 case QImode:
24239 fn = gen_load_lockedqi;
24240 break;
24241 case HImode:
24242 fn = gen_load_lockedhi;
24243 break;
24244 case SImode:
24245 if (GET_MODE (mem) == QImode)
24246 fn = gen_load_lockedqi_si;
24247 else if (GET_MODE (mem) == HImode)
24248 fn = gen_load_lockedhi_si;
24249 else
24250 fn = gen_load_lockedsi;
24251 break;
24252 case DImode:
24253 fn = gen_load_lockeddi;
24254 break;
24255 case TImode:
24256 fn = gen_load_lockedti;
24257 break;
24258 default:
24259 gcc_unreachable ();
24261 emit_insn (fn (reg, mem));
24264 /* A subroutine of the atomic operation splitters. Emit a store-conditional
24265 instruction in MODE. */
24267 static void
24268 emit_store_conditional (machine_mode mode, rtx res, rtx mem, rtx val)
24270 rtx (*fn) (rtx, rtx, rtx) = NULL;
24272 switch (mode)
24274 case QImode:
24275 fn = gen_store_conditionalqi;
24276 break;
24277 case HImode:
24278 fn = gen_store_conditionalhi;
24279 break;
24280 case SImode:
24281 fn = gen_store_conditionalsi;
24282 break;
24283 case DImode:
24284 fn = gen_store_conditionaldi;
24285 break;
24286 case TImode:
24287 fn = gen_store_conditionalti;
24288 break;
24289 default:
24290 gcc_unreachable ();
24293 /* Emit sync before stwcx. to address PPC405 Erratum. */
24294 if (PPC405_ERRATUM77)
24295 emit_insn (gen_hwsync ());
24297 emit_insn (fn (res, mem, val));
24300 /* Expand barriers before and after a load_locked/store_cond sequence. */
24302 static rtx
24303 rs6000_pre_atomic_barrier (rtx mem, enum memmodel model)
24305 rtx addr = XEXP (mem, 0);
24306 int strict_p = (reload_in_progress || reload_completed);
24308 if (!legitimate_indirect_address_p (addr, strict_p)
24309 && !legitimate_indexed_address_p (addr, strict_p))
24311 addr = force_reg (Pmode, addr);
24312 mem = replace_equiv_address_nv (mem, addr);
24315 switch (model)
24317 case MEMMODEL_RELAXED:
24318 case MEMMODEL_CONSUME:
24319 case MEMMODEL_ACQUIRE:
24320 break;
24321 case MEMMODEL_RELEASE:
24322 case MEMMODEL_ACQ_REL:
24323 emit_insn (gen_lwsync ());
24324 break;
24325 case MEMMODEL_SEQ_CST:
24326 emit_insn (gen_hwsync ());
24327 break;
24328 default:
24329 gcc_unreachable ();
24331 return mem;
24334 static void
24335 rs6000_post_atomic_barrier (enum memmodel model)
24337 switch (model)
24339 case MEMMODEL_RELAXED:
24340 case MEMMODEL_CONSUME:
24341 case MEMMODEL_RELEASE:
24342 break;
24343 case MEMMODEL_ACQUIRE:
24344 case MEMMODEL_ACQ_REL:
24345 case MEMMODEL_SEQ_CST:
24346 emit_insn (gen_isync ());
24347 break;
24348 default:
24349 gcc_unreachable ();
24353 /* A subroutine of the various atomic expanders. For sub-word operations,
24354 we must adjust things to operate on SImode. Given the original MEM,
24355 return a new aligned memory. Also build and return the quantities by
24356 which to shift and mask. */
24358 static rtx
24359 rs6000_adjust_atomic_subword (rtx orig_mem, rtx *pshift, rtx *pmask)
24361 rtx addr, align, shift, mask, mem;
24362 HOST_WIDE_INT shift_mask;
24363 machine_mode mode = GET_MODE (orig_mem);
24365 /* For smaller modes, we have to implement this via SImode. */
24366 shift_mask = (mode == QImode ? 0x18 : 0x10);
24368 addr = XEXP (orig_mem, 0);
24369 addr = force_reg (GET_MODE (addr), addr);
24371 /* Aligned memory containing subword. Generate a new memory. We
24372 do not want any of the existing MEM_ATTR data, as we're now
24373 accessing memory outside the original object. */
24374 align = expand_simple_binop (Pmode, AND, addr, GEN_INT (-4),
24375 NULL_RTX, 1, OPTAB_LIB_WIDEN);
24376 mem = gen_rtx_MEM (SImode, align);
24377 MEM_VOLATILE_P (mem) = MEM_VOLATILE_P (orig_mem);
24378 if (MEM_ALIAS_SET (orig_mem) == ALIAS_SET_MEMORY_BARRIER)
24379 set_mem_alias_set (mem, ALIAS_SET_MEMORY_BARRIER);
24381 /* Shift amount for subword relative to aligned word. */
24382 shift = gen_reg_rtx (SImode);
24383 addr = gen_lowpart (SImode, addr);
24384 rtx tmp = gen_reg_rtx (SImode);
24385 emit_insn (gen_ashlsi3 (tmp, addr, GEN_INT (3)));
24386 emit_insn (gen_andsi3 (shift, tmp, GEN_INT (shift_mask)));
24387 if (BYTES_BIG_ENDIAN)
24388 shift = expand_simple_binop (SImode, XOR, shift, GEN_INT (shift_mask),
24389 shift, 1, OPTAB_LIB_WIDEN);
24390 *pshift = shift;
24392 /* Mask for insertion. */
24393 mask = expand_simple_binop (SImode, ASHIFT, GEN_INT (GET_MODE_MASK (mode)),
24394 shift, NULL_RTX, 1, OPTAB_LIB_WIDEN);
24395 *pmask = mask;
24397 return mem;
24400 /* A subroutine of the various atomic expanders. For sub-word operands,
24401 combine OLDVAL and NEWVAL via MASK. Returns a new pseduo. */
24403 static rtx
24404 rs6000_mask_atomic_subword (rtx oldval, rtx newval, rtx mask)
24406 rtx x;
24408 x = gen_reg_rtx (SImode);
24409 emit_insn (gen_rtx_SET (x, gen_rtx_AND (SImode,
24410 gen_rtx_NOT (SImode, mask),
24411 oldval)));
24413 x = expand_simple_binop (SImode, IOR, newval, x, x, 1, OPTAB_LIB_WIDEN);
24415 return x;
24418 /* A subroutine of the various atomic expanders. For sub-word operands,
24419 extract WIDE to NARROW via SHIFT. */
24421 static void
24422 rs6000_finish_atomic_subword (rtx narrow, rtx wide, rtx shift)
24424 wide = expand_simple_binop (SImode, LSHIFTRT, wide, shift,
24425 wide, 1, OPTAB_LIB_WIDEN);
24426 emit_move_insn (narrow, gen_lowpart (GET_MODE (narrow), wide));
24429 /* Expand an atomic compare and swap operation. */
24431 void
24432 rs6000_expand_atomic_compare_and_swap (rtx operands[])
24434 rtx boolval, retval, mem, oldval, newval, cond;
24435 rtx label1, label2, x, mask, shift;
24436 machine_mode mode, orig_mode;
24437 enum memmodel mod_s, mod_f;
24438 bool is_weak;
24440 boolval = operands[0];
24441 retval = operands[1];
24442 mem = operands[2];
24443 oldval = operands[3];
24444 newval = operands[4];
24445 is_weak = (INTVAL (operands[5]) != 0);
24446 mod_s = memmodel_base (INTVAL (operands[6]));
24447 mod_f = memmodel_base (INTVAL (operands[7]));
24448 orig_mode = mode = GET_MODE (mem);
24450 mask = shift = NULL_RTX;
24451 if (mode == QImode || mode == HImode)
24453 /* Before power8, we didn't have access to lbarx/lharx, so generate a
24454 lwarx and shift/mask operations. With power8, we need to do the
24455 comparison in SImode, but the store is still done in QI/HImode. */
24456 oldval = convert_modes (SImode, mode, oldval, 1);
24458 if (!TARGET_SYNC_HI_QI)
24460 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
24462 /* Shift and mask OLDVAL into position with the word. */
24463 oldval = expand_simple_binop (SImode, ASHIFT, oldval, shift,
24464 NULL_RTX, 1, OPTAB_LIB_WIDEN);
24466 /* Shift and mask NEWVAL into position within the word. */
24467 newval = convert_modes (SImode, mode, newval, 1);
24468 newval = expand_simple_binop (SImode, ASHIFT, newval, shift,
24469 NULL_RTX, 1, OPTAB_LIB_WIDEN);
24472 /* Prepare to adjust the return value. */
24473 retval = gen_reg_rtx (SImode);
24474 mode = SImode;
24476 else if (reg_overlap_mentioned_p (retval, oldval))
24477 oldval = copy_to_reg (oldval);
24479 if (mode != TImode && !reg_or_short_operand (oldval, mode))
24480 oldval = copy_to_mode_reg (mode, oldval);
24482 if (reg_overlap_mentioned_p (retval, newval))
24483 newval = copy_to_reg (newval);
24485 mem = rs6000_pre_atomic_barrier (mem, mod_s);
24487 label1 = NULL_RTX;
24488 if (!is_weak)
24490 label1 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
24491 emit_label (XEXP (label1, 0));
24493 label2 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
24495 emit_load_locked (mode, retval, mem);
24497 x = retval;
24498 if (mask)
24499 x = expand_simple_binop (SImode, AND, retval, mask,
24500 NULL_RTX, 1, OPTAB_LIB_WIDEN);
24502 cond = gen_reg_rtx (CCmode);
24503 /* If we have TImode, synthesize a comparison. */
24504 if (mode != TImode)
24505 x = gen_rtx_COMPARE (CCmode, x, oldval);
24506 else
24508 rtx xor1_result = gen_reg_rtx (DImode);
24509 rtx xor2_result = gen_reg_rtx (DImode);
24510 rtx or_result = gen_reg_rtx (DImode);
24511 rtx new_word0 = simplify_gen_subreg (DImode, x, TImode, 0);
24512 rtx new_word1 = simplify_gen_subreg (DImode, x, TImode, 8);
24513 rtx old_word0 = simplify_gen_subreg (DImode, oldval, TImode, 0);
24514 rtx old_word1 = simplify_gen_subreg (DImode, oldval, TImode, 8);
24516 emit_insn (gen_xordi3 (xor1_result, new_word0, old_word0));
24517 emit_insn (gen_xordi3 (xor2_result, new_word1, old_word1));
24518 emit_insn (gen_iordi3 (or_result, xor1_result, xor2_result));
24519 x = gen_rtx_COMPARE (CCmode, or_result, const0_rtx);
24522 emit_insn (gen_rtx_SET (cond, x));
24524 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
24525 emit_unlikely_jump (x, label2);
24527 x = newval;
24528 if (mask)
24529 x = rs6000_mask_atomic_subword (retval, newval, mask);
24531 emit_store_conditional (orig_mode, cond, mem, x);
24533 if (!is_weak)
24535 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
24536 emit_unlikely_jump (x, label1);
24539 if (!is_mm_relaxed (mod_f))
24540 emit_label (XEXP (label2, 0));
24542 rs6000_post_atomic_barrier (mod_s);
24544 if (is_mm_relaxed (mod_f))
24545 emit_label (XEXP (label2, 0));
24547 if (shift)
24548 rs6000_finish_atomic_subword (operands[1], retval, shift);
24549 else if (mode != GET_MODE (operands[1]))
24550 convert_move (operands[1], retval, 1);
24552 /* In all cases, CR0 contains EQ on success, and NE on failure. */
24553 x = gen_rtx_EQ (SImode, cond, const0_rtx);
24554 emit_insn (gen_rtx_SET (boolval, x));
24557 /* Expand an atomic exchange operation. */
24559 void
24560 rs6000_expand_atomic_exchange (rtx operands[])
24562 rtx retval, mem, val, cond;
24563 machine_mode mode;
24564 enum memmodel model;
24565 rtx label, x, mask, shift;
24567 retval = operands[0];
24568 mem = operands[1];
24569 val = operands[2];
24570 model = memmodel_base (INTVAL (operands[3]));
24571 mode = GET_MODE (mem);
24573 mask = shift = NULL_RTX;
24574 if (!TARGET_SYNC_HI_QI && (mode == QImode || mode == HImode))
24576 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
24578 /* Shift and mask VAL into position with the word. */
24579 val = convert_modes (SImode, mode, val, 1);
24580 val = expand_simple_binop (SImode, ASHIFT, val, shift,
24581 NULL_RTX, 1, OPTAB_LIB_WIDEN);
24583 /* Prepare to adjust the return value. */
24584 retval = gen_reg_rtx (SImode);
24585 mode = SImode;
24588 mem = rs6000_pre_atomic_barrier (mem, model);
24590 label = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
24591 emit_label (XEXP (label, 0));
24593 emit_load_locked (mode, retval, mem);
24595 x = val;
24596 if (mask)
24597 x = rs6000_mask_atomic_subword (retval, val, mask);
24599 cond = gen_reg_rtx (CCmode);
24600 emit_store_conditional (mode, cond, mem, x);
24602 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
24603 emit_unlikely_jump (x, label);
24605 rs6000_post_atomic_barrier (model);
24607 if (shift)
24608 rs6000_finish_atomic_subword (operands[0], retval, shift);
24611 /* Expand an atomic fetch-and-operate pattern. CODE is the binary operation
24612 to perform. MEM is the memory on which to operate. VAL is the second
24613 operand of the binary operator. BEFORE and AFTER are optional locations to
24614 return the value of MEM either before of after the operation. MODEL_RTX
24615 is a CONST_INT containing the memory model to use. */
24617 void
24618 rs6000_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
24619 rtx orig_before, rtx orig_after, rtx model_rtx)
24621 enum memmodel model = memmodel_base (INTVAL (model_rtx));
24622 machine_mode mode = GET_MODE (mem);
24623 machine_mode store_mode = mode;
24624 rtx label, x, cond, mask, shift;
24625 rtx before = orig_before, after = orig_after;
24627 mask = shift = NULL_RTX;
24628 /* On power8, we want to use SImode for the operation. On previous systems,
24629 use the operation in a subword and shift/mask to get the proper byte or
24630 halfword. */
24631 if (mode == QImode || mode == HImode)
24633 if (TARGET_SYNC_HI_QI)
24635 val = convert_modes (SImode, mode, val, 1);
24637 /* Prepare to adjust the return value. */
24638 before = gen_reg_rtx (SImode);
24639 if (after)
24640 after = gen_reg_rtx (SImode);
24641 mode = SImode;
24643 else
24645 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
24647 /* Shift and mask VAL into position with the word. */
24648 val = convert_modes (SImode, mode, val, 1);
24649 val = expand_simple_binop (SImode, ASHIFT, val, shift,
24650 NULL_RTX, 1, OPTAB_LIB_WIDEN);
24652 switch (code)
24654 case IOR:
24655 case XOR:
24656 /* We've already zero-extended VAL. That is sufficient to
24657 make certain that it does not affect other bits. */
24658 mask = NULL;
24659 break;
24661 case AND:
24662 /* If we make certain that all of the other bits in VAL are
24663 set, that will be sufficient to not affect other bits. */
24664 x = gen_rtx_NOT (SImode, mask);
24665 x = gen_rtx_IOR (SImode, x, val);
24666 emit_insn (gen_rtx_SET (val, x));
24667 mask = NULL;
24668 break;
24670 case NOT:
24671 case PLUS:
24672 case MINUS:
24673 /* These will all affect bits outside the field and need
24674 adjustment via MASK within the loop. */
24675 break;
24677 default:
24678 gcc_unreachable ();
24681 /* Prepare to adjust the return value. */
24682 before = gen_reg_rtx (SImode);
24683 if (after)
24684 after = gen_reg_rtx (SImode);
24685 store_mode = mode = SImode;
24689 mem = rs6000_pre_atomic_barrier (mem, model);
24691 label = gen_label_rtx ();
24692 emit_label (label);
24693 label = gen_rtx_LABEL_REF (VOIDmode, label);
24695 if (before == NULL_RTX)
24696 before = gen_reg_rtx (mode);
24698 emit_load_locked (mode, before, mem);
24700 if (code == NOT)
24702 x = expand_simple_binop (mode, AND, before, val,
24703 NULL_RTX, 1, OPTAB_LIB_WIDEN);
24704 after = expand_simple_unop (mode, NOT, x, after, 1);
24706 else
24708 after = expand_simple_binop (mode, code, before, val,
24709 after, 1, OPTAB_LIB_WIDEN);
24712 x = after;
24713 if (mask)
24715 x = expand_simple_binop (SImode, AND, after, mask,
24716 NULL_RTX, 1, OPTAB_LIB_WIDEN);
24717 x = rs6000_mask_atomic_subword (before, x, mask);
24719 else if (store_mode != mode)
24720 x = convert_modes (store_mode, mode, x, 1);
24722 cond = gen_reg_rtx (CCmode);
24723 emit_store_conditional (store_mode, cond, mem, x);
24725 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
24726 emit_unlikely_jump (x, label);
24728 rs6000_post_atomic_barrier (model);
24730 if (shift)
24732 /* QImode/HImode on machines without lbarx/lharx where we do a lwarx and
24733 then do the calcuations in a SImode register. */
24734 if (orig_before)
24735 rs6000_finish_atomic_subword (orig_before, before, shift);
24736 if (orig_after)
24737 rs6000_finish_atomic_subword (orig_after, after, shift);
24739 else if (store_mode != mode)
24741 /* QImode/HImode on machines with lbarx/lharx where we do the native
24742 operation and then do the calcuations in a SImode register. */
24743 if (orig_before)
24744 convert_move (orig_before, before, 1);
24745 if (orig_after)
24746 convert_move (orig_after, after, 1);
24748 else if (orig_after && after != orig_after)
24749 emit_move_insn (orig_after, after);
24752 /* Emit instructions to move SRC to DST. Called by splitters for
24753 multi-register moves. It will emit at most one instruction for
24754 each register that is accessed; that is, it won't emit li/lis pairs
24755 (or equivalent for 64-bit code). One of SRC or DST must be a hard
24756 register. */
24758 void
24759 rs6000_split_multireg_move (rtx dst, rtx src)
24761 /* The register number of the first register being moved. */
24762 int reg;
24763 /* The mode that is to be moved. */
24764 machine_mode mode;
24765 /* The mode that the move is being done in, and its size. */
24766 machine_mode reg_mode;
24767 int reg_mode_size;
24768 /* The number of registers that will be moved. */
24769 int nregs;
24771 reg = REG_P (dst) ? REGNO (dst) : REGNO (src);
24772 mode = GET_MODE (dst);
24773 nregs = hard_regno_nregs[reg][mode];
24774 if (FP_REGNO_P (reg))
24775 reg_mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode :
24776 ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT) ? DFmode : SFmode);
24777 else if (ALTIVEC_REGNO_P (reg))
24778 reg_mode = V16QImode;
24779 else if (TARGET_E500_DOUBLE && FLOAT128_2REG_P (mode))
24780 reg_mode = DFmode;
24781 else
24782 reg_mode = word_mode;
24783 reg_mode_size = GET_MODE_SIZE (reg_mode);
24785 gcc_assert (reg_mode_size * nregs == GET_MODE_SIZE (mode));
24787 /* TDmode residing in FP registers is special, since the ISA requires that
24788 the lower-numbered word of a register pair is always the most significant
24789 word, even in little-endian mode. This does not match the usual subreg
24790 semantics, so we cannnot use simplify_gen_subreg in those cases. Access
24791 the appropriate constituent registers "by hand" in little-endian mode.
24793 Note we do not need to check for destructive overlap here since TDmode
24794 can only reside in even/odd register pairs. */
24795 if (FP_REGNO_P (reg) && DECIMAL_FLOAT_MODE_P (mode) && !BYTES_BIG_ENDIAN)
24797 rtx p_src, p_dst;
24798 int i;
24800 for (i = 0; i < nregs; i++)
24802 if (REG_P (src) && FP_REGNO_P (REGNO (src)))
24803 p_src = gen_rtx_REG (reg_mode, REGNO (src) + nregs - 1 - i);
24804 else
24805 p_src = simplify_gen_subreg (reg_mode, src, mode,
24806 i * reg_mode_size);
24808 if (REG_P (dst) && FP_REGNO_P (REGNO (dst)))
24809 p_dst = gen_rtx_REG (reg_mode, REGNO (dst) + nregs - 1 - i);
24810 else
24811 p_dst = simplify_gen_subreg (reg_mode, dst, mode,
24812 i * reg_mode_size);
24814 emit_insn (gen_rtx_SET (p_dst, p_src));
24817 return;
24820 if (REG_P (src) && REG_P (dst) && (REGNO (src) < REGNO (dst)))
24822 /* Move register range backwards, if we might have destructive
24823 overlap. */
24824 int i;
24825 for (i = nregs - 1; i >= 0; i--)
24826 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
24827 i * reg_mode_size),
24828 simplify_gen_subreg (reg_mode, src, mode,
24829 i * reg_mode_size)));
24831 else
24833 int i;
24834 int j = -1;
24835 bool used_update = false;
24836 rtx restore_basereg = NULL_RTX;
24838 if (MEM_P (src) && INT_REGNO_P (reg))
24840 rtx breg;
24842 if (GET_CODE (XEXP (src, 0)) == PRE_INC
24843 || GET_CODE (XEXP (src, 0)) == PRE_DEC)
24845 rtx delta_rtx;
24846 breg = XEXP (XEXP (src, 0), 0);
24847 delta_rtx = (GET_CODE (XEXP (src, 0)) == PRE_INC
24848 ? GEN_INT (GET_MODE_SIZE (GET_MODE (src)))
24849 : GEN_INT (-GET_MODE_SIZE (GET_MODE (src))));
24850 emit_insn (gen_add3_insn (breg, breg, delta_rtx));
24851 src = replace_equiv_address (src, breg);
24853 else if (! rs6000_offsettable_memref_p (src, reg_mode))
24855 if (GET_CODE (XEXP (src, 0)) == PRE_MODIFY)
24857 rtx basereg = XEXP (XEXP (src, 0), 0);
24858 if (TARGET_UPDATE)
24860 rtx ndst = simplify_gen_subreg (reg_mode, dst, mode, 0);
24861 emit_insn (gen_rtx_SET (ndst,
24862 gen_rtx_MEM (reg_mode,
24863 XEXP (src, 0))));
24864 used_update = true;
24866 else
24867 emit_insn (gen_rtx_SET (basereg,
24868 XEXP (XEXP (src, 0), 1)));
24869 src = replace_equiv_address (src, basereg);
24871 else
24873 rtx basereg = gen_rtx_REG (Pmode, reg);
24874 emit_insn (gen_rtx_SET (basereg, XEXP (src, 0)));
24875 src = replace_equiv_address (src, basereg);
24879 breg = XEXP (src, 0);
24880 if (GET_CODE (breg) == PLUS || GET_CODE (breg) == LO_SUM)
24881 breg = XEXP (breg, 0);
24883 /* If the base register we are using to address memory is
24884 also a destination reg, then change that register last. */
24885 if (REG_P (breg)
24886 && REGNO (breg) >= REGNO (dst)
24887 && REGNO (breg) < REGNO (dst) + nregs)
24888 j = REGNO (breg) - REGNO (dst);
24890 else if (MEM_P (dst) && INT_REGNO_P (reg))
24892 rtx breg;
24894 if (GET_CODE (XEXP (dst, 0)) == PRE_INC
24895 || GET_CODE (XEXP (dst, 0)) == PRE_DEC)
24897 rtx delta_rtx;
24898 breg = XEXP (XEXP (dst, 0), 0);
24899 delta_rtx = (GET_CODE (XEXP (dst, 0)) == PRE_INC
24900 ? GEN_INT (GET_MODE_SIZE (GET_MODE (dst)))
24901 : GEN_INT (-GET_MODE_SIZE (GET_MODE (dst))));
24903 /* We have to update the breg before doing the store.
24904 Use store with update, if available. */
24906 if (TARGET_UPDATE)
24908 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
24909 emit_insn (TARGET_32BIT
24910 ? (TARGET_POWERPC64
24911 ? gen_movdi_si_update (breg, breg, delta_rtx, nsrc)
24912 : gen_movsi_update (breg, breg, delta_rtx, nsrc))
24913 : gen_movdi_di_update (breg, breg, delta_rtx, nsrc));
24914 used_update = true;
24916 else
24917 emit_insn (gen_add3_insn (breg, breg, delta_rtx));
24918 dst = replace_equiv_address (dst, breg);
24920 else if (!rs6000_offsettable_memref_p (dst, reg_mode)
24921 && GET_CODE (XEXP (dst, 0)) != LO_SUM)
24923 if (GET_CODE (XEXP (dst, 0)) == PRE_MODIFY)
24925 rtx basereg = XEXP (XEXP (dst, 0), 0);
24926 if (TARGET_UPDATE)
24928 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
24929 emit_insn (gen_rtx_SET (gen_rtx_MEM (reg_mode,
24930 XEXP (dst, 0)),
24931 nsrc));
24932 used_update = true;
24934 else
24935 emit_insn (gen_rtx_SET (basereg,
24936 XEXP (XEXP (dst, 0), 1)));
24937 dst = replace_equiv_address (dst, basereg);
24939 else
24941 rtx basereg = XEXP (XEXP (dst, 0), 0);
24942 rtx offsetreg = XEXP (XEXP (dst, 0), 1);
24943 gcc_assert (GET_CODE (XEXP (dst, 0)) == PLUS
24944 && REG_P (basereg)
24945 && REG_P (offsetreg)
24946 && REGNO (basereg) != REGNO (offsetreg));
24947 if (REGNO (basereg) == 0)
24949 rtx tmp = offsetreg;
24950 offsetreg = basereg;
24951 basereg = tmp;
24953 emit_insn (gen_add3_insn (basereg, basereg, offsetreg));
24954 restore_basereg = gen_sub3_insn (basereg, basereg, offsetreg);
24955 dst = replace_equiv_address (dst, basereg);
24958 else if (GET_CODE (XEXP (dst, 0)) != LO_SUM)
24959 gcc_assert (rs6000_offsettable_memref_p (dst, reg_mode));
24962 for (i = 0; i < nregs; i++)
24964 /* Calculate index to next subword. */
24965 ++j;
24966 if (j == nregs)
24967 j = 0;
24969 /* If compiler already emitted move of first word by
24970 store with update, no need to do anything. */
24971 if (j == 0 && used_update)
24972 continue;
24974 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
24975 j * reg_mode_size),
24976 simplify_gen_subreg (reg_mode, src, mode,
24977 j * reg_mode_size)));
24979 if (restore_basereg != NULL_RTX)
24980 emit_insn (restore_basereg);
24985 /* This page contains routines that are used to determine what the
24986 function prologue and epilogue code will do and write them out. */
24988 static inline bool
24989 save_reg_p (int r)
24991 return !call_used_regs[r] && df_regs_ever_live_p (r);
24994 /* Determine whether the gp REG is really used. */
24996 static bool
24997 rs6000_reg_live_or_pic_offset_p (int reg)
24999 /* We need to mark the PIC offset register live for the same conditions
25000 as it is set up, or otherwise it won't be saved before we clobber it. */
25002 if (reg == RS6000_PIC_OFFSET_TABLE_REGNUM && !TARGET_SINGLE_PIC_BASE)
25004 if (TARGET_TOC && TARGET_MINIMAL_TOC
25005 && (crtl->calls_eh_return
25006 || df_regs_ever_live_p (reg)
25007 || get_pool_size ()))
25008 return true;
25010 if ((DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN)
25011 && flag_pic)
25012 return true;
25015 /* If the function calls eh_return, claim used all the registers that would
25016 be checked for liveness otherwise. */
25018 return ((crtl->calls_eh_return || df_regs_ever_live_p (reg))
25019 && !call_used_regs[reg]);
25022 /* Return the first fixed-point register that is required to be
25023 saved. 32 if none. */
25026 first_reg_to_save (void)
25028 int first_reg;
25030 /* Find lowest numbered live register. */
25031 for (first_reg = 13; first_reg <= 31; first_reg++)
25032 if (save_reg_p (first_reg))
25033 break;
25035 if (first_reg > RS6000_PIC_OFFSET_TABLE_REGNUM
25036 && ((DEFAULT_ABI == ABI_V4 && flag_pic != 0)
25037 || (DEFAULT_ABI == ABI_DARWIN && flag_pic)
25038 || (TARGET_TOC && TARGET_MINIMAL_TOC))
25039 && rs6000_reg_live_or_pic_offset_p (RS6000_PIC_OFFSET_TABLE_REGNUM))
25040 first_reg = RS6000_PIC_OFFSET_TABLE_REGNUM;
25042 #if TARGET_MACHO
25043 if (flag_pic
25044 && crtl->uses_pic_offset_table
25045 && first_reg > RS6000_PIC_OFFSET_TABLE_REGNUM)
25046 return RS6000_PIC_OFFSET_TABLE_REGNUM;
25047 #endif
25049 return first_reg;
25052 /* Similar, for FP regs. */
25055 first_fp_reg_to_save (void)
25057 int first_reg;
25059 /* Find lowest numbered live register. */
25060 for (first_reg = 14 + 32; first_reg <= 63; first_reg++)
25061 if (save_reg_p (first_reg))
25062 break;
25064 return first_reg;
25067 /* Similar, for AltiVec regs. */
25069 static int
25070 first_altivec_reg_to_save (void)
25072 int i;
25074 /* Stack frame remains as is unless we are in AltiVec ABI. */
25075 if (! TARGET_ALTIVEC_ABI)
25076 return LAST_ALTIVEC_REGNO + 1;
25078 /* On Darwin, the unwind routines are compiled without
25079 TARGET_ALTIVEC, and use save_world to save/restore the
25080 altivec registers when necessary. */
25081 if (DEFAULT_ABI == ABI_DARWIN && crtl->calls_eh_return
25082 && ! TARGET_ALTIVEC)
25083 return FIRST_ALTIVEC_REGNO + 20;
25085 /* Find lowest numbered live register. */
25086 for (i = FIRST_ALTIVEC_REGNO + 20; i <= LAST_ALTIVEC_REGNO; ++i)
25087 if (save_reg_p (i))
25088 break;
25090 return i;
25093 /* Return a 32-bit mask of the AltiVec registers we need to set in
25094 VRSAVE. Bit n of the return value is 1 if Vn is live. The MSB in
25095 the 32-bit word is 0. */
25097 static unsigned int
25098 compute_vrsave_mask (void)
25100 unsigned int i, mask = 0;
25102 /* On Darwin, the unwind routines are compiled without
25103 TARGET_ALTIVEC, and use save_world to save/restore the
25104 call-saved altivec registers when necessary. */
25105 if (DEFAULT_ABI == ABI_DARWIN && crtl->calls_eh_return
25106 && ! TARGET_ALTIVEC)
25107 mask |= 0xFFF;
25109 /* First, find out if we use _any_ altivec registers. */
25110 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
25111 if (df_regs_ever_live_p (i))
25112 mask |= ALTIVEC_REG_BIT (i);
25114 if (mask == 0)
25115 return mask;
25117 /* Next, remove the argument registers from the set. These must
25118 be in the VRSAVE mask set by the caller, so we don't need to add
25119 them in again. More importantly, the mask we compute here is
25120 used to generate CLOBBERs in the set_vrsave insn, and we do not
25121 wish the argument registers to die. */
25122 for (i = ALTIVEC_ARG_MIN_REG; i < (unsigned) crtl->args.info.vregno; i++)
25123 mask &= ~ALTIVEC_REG_BIT (i);
25125 /* Similarly, remove the return value from the set. */
25127 bool yes = false;
25128 diddle_return_value (is_altivec_return_reg, &yes);
25129 if (yes)
25130 mask &= ~ALTIVEC_REG_BIT (ALTIVEC_ARG_RETURN);
25133 return mask;
25136 /* For a very restricted set of circumstances, we can cut down the
25137 size of prologues/epilogues by calling our own save/restore-the-world
25138 routines. */
25140 static void
25141 compute_save_world_info (rs6000_stack_t *info)
25143 info->world_save_p = 1;
25144 info->world_save_p
25145 = (WORLD_SAVE_P (info)
25146 && DEFAULT_ABI == ABI_DARWIN
25147 && !cfun->has_nonlocal_label
25148 && info->first_fp_reg_save == FIRST_SAVED_FP_REGNO
25149 && info->first_gp_reg_save == FIRST_SAVED_GP_REGNO
25150 && info->first_altivec_reg_save == FIRST_SAVED_ALTIVEC_REGNO
25151 && info->cr_save_p);
25153 /* This will not work in conjunction with sibcalls. Make sure there
25154 are none. (This check is expensive, but seldom executed.) */
25155 if (WORLD_SAVE_P (info))
25157 rtx_insn *insn;
25158 for (insn = get_last_insn_anywhere (); insn; insn = PREV_INSN (insn))
25159 if (CALL_P (insn) && SIBLING_CALL_P (insn))
25161 info->world_save_p = 0;
25162 break;
25166 if (WORLD_SAVE_P (info))
25168 /* Even if we're not touching VRsave, make sure there's room on the
25169 stack for it, if it looks like we're calling SAVE_WORLD, which
25170 will attempt to save it. */
25171 info->vrsave_size = 4;
25173 /* If we are going to save the world, we need to save the link register too. */
25174 info->lr_save_p = 1;
25176 /* "Save" the VRsave register too if we're saving the world. */
25177 if (info->vrsave_mask == 0)
25178 info->vrsave_mask = compute_vrsave_mask ();
25180 /* Because the Darwin register save/restore routines only handle
25181 F14 .. F31 and V20 .. V31 as per the ABI, perform a consistency
25182 check. */
25183 gcc_assert (info->first_fp_reg_save >= FIRST_SAVED_FP_REGNO
25184 && (info->first_altivec_reg_save
25185 >= FIRST_SAVED_ALTIVEC_REGNO));
25188 return;
25192 static void
25193 is_altivec_return_reg (rtx reg, void *xyes)
25195 bool *yes = (bool *) xyes;
25196 if (REGNO (reg) == ALTIVEC_ARG_RETURN)
25197 *yes = true;
25201 /* Return whether REG is a global user reg or has been specifed by
25202 -ffixed-REG. We should not restore these, and so cannot use
25203 lmw or out-of-line restore functions if there are any. We also
25204 can't save them (well, emit frame notes for them), because frame
25205 unwinding during exception handling will restore saved registers. */
25207 static bool
25208 fixed_reg_p (int reg)
25210 /* Ignore fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] when the
25211 backend sets it, overriding anything the user might have given. */
25212 if (reg == RS6000_PIC_OFFSET_TABLE_REGNUM
25213 && ((DEFAULT_ABI == ABI_V4 && flag_pic)
25214 || (DEFAULT_ABI == ABI_DARWIN && flag_pic)
25215 || (TARGET_TOC && TARGET_MINIMAL_TOC)))
25216 return false;
25218 return fixed_regs[reg];
25221 /* Determine the strategy for savings/restoring registers. */
25223 enum {
25224 SAVE_MULTIPLE = 0x1,
25225 SAVE_INLINE_GPRS = 0x2,
25226 SAVE_INLINE_FPRS = 0x4,
25227 SAVE_NOINLINE_GPRS_SAVES_LR = 0x8,
25228 SAVE_NOINLINE_FPRS_SAVES_LR = 0x10,
25229 SAVE_INLINE_VRS = 0x20,
25230 REST_MULTIPLE = 0x100,
25231 REST_INLINE_GPRS = 0x200,
25232 REST_INLINE_FPRS = 0x400,
25233 REST_NOINLINE_FPRS_DOESNT_RESTORE_LR = 0x800,
25234 REST_INLINE_VRS = 0x1000
25237 static int
25238 rs6000_savres_strategy (rs6000_stack_t *info,
25239 bool using_static_chain_p)
25241 int strategy = 0;
25243 /* Select between in-line and out-of-line save and restore of regs.
25244 First, all the obvious cases where we don't use out-of-line. */
25245 if (crtl->calls_eh_return
25246 || cfun->machine->ra_need_lr)
25247 strategy |= (SAVE_INLINE_FPRS | REST_INLINE_FPRS
25248 | SAVE_INLINE_GPRS | REST_INLINE_GPRS
25249 | SAVE_INLINE_VRS | REST_INLINE_VRS);
25251 if (info->first_gp_reg_save == 32)
25252 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
25254 if (info->first_fp_reg_save == 64
25255 /* The out-of-line FP routines use double-precision stores;
25256 we can't use those routines if we don't have such stores. */
25257 || (TARGET_HARD_FLOAT && !TARGET_DOUBLE_FLOAT))
25258 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
25260 if (info->first_altivec_reg_save == LAST_ALTIVEC_REGNO + 1)
25261 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
25263 /* Define cutoff for using out-of-line functions to save registers. */
25264 if (DEFAULT_ABI == ABI_V4 || TARGET_ELF)
25266 if (!optimize_size)
25268 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
25269 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
25270 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
25272 else
25274 /* Prefer out-of-line restore if it will exit. */
25275 if (info->first_fp_reg_save > 61)
25276 strategy |= SAVE_INLINE_FPRS;
25277 if (info->first_gp_reg_save > 29)
25279 if (info->first_fp_reg_save == 64)
25280 strategy |= SAVE_INLINE_GPRS;
25281 else
25282 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
25284 if (info->first_altivec_reg_save == LAST_ALTIVEC_REGNO)
25285 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
25288 else if (DEFAULT_ABI == ABI_DARWIN)
25290 if (info->first_fp_reg_save > 60)
25291 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
25292 if (info->first_gp_reg_save > 29)
25293 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
25294 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
25296 else
25298 gcc_checking_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2);
25299 if (info->first_fp_reg_save > 61)
25300 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
25301 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
25302 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
25305 /* Don't bother to try to save things out-of-line if r11 is occupied
25306 by the static chain. It would require too much fiddling and the
25307 static chain is rarely used anyway. FPRs are saved w.r.t the stack
25308 pointer on Darwin, and AIX uses r1 or r12. */
25309 if (using_static_chain_p
25310 && (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN))
25311 strategy |= ((DEFAULT_ABI == ABI_DARWIN ? 0 : SAVE_INLINE_FPRS)
25312 | SAVE_INLINE_GPRS
25313 | SAVE_INLINE_VRS);
25315 /* Saving CR interferes with the exit routines used on the SPE, so
25316 just punt here. */
25317 if (TARGET_SPE_ABI
25318 && info->spe_64bit_regs_used
25319 && info->cr_save_p)
25320 strategy |= REST_INLINE_GPRS;
25322 /* We can only use the out-of-line routines to restore fprs if we've
25323 saved all the registers from first_fp_reg_save in the prologue.
25324 Otherwise, we risk loading garbage. Of course, if we have saved
25325 out-of-line then we know we haven't skipped any fprs. */
25326 if ((strategy & SAVE_INLINE_FPRS)
25327 && !(strategy & REST_INLINE_FPRS))
25329 int i;
25331 for (i = info->first_fp_reg_save; i < 64; i++)
25332 if (fixed_regs[i] || !save_reg_p (i))
25334 strategy |= REST_INLINE_FPRS;
25335 break;
25339 /* Similarly, for altivec regs. */
25340 if ((strategy & SAVE_INLINE_VRS)
25341 && !(strategy & REST_INLINE_VRS))
25343 int i;
25345 for (i = info->first_altivec_reg_save; i < LAST_ALTIVEC_REGNO + 1; i++)
25346 if (fixed_regs[i] || !save_reg_p (i))
25348 strategy |= REST_INLINE_VRS;
25349 break;
25353 /* info->lr_save_p isn't yet set if the only reason lr needs to be
25354 saved is an out-of-line save or restore. Set up the value for
25355 the next test (excluding out-of-line gprs). */
25356 bool lr_save_p = (info->lr_save_p
25357 || !(strategy & SAVE_INLINE_FPRS)
25358 || !(strategy & SAVE_INLINE_VRS)
25359 || !(strategy & REST_INLINE_FPRS)
25360 || !(strategy & REST_INLINE_VRS));
25362 if (TARGET_MULTIPLE
25363 && !TARGET_POWERPC64
25364 && !(TARGET_SPE_ABI && info->spe_64bit_regs_used)
25365 && info->first_gp_reg_save < 31)
25367 /* Prefer store multiple for saves over out-of-line routines,
25368 since the store-multiple instruction will always be smaller. */
25369 strategy |= SAVE_INLINE_GPRS | SAVE_MULTIPLE;
25371 /* The situation is more complicated with load multiple. We'd
25372 prefer to use the out-of-line routines for restores, since the
25373 "exit" out-of-line routines can handle the restore of LR and the
25374 frame teardown. However if doesn't make sense to use the
25375 out-of-line routine if that is the only reason we'd need to save
25376 LR, and we can't use the "exit" out-of-line gpr restore if we
25377 have saved some fprs; In those cases it is advantageous to use
25378 load multiple when available. */
25379 if (info->first_fp_reg_save != 64 || !lr_save_p)
25380 strategy |= REST_INLINE_GPRS | REST_MULTIPLE;
25383 /* Using the "exit" out-of-line routine does not improve code size
25384 if using it would require lr to be saved and if only saving one
25385 or two gprs. */
25386 else if (!lr_save_p && info->first_gp_reg_save > 29)
25387 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
25389 /* We can only use load multiple or the out-of-line routines to
25390 restore gprs if we've saved all the registers from
25391 first_gp_reg_save. Otherwise, we risk loading garbage.
25392 Of course, if we have saved out-of-line or used stmw then we know
25393 we haven't skipped any gprs. */
25394 if ((strategy & (SAVE_INLINE_GPRS | SAVE_MULTIPLE)) == SAVE_INLINE_GPRS
25395 && (strategy & (REST_INLINE_GPRS | REST_MULTIPLE)) != REST_INLINE_GPRS)
25397 int i;
25399 for (i = info->first_gp_reg_save; i < 32; i++)
25400 if (fixed_reg_p (i) || !save_reg_p (i))
25402 strategy |= REST_INLINE_GPRS;
25403 strategy &= ~REST_MULTIPLE;
25404 break;
25408 if (TARGET_ELF && TARGET_64BIT)
25410 if (!(strategy & SAVE_INLINE_FPRS))
25411 strategy |= SAVE_NOINLINE_FPRS_SAVES_LR;
25412 else if (!(strategy & SAVE_INLINE_GPRS)
25413 && info->first_fp_reg_save == 64)
25414 strategy |= SAVE_NOINLINE_GPRS_SAVES_LR;
25416 else if (TARGET_AIX && !(strategy & REST_INLINE_FPRS))
25417 strategy |= REST_NOINLINE_FPRS_DOESNT_RESTORE_LR;
25419 if (TARGET_MACHO && !(strategy & SAVE_INLINE_FPRS))
25420 strategy |= SAVE_NOINLINE_FPRS_SAVES_LR;
25422 return strategy;
25425 /* Calculate the stack information for the current function. This is
25426 complicated by having two separate calling sequences, the AIX calling
25427 sequence and the V.4 calling sequence.
25429 AIX (and Darwin/Mac OS X) stack frames look like:
25430 32-bit 64-bit
25431 SP----> +---------------------------------------+
25432 | back chain to caller | 0 0
25433 +---------------------------------------+
25434 | saved CR | 4 8 (8-11)
25435 +---------------------------------------+
25436 | saved LR | 8 16
25437 +---------------------------------------+
25438 | reserved for compilers | 12 24
25439 +---------------------------------------+
25440 | reserved for binders | 16 32
25441 +---------------------------------------+
25442 | saved TOC pointer | 20 40
25443 +---------------------------------------+
25444 | Parameter save area (P) | 24 48
25445 +---------------------------------------+
25446 | Alloca space (A) | 24+P etc.
25447 +---------------------------------------+
25448 | Local variable space (L) | 24+P+A
25449 +---------------------------------------+
25450 | Float/int conversion temporary (X) | 24+P+A+L
25451 +---------------------------------------+
25452 | Save area for AltiVec registers (W) | 24+P+A+L+X
25453 +---------------------------------------+
25454 | AltiVec alignment padding (Y) | 24+P+A+L+X+W
25455 +---------------------------------------+
25456 | Save area for VRSAVE register (Z) | 24+P+A+L+X+W+Y
25457 +---------------------------------------+
25458 | Save area for GP registers (G) | 24+P+A+X+L+X+W+Y+Z
25459 +---------------------------------------+
25460 | Save area for FP registers (F) | 24+P+A+X+L+X+W+Y+Z+G
25461 +---------------------------------------+
25462 old SP->| back chain to caller's caller |
25463 +---------------------------------------+
25465 The required alignment for AIX configurations is two words (i.e., 8
25466 or 16 bytes).
25468 The ELFv2 ABI is a variant of the AIX ABI. Stack frames look like:
25470 SP----> +---------------------------------------+
25471 | Back chain to caller | 0
25472 +---------------------------------------+
25473 | Save area for CR | 8
25474 +---------------------------------------+
25475 | Saved LR | 16
25476 +---------------------------------------+
25477 | Saved TOC pointer | 24
25478 +---------------------------------------+
25479 | Parameter save area (P) | 32
25480 +---------------------------------------+
25481 | Alloca space (A) | 32+P
25482 +---------------------------------------+
25483 | Local variable space (L) | 32+P+A
25484 +---------------------------------------+
25485 | Save area for AltiVec registers (W) | 32+P+A+L
25486 +---------------------------------------+
25487 | AltiVec alignment padding (Y) | 32+P+A+L+W
25488 +---------------------------------------+
25489 | Save area for GP registers (G) | 32+P+A+L+W+Y
25490 +---------------------------------------+
25491 | Save area for FP registers (F) | 32+P+A+L+W+Y+G
25492 +---------------------------------------+
25493 old SP->| back chain to caller's caller | 32+P+A+L+W+Y+G+F
25494 +---------------------------------------+
25497 V.4 stack frames look like:
25499 SP----> +---------------------------------------+
25500 | back chain to caller | 0
25501 +---------------------------------------+
25502 | caller's saved LR | 4
25503 +---------------------------------------+
25504 | Parameter save area (P) | 8
25505 +---------------------------------------+
25506 | Alloca space (A) | 8+P
25507 +---------------------------------------+
25508 | Varargs save area (V) | 8+P+A
25509 +---------------------------------------+
25510 | Local variable space (L) | 8+P+A+V
25511 +---------------------------------------+
25512 | Float/int conversion temporary (X) | 8+P+A+V+L
25513 +---------------------------------------+
25514 | Save area for AltiVec registers (W) | 8+P+A+V+L+X
25515 +---------------------------------------+
25516 | AltiVec alignment padding (Y) | 8+P+A+V+L+X+W
25517 +---------------------------------------+
25518 | Save area for VRSAVE register (Z) | 8+P+A+V+L+X+W+Y
25519 +---------------------------------------+
25520 | SPE: area for 64-bit GP registers |
25521 +---------------------------------------+
25522 | SPE alignment padding |
25523 +---------------------------------------+
25524 | saved CR (C) | 8+P+A+V+L+X+W+Y+Z
25525 +---------------------------------------+
25526 | Save area for GP registers (G) | 8+P+A+V+L+X+W+Y+Z+C
25527 +---------------------------------------+
25528 | Save area for FP registers (F) | 8+P+A+V+L+X+W+Y+Z+C+G
25529 +---------------------------------------+
25530 old SP->| back chain to caller's caller |
25531 +---------------------------------------+
25533 The required alignment for V.4 is 16 bytes, or 8 bytes if -meabi is
25534 given. (But note below and in sysv4.h that we require only 8 and
25535 may round up the size of our stack frame anyways. The historical
25536 reason is early versions of powerpc-linux which didn't properly
25537 align the stack at program startup. A happy side-effect is that
25538 -mno-eabi libraries can be used with -meabi programs.)
25540 The EABI configuration defaults to the V.4 layout. However,
25541 the stack alignment requirements may differ. If -mno-eabi is not
25542 given, the required stack alignment is 8 bytes; if -mno-eabi is
25543 given, the required alignment is 16 bytes. (But see V.4 comment
25544 above.) */
25546 #ifndef ABI_STACK_BOUNDARY
25547 #define ABI_STACK_BOUNDARY STACK_BOUNDARY
25548 #endif
25550 static rs6000_stack_t *
25551 rs6000_stack_info (void)
25553 /* We should never be called for thunks, we are not set up for that. */
25554 gcc_assert (!cfun->is_thunk);
25556 rs6000_stack_t *info = &stack_info;
25557 int reg_size = TARGET_32BIT ? 4 : 8;
25558 int ehrd_size;
25559 int ehcr_size;
25560 int save_align;
25561 int first_gp;
25562 HOST_WIDE_INT non_fixed_size;
25563 bool using_static_chain_p;
25565 if (reload_completed && info->reload_completed)
25566 return info;
25568 memset (info, 0, sizeof (*info));
25569 info->reload_completed = reload_completed;
25571 if (TARGET_SPE)
25573 /* Cache value so we don't rescan instruction chain over and over. */
25574 if (cfun->machine->spe_insn_chain_scanned_p == 0)
25575 cfun->machine->spe_insn_chain_scanned_p
25576 = spe_func_has_64bit_regs_p () + 1;
25577 info->spe_64bit_regs_used = cfun->machine->spe_insn_chain_scanned_p - 1;
25580 /* Select which calling sequence. */
25581 info->abi = DEFAULT_ABI;
25583 /* Calculate which registers need to be saved & save area size. */
25584 info->first_gp_reg_save = first_reg_to_save ();
25585 /* Assume that we will have to save RS6000_PIC_OFFSET_TABLE_REGNUM,
25586 even if it currently looks like we won't. Reload may need it to
25587 get at a constant; if so, it will have already created a constant
25588 pool entry for it. */
25589 if (((TARGET_TOC && TARGET_MINIMAL_TOC)
25590 || (flag_pic == 1 && DEFAULT_ABI == ABI_V4)
25591 || (flag_pic && DEFAULT_ABI == ABI_DARWIN))
25592 && crtl->uses_const_pool
25593 && info->first_gp_reg_save > RS6000_PIC_OFFSET_TABLE_REGNUM)
25594 first_gp = RS6000_PIC_OFFSET_TABLE_REGNUM;
25595 else
25596 first_gp = info->first_gp_reg_save;
25598 info->gp_size = reg_size * (32 - first_gp);
25600 /* For the SPE, we have an additional upper 32-bits on each GPR.
25601 Ideally we should save the entire 64-bits only when the upper
25602 half is used in SIMD instructions. Since we only record
25603 registers live (not the size they are used in), this proves
25604 difficult because we'd have to traverse the instruction chain at
25605 the right time, taking reload into account. This is a real pain,
25606 so we opt to save the GPRs in 64-bits always if but one register
25607 gets used in 64-bits. Otherwise, all the registers in the frame
25608 get saved in 32-bits.
25610 So... since when we save all GPRs (except the SP) in 64-bits, the
25611 traditional GP save area will be empty. */
25612 if (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0)
25613 info->gp_size = 0;
25615 info->first_fp_reg_save = first_fp_reg_to_save ();
25616 info->fp_size = 8 * (64 - info->first_fp_reg_save);
25618 info->first_altivec_reg_save = first_altivec_reg_to_save ();
25619 info->altivec_size = 16 * (LAST_ALTIVEC_REGNO + 1
25620 - info->first_altivec_reg_save);
25622 /* Does this function call anything? */
25623 info->calls_p = (!crtl->is_leaf || cfun->machine->ra_needs_full_frame);
25625 /* Determine if we need to save the condition code registers. */
25626 if (save_reg_p (CR2_REGNO)
25627 || save_reg_p (CR3_REGNO)
25628 || save_reg_p (CR4_REGNO))
25630 info->cr_save_p = 1;
25631 if (DEFAULT_ABI == ABI_V4)
25632 info->cr_size = reg_size;
25635 /* If the current function calls __builtin_eh_return, then we need
25636 to allocate stack space for registers that will hold data for
25637 the exception handler. */
25638 if (crtl->calls_eh_return)
25640 unsigned int i;
25641 for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM; ++i)
25642 continue;
25644 /* SPE saves EH registers in 64-bits. */
25645 ehrd_size = i * (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0
25646 ? UNITS_PER_SPE_WORD : UNITS_PER_WORD);
25648 else
25649 ehrd_size = 0;
25651 /* In the ELFv2 ABI, we also need to allocate space for separate
25652 CR field save areas if the function calls __builtin_eh_return. */
25653 if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return)
25655 /* This hard-codes that we have three call-saved CR fields. */
25656 ehcr_size = 3 * reg_size;
25657 /* We do *not* use the regular CR save mechanism. */
25658 info->cr_save_p = 0;
25660 else
25661 ehcr_size = 0;
25663 /* Determine various sizes. */
25664 info->reg_size = reg_size;
25665 info->fixed_size = RS6000_SAVE_AREA;
25666 info->vars_size = RS6000_ALIGN (get_frame_size (), 8);
25667 info->parm_size = RS6000_ALIGN (crtl->outgoing_args_size,
25668 TARGET_ALTIVEC ? 16 : 8);
25669 if (FRAME_GROWS_DOWNWARD)
25670 info->vars_size
25671 += RS6000_ALIGN (info->fixed_size + info->vars_size + info->parm_size,
25672 ABI_STACK_BOUNDARY / BITS_PER_UNIT)
25673 - (info->fixed_size + info->vars_size + info->parm_size);
25675 if (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0)
25676 info->spe_gp_size = 8 * (32 - first_gp);
25678 if (TARGET_ALTIVEC_ABI)
25679 info->vrsave_mask = compute_vrsave_mask ();
25681 if (TARGET_ALTIVEC_VRSAVE && info->vrsave_mask)
25682 info->vrsave_size = 4;
25684 compute_save_world_info (info);
25686 /* Calculate the offsets. */
25687 switch (DEFAULT_ABI)
25689 case ABI_NONE:
25690 default:
25691 gcc_unreachable ();
25693 case ABI_AIX:
25694 case ABI_ELFv2:
25695 case ABI_DARWIN:
25696 info->fp_save_offset = -info->fp_size;
25697 info->gp_save_offset = info->fp_save_offset - info->gp_size;
25699 if (TARGET_ALTIVEC_ABI)
25701 info->vrsave_save_offset = info->gp_save_offset - info->vrsave_size;
25703 /* Align stack so vector save area is on a quadword boundary.
25704 The padding goes above the vectors. */
25705 if (info->altivec_size != 0)
25706 info->altivec_padding_size = info->vrsave_save_offset & 0xF;
25708 info->altivec_save_offset = info->vrsave_save_offset
25709 - info->altivec_padding_size
25710 - info->altivec_size;
25711 gcc_assert (info->altivec_size == 0
25712 || info->altivec_save_offset % 16 == 0);
25714 /* Adjust for AltiVec case. */
25715 info->ehrd_offset = info->altivec_save_offset - ehrd_size;
25717 else
25718 info->ehrd_offset = info->gp_save_offset - ehrd_size;
25720 info->ehcr_offset = info->ehrd_offset - ehcr_size;
25721 info->cr_save_offset = reg_size; /* first word when 64-bit. */
25722 info->lr_save_offset = 2*reg_size;
25723 break;
25725 case ABI_V4:
25726 info->fp_save_offset = -info->fp_size;
25727 info->gp_save_offset = info->fp_save_offset - info->gp_size;
25728 info->cr_save_offset = info->gp_save_offset - info->cr_size;
25730 if (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0)
25732 /* Align stack so SPE GPR save area is aligned on a
25733 double-word boundary. */
25734 if (info->spe_gp_size != 0 && info->cr_save_offset != 0)
25735 info->spe_padding_size = 8 - (-info->cr_save_offset % 8);
25736 else
25737 info->spe_padding_size = 0;
25739 info->spe_gp_save_offset = info->cr_save_offset
25740 - info->spe_padding_size
25741 - info->spe_gp_size;
25743 /* Adjust for SPE case. */
25744 info->ehrd_offset = info->spe_gp_save_offset;
25746 else if (TARGET_ALTIVEC_ABI)
25748 info->vrsave_save_offset = info->cr_save_offset - info->vrsave_size;
25750 /* Align stack so vector save area is on a quadword boundary. */
25751 if (info->altivec_size != 0)
25752 info->altivec_padding_size = 16 - (-info->vrsave_save_offset % 16);
25754 info->altivec_save_offset = info->vrsave_save_offset
25755 - info->altivec_padding_size
25756 - info->altivec_size;
25758 /* Adjust for AltiVec case. */
25759 info->ehrd_offset = info->altivec_save_offset;
25761 else
25762 info->ehrd_offset = info->cr_save_offset;
25764 info->ehrd_offset -= ehrd_size;
25765 info->lr_save_offset = reg_size;
25768 save_align = (TARGET_ALTIVEC_ABI || DEFAULT_ABI == ABI_DARWIN) ? 16 : 8;
25769 info->save_size = RS6000_ALIGN (info->fp_size
25770 + info->gp_size
25771 + info->altivec_size
25772 + info->altivec_padding_size
25773 + info->spe_gp_size
25774 + info->spe_padding_size
25775 + ehrd_size
25776 + ehcr_size
25777 + info->cr_size
25778 + info->vrsave_size,
25779 save_align);
25781 non_fixed_size = info->vars_size + info->parm_size + info->save_size;
25783 info->total_size = RS6000_ALIGN (non_fixed_size + info->fixed_size,
25784 ABI_STACK_BOUNDARY / BITS_PER_UNIT);
25786 /* Determine if we need to save the link register. */
25787 if (info->calls_p
25788 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
25789 && crtl->profile
25790 && !TARGET_PROFILE_KERNEL)
25791 || (DEFAULT_ABI == ABI_V4 && cfun->calls_alloca)
25792 #ifdef TARGET_RELOCATABLE
25793 || (DEFAULT_ABI == ABI_V4
25794 && (TARGET_RELOCATABLE || flag_pic > 1)
25795 && get_pool_size () != 0)
25796 #endif
25797 || rs6000_ra_ever_killed ())
25798 info->lr_save_p = 1;
25800 using_static_chain_p = (cfun->static_chain_decl != NULL_TREE
25801 && df_regs_ever_live_p (STATIC_CHAIN_REGNUM)
25802 && call_used_regs[STATIC_CHAIN_REGNUM]);
25803 info->savres_strategy = rs6000_savres_strategy (info, using_static_chain_p);
25805 if (!(info->savres_strategy & SAVE_INLINE_GPRS)
25806 || !(info->savres_strategy & SAVE_INLINE_FPRS)
25807 || !(info->savres_strategy & SAVE_INLINE_VRS)
25808 || !(info->savres_strategy & REST_INLINE_GPRS)
25809 || !(info->savres_strategy & REST_INLINE_FPRS)
25810 || !(info->savres_strategy & REST_INLINE_VRS))
25811 info->lr_save_p = 1;
25813 if (info->lr_save_p)
25814 df_set_regs_ever_live (LR_REGNO, true);
25816 /* Determine if we need to allocate any stack frame:
25818 For AIX we need to push the stack if a frame pointer is needed
25819 (because the stack might be dynamically adjusted), if we are
25820 debugging, if we make calls, or if the sum of fp_save, gp_save,
25821 and local variables are more than the space needed to save all
25822 non-volatile registers: 32-bit: 18*8 + 19*4 = 220 or 64-bit: 18*8
25823 + 18*8 = 288 (GPR13 reserved).
25825 For V.4 we don't have the stack cushion that AIX uses, but assume
25826 that the debugger can handle stackless frames. */
25828 if (info->calls_p)
25829 info->push_p = 1;
25831 else if (DEFAULT_ABI == ABI_V4)
25832 info->push_p = non_fixed_size != 0;
25834 else if (frame_pointer_needed)
25835 info->push_p = 1;
25837 else if (TARGET_XCOFF && write_symbols != NO_DEBUG)
25838 info->push_p = 1;
25840 else
25841 info->push_p = non_fixed_size > (TARGET_32BIT ? 220 : 288);
25843 return info;
25846 /* Return true if the current function uses any GPRs in 64-bit SIMD
25847 mode. */
25849 static bool
25850 spe_func_has_64bit_regs_p (void)
25852 rtx_insn *insns, *insn;
25854 /* Functions that save and restore all the call-saved registers will
25855 need to save/restore the registers in 64-bits. */
25856 if (crtl->calls_eh_return
25857 || cfun->calls_setjmp
25858 || crtl->has_nonlocal_goto)
25859 return true;
25861 insns = get_insns ();
25863 for (insn = NEXT_INSN (insns); insn != NULL_RTX; insn = NEXT_INSN (insn))
25865 if (INSN_P (insn))
25867 rtx i;
25869 /* FIXME: This should be implemented with attributes...
25871 (set_attr "spe64" "true")....then,
25872 if (get_spe64(insn)) return true;
25874 It's the only reliable way to do the stuff below. */
25876 i = PATTERN (insn);
25877 if (GET_CODE (i) == SET)
25879 machine_mode mode = GET_MODE (SET_SRC (i));
25881 if (SPE_VECTOR_MODE (mode))
25882 return true;
25883 if (TARGET_E500_DOUBLE
25884 && (mode == DFmode || FLOAT128_2REG_P (mode)))
25885 return true;
25890 return false;
25893 static void
25894 debug_stack_info (rs6000_stack_t *info)
25896 const char *abi_string;
25898 if (! info)
25899 info = rs6000_stack_info ();
25901 fprintf (stderr, "\nStack information for function %s:\n",
25902 ((current_function_decl && DECL_NAME (current_function_decl))
25903 ? IDENTIFIER_POINTER (DECL_NAME (current_function_decl))
25904 : "<unknown>"));
25906 switch (info->abi)
25908 default: abi_string = "Unknown"; break;
25909 case ABI_NONE: abi_string = "NONE"; break;
25910 case ABI_AIX: abi_string = "AIX"; break;
25911 case ABI_ELFv2: abi_string = "ELFv2"; break;
25912 case ABI_DARWIN: abi_string = "Darwin"; break;
25913 case ABI_V4: abi_string = "V.4"; break;
25916 fprintf (stderr, "\tABI = %5s\n", abi_string);
25918 if (TARGET_ALTIVEC_ABI)
25919 fprintf (stderr, "\tALTIVEC ABI extensions enabled.\n");
25921 if (TARGET_SPE_ABI)
25922 fprintf (stderr, "\tSPE ABI extensions enabled.\n");
25924 if (info->first_gp_reg_save != 32)
25925 fprintf (stderr, "\tfirst_gp_reg_save = %5d\n", info->first_gp_reg_save);
25927 if (info->first_fp_reg_save != 64)
25928 fprintf (stderr, "\tfirst_fp_reg_save = %5d\n", info->first_fp_reg_save);
25930 if (info->first_altivec_reg_save <= LAST_ALTIVEC_REGNO)
25931 fprintf (stderr, "\tfirst_altivec_reg_save = %5d\n",
25932 info->first_altivec_reg_save);
25934 if (info->lr_save_p)
25935 fprintf (stderr, "\tlr_save_p = %5d\n", info->lr_save_p);
25937 if (info->cr_save_p)
25938 fprintf (stderr, "\tcr_save_p = %5d\n", info->cr_save_p);
25940 if (info->vrsave_mask)
25941 fprintf (stderr, "\tvrsave_mask = 0x%x\n", info->vrsave_mask);
25943 if (info->push_p)
25944 fprintf (stderr, "\tpush_p = %5d\n", info->push_p);
25946 if (info->calls_p)
25947 fprintf (stderr, "\tcalls_p = %5d\n", info->calls_p);
25949 if (info->gp_size)
25950 fprintf (stderr, "\tgp_save_offset = %5d\n", info->gp_save_offset);
25952 if (info->fp_size)
25953 fprintf (stderr, "\tfp_save_offset = %5d\n", info->fp_save_offset);
25955 if (info->altivec_size)
25956 fprintf (stderr, "\taltivec_save_offset = %5d\n",
25957 info->altivec_save_offset);
25959 if (info->spe_gp_size)
25960 fprintf (stderr, "\tspe_gp_save_offset = %5d\n",
25961 info->spe_gp_save_offset);
25963 if (info->vrsave_size)
25964 fprintf (stderr, "\tvrsave_save_offset = %5d\n",
25965 info->vrsave_save_offset);
25967 if (info->lr_save_p)
25968 fprintf (stderr, "\tlr_save_offset = %5d\n", info->lr_save_offset);
25970 if (info->cr_save_p)
25971 fprintf (stderr, "\tcr_save_offset = %5d\n", info->cr_save_offset);
25973 if (info->varargs_save_offset)
25974 fprintf (stderr, "\tvarargs_save_offset = %5d\n", info->varargs_save_offset);
25976 if (info->total_size)
25977 fprintf (stderr, "\ttotal_size = " HOST_WIDE_INT_PRINT_DEC"\n",
25978 info->total_size);
25980 if (info->vars_size)
25981 fprintf (stderr, "\tvars_size = " HOST_WIDE_INT_PRINT_DEC"\n",
25982 info->vars_size);
25984 if (info->parm_size)
25985 fprintf (stderr, "\tparm_size = %5d\n", info->parm_size);
25987 if (info->fixed_size)
25988 fprintf (stderr, "\tfixed_size = %5d\n", info->fixed_size);
25990 if (info->gp_size)
25991 fprintf (stderr, "\tgp_size = %5d\n", info->gp_size);
25993 if (info->spe_gp_size)
25994 fprintf (stderr, "\tspe_gp_size = %5d\n", info->spe_gp_size);
25996 if (info->fp_size)
25997 fprintf (stderr, "\tfp_size = %5d\n", info->fp_size);
25999 if (info->altivec_size)
26000 fprintf (stderr, "\taltivec_size = %5d\n", info->altivec_size);
26002 if (info->vrsave_size)
26003 fprintf (stderr, "\tvrsave_size = %5d\n", info->vrsave_size);
26005 if (info->altivec_padding_size)
26006 fprintf (stderr, "\taltivec_padding_size= %5d\n",
26007 info->altivec_padding_size);
26009 if (info->spe_padding_size)
26010 fprintf (stderr, "\tspe_padding_size = %5d\n",
26011 info->spe_padding_size);
26013 if (info->cr_size)
26014 fprintf (stderr, "\tcr_size = %5d\n", info->cr_size);
26016 if (info->save_size)
26017 fprintf (stderr, "\tsave_size = %5d\n", info->save_size);
26019 if (info->reg_size != 4)
26020 fprintf (stderr, "\treg_size = %5d\n", info->reg_size);
26022 fprintf (stderr, "\tsave-strategy = %04x\n", info->savres_strategy);
26024 fprintf (stderr, "\n");
26028 rs6000_return_addr (int count, rtx frame)
26030 /* Currently we don't optimize very well between prolog and body
26031 code and for PIC code the code can be actually quite bad, so
26032 don't try to be too clever here. */
26033 if (count != 0
26034 || ((DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN) && flag_pic))
26036 cfun->machine->ra_needs_full_frame = 1;
26038 return
26039 gen_rtx_MEM
26040 (Pmode,
26041 memory_address
26042 (Pmode,
26043 plus_constant (Pmode,
26044 copy_to_reg
26045 (gen_rtx_MEM (Pmode,
26046 memory_address (Pmode, frame))),
26047 RETURN_ADDRESS_OFFSET)));
26050 cfun->machine->ra_need_lr = 1;
26051 return get_hard_reg_initial_val (Pmode, LR_REGNO);
26054 /* Say whether a function is a candidate for sibcall handling or not. */
26056 static bool
26057 rs6000_function_ok_for_sibcall (tree decl, tree exp)
26059 tree fntype;
26061 if (decl)
26062 fntype = TREE_TYPE (decl);
26063 else
26064 fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
26066 /* We can't do it if the called function has more vector parameters
26067 than the current function; there's nowhere to put the VRsave code. */
26068 if (TARGET_ALTIVEC_ABI
26069 && TARGET_ALTIVEC_VRSAVE
26070 && !(decl && decl == current_function_decl))
26072 function_args_iterator args_iter;
26073 tree type;
26074 int nvreg = 0;
26076 /* Functions with vector parameters are required to have a
26077 prototype, so the argument type info must be available
26078 here. */
26079 FOREACH_FUNCTION_ARGS(fntype, type, args_iter)
26080 if (TREE_CODE (type) == VECTOR_TYPE
26081 && ALTIVEC_OR_VSX_VECTOR_MODE (TYPE_MODE (type)))
26082 nvreg++;
26084 FOREACH_FUNCTION_ARGS(TREE_TYPE (current_function_decl), type, args_iter)
26085 if (TREE_CODE (type) == VECTOR_TYPE
26086 && ALTIVEC_OR_VSX_VECTOR_MODE (TYPE_MODE (type)))
26087 nvreg--;
26089 if (nvreg > 0)
26090 return false;
26093 /* Under the AIX or ELFv2 ABIs we can't allow calls to non-local
26094 functions, because the callee may have a different TOC pointer to
26095 the caller and there's no way to ensure we restore the TOC when
26096 we return. With the secure-plt SYSV ABI we can't make non-local
26097 calls when -fpic/PIC because the plt call stubs use r30. */
26098 if (DEFAULT_ABI == ABI_DARWIN
26099 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
26100 && decl
26101 && !DECL_EXTERNAL (decl)
26102 && !DECL_WEAK (decl)
26103 && (*targetm.binds_local_p) (decl))
26104 || (DEFAULT_ABI == ABI_V4
26105 && (!TARGET_SECURE_PLT
26106 || !flag_pic
26107 || (decl
26108 && (*targetm.binds_local_p) (decl)))))
26110 tree attr_list = TYPE_ATTRIBUTES (fntype);
26112 if (!lookup_attribute ("longcall", attr_list)
26113 || lookup_attribute ("shortcall", attr_list))
26114 return true;
26117 return false;
26120 static int
26121 rs6000_ra_ever_killed (void)
26123 rtx_insn *top;
26124 rtx reg;
26125 rtx_insn *insn;
26127 if (cfun->is_thunk)
26128 return 0;
26130 if (cfun->machine->lr_save_state)
26131 return cfun->machine->lr_save_state - 1;
26133 /* regs_ever_live has LR marked as used if any sibcalls are present,
26134 but this should not force saving and restoring in the
26135 pro/epilogue. Likewise, reg_set_between_p thinks a sibcall
26136 clobbers LR, so that is inappropriate. */
26138 /* Also, the prologue can generate a store into LR that
26139 doesn't really count, like this:
26141 move LR->R0
26142 bcl to set PIC register
26143 move LR->R31
26144 move R0->LR
26146 When we're called from the epilogue, we need to avoid counting
26147 this as a store. */
26149 push_topmost_sequence ();
26150 top = get_insns ();
26151 pop_topmost_sequence ();
26152 reg = gen_rtx_REG (Pmode, LR_REGNO);
26154 for (insn = NEXT_INSN (top); insn != NULL_RTX; insn = NEXT_INSN (insn))
26156 if (INSN_P (insn))
26158 if (CALL_P (insn))
26160 if (!SIBLING_CALL_P (insn))
26161 return 1;
26163 else if (find_regno_note (insn, REG_INC, LR_REGNO))
26164 return 1;
26165 else if (set_of (reg, insn) != NULL_RTX
26166 && !prologue_epilogue_contains (insn))
26167 return 1;
26170 return 0;
26173 /* Emit instructions needed to load the TOC register.
26174 This is only needed when TARGET_TOC, TARGET_MINIMAL_TOC, and there is
26175 a constant pool; or for SVR4 -fpic. */
26177 void
26178 rs6000_emit_load_toc_table (int fromprolog)
26180 rtx dest;
26181 dest = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
26183 if (TARGET_ELF && TARGET_SECURE_PLT && DEFAULT_ABI == ABI_V4 && flag_pic)
26185 char buf[30];
26186 rtx lab, tmp1, tmp2, got;
26188 lab = gen_label_rtx ();
26189 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (lab));
26190 lab = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
26191 if (flag_pic == 2)
26193 got = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (toc_label_name));
26194 need_toc_init = 1;
26196 else
26197 got = rs6000_got_sym ();
26198 tmp1 = tmp2 = dest;
26199 if (!fromprolog)
26201 tmp1 = gen_reg_rtx (Pmode);
26202 tmp2 = gen_reg_rtx (Pmode);
26204 emit_insn (gen_load_toc_v4_PIC_1 (lab));
26205 emit_move_insn (tmp1, gen_rtx_REG (Pmode, LR_REGNO));
26206 emit_insn (gen_load_toc_v4_PIC_3b (tmp2, tmp1, got, lab));
26207 emit_insn (gen_load_toc_v4_PIC_3c (dest, tmp2, got, lab));
26209 else if (TARGET_ELF && DEFAULT_ABI == ABI_V4 && flag_pic == 1)
26211 emit_insn (gen_load_toc_v4_pic_si ());
26212 emit_move_insn (dest, gen_rtx_REG (Pmode, LR_REGNO));
26214 else if (TARGET_ELF && DEFAULT_ABI == ABI_V4 && flag_pic == 2)
26216 char buf[30];
26217 rtx temp0 = (fromprolog
26218 ? gen_rtx_REG (Pmode, 0)
26219 : gen_reg_rtx (Pmode));
26221 if (fromprolog)
26223 rtx symF, symL;
26225 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
26226 symF = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
26228 ASM_GENERATE_INTERNAL_LABEL (buf, "LCL", rs6000_pic_labelno);
26229 symL = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
26231 emit_insn (gen_load_toc_v4_PIC_1 (symF));
26232 emit_move_insn (dest, gen_rtx_REG (Pmode, LR_REGNO));
26233 emit_insn (gen_load_toc_v4_PIC_2 (temp0, dest, symL, symF));
26235 else
26237 rtx tocsym, lab;
26239 tocsym = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (toc_label_name));
26240 need_toc_init = 1;
26241 lab = gen_label_rtx ();
26242 emit_insn (gen_load_toc_v4_PIC_1b (tocsym, lab));
26243 emit_move_insn (dest, gen_rtx_REG (Pmode, LR_REGNO));
26244 if (TARGET_LINK_STACK)
26245 emit_insn (gen_addsi3 (dest, dest, GEN_INT (4)));
26246 emit_move_insn (temp0, gen_rtx_MEM (Pmode, dest));
26248 emit_insn (gen_addsi3 (dest, temp0, dest));
26250 else if (TARGET_ELF && !TARGET_AIX && flag_pic == 0 && TARGET_MINIMAL_TOC)
26252 /* This is for AIX code running in non-PIC ELF32. */
26253 rtx realsym = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (toc_label_name));
26255 need_toc_init = 1;
26256 emit_insn (gen_elf_high (dest, realsym));
26257 emit_insn (gen_elf_low (dest, dest, realsym));
26259 else
26261 gcc_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2);
26263 if (TARGET_32BIT)
26264 emit_insn (gen_load_toc_aix_si (dest));
26265 else
26266 emit_insn (gen_load_toc_aix_di (dest));
26270 /* Emit instructions to restore the link register after determining where
26271 its value has been stored. */
26273 void
26274 rs6000_emit_eh_reg_restore (rtx source, rtx scratch)
26276 rs6000_stack_t *info = rs6000_stack_info ();
26277 rtx operands[2];
26279 operands[0] = source;
26280 operands[1] = scratch;
26282 if (info->lr_save_p)
26284 rtx frame_rtx = stack_pointer_rtx;
26285 HOST_WIDE_INT sp_offset = 0;
26286 rtx tmp;
26288 if (frame_pointer_needed
26289 || cfun->calls_alloca
26290 || info->total_size > 32767)
26292 tmp = gen_frame_mem (Pmode, frame_rtx);
26293 emit_move_insn (operands[1], tmp);
26294 frame_rtx = operands[1];
26296 else if (info->push_p)
26297 sp_offset = info->total_size;
26299 tmp = plus_constant (Pmode, frame_rtx,
26300 info->lr_save_offset + sp_offset);
26301 tmp = gen_frame_mem (Pmode, tmp);
26302 emit_move_insn (tmp, operands[0]);
26304 else
26305 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNO), operands[0]);
26307 /* Freeze lr_save_p. We've just emitted rtl that depends on the
26308 state of lr_save_p so any change from here on would be a bug. In
26309 particular, stop rs6000_ra_ever_killed from considering the SET
26310 of lr we may have added just above. */
26311 cfun->machine->lr_save_state = info->lr_save_p + 1;
26314 static GTY(()) alias_set_type set = -1;
26316 alias_set_type
26317 get_TOC_alias_set (void)
26319 if (set == -1)
26320 set = new_alias_set ();
26321 return set;
26324 /* This returns nonzero if the current function uses the TOC. This is
26325 determined by the presence of (use (unspec ... UNSPEC_TOC)), which
26326 is generated by the ABI_V4 load_toc_* patterns. */
26327 #if TARGET_ELF
26328 static int
26329 uses_TOC (void)
26331 rtx_insn *insn;
26333 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
26334 if (INSN_P (insn))
26336 rtx pat = PATTERN (insn);
26337 int i;
26339 if (GET_CODE (pat) == PARALLEL)
26340 for (i = 0; i < XVECLEN (pat, 0); i++)
26342 rtx sub = XVECEXP (pat, 0, i);
26343 if (GET_CODE (sub) == USE)
26345 sub = XEXP (sub, 0);
26346 if (GET_CODE (sub) == UNSPEC
26347 && XINT (sub, 1) == UNSPEC_TOC)
26348 return 1;
26352 return 0;
26354 #endif
26357 create_TOC_reference (rtx symbol, rtx largetoc_reg)
26359 rtx tocrel, tocreg, hi;
26361 if (TARGET_DEBUG_ADDR)
26363 if (GET_CODE (symbol) == SYMBOL_REF)
26364 fprintf (stderr, "\ncreate_TOC_reference, (symbol_ref %s)\n",
26365 XSTR (symbol, 0));
26366 else
26368 fprintf (stderr, "\ncreate_TOC_reference, code %s:\n",
26369 GET_RTX_NAME (GET_CODE (symbol)));
26370 debug_rtx (symbol);
26374 if (!can_create_pseudo_p ())
26375 df_set_regs_ever_live (TOC_REGISTER, true);
26377 tocreg = gen_rtx_REG (Pmode, TOC_REGISTER);
26378 tocrel = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, symbol, tocreg), UNSPEC_TOCREL);
26379 if (TARGET_CMODEL == CMODEL_SMALL || can_create_pseudo_p ())
26380 return tocrel;
26382 hi = gen_rtx_HIGH (Pmode, copy_rtx (tocrel));
26383 if (largetoc_reg != NULL)
26385 emit_move_insn (largetoc_reg, hi);
26386 hi = largetoc_reg;
26388 return gen_rtx_LO_SUM (Pmode, hi, tocrel);
26391 /* Issue assembly directives that create a reference to the given DWARF
26392 FRAME_TABLE_LABEL from the current function section. */
26393 void
26394 rs6000_aix_asm_output_dwarf_table_ref (char * frame_table_label)
26396 fprintf (asm_out_file, "\t.ref %s\n",
26397 (* targetm.strip_name_encoding) (frame_table_label));
26400 /* This ties together stack memory (MEM with an alias set of frame_alias_set)
26401 and the change to the stack pointer. */
26403 static void
26404 rs6000_emit_stack_tie (rtx fp, bool hard_frame_needed)
26406 rtvec p;
26407 int i;
26408 rtx regs[3];
26410 i = 0;
26411 regs[i++] = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
26412 if (hard_frame_needed)
26413 regs[i++] = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
26414 if (!(REGNO (fp) == STACK_POINTER_REGNUM
26415 || (hard_frame_needed
26416 && REGNO (fp) == HARD_FRAME_POINTER_REGNUM)))
26417 regs[i++] = fp;
26419 p = rtvec_alloc (i);
26420 while (--i >= 0)
26422 rtx mem = gen_frame_mem (BLKmode, regs[i]);
26423 RTVEC_ELT (p, i) = gen_rtx_SET (mem, const0_rtx);
26426 emit_insn (gen_stack_tie (gen_rtx_PARALLEL (VOIDmode, p)));
26429 /* Emit the correct code for allocating stack space, as insns.
26430 If COPY_REG, make sure a copy of the old frame is left there.
26431 The generated code may use hard register 0 as a temporary. */
26433 static rtx_insn *
26434 rs6000_emit_allocate_stack (HOST_WIDE_INT size, rtx copy_reg, int copy_off)
26436 rtx_insn *insn;
26437 rtx stack_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
26438 rtx tmp_reg = gen_rtx_REG (Pmode, 0);
26439 rtx todec = gen_int_mode (-size, Pmode);
26440 rtx par, set, mem;
26442 if (INTVAL (todec) != -size)
26444 warning (0, "stack frame too large");
26445 emit_insn (gen_trap ());
26446 return 0;
26449 if (crtl->limit_stack)
26451 if (REG_P (stack_limit_rtx)
26452 && REGNO (stack_limit_rtx) > 1
26453 && REGNO (stack_limit_rtx) <= 31)
26455 emit_insn (gen_add3_insn (tmp_reg, stack_limit_rtx, GEN_INT (size)));
26456 emit_insn (gen_cond_trap (LTU, stack_reg, tmp_reg,
26457 const0_rtx));
26459 else if (GET_CODE (stack_limit_rtx) == SYMBOL_REF
26460 && TARGET_32BIT
26461 && DEFAULT_ABI == ABI_V4)
26463 rtx toload = gen_rtx_CONST (VOIDmode,
26464 gen_rtx_PLUS (Pmode,
26465 stack_limit_rtx,
26466 GEN_INT (size)));
26468 emit_insn (gen_elf_high (tmp_reg, toload));
26469 emit_insn (gen_elf_low (tmp_reg, tmp_reg, toload));
26470 emit_insn (gen_cond_trap (LTU, stack_reg, tmp_reg,
26471 const0_rtx));
26473 else
26474 warning (0, "stack limit expression is not supported");
26477 if (copy_reg)
26479 if (copy_off != 0)
26480 emit_insn (gen_add3_insn (copy_reg, stack_reg, GEN_INT (copy_off)));
26481 else
26482 emit_move_insn (copy_reg, stack_reg);
26485 if (size > 32767)
26487 /* Need a note here so that try_split doesn't get confused. */
26488 if (get_last_insn () == NULL_RTX)
26489 emit_note (NOTE_INSN_DELETED);
26490 insn = emit_move_insn (tmp_reg, todec);
26491 try_split (PATTERN (insn), insn, 0);
26492 todec = tmp_reg;
26495 insn = emit_insn (TARGET_32BIT
26496 ? gen_movsi_update_stack (stack_reg, stack_reg,
26497 todec, stack_reg)
26498 : gen_movdi_di_update_stack (stack_reg, stack_reg,
26499 todec, stack_reg));
26500 /* Since we didn't use gen_frame_mem to generate the MEM, grab
26501 it now and set the alias set/attributes. The above gen_*_update
26502 calls will generate a PARALLEL with the MEM set being the first
26503 operation. */
26504 par = PATTERN (insn);
26505 gcc_assert (GET_CODE (par) == PARALLEL);
26506 set = XVECEXP (par, 0, 0);
26507 gcc_assert (GET_CODE (set) == SET);
26508 mem = SET_DEST (set);
26509 gcc_assert (MEM_P (mem));
26510 MEM_NOTRAP_P (mem) = 1;
26511 set_mem_alias_set (mem, get_frame_alias_set ());
26513 RTX_FRAME_RELATED_P (insn) = 1;
26514 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
26515 gen_rtx_SET (stack_reg, gen_rtx_PLUS (Pmode, stack_reg,
26516 GEN_INT (-size))));
26517 return insn;
26520 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
26522 #if PROBE_INTERVAL > 32768
26523 #error Cannot use indexed addressing mode for stack probing
26524 #endif
26526 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
26527 inclusive. These are offsets from the current stack pointer. */
26529 static void
26530 rs6000_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
26532 /* See if we have a constant small number of probes to generate. If so,
26533 that's the easy case. */
26534 if (first + size <= 32768)
26536 HOST_WIDE_INT i;
26538 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
26539 it exceeds SIZE. If only one probe is needed, this will not
26540 generate any code. Then probe at FIRST + SIZE. */
26541 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
26542 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
26543 -(first + i)));
26545 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
26546 -(first + size)));
26549 /* Otherwise, do the same as above, but in a loop. Note that we must be
26550 extra careful with variables wrapping around because we might be at
26551 the very top (or the very bottom) of the address space and we have
26552 to be able to handle this case properly; in particular, we use an
26553 equality test for the loop condition. */
26554 else
26556 HOST_WIDE_INT rounded_size;
26557 rtx r12 = gen_rtx_REG (Pmode, 12);
26558 rtx r0 = gen_rtx_REG (Pmode, 0);
26560 /* Sanity check for the addressing mode we're going to use. */
26561 gcc_assert (first <= 32768);
26563 /* Step 1: round SIZE to the previous multiple of the interval. */
26565 rounded_size = ROUND_DOWN (size, PROBE_INTERVAL);
26568 /* Step 2: compute initial and final value of the loop counter. */
26570 /* TEST_ADDR = SP + FIRST. */
26571 emit_insn (gen_rtx_SET (r12, plus_constant (Pmode, stack_pointer_rtx,
26572 -first)));
26574 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
26575 if (rounded_size > 32768)
26577 emit_move_insn (r0, GEN_INT (-rounded_size));
26578 emit_insn (gen_rtx_SET (r0, gen_rtx_PLUS (Pmode, r12, r0)));
26580 else
26581 emit_insn (gen_rtx_SET (r0, plus_constant (Pmode, r12,
26582 -rounded_size)));
26585 /* Step 3: the loop
26589 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
26590 probe at TEST_ADDR
26592 while (TEST_ADDR != LAST_ADDR)
26594 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
26595 until it is equal to ROUNDED_SIZE. */
26597 if (TARGET_64BIT)
26598 emit_insn (gen_probe_stack_rangedi (r12, r12, r0));
26599 else
26600 emit_insn (gen_probe_stack_rangesi (r12, r12, r0));
26603 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
26604 that SIZE is equal to ROUNDED_SIZE. */
26606 if (size != rounded_size)
26607 emit_stack_probe (plus_constant (Pmode, r12, rounded_size - size));
26611 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
26612 absolute addresses. */
26614 const char *
26615 output_probe_stack_range (rtx reg1, rtx reg2)
26617 static int labelno = 0;
26618 char loop_lab[32];
26619 rtx xops[2];
26621 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
26623 /* Loop. */
26624 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
26626 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
26627 xops[0] = reg1;
26628 xops[1] = GEN_INT (-PROBE_INTERVAL);
26629 output_asm_insn ("addi %0,%0,%1", xops);
26631 /* Probe at TEST_ADDR. */
26632 xops[1] = gen_rtx_REG (Pmode, 0);
26633 output_asm_insn ("stw %1,0(%0)", xops);
26635 /* Test if TEST_ADDR == LAST_ADDR. */
26636 xops[1] = reg2;
26637 if (TARGET_64BIT)
26638 output_asm_insn ("cmpd 0,%0,%1", xops);
26639 else
26640 output_asm_insn ("cmpw 0,%0,%1", xops);
26642 /* Branch. */
26643 fputs ("\tbne 0,", asm_out_file);
26644 assemble_name_raw (asm_out_file, loop_lab);
26645 fputc ('\n', asm_out_file);
26647 return "";
26650 /* Add to 'insn' a note which is PATTERN (INSN) but with REG replaced
26651 with (plus:P (reg 1) VAL), and with REG2 replaced with REPL2 if REG2
26652 is not NULL. It would be nice if dwarf2out_frame_debug_expr could
26653 deduce these equivalences by itself so it wasn't necessary to hold
26654 its hand so much. Don't be tempted to always supply d2_f_d_e with
26655 the actual cfa register, ie. r31 when we are using a hard frame
26656 pointer. That fails when saving regs off r1, and sched moves the
26657 r31 setup past the reg saves. */
26659 static rtx
26660 rs6000_frame_related (rtx insn, rtx reg, HOST_WIDE_INT val,
26661 rtx reg2, rtx repl2)
26663 rtx repl;
26665 if (REGNO (reg) == STACK_POINTER_REGNUM)
26667 gcc_checking_assert (val == 0);
26668 repl = NULL_RTX;
26670 else
26671 repl = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, STACK_POINTER_REGNUM),
26672 GEN_INT (val));
26674 rtx pat = PATTERN (insn);
26675 if (!repl && !reg2)
26677 /* No need for any replacement. Just set RTX_FRAME_RELATED_P. */
26678 if (GET_CODE (pat) == PARALLEL)
26679 for (int i = 0; i < XVECLEN (pat, 0); i++)
26680 if (GET_CODE (XVECEXP (pat, 0, i)) == SET)
26682 rtx set = XVECEXP (pat, 0, i);
26684 /* If this PARALLEL has been emitted for out-of-line
26685 register save functions, or store multiple, then omit
26686 eh_frame info for any user-defined global regs. If
26687 eh_frame info is supplied, frame unwinding will
26688 restore a user reg. */
26689 if (!REG_P (SET_SRC (set))
26690 || !fixed_reg_p (REGNO (SET_SRC (set))))
26691 RTX_FRAME_RELATED_P (set) = 1;
26693 RTX_FRAME_RELATED_P (insn) = 1;
26694 return insn;
26697 /* We expect that 'pat' is either a SET or a PARALLEL containing
26698 SETs (and possibly other stuff). In a PARALLEL, all the SETs
26699 are important so they all have to be marked RTX_FRAME_RELATED_P.
26700 Call simplify_replace_rtx on the SETs rather than the whole insn
26701 so as to leave the other stuff alone (for example USE of r12). */
26703 if (GET_CODE (pat) == SET)
26705 if (repl)
26706 pat = simplify_replace_rtx (pat, reg, repl);
26707 if (reg2)
26708 pat = simplify_replace_rtx (pat, reg2, repl2);
26710 else if (GET_CODE (pat) == PARALLEL)
26712 pat = shallow_copy_rtx (pat);
26713 XVEC (pat, 0) = shallow_copy_rtvec (XVEC (pat, 0));
26715 for (int i = 0; i < XVECLEN (pat, 0); i++)
26716 if (GET_CODE (XVECEXP (pat, 0, i)) == SET)
26718 rtx set = XVECEXP (pat, 0, i);
26720 if (repl)
26721 set = simplify_replace_rtx (set, reg, repl);
26722 if (reg2)
26723 set = simplify_replace_rtx (set, reg2, repl2);
26724 XVECEXP (pat, 0, i) = set;
26726 /* Omit eh_frame info for any user-defined global regs. */
26727 if (!REG_P (SET_SRC (set))
26728 || !fixed_reg_p (REGNO (SET_SRC (set))))
26729 RTX_FRAME_RELATED_P (set) = 1;
26732 else
26733 gcc_unreachable ();
26735 RTX_FRAME_RELATED_P (insn) = 1;
26736 if (repl || reg2)
26737 add_reg_note (insn, REG_FRAME_RELATED_EXPR, pat);
26739 return insn;
26742 /* Returns an insn that has a vrsave set operation with the
26743 appropriate CLOBBERs. */
26745 static rtx
26746 generate_set_vrsave (rtx reg, rs6000_stack_t *info, int epiloguep)
26748 int nclobs, i;
26749 rtx insn, clobs[TOTAL_ALTIVEC_REGS + 1];
26750 rtx vrsave = gen_rtx_REG (SImode, VRSAVE_REGNO);
26752 clobs[0]
26753 = gen_rtx_SET (vrsave,
26754 gen_rtx_UNSPEC_VOLATILE (SImode,
26755 gen_rtvec (2, reg, vrsave),
26756 UNSPECV_SET_VRSAVE));
26758 nclobs = 1;
26760 /* We need to clobber the registers in the mask so the scheduler
26761 does not move sets to VRSAVE before sets of AltiVec registers.
26763 However, if the function receives nonlocal gotos, reload will set
26764 all call saved registers live. We will end up with:
26766 (set (reg 999) (mem))
26767 (parallel [ (set (reg vrsave) (unspec blah))
26768 (clobber (reg 999))])
26770 The clobber will cause the store into reg 999 to be dead, and
26771 flow will attempt to delete an epilogue insn. In this case, we
26772 need an unspec use/set of the register. */
26774 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
26775 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
26777 if (!epiloguep || call_used_regs [i])
26778 clobs[nclobs++] = gen_rtx_CLOBBER (VOIDmode,
26779 gen_rtx_REG (V4SImode, i));
26780 else
26782 rtx reg = gen_rtx_REG (V4SImode, i);
26784 clobs[nclobs++]
26785 = gen_rtx_SET (reg,
26786 gen_rtx_UNSPEC (V4SImode,
26787 gen_rtvec (1, reg), 27));
26791 insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nclobs));
26793 for (i = 0; i < nclobs; ++i)
26794 XVECEXP (insn, 0, i) = clobs[i];
26796 return insn;
26799 static rtx
26800 gen_frame_set (rtx reg, rtx frame_reg, int offset, bool store)
26802 rtx addr, mem;
26804 addr = gen_rtx_PLUS (Pmode, frame_reg, GEN_INT (offset));
26805 mem = gen_frame_mem (GET_MODE (reg), addr);
26806 return gen_rtx_SET (store ? mem : reg, store ? reg : mem);
26809 static rtx
26810 gen_frame_load (rtx reg, rtx frame_reg, int offset)
26812 return gen_frame_set (reg, frame_reg, offset, false);
26815 static rtx
26816 gen_frame_store (rtx reg, rtx frame_reg, int offset)
26818 return gen_frame_set (reg, frame_reg, offset, true);
26821 /* Save a register into the frame, and emit RTX_FRAME_RELATED_P notes.
26822 Save REGNO into [FRAME_REG + OFFSET] in mode MODE. */
26824 static rtx
26825 emit_frame_save (rtx frame_reg, machine_mode mode,
26826 unsigned int regno, int offset, HOST_WIDE_INT frame_reg_to_sp)
26828 rtx reg, insn;
26830 /* Some cases that need register indexed addressing. */
26831 gcc_checking_assert (!((TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (mode))
26832 || (TARGET_VSX && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
26833 || (TARGET_E500_DOUBLE && mode == DFmode)
26834 || (TARGET_SPE_ABI
26835 && SPE_VECTOR_MODE (mode)
26836 && !SPE_CONST_OFFSET_OK (offset))));
26838 reg = gen_rtx_REG (mode, regno);
26839 insn = emit_insn (gen_frame_store (reg, frame_reg, offset));
26840 return rs6000_frame_related (insn, frame_reg, frame_reg_to_sp,
26841 NULL_RTX, NULL_RTX);
26844 /* Emit an offset memory reference suitable for a frame store, while
26845 converting to a valid addressing mode. */
26847 static rtx
26848 gen_frame_mem_offset (machine_mode mode, rtx reg, int offset)
26850 rtx int_rtx, offset_rtx;
26852 int_rtx = GEN_INT (offset);
26854 if ((TARGET_SPE_ABI && SPE_VECTOR_MODE (mode) && !SPE_CONST_OFFSET_OK (offset))
26855 || (TARGET_E500_DOUBLE && mode == DFmode))
26857 offset_rtx = gen_rtx_REG (Pmode, FIXED_SCRATCH);
26858 emit_move_insn (offset_rtx, int_rtx);
26860 else
26861 offset_rtx = int_rtx;
26863 return gen_frame_mem (mode, gen_rtx_PLUS (Pmode, reg, offset_rtx));
26866 #ifndef TARGET_FIX_AND_CONTINUE
26867 #define TARGET_FIX_AND_CONTINUE 0
26868 #endif
26870 /* It's really GPR 13 or 14, FPR 14 and VR 20. We need the smallest. */
26871 #define FIRST_SAVRES_REGISTER FIRST_SAVED_GP_REGNO
26872 #define LAST_SAVRES_REGISTER 31
26873 #define N_SAVRES_REGISTERS (LAST_SAVRES_REGISTER - FIRST_SAVRES_REGISTER + 1)
26875 enum {
26876 SAVRES_LR = 0x1,
26877 SAVRES_SAVE = 0x2,
26878 SAVRES_REG = 0x0c,
26879 SAVRES_GPR = 0,
26880 SAVRES_FPR = 4,
26881 SAVRES_VR = 8
26884 static GTY(()) rtx savres_routine_syms[N_SAVRES_REGISTERS][12];
26886 /* Temporary holding space for an out-of-line register save/restore
26887 routine name. */
26888 static char savres_routine_name[30];
26890 /* Return the name for an out-of-line register save/restore routine.
26891 We are saving/restoring GPRs if GPR is true. */
26893 static char *
26894 rs6000_savres_routine_name (rs6000_stack_t *info, int regno, int sel)
26896 const char *prefix = "";
26897 const char *suffix = "";
26899 /* Different targets are supposed to define
26900 {SAVE,RESTORE}_FP_{PREFIX,SUFFIX} with the idea that the needed
26901 routine name could be defined with:
26903 sprintf (name, "%s%d%s", SAVE_FP_PREFIX, regno, SAVE_FP_SUFFIX)
26905 This is a nice idea in practice, but in reality, things are
26906 complicated in several ways:
26908 - ELF targets have save/restore routines for GPRs.
26910 - SPE targets use different prefixes for 32/64-bit registers, and
26911 neither of them fit neatly in the FOO_{PREFIX,SUFFIX} regimen.
26913 - PPC64 ELF targets have routines for save/restore of GPRs that
26914 differ in what they do with the link register, so having a set
26915 prefix doesn't work. (We only use one of the save routines at
26916 the moment, though.)
26918 - PPC32 elf targets have "exit" versions of the restore routines
26919 that restore the link register and can save some extra space.
26920 These require an extra suffix. (There are also "tail" versions
26921 of the restore routines and "GOT" versions of the save routines,
26922 but we don't generate those at present. Same problems apply,
26923 though.)
26925 We deal with all this by synthesizing our own prefix/suffix and
26926 using that for the simple sprintf call shown above. */
26927 if (TARGET_SPE)
26929 /* No floating point saves on the SPE. */
26930 gcc_assert ((sel & SAVRES_REG) == SAVRES_GPR);
26932 if ((sel & SAVRES_SAVE))
26933 prefix = info->spe_64bit_regs_used ? "_save64gpr_" : "_save32gpr_";
26934 else
26935 prefix = info->spe_64bit_regs_used ? "_rest64gpr_" : "_rest32gpr_";
26937 if ((sel & SAVRES_LR))
26938 suffix = "_x";
26940 else if (DEFAULT_ABI == ABI_V4)
26942 if (TARGET_64BIT)
26943 goto aix_names;
26945 if ((sel & SAVRES_REG) == SAVRES_GPR)
26946 prefix = (sel & SAVRES_SAVE) ? "_savegpr_" : "_restgpr_";
26947 else if ((sel & SAVRES_REG) == SAVRES_FPR)
26948 prefix = (sel & SAVRES_SAVE) ? "_savefpr_" : "_restfpr_";
26949 else if ((sel & SAVRES_REG) == SAVRES_VR)
26950 prefix = (sel & SAVRES_SAVE) ? "_savevr_" : "_restvr_";
26951 else
26952 abort ();
26954 if ((sel & SAVRES_LR))
26955 suffix = "_x";
26957 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
26959 #if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD)
26960 /* No out-of-line save/restore routines for GPRs on AIX. */
26961 gcc_assert (!TARGET_AIX || (sel & SAVRES_REG) != SAVRES_GPR);
26962 #endif
26964 aix_names:
26965 if ((sel & SAVRES_REG) == SAVRES_GPR)
26966 prefix = ((sel & SAVRES_SAVE)
26967 ? ((sel & SAVRES_LR) ? "_savegpr0_" : "_savegpr1_")
26968 : ((sel & SAVRES_LR) ? "_restgpr0_" : "_restgpr1_"));
26969 else if ((sel & SAVRES_REG) == SAVRES_FPR)
26971 #if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD)
26972 if ((sel & SAVRES_LR))
26973 prefix = ((sel & SAVRES_SAVE) ? "_savefpr_" : "_restfpr_");
26974 else
26975 #endif
26977 prefix = (sel & SAVRES_SAVE) ? SAVE_FP_PREFIX : RESTORE_FP_PREFIX;
26978 suffix = (sel & SAVRES_SAVE) ? SAVE_FP_SUFFIX : RESTORE_FP_SUFFIX;
26981 else if ((sel & SAVRES_REG) == SAVRES_VR)
26982 prefix = (sel & SAVRES_SAVE) ? "_savevr_" : "_restvr_";
26983 else
26984 abort ();
26987 if (DEFAULT_ABI == ABI_DARWIN)
26989 /* The Darwin approach is (slightly) different, in order to be
26990 compatible with code generated by the system toolchain. There is a
26991 single symbol for the start of save sequence, and the code here
26992 embeds an offset into that code on the basis of the first register
26993 to be saved. */
26994 prefix = (sel & SAVRES_SAVE) ? "save" : "rest" ;
26995 if ((sel & SAVRES_REG) == SAVRES_GPR)
26996 sprintf (savres_routine_name, "*%sGPR%s%s%.0d ; %s r%d-r31", prefix,
26997 ((sel & SAVRES_LR) ? "x" : ""), (regno == 13 ? "" : "+"),
26998 (regno - 13) * 4, prefix, regno);
26999 else if ((sel & SAVRES_REG) == SAVRES_FPR)
27000 sprintf (savres_routine_name, "*%sFP%s%.0d ; %s f%d-f31", prefix,
27001 (regno == 14 ? "" : "+"), (regno - 14) * 4, prefix, regno);
27002 else if ((sel & SAVRES_REG) == SAVRES_VR)
27003 sprintf (savres_routine_name, "*%sVEC%s%.0d ; %s v%d-v31", prefix,
27004 (regno == 20 ? "" : "+"), (regno - 20) * 8, prefix, regno);
27005 else
27006 abort ();
27008 else
27009 sprintf (savres_routine_name, "%s%d%s", prefix, regno, suffix);
27011 return savres_routine_name;
27014 /* Return an RTL SYMBOL_REF for an out-of-line register save/restore routine.
27015 We are saving/restoring GPRs if GPR is true. */
27017 static rtx
27018 rs6000_savres_routine_sym (rs6000_stack_t *info, int sel)
27020 int regno = ((sel & SAVRES_REG) == SAVRES_GPR
27021 ? info->first_gp_reg_save
27022 : (sel & SAVRES_REG) == SAVRES_FPR
27023 ? info->first_fp_reg_save - 32
27024 : (sel & SAVRES_REG) == SAVRES_VR
27025 ? info->first_altivec_reg_save - FIRST_ALTIVEC_REGNO
27026 : -1);
27027 rtx sym;
27028 int select = sel;
27030 /* On the SPE, we never have any FPRs, but we do have 32/64-bit
27031 versions of the gpr routines. */
27032 if (TARGET_SPE_ABI && (sel & SAVRES_REG) == SAVRES_GPR
27033 && info->spe_64bit_regs_used)
27034 select ^= SAVRES_FPR ^ SAVRES_GPR;
27036 /* Don't generate bogus routine names. */
27037 gcc_assert (FIRST_SAVRES_REGISTER <= regno
27038 && regno <= LAST_SAVRES_REGISTER
27039 && select >= 0 && select <= 12);
27041 sym = savres_routine_syms[regno-FIRST_SAVRES_REGISTER][select];
27043 if (sym == NULL)
27045 char *name;
27047 name = rs6000_savres_routine_name (info, regno, sel);
27049 sym = savres_routine_syms[regno-FIRST_SAVRES_REGISTER][select]
27050 = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
27051 SYMBOL_REF_FLAGS (sym) |= SYMBOL_FLAG_FUNCTION;
27054 return sym;
27057 /* Emit a sequence of insns, including a stack tie if needed, for
27058 resetting the stack pointer. If UPDT_REGNO is not 1, then don't
27059 reset the stack pointer, but move the base of the frame into
27060 reg UPDT_REGNO for use by out-of-line register restore routines. */
27062 static rtx
27063 rs6000_emit_stack_reset (rs6000_stack_t *info,
27064 rtx frame_reg_rtx, HOST_WIDE_INT frame_off,
27065 unsigned updt_regno)
27067 rtx updt_reg_rtx;
27069 /* This blockage is needed so that sched doesn't decide to move
27070 the sp change before the register restores. */
27071 if (DEFAULT_ABI == ABI_V4
27072 || (TARGET_SPE_ABI
27073 && info->spe_64bit_regs_used != 0
27074 && info->first_gp_reg_save != 32))
27075 rs6000_emit_stack_tie (frame_reg_rtx, frame_pointer_needed);
27077 /* If we are restoring registers out-of-line, we will be using the
27078 "exit" variants of the restore routines, which will reset the
27079 stack for us. But we do need to point updt_reg into the
27080 right place for those routines. */
27081 updt_reg_rtx = gen_rtx_REG (Pmode, updt_regno);
27083 if (frame_off != 0)
27084 return emit_insn (gen_add3_insn (updt_reg_rtx,
27085 frame_reg_rtx, GEN_INT (frame_off)));
27086 else if (REGNO (frame_reg_rtx) != updt_regno)
27087 return emit_move_insn (updt_reg_rtx, frame_reg_rtx);
27089 return NULL_RTX;
27092 /* Return the register number used as a pointer by out-of-line
27093 save/restore functions. */
27095 static inline unsigned
27096 ptr_regno_for_savres (int sel)
27098 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
27099 return (sel & SAVRES_REG) == SAVRES_FPR || (sel & SAVRES_LR) ? 1 : 12;
27100 return DEFAULT_ABI == ABI_DARWIN && (sel & SAVRES_REG) == SAVRES_FPR ? 1 : 11;
27103 /* Construct a parallel rtx describing the effect of a call to an
27104 out-of-line register save/restore routine, and emit the insn
27105 or jump_insn as appropriate. */
27107 static rtx
27108 rs6000_emit_savres_rtx (rs6000_stack_t *info,
27109 rtx frame_reg_rtx, int save_area_offset, int lr_offset,
27110 machine_mode reg_mode, int sel)
27112 int i;
27113 int offset, start_reg, end_reg, n_regs, use_reg;
27114 int reg_size = GET_MODE_SIZE (reg_mode);
27115 rtx sym;
27116 rtvec p;
27117 rtx par, insn;
27119 offset = 0;
27120 start_reg = ((sel & SAVRES_REG) == SAVRES_GPR
27121 ? info->first_gp_reg_save
27122 : (sel & SAVRES_REG) == SAVRES_FPR
27123 ? info->first_fp_reg_save
27124 : (sel & SAVRES_REG) == SAVRES_VR
27125 ? info->first_altivec_reg_save
27126 : -1);
27127 end_reg = ((sel & SAVRES_REG) == SAVRES_GPR
27128 ? 32
27129 : (sel & SAVRES_REG) == SAVRES_FPR
27130 ? 64
27131 : (sel & SAVRES_REG) == SAVRES_VR
27132 ? LAST_ALTIVEC_REGNO + 1
27133 : -1);
27134 n_regs = end_reg - start_reg;
27135 p = rtvec_alloc (3 + ((sel & SAVRES_LR) ? 1 : 0)
27136 + ((sel & SAVRES_REG) == SAVRES_VR ? 1 : 0)
27137 + n_regs);
27139 if (!(sel & SAVRES_SAVE) && (sel & SAVRES_LR))
27140 RTVEC_ELT (p, offset++) = ret_rtx;
27142 RTVEC_ELT (p, offset++)
27143 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, LR_REGNO));
27145 sym = rs6000_savres_routine_sym (info, sel);
27146 RTVEC_ELT (p, offset++) = gen_rtx_USE (VOIDmode, sym);
27148 use_reg = ptr_regno_for_savres (sel);
27149 if ((sel & SAVRES_REG) == SAVRES_VR)
27151 /* Vector regs are saved/restored using [reg+reg] addressing. */
27152 RTVEC_ELT (p, offset++)
27153 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, use_reg));
27154 RTVEC_ELT (p, offset++)
27155 = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, 0));
27157 else
27158 RTVEC_ELT (p, offset++)
27159 = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, use_reg));
27161 for (i = 0; i < end_reg - start_reg; i++)
27162 RTVEC_ELT (p, i + offset)
27163 = gen_frame_set (gen_rtx_REG (reg_mode, start_reg + i),
27164 frame_reg_rtx, save_area_offset + reg_size * i,
27165 (sel & SAVRES_SAVE) != 0);
27167 if ((sel & SAVRES_SAVE) && (sel & SAVRES_LR))
27168 RTVEC_ELT (p, i + offset)
27169 = gen_frame_store (gen_rtx_REG (Pmode, 0), frame_reg_rtx, lr_offset);
27171 par = gen_rtx_PARALLEL (VOIDmode, p);
27173 if (!(sel & SAVRES_SAVE) && (sel & SAVRES_LR))
27175 insn = emit_jump_insn (par);
27176 JUMP_LABEL (insn) = ret_rtx;
27178 else
27179 insn = emit_insn (par);
27180 return insn;
27183 /* Emit code to store CR fields that need to be saved into REG. */
27185 static void
27186 rs6000_emit_move_from_cr (rtx reg)
27188 /* Only the ELFv2 ABI allows storing only selected fields. */
27189 if (DEFAULT_ABI == ABI_ELFv2 && TARGET_MFCRF)
27191 int i, cr_reg[8], count = 0;
27193 /* Collect CR fields that must be saved. */
27194 for (i = 0; i < 8; i++)
27195 if (save_reg_p (CR0_REGNO + i))
27196 cr_reg[count++] = i;
27198 /* If it's just a single one, use mfcrf. */
27199 if (count == 1)
27201 rtvec p = rtvec_alloc (1);
27202 rtvec r = rtvec_alloc (2);
27203 RTVEC_ELT (r, 0) = gen_rtx_REG (CCmode, CR0_REGNO + cr_reg[0]);
27204 RTVEC_ELT (r, 1) = GEN_INT (1 << (7 - cr_reg[0]));
27205 RTVEC_ELT (p, 0)
27206 = gen_rtx_SET (reg,
27207 gen_rtx_UNSPEC (SImode, r, UNSPEC_MOVESI_FROM_CR));
27209 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
27210 return;
27213 /* ??? It might be better to handle count == 2 / 3 cases here
27214 as well, using logical operations to combine the values. */
27217 emit_insn (gen_movesi_from_cr (reg));
27220 /* Return whether the split-stack arg pointer (r12) is used. */
27222 static bool
27223 split_stack_arg_pointer_used_p (void)
27225 /* If the pseudo holding the arg pointer is no longer a pseudo,
27226 then the arg pointer is used. */
27227 if (cfun->machine->split_stack_arg_pointer != NULL_RTX
27228 && (!REG_P (cfun->machine->split_stack_arg_pointer)
27229 || (REGNO (cfun->machine->split_stack_arg_pointer)
27230 < FIRST_PSEUDO_REGISTER)))
27231 return true;
27233 /* Unfortunately we also need to do some code scanning, since
27234 r12 may have been substituted for the pseudo. */
27235 rtx_insn *insn;
27236 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
27237 FOR_BB_INSNS (bb, insn)
27238 if (NONDEBUG_INSN_P (insn))
27240 /* A call destroys r12. */
27241 if (CALL_P (insn))
27242 return false;
27244 df_ref use;
27245 FOR_EACH_INSN_USE (use, insn)
27247 rtx x = DF_REF_REG (use);
27248 if (REG_P (x) && REGNO (x) == 12)
27249 return true;
27251 df_ref def;
27252 FOR_EACH_INSN_DEF (def, insn)
27254 rtx x = DF_REF_REG (def);
27255 if (REG_P (x) && REGNO (x) == 12)
27256 return false;
27259 return bitmap_bit_p (DF_LR_OUT (bb), 12);
27262 /* Return whether we need to emit an ELFv2 global entry point prologue. */
27264 static bool
27265 rs6000_global_entry_point_needed_p (void)
27267 /* Only needed for the ELFv2 ABI. */
27268 if (DEFAULT_ABI != ABI_ELFv2)
27269 return false;
27271 /* With -msingle-pic-base, we assume the whole program shares the same
27272 TOC, so no global entry point prologues are needed anywhere. */
27273 if (TARGET_SINGLE_PIC_BASE)
27274 return false;
27276 /* Ensure we have a global entry point for thunks. ??? We could
27277 avoid that if the target routine doesn't need a global entry point,
27278 but we do not know whether this is the case at this point. */
27279 if (cfun->is_thunk)
27280 return true;
27282 /* For regular functions, rs6000_emit_prologue sets this flag if the
27283 routine ever uses the TOC pointer. */
27284 return cfun->machine->r2_setup_needed;
27287 /* Emit function prologue as insns. */
27289 void
27290 rs6000_emit_prologue (void)
27292 rs6000_stack_t *info = rs6000_stack_info ();
27293 machine_mode reg_mode = Pmode;
27294 int reg_size = TARGET_32BIT ? 4 : 8;
27295 rtx sp_reg_rtx = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
27296 rtx frame_reg_rtx = sp_reg_rtx;
27297 unsigned int cr_save_regno;
27298 rtx cr_save_rtx = NULL_RTX;
27299 rtx insn;
27300 int strategy;
27301 int using_static_chain_p = (cfun->static_chain_decl != NULL_TREE
27302 && df_regs_ever_live_p (STATIC_CHAIN_REGNUM)
27303 && call_used_regs[STATIC_CHAIN_REGNUM]);
27304 int using_split_stack = (flag_split_stack
27305 && (lookup_attribute ("no_split_stack",
27306 DECL_ATTRIBUTES (cfun->decl))
27307 == NULL));
27309 /* Offset to top of frame for frame_reg and sp respectively. */
27310 HOST_WIDE_INT frame_off = 0;
27311 HOST_WIDE_INT sp_off = 0;
27312 /* sp_adjust is the stack adjusting instruction, tracked so that the
27313 insn setting up the split-stack arg pointer can be emitted just
27314 prior to it, when r12 is not used here for other purposes. */
27315 rtx_insn *sp_adjust = 0;
27317 #if CHECKING_P
27318 /* Track and check usage of r0, r11, r12. */
27319 int reg_inuse = using_static_chain_p ? 1 << 11 : 0;
27320 #define START_USE(R) do \
27322 gcc_assert ((reg_inuse & (1 << (R))) == 0); \
27323 reg_inuse |= 1 << (R); \
27324 } while (0)
27325 #define END_USE(R) do \
27327 gcc_assert ((reg_inuse & (1 << (R))) != 0); \
27328 reg_inuse &= ~(1 << (R)); \
27329 } while (0)
27330 #define NOT_INUSE(R) do \
27332 gcc_assert ((reg_inuse & (1 << (R))) == 0); \
27333 } while (0)
27334 #else
27335 #define START_USE(R) do {} while (0)
27336 #define END_USE(R) do {} while (0)
27337 #define NOT_INUSE(R) do {} while (0)
27338 #endif
27340 if (DEFAULT_ABI == ABI_ELFv2
27341 && !TARGET_SINGLE_PIC_BASE)
27343 cfun->machine->r2_setup_needed = df_regs_ever_live_p (TOC_REGNUM);
27345 /* With -mminimal-toc we may generate an extra use of r2 below. */
27346 if (TARGET_TOC && TARGET_MINIMAL_TOC && get_pool_size () != 0)
27347 cfun->machine->r2_setup_needed = true;
27351 if (flag_stack_usage_info)
27352 current_function_static_stack_size = info->total_size;
27354 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
27356 HOST_WIDE_INT size = info->total_size;
27358 if (crtl->is_leaf && !cfun->calls_alloca)
27360 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
27361 rs6000_emit_probe_stack_range (STACK_CHECK_PROTECT,
27362 size - STACK_CHECK_PROTECT);
27364 else if (size > 0)
27365 rs6000_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
27368 if (TARGET_FIX_AND_CONTINUE)
27370 /* gdb on darwin arranges to forward a function from the old
27371 address by modifying the first 5 instructions of the function
27372 to branch to the overriding function. This is necessary to
27373 permit function pointers that point to the old function to
27374 actually forward to the new function. */
27375 emit_insn (gen_nop ());
27376 emit_insn (gen_nop ());
27377 emit_insn (gen_nop ());
27378 emit_insn (gen_nop ());
27379 emit_insn (gen_nop ());
27382 if (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0)
27384 reg_mode = V2SImode;
27385 reg_size = 8;
27388 /* Handle world saves specially here. */
27389 if (WORLD_SAVE_P (info))
27391 int i, j, sz;
27392 rtx treg;
27393 rtvec p;
27394 rtx reg0;
27396 /* save_world expects lr in r0. */
27397 reg0 = gen_rtx_REG (Pmode, 0);
27398 if (info->lr_save_p)
27400 insn = emit_move_insn (reg0,
27401 gen_rtx_REG (Pmode, LR_REGNO));
27402 RTX_FRAME_RELATED_P (insn) = 1;
27405 /* The SAVE_WORLD and RESTORE_WORLD routines make a number of
27406 assumptions about the offsets of various bits of the stack
27407 frame. */
27408 gcc_assert (info->gp_save_offset == -220
27409 && info->fp_save_offset == -144
27410 && info->lr_save_offset == 8
27411 && info->cr_save_offset == 4
27412 && info->push_p
27413 && info->lr_save_p
27414 && (!crtl->calls_eh_return
27415 || info->ehrd_offset == -432)
27416 && info->vrsave_save_offset == -224
27417 && info->altivec_save_offset == -416);
27419 treg = gen_rtx_REG (SImode, 11);
27420 emit_move_insn (treg, GEN_INT (-info->total_size));
27422 /* SAVE_WORLD takes the caller's LR in R0 and the frame size
27423 in R11. It also clobbers R12, so beware! */
27425 /* Preserve CR2 for save_world prologues */
27426 sz = 5;
27427 sz += 32 - info->first_gp_reg_save;
27428 sz += 64 - info->first_fp_reg_save;
27429 sz += LAST_ALTIVEC_REGNO - info->first_altivec_reg_save + 1;
27430 p = rtvec_alloc (sz);
27431 j = 0;
27432 RTVEC_ELT (p, j++) = gen_rtx_CLOBBER (VOIDmode,
27433 gen_rtx_REG (SImode,
27434 LR_REGNO));
27435 RTVEC_ELT (p, j++) = gen_rtx_USE (VOIDmode,
27436 gen_rtx_SYMBOL_REF (Pmode,
27437 "*save_world"));
27438 /* We do floats first so that the instruction pattern matches
27439 properly. */
27440 for (i = 0; i < 64 - info->first_fp_reg_save; i++)
27441 RTVEC_ELT (p, j++)
27442 = gen_frame_store (gen_rtx_REG (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT
27443 ? DFmode : SFmode,
27444 info->first_fp_reg_save + i),
27445 frame_reg_rtx,
27446 info->fp_save_offset + frame_off + 8 * i);
27447 for (i = 0; info->first_altivec_reg_save + i <= LAST_ALTIVEC_REGNO; i++)
27448 RTVEC_ELT (p, j++)
27449 = gen_frame_store (gen_rtx_REG (V4SImode,
27450 info->first_altivec_reg_save + i),
27451 frame_reg_rtx,
27452 info->altivec_save_offset + frame_off + 16 * i);
27453 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
27454 RTVEC_ELT (p, j++)
27455 = gen_frame_store (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
27456 frame_reg_rtx,
27457 info->gp_save_offset + frame_off + reg_size * i);
27459 /* CR register traditionally saved as CR2. */
27460 RTVEC_ELT (p, j++)
27461 = gen_frame_store (gen_rtx_REG (SImode, CR2_REGNO),
27462 frame_reg_rtx, info->cr_save_offset + frame_off);
27463 /* Explain about use of R0. */
27464 if (info->lr_save_p)
27465 RTVEC_ELT (p, j++)
27466 = gen_frame_store (reg0,
27467 frame_reg_rtx, info->lr_save_offset + frame_off);
27468 /* Explain what happens to the stack pointer. */
27470 rtx newval = gen_rtx_PLUS (Pmode, sp_reg_rtx, treg);
27471 RTVEC_ELT (p, j++) = gen_rtx_SET (sp_reg_rtx, newval);
27474 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
27475 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
27476 treg, GEN_INT (-info->total_size));
27477 sp_off = frame_off = info->total_size;
27480 strategy = info->savres_strategy;
27482 /* For V.4, update stack before we do any saving and set back pointer. */
27483 if (! WORLD_SAVE_P (info)
27484 && info->push_p
27485 && (DEFAULT_ABI == ABI_V4
27486 || crtl->calls_eh_return))
27488 bool need_r11 = (TARGET_SPE
27489 ? (!(strategy & SAVE_INLINE_GPRS)
27490 && info->spe_64bit_regs_used == 0)
27491 : (!(strategy & SAVE_INLINE_FPRS)
27492 || !(strategy & SAVE_INLINE_GPRS)
27493 || !(strategy & SAVE_INLINE_VRS)));
27494 int ptr_regno = -1;
27495 rtx ptr_reg = NULL_RTX;
27496 int ptr_off = 0;
27498 if (info->total_size < 32767)
27499 frame_off = info->total_size;
27500 else if (need_r11)
27501 ptr_regno = 11;
27502 else if (info->cr_save_p
27503 || info->lr_save_p
27504 || info->first_fp_reg_save < 64
27505 || info->first_gp_reg_save < 32
27506 || info->altivec_size != 0
27507 || info->vrsave_size != 0
27508 || crtl->calls_eh_return)
27509 ptr_regno = 12;
27510 else
27512 /* The prologue won't be saving any regs so there is no need
27513 to set up a frame register to access any frame save area.
27514 We also won't be using frame_off anywhere below, but set
27515 the correct value anyway to protect against future
27516 changes to this function. */
27517 frame_off = info->total_size;
27519 if (ptr_regno != -1)
27521 /* Set up the frame offset to that needed by the first
27522 out-of-line save function. */
27523 START_USE (ptr_regno);
27524 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
27525 frame_reg_rtx = ptr_reg;
27526 if (!(strategy & SAVE_INLINE_FPRS) && info->fp_size != 0)
27527 gcc_checking_assert (info->fp_save_offset + info->fp_size == 0);
27528 else if (!(strategy & SAVE_INLINE_GPRS) && info->first_gp_reg_save < 32)
27529 ptr_off = info->gp_save_offset + info->gp_size;
27530 else if (!(strategy & SAVE_INLINE_VRS) && info->altivec_size != 0)
27531 ptr_off = info->altivec_save_offset + info->altivec_size;
27532 frame_off = -ptr_off;
27534 sp_adjust = rs6000_emit_allocate_stack (info->total_size,
27535 ptr_reg, ptr_off);
27536 if (REGNO (frame_reg_rtx) == 12)
27537 sp_adjust = 0;
27538 sp_off = info->total_size;
27539 if (frame_reg_rtx != sp_reg_rtx)
27540 rs6000_emit_stack_tie (frame_reg_rtx, false);
27543 /* If we use the link register, get it into r0. */
27544 if (!WORLD_SAVE_P (info) && info->lr_save_p)
27546 rtx addr, reg, mem;
27548 reg = gen_rtx_REG (Pmode, 0);
27549 START_USE (0);
27550 insn = emit_move_insn (reg, gen_rtx_REG (Pmode, LR_REGNO));
27551 RTX_FRAME_RELATED_P (insn) = 1;
27553 if (!(strategy & (SAVE_NOINLINE_GPRS_SAVES_LR
27554 | SAVE_NOINLINE_FPRS_SAVES_LR)))
27556 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx,
27557 GEN_INT (info->lr_save_offset + frame_off));
27558 mem = gen_rtx_MEM (Pmode, addr);
27559 /* This should not be of rs6000_sr_alias_set, because of
27560 __builtin_return_address. */
27562 insn = emit_move_insn (mem, reg);
27563 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
27564 NULL_RTX, NULL_RTX);
27565 END_USE (0);
27569 /* If we need to save CR, put it into r12 or r11. Choose r12 except when
27570 r12 will be needed by out-of-line gpr restore. */
27571 cr_save_regno = ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
27572 && !(strategy & (SAVE_INLINE_GPRS
27573 | SAVE_NOINLINE_GPRS_SAVES_LR))
27574 ? 11 : 12);
27575 if (!WORLD_SAVE_P (info)
27576 && info->cr_save_p
27577 && REGNO (frame_reg_rtx) != cr_save_regno
27578 && !(using_static_chain_p && cr_save_regno == 11)
27579 && !(using_split_stack && cr_save_regno == 12 && sp_adjust))
27581 cr_save_rtx = gen_rtx_REG (SImode, cr_save_regno);
27582 START_USE (cr_save_regno);
27583 rs6000_emit_move_from_cr (cr_save_rtx);
27586 /* Do any required saving of fpr's. If only one or two to save, do
27587 it ourselves. Otherwise, call function. */
27588 if (!WORLD_SAVE_P (info) && (strategy & SAVE_INLINE_FPRS))
27590 int i;
27591 for (i = 0; i < 64 - info->first_fp_reg_save; i++)
27592 if (save_reg_p (info->first_fp_reg_save + i))
27593 emit_frame_save (frame_reg_rtx,
27594 (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT
27595 ? DFmode : SFmode),
27596 info->first_fp_reg_save + i,
27597 info->fp_save_offset + frame_off + 8 * i,
27598 sp_off - frame_off);
27600 else if (!WORLD_SAVE_P (info) && info->first_fp_reg_save != 64)
27602 bool lr = (strategy & SAVE_NOINLINE_FPRS_SAVES_LR) != 0;
27603 int sel = SAVRES_SAVE | SAVRES_FPR | (lr ? SAVRES_LR : 0);
27604 unsigned ptr_regno = ptr_regno_for_savres (sel);
27605 rtx ptr_reg = frame_reg_rtx;
27607 if (REGNO (frame_reg_rtx) == ptr_regno)
27608 gcc_checking_assert (frame_off == 0);
27609 else
27611 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
27612 NOT_INUSE (ptr_regno);
27613 emit_insn (gen_add3_insn (ptr_reg,
27614 frame_reg_rtx, GEN_INT (frame_off)));
27616 insn = rs6000_emit_savres_rtx (info, ptr_reg,
27617 info->fp_save_offset,
27618 info->lr_save_offset,
27619 DFmode, sel);
27620 rs6000_frame_related (insn, ptr_reg, sp_off,
27621 NULL_RTX, NULL_RTX);
27622 if (lr)
27623 END_USE (0);
27626 /* Save GPRs. This is done as a PARALLEL if we are using
27627 the store-multiple instructions. */
27628 if (!WORLD_SAVE_P (info)
27629 && TARGET_SPE_ABI
27630 && info->spe_64bit_regs_used != 0
27631 && info->first_gp_reg_save != 32)
27633 int i;
27634 rtx spe_save_area_ptr;
27635 HOST_WIDE_INT save_off;
27636 int ool_adjust = 0;
27638 /* Determine whether we can address all of the registers that need
27639 to be saved with an offset from frame_reg_rtx that fits in
27640 the small const field for SPE memory instructions. */
27641 int spe_regs_addressable
27642 = (SPE_CONST_OFFSET_OK (info->spe_gp_save_offset + frame_off
27643 + reg_size * (32 - info->first_gp_reg_save - 1))
27644 && (strategy & SAVE_INLINE_GPRS));
27646 if (spe_regs_addressable)
27648 spe_save_area_ptr = frame_reg_rtx;
27649 save_off = frame_off;
27651 else
27653 /* Make r11 point to the start of the SPE save area. We need
27654 to be careful here if r11 is holding the static chain. If
27655 it is, then temporarily save it in r0. */
27656 HOST_WIDE_INT offset;
27658 if (!(strategy & SAVE_INLINE_GPRS))
27659 ool_adjust = 8 * (info->first_gp_reg_save - FIRST_SAVED_GP_REGNO);
27660 offset = info->spe_gp_save_offset + frame_off - ool_adjust;
27661 spe_save_area_ptr = gen_rtx_REG (Pmode, 11);
27662 save_off = frame_off - offset;
27664 if (using_static_chain_p)
27666 rtx r0 = gen_rtx_REG (Pmode, 0);
27668 START_USE (0);
27669 gcc_assert (info->first_gp_reg_save > 11);
27671 emit_move_insn (r0, spe_save_area_ptr);
27673 else if (REGNO (frame_reg_rtx) != 11)
27674 START_USE (11);
27676 emit_insn (gen_addsi3 (spe_save_area_ptr,
27677 frame_reg_rtx, GEN_INT (offset)));
27678 if (!using_static_chain_p && REGNO (frame_reg_rtx) == 11)
27679 frame_off = -info->spe_gp_save_offset + ool_adjust;
27682 if ((strategy & SAVE_INLINE_GPRS))
27684 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
27685 if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i))
27686 emit_frame_save (spe_save_area_ptr, reg_mode,
27687 info->first_gp_reg_save + i,
27688 (info->spe_gp_save_offset + save_off
27689 + reg_size * i),
27690 sp_off - save_off);
27692 else
27694 insn = rs6000_emit_savres_rtx (info, spe_save_area_ptr,
27695 info->spe_gp_save_offset + save_off,
27696 0, reg_mode,
27697 SAVRES_SAVE | SAVRES_GPR);
27699 rs6000_frame_related (insn, spe_save_area_ptr, sp_off - save_off,
27700 NULL_RTX, NULL_RTX);
27703 /* Move the static chain pointer back. */
27704 if (!spe_regs_addressable)
27706 if (using_static_chain_p)
27708 emit_move_insn (spe_save_area_ptr, gen_rtx_REG (Pmode, 0));
27709 END_USE (0);
27711 else if (REGNO (frame_reg_rtx) != 11)
27712 END_USE (11);
27715 else if (!WORLD_SAVE_P (info) && !(strategy & SAVE_INLINE_GPRS))
27717 bool lr = (strategy & SAVE_NOINLINE_GPRS_SAVES_LR) != 0;
27718 int sel = SAVRES_SAVE | SAVRES_GPR | (lr ? SAVRES_LR : 0);
27719 unsigned ptr_regno = ptr_regno_for_savres (sel);
27720 rtx ptr_reg = frame_reg_rtx;
27721 bool ptr_set_up = REGNO (ptr_reg) == ptr_regno;
27722 int end_save = info->gp_save_offset + info->gp_size;
27723 int ptr_off;
27725 if (ptr_regno == 12)
27726 sp_adjust = 0;
27727 if (!ptr_set_up)
27728 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
27730 /* Need to adjust r11 (r12) if we saved any FPRs. */
27731 if (end_save + frame_off != 0)
27733 rtx offset = GEN_INT (end_save + frame_off);
27735 if (ptr_set_up)
27736 frame_off = -end_save;
27737 else
27738 NOT_INUSE (ptr_regno);
27739 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
27741 else if (!ptr_set_up)
27743 NOT_INUSE (ptr_regno);
27744 emit_move_insn (ptr_reg, frame_reg_rtx);
27746 ptr_off = -end_save;
27747 insn = rs6000_emit_savres_rtx (info, ptr_reg,
27748 info->gp_save_offset + ptr_off,
27749 info->lr_save_offset + ptr_off,
27750 reg_mode, sel);
27751 rs6000_frame_related (insn, ptr_reg, sp_off - ptr_off,
27752 NULL_RTX, NULL_RTX);
27753 if (lr)
27754 END_USE (0);
27756 else if (!WORLD_SAVE_P (info) && (strategy & SAVE_MULTIPLE))
27758 rtvec p;
27759 int i;
27760 p = rtvec_alloc (32 - info->first_gp_reg_save);
27761 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
27762 RTVEC_ELT (p, i)
27763 = gen_frame_store (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
27764 frame_reg_rtx,
27765 info->gp_save_offset + frame_off + reg_size * i);
27766 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
27767 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
27768 NULL_RTX, NULL_RTX);
27770 else if (!WORLD_SAVE_P (info))
27772 int i;
27773 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
27774 if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i))
27775 emit_frame_save (frame_reg_rtx, reg_mode,
27776 info->first_gp_reg_save + i,
27777 info->gp_save_offset + frame_off + reg_size * i,
27778 sp_off - frame_off);
27781 if (crtl->calls_eh_return)
27783 unsigned int i;
27784 rtvec p;
27786 for (i = 0; ; ++i)
27788 unsigned int regno = EH_RETURN_DATA_REGNO (i);
27789 if (regno == INVALID_REGNUM)
27790 break;
27793 p = rtvec_alloc (i);
27795 for (i = 0; ; ++i)
27797 unsigned int regno = EH_RETURN_DATA_REGNO (i);
27798 if (regno == INVALID_REGNUM)
27799 break;
27801 insn
27802 = gen_frame_store (gen_rtx_REG (reg_mode, regno),
27803 sp_reg_rtx,
27804 info->ehrd_offset + sp_off + reg_size * (int) i);
27805 RTVEC_ELT (p, i) = insn;
27806 RTX_FRAME_RELATED_P (insn) = 1;
27809 insn = emit_insn (gen_blockage ());
27810 RTX_FRAME_RELATED_P (insn) = 1;
27811 add_reg_note (insn, REG_FRAME_RELATED_EXPR, gen_rtx_PARALLEL (VOIDmode, p));
27814 /* In AIX ABI we need to make sure r2 is really saved. */
27815 if (TARGET_AIX && crtl->calls_eh_return)
27817 rtx tmp_reg, tmp_reg_si, hi, lo, compare_result, toc_save_done, jump;
27818 rtx save_insn, join_insn, note;
27819 long toc_restore_insn;
27821 tmp_reg = gen_rtx_REG (Pmode, 11);
27822 tmp_reg_si = gen_rtx_REG (SImode, 11);
27823 if (using_static_chain_p)
27825 START_USE (0);
27826 emit_move_insn (gen_rtx_REG (Pmode, 0), tmp_reg);
27828 else
27829 START_USE (11);
27830 emit_move_insn (tmp_reg, gen_rtx_REG (Pmode, LR_REGNO));
27831 /* Peek at instruction to which this function returns. If it's
27832 restoring r2, then we know we've already saved r2. We can't
27833 unconditionally save r2 because the value we have will already
27834 be updated if we arrived at this function via a plt call or
27835 toc adjusting stub. */
27836 emit_move_insn (tmp_reg_si, gen_rtx_MEM (SImode, tmp_reg));
27837 toc_restore_insn = ((TARGET_32BIT ? 0x80410000 : 0xE8410000)
27838 + RS6000_TOC_SAVE_SLOT);
27839 hi = gen_int_mode (toc_restore_insn & ~0xffff, SImode);
27840 emit_insn (gen_xorsi3 (tmp_reg_si, tmp_reg_si, hi));
27841 compare_result = gen_rtx_REG (CCUNSmode, CR0_REGNO);
27842 validate_condition_mode (EQ, CCUNSmode);
27843 lo = gen_int_mode (toc_restore_insn & 0xffff, SImode);
27844 emit_insn (gen_rtx_SET (compare_result,
27845 gen_rtx_COMPARE (CCUNSmode, tmp_reg_si, lo)));
27846 toc_save_done = gen_label_rtx ();
27847 jump = gen_rtx_IF_THEN_ELSE (VOIDmode,
27848 gen_rtx_EQ (VOIDmode, compare_result,
27849 const0_rtx),
27850 gen_rtx_LABEL_REF (VOIDmode, toc_save_done),
27851 pc_rtx);
27852 jump = emit_jump_insn (gen_rtx_SET (pc_rtx, jump));
27853 JUMP_LABEL (jump) = toc_save_done;
27854 LABEL_NUSES (toc_save_done) += 1;
27856 save_insn = emit_frame_save (frame_reg_rtx, reg_mode,
27857 TOC_REGNUM, frame_off + RS6000_TOC_SAVE_SLOT,
27858 sp_off - frame_off);
27860 emit_label (toc_save_done);
27862 /* ??? If we leave SAVE_INSN as marked as saving R2, then we'll
27863 have a CFG that has different saves along different paths.
27864 Move the note to a dummy blockage insn, which describes that
27865 R2 is unconditionally saved after the label. */
27866 /* ??? An alternate representation might be a special insn pattern
27867 containing both the branch and the store. That might let the
27868 code that minimizes the number of DW_CFA_advance opcodes better
27869 freedom in placing the annotations. */
27870 note = find_reg_note (save_insn, REG_FRAME_RELATED_EXPR, NULL);
27871 if (note)
27872 remove_note (save_insn, note);
27873 else
27874 note = alloc_reg_note (REG_FRAME_RELATED_EXPR,
27875 copy_rtx (PATTERN (save_insn)), NULL_RTX);
27876 RTX_FRAME_RELATED_P (save_insn) = 0;
27878 join_insn = emit_insn (gen_blockage ());
27879 REG_NOTES (join_insn) = note;
27880 RTX_FRAME_RELATED_P (join_insn) = 1;
27882 if (using_static_chain_p)
27884 emit_move_insn (tmp_reg, gen_rtx_REG (Pmode, 0));
27885 END_USE (0);
27887 else
27888 END_USE (11);
27891 /* Save CR if we use any that must be preserved. */
27892 if (!WORLD_SAVE_P (info) && info->cr_save_p)
27894 rtx addr = gen_rtx_PLUS (Pmode, frame_reg_rtx,
27895 GEN_INT (info->cr_save_offset + frame_off));
27896 rtx mem = gen_frame_mem (SImode, addr);
27898 /* If we didn't copy cr before, do so now using r0. */
27899 if (cr_save_rtx == NULL_RTX)
27901 START_USE (0);
27902 cr_save_rtx = gen_rtx_REG (SImode, 0);
27903 rs6000_emit_move_from_cr (cr_save_rtx);
27906 /* Saving CR requires a two-instruction sequence: one instruction
27907 to move the CR to a general-purpose register, and a second
27908 instruction that stores the GPR to memory.
27910 We do not emit any DWARF CFI records for the first of these,
27911 because we cannot properly represent the fact that CR is saved in
27912 a register. One reason is that we cannot express that multiple
27913 CR fields are saved; another reason is that on 64-bit, the size
27914 of the CR register in DWARF (4 bytes) differs from the size of
27915 a general-purpose register.
27917 This means if any intervening instruction were to clobber one of
27918 the call-saved CR fields, we'd have incorrect CFI. To prevent
27919 this from happening, we mark the store to memory as a use of
27920 those CR fields, which prevents any such instruction from being
27921 scheduled in between the two instructions. */
27922 rtx crsave_v[9];
27923 int n_crsave = 0;
27924 int i;
27926 crsave_v[n_crsave++] = gen_rtx_SET (mem, cr_save_rtx);
27927 for (i = 0; i < 8; i++)
27928 if (save_reg_p (CR0_REGNO + i))
27929 crsave_v[n_crsave++]
27930 = gen_rtx_USE (VOIDmode, gen_rtx_REG (CCmode, CR0_REGNO + i));
27932 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode,
27933 gen_rtvec_v (n_crsave, crsave_v)));
27934 END_USE (REGNO (cr_save_rtx));
27936 /* Now, there's no way that dwarf2out_frame_debug_expr is going to
27937 understand '(unspec:SI [(reg:CC 68) ...] UNSPEC_MOVESI_FROM_CR)',
27938 so we need to construct a frame expression manually. */
27939 RTX_FRAME_RELATED_P (insn) = 1;
27941 /* Update address to be stack-pointer relative, like
27942 rs6000_frame_related would do. */
27943 addr = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, STACK_POINTER_REGNUM),
27944 GEN_INT (info->cr_save_offset + sp_off));
27945 mem = gen_frame_mem (SImode, addr);
27947 if (DEFAULT_ABI == ABI_ELFv2)
27949 /* In the ELFv2 ABI we generate separate CFI records for each
27950 CR field that was actually saved. They all point to the
27951 same 32-bit stack slot. */
27952 rtx crframe[8];
27953 int n_crframe = 0;
27955 for (i = 0; i < 8; i++)
27956 if (save_reg_p (CR0_REGNO + i))
27958 crframe[n_crframe]
27959 = gen_rtx_SET (mem, gen_rtx_REG (SImode, CR0_REGNO + i));
27961 RTX_FRAME_RELATED_P (crframe[n_crframe]) = 1;
27962 n_crframe++;
27965 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
27966 gen_rtx_PARALLEL (VOIDmode,
27967 gen_rtvec_v (n_crframe, crframe)));
27969 else
27971 /* In other ABIs, by convention, we use a single CR regnum to
27972 represent the fact that all call-saved CR fields are saved.
27973 We use CR2_REGNO to be compatible with gcc-2.95 on Linux. */
27974 rtx set = gen_rtx_SET (mem, gen_rtx_REG (SImode, CR2_REGNO));
27975 add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
27979 /* In the ELFv2 ABI we need to save all call-saved CR fields into
27980 *separate* slots if the routine calls __builtin_eh_return, so
27981 that they can be independently restored by the unwinder. */
27982 if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return)
27984 int i, cr_off = info->ehcr_offset;
27985 rtx crsave;
27987 /* ??? We might get better performance by using multiple mfocrf
27988 instructions. */
27989 crsave = gen_rtx_REG (SImode, 0);
27990 emit_insn (gen_movesi_from_cr (crsave));
27992 for (i = 0; i < 8; i++)
27993 if (!call_used_regs[CR0_REGNO + i])
27995 rtvec p = rtvec_alloc (2);
27996 RTVEC_ELT (p, 0)
27997 = gen_frame_store (crsave, frame_reg_rtx, cr_off + frame_off);
27998 RTVEC_ELT (p, 1)
27999 = gen_rtx_USE (VOIDmode, gen_rtx_REG (CCmode, CR0_REGNO + i));
28001 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
28003 RTX_FRAME_RELATED_P (insn) = 1;
28004 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
28005 gen_frame_store (gen_rtx_REG (SImode, CR0_REGNO + i),
28006 sp_reg_rtx, cr_off + sp_off));
28008 cr_off += reg_size;
28012 /* Update stack and set back pointer unless this is V.4,
28013 for which it was done previously. */
28014 if (!WORLD_SAVE_P (info) && info->push_p
28015 && !(DEFAULT_ABI == ABI_V4 || crtl->calls_eh_return))
28017 rtx ptr_reg = NULL;
28018 int ptr_off = 0;
28020 /* If saving altivec regs we need to be able to address all save
28021 locations using a 16-bit offset. */
28022 if ((strategy & SAVE_INLINE_VRS) == 0
28023 || (info->altivec_size != 0
28024 && (info->altivec_save_offset + info->altivec_size - 16
28025 + info->total_size - frame_off) > 32767)
28026 || (info->vrsave_size != 0
28027 && (info->vrsave_save_offset
28028 + info->total_size - frame_off) > 32767))
28030 int sel = SAVRES_SAVE | SAVRES_VR;
28031 unsigned ptr_regno = ptr_regno_for_savres (sel);
28033 if (using_static_chain_p
28034 && ptr_regno == STATIC_CHAIN_REGNUM)
28035 ptr_regno = 12;
28036 if (REGNO (frame_reg_rtx) != ptr_regno)
28037 START_USE (ptr_regno);
28038 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
28039 frame_reg_rtx = ptr_reg;
28040 ptr_off = info->altivec_save_offset + info->altivec_size;
28041 frame_off = -ptr_off;
28043 else if (REGNO (frame_reg_rtx) == 1)
28044 frame_off = info->total_size;
28045 sp_adjust = rs6000_emit_allocate_stack (info->total_size,
28046 ptr_reg, ptr_off);
28047 if (REGNO (frame_reg_rtx) == 12)
28048 sp_adjust = 0;
28049 sp_off = info->total_size;
28050 if (frame_reg_rtx != sp_reg_rtx)
28051 rs6000_emit_stack_tie (frame_reg_rtx, false);
28054 /* Set frame pointer, if needed. */
28055 if (frame_pointer_needed)
28057 insn = emit_move_insn (gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM),
28058 sp_reg_rtx);
28059 RTX_FRAME_RELATED_P (insn) = 1;
28062 /* Save AltiVec registers if needed. Save here because the red zone does
28063 not always include AltiVec registers. */
28064 if (!WORLD_SAVE_P (info)
28065 && info->altivec_size != 0 && (strategy & SAVE_INLINE_VRS) == 0)
28067 int end_save = info->altivec_save_offset + info->altivec_size;
28068 int ptr_off;
28069 /* Oddly, the vector save/restore functions point r0 at the end
28070 of the save area, then use r11 or r12 to load offsets for
28071 [reg+reg] addressing. */
28072 rtx ptr_reg = gen_rtx_REG (Pmode, 0);
28073 int scratch_regno = ptr_regno_for_savres (SAVRES_SAVE | SAVRES_VR);
28074 rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
28076 gcc_checking_assert (scratch_regno == 11 || scratch_regno == 12);
28077 NOT_INUSE (0);
28078 if (scratch_regno == 12)
28079 sp_adjust = 0;
28080 if (end_save + frame_off != 0)
28082 rtx offset = GEN_INT (end_save + frame_off);
28084 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
28086 else
28087 emit_move_insn (ptr_reg, frame_reg_rtx);
28089 ptr_off = -end_save;
28090 insn = rs6000_emit_savres_rtx (info, scratch_reg,
28091 info->altivec_save_offset + ptr_off,
28092 0, V4SImode, SAVRES_SAVE | SAVRES_VR);
28093 rs6000_frame_related (insn, scratch_reg, sp_off - ptr_off,
28094 NULL_RTX, NULL_RTX);
28095 if (REGNO (frame_reg_rtx) == REGNO (scratch_reg))
28097 /* The oddity mentioned above clobbered our frame reg. */
28098 emit_move_insn (frame_reg_rtx, ptr_reg);
28099 frame_off = ptr_off;
28102 else if (!WORLD_SAVE_P (info)
28103 && info->altivec_size != 0)
28105 int i;
28107 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
28108 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
28110 rtx areg, savereg, mem;
28111 HOST_WIDE_INT offset;
28113 offset = (info->altivec_save_offset + frame_off
28114 + 16 * (i - info->first_altivec_reg_save));
28116 savereg = gen_rtx_REG (V4SImode, i);
28118 if (TARGET_P9_DFORM_VECTOR && quad_address_offset_p (offset))
28120 mem = gen_frame_mem (V4SImode,
28121 gen_rtx_PLUS (Pmode, frame_reg_rtx,
28122 GEN_INT (offset)));
28123 insn = emit_insn (gen_rtx_SET (mem, savereg));
28124 areg = NULL_RTX;
28126 else
28128 NOT_INUSE (0);
28129 areg = gen_rtx_REG (Pmode, 0);
28130 emit_move_insn (areg, GEN_INT (offset));
28132 /* AltiVec addressing mode is [reg+reg]. */
28133 mem = gen_frame_mem (V4SImode,
28134 gen_rtx_PLUS (Pmode, frame_reg_rtx, areg));
28136 /* Rather than emitting a generic move, force use of the stvx
28137 instruction, which we always want on ISA 2.07 (power8) systems.
28138 In particular we don't want xxpermdi/stxvd2x for little
28139 endian. */
28140 insn = emit_insn (gen_altivec_stvx_v4si_internal (mem, savereg));
28143 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
28144 areg, GEN_INT (offset));
28148 /* VRSAVE is a bit vector representing which AltiVec registers
28149 are used. The OS uses this to determine which vector
28150 registers to save on a context switch. We need to save
28151 VRSAVE on the stack frame, add whatever AltiVec registers we
28152 used in this function, and do the corresponding magic in the
28153 epilogue. */
28155 if (!WORLD_SAVE_P (info)
28156 && info->vrsave_size != 0)
28158 rtx reg, vrsave;
28159 int offset;
28160 int save_regno;
28162 /* Get VRSAVE onto a GPR. Note that ABI_V4 and ABI_DARWIN might
28163 be using r12 as frame_reg_rtx and r11 as the static chain
28164 pointer for nested functions. */
28165 save_regno = 12;
28166 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
28167 && !using_static_chain_p)
28168 save_regno = 11;
28169 else if (using_split_stack || REGNO (frame_reg_rtx) == 12)
28171 save_regno = 11;
28172 if (using_static_chain_p)
28173 save_regno = 0;
28176 NOT_INUSE (save_regno);
28177 reg = gen_rtx_REG (SImode, save_regno);
28178 vrsave = gen_rtx_REG (SImode, VRSAVE_REGNO);
28179 if (TARGET_MACHO)
28180 emit_insn (gen_get_vrsave_internal (reg));
28181 else
28182 emit_insn (gen_rtx_SET (reg, vrsave));
28184 /* Save VRSAVE. */
28185 offset = info->vrsave_save_offset + frame_off;
28186 insn = emit_insn (gen_frame_store (reg, frame_reg_rtx, offset));
28188 /* Include the registers in the mask. */
28189 emit_insn (gen_iorsi3 (reg, reg, GEN_INT ((int) info->vrsave_mask)));
28191 insn = emit_insn (generate_set_vrsave (reg, info, 0));
28194 /* If we are using RS6000_PIC_OFFSET_TABLE_REGNUM, we need to set it up. */
28195 if (!TARGET_SINGLE_PIC_BASE
28196 && ((TARGET_TOC && TARGET_MINIMAL_TOC && get_pool_size () != 0)
28197 || (DEFAULT_ABI == ABI_V4
28198 && (flag_pic == 1 || (flag_pic && TARGET_SECURE_PLT))
28199 && df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM))))
28201 /* If emit_load_toc_table will use the link register, we need to save
28202 it. We use R12 for this purpose because emit_load_toc_table
28203 can use register 0. This allows us to use a plain 'blr' to return
28204 from the procedure more often. */
28205 int save_LR_around_toc_setup = (TARGET_ELF
28206 && DEFAULT_ABI == ABI_V4
28207 && flag_pic
28208 && ! info->lr_save_p
28209 && EDGE_COUNT (EXIT_BLOCK_PTR_FOR_FN (cfun)->preds) > 0);
28210 if (save_LR_around_toc_setup)
28212 rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
28213 rtx tmp = gen_rtx_REG (Pmode, 12);
28215 sp_adjust = 0;
28216 insn = emit_move_insn (tmp, lr);
28217 RTX_FRAME_RELATED_P (insn) = 1;
28219 rs6000_emit_load_toc_table (TRUE);
28221 insn = emit_move_insn (lr, tmp);
28222 add_reg_note (insn, REG_CFA_RESTORE, lr);
28223 RTX_FRAME_RELATED_P (insn) = 1;
28225 else
28226 rs6000_emit_load_toc_table (TRUE);
28229 #if TARGET_MACHO
28230 if (!TARGET_SINGLE_PIC_BASE
28231 && DEFAULT_ABI == ABI_DARWIN
28232 && flag_pic && crtl->uses_pic_offset_table)
28234 rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
28235 rtx src = gen_rtx_SYMBOL_REF (Pmode, MACHOPIC_FUNCTION_BASE_NAME);
28237 /* Save and restore LR locally around this call (in R0). */
28238 if (!info->lr_save_p)
28239 emit_move_insn (gen_rtx_REG (Pmode, 0), lr);
28241 emit_insn (gen_load_macho_picbase (src));
28243 emit_move_insn (gen_rtx_REG (Pmode,
28244 RS6000_PIC_OFFSET_TABLE_REGNUM),
28245 lr);
28247 if (!info->lr_save_p)
28248 emit_move_insn (lr, gen_rtx_REG (Pmode, 0));
28250 #endif
28252 /* If we need to, save the TOC register after doing the stack setup.
28253 Do not emit eh frame info for this save. The unwinder wants info,
28254 conceptually attached to instructions in this function, about
28255 register values in the caller of this function. This R2 may have
28256 already been changed from the value in the caller.
28257 We don't attempt to write accurate DWARF EH frame info for R2
28258 because code emitted by gcc for a (non-pointer) function call
28259 doesn't save and restore R2. Instead, R2 is managed out-of-line
28260 by a linker generated plt call stub when the function resides in
28261 a shared library. This behavior is costly to describe in DWARF,
28262 both in terms of the size of DWARF info and the time taken in the
28263 unwinder to interpret it. R2 changes, apart from the
28264 calls_eh_return case earlier in this function, are handled by
28265 linux-unwind.h frob_update_context. */
28266 if (rs6000_save_toc_in_prologue_p ())
28268 rtx reg = gen_rtx_REG (reg_mode, TOC_REGNUM);
28269 emit_insn (gen_frame_store (reg, sp_reg_rtx, RS6000_TOC_SAVE_SLOT));
28272 if (using_split_stack && split_stack_arg_pointer_used_p ())
28274 /* Set up the arg pointer (r12) for -fsplit-stack code. If
28275 __morestack was called, it left the arg pointer to the old
28276 stack in r29. Otherwise, the arg pointer is the top of the
28277 current frame. */
28278 cfun->machine->split_stack_argp_used = true;
28279 if (sp_adjust)
28281 rtx r12 = gen_rtx_REG (Pmode, 12);
28282 rtx set_r12 = gen_rtx_SET (r12, sp_reg_rtx);
28283 emit_insn_before (set_r12, sp_adjust);
28285 else if (frame_off != 0 || REGNO (frame_reg_rtx) != 12)
28287 rtx r12 = gen_rtx_REG (Pmode, 12);
28288 if (frame_off == 0)
28289 emit_move_insn (r12, frame_reg_rtx);
28290 else
28291 emit_insn (gen_add3_insn (r12, frame_reg_rtx, GEN_INT (frame_off)));
28293 if (info->push_p)
28295 rtx r12 = gen_rtx_REG (Pmode, 12);
28296 rtx r29 = gen_rtx_REG (Pmode, 29);
28297 rtx cr7 = gen_rtx_REG (CCUNSmode, CR7_REGNO);
28298 rtx not_more = gen_label_rtx ();
28299 rtx jump;
28301 jump = gen_rtx_IF_THEN_ELSE (VOIDmode,
28302 gen_rtx_GEU (VOIDmode, cr7, const0_rtx),
28303 gen_rtx_LABEL_REF (VOIDmode, not_more),
28304 pc_rtx);
28305 jump = emit_jump_insn (gen_rtx_SET (pc_rtx, jump));
28306 JUMP_LABEL (jump) = not_more;
28307 LABEL_NUSES (not_more) += 1;
28308 emit_move_insn (r12, r29);
28309 emit_label (not_more);
28314 /* Output .extern statements for the save/restore routines we use. */
28316 static void
28317 rs6000_output_savres_externs (FILE *file)
28319 rs6000_stack_t *info = rs6000_stack_info ();
28321 if (TARGET_DEBUG_STACK)
28322 debug_stack_info (info);
28324 /* Write .extern for any function we will call to save and restore
28325 fp values. */
28326 if (info->first_fp_reg_save < 64
28327 && !TARGET_MACHO
28328 && !TARGET_ELF)
28330 char *name;
28331 int regno = info->first_fp_reg_save - 32;
28333 if ((info->savres_strategy & SAVE_INLINE_FPRS) == 0)
28335 bool lr = (info->savres_strategy & SAVE_NOINLINE_FPRS_SAVES_LR) != 0;
28336 int sel = SAVRES_SAVE | SAVRES_FPR | (lr ? SAVRES_LR : 0);
28337 name = rs6000_savres_routine_name (info, regno, sel);
28338 fprintf (file, "\t.extern %s\n", name);
28340 if ((info->savres_strategy & REST_INLINE_FPRS) == 0)
28342 bool lr = (info->savres_strategy
28343 & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR) == 0;
28344 int sel = SAVRES_FPR | (lr ? SAVRES_LR : 0);
28345 name = rs6000_savres_routine_name (info, regno, sel);
28346 fprintf (file, "\t.extern %s\n", name);
28351 /* Write function prologue. */
28353 static void
28354 rs6000_output_function_prologue (FILE *file,
28355 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
28357 if (!cfun->is_thunk)
28358 rs6000_output_savres_externs (file);
28360 /* ELFv2 ABI r2 setup code and local entry point. This must follow
28361 immediately after the global entry point label. */
28362 if (rs6000_global_entry_point_needed_p ())
28364 const char *name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
28366 (*targetm.asm_out.internal_label) (file, "LCF", rs6000_pic_labelno);
28368 if (TARGET_CMODEL != CMODEL_LARGE)
28370 /* In the small and medium code models, we assume the TOC is less
28371 2 GB away from the text section, so it can be computed via the
28372 following two-instruction sequence. */
28373 char buf[256];
28375 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
28376 fprintf (file, "0:\taddis 2,12,.TOC.-");
28377 assemble_name (file, buf);
28378 fprintf (file, "@ha\n");
28379 fprintf (file, "\taddi 2,2,.TOC.-");
28380 assemble_name (file, buf);
28381 fprintf (file, "@l\n");
28383 else
28385 /* In the large code model, we allow arbitrary offsets between the
28386 TOC and the text section, so we have to load the offset from
28387 memory. The data field is emitted directly before the global
28388 entry point in rs6000_elf_declare_function_name. */
28389 char buf[256];
28391 #ifdef HAVE_AS_ENTRY_MARKERS
28392 /* If supported by the linker, emit a marker relocation. If the
28393 total code size of the final executable or shared library
28394 happens to fit into 2 GB after all, the linker will replace
28395 this code sequence with the sequence for the small or medium
28396 code model. */
28397 fprintf (file, "\t.reloc .,R_PPC64_ENTRY\n");
28398 #endif
28399 fprintf (file, "\tld 2,");
28400 ASM_GENERATE_INTERNAL_LABEL (buf, "LCL", rs6000_pic_labelno);
28401 assemble_name (file, buf);
28402 fprintf (file, "-");
28403 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
28404 assemble_name (file, buf);
28405 fprintf (file, "(12)\n");
28406 fprintf (file, "\tadd 2,2,12\n");
28409 fputs ("\t.localentry\t", file);
28410 assemble_name (file, name);
28411 fputs (",.-", file);
28412 assemble_name (file, name);
28413 fputs ("\n", file);
28416 /* Output -mprofile-kernel code. This needs to be done here instead of
28417 in output_function_profile since it must go after the ELFv2 ABI
28418 local entry point. */
28419 if (TARGET_PROFILE_KERNEL && crtl->profile)
28421 gcc_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2);
28422 gcc_assert (!TARGET_32BIT);
28424 asm_fprintf (file, "\tmflr %s\n", reg_names[0]);
28426 /* In the ELFv2 ABI we have no compiler stack word. It must be
28427 the resposibility of _mcount to preserve the static chain
28428 register if required. */
28429 if (DEFAULT_ABI != ABI_ELFv2
28430 && cfun->static_chain_decl != NULL)
28432 asm_fprintf (file, "\tstd %s,24(%s)\n",
28433 reg_names[STATIC_CHAIN_REGNUM], reg_names[1]);
28434 fprintf (file, "\tbl %s\n", RS6000_MCOUNT);
28435 asm_fprintf (file, "\tld %s,24(%s)\n",
28436 reg_names[STATIC_CHAIN_REGNUM], reg_names[1]);
28438 else
28439 fprintf (file, "\tbl %s\n", RS6000_MCOUNT);
28442 rs6000_pic_labelno++;
28445 /* -mprofile-kernel code calls mcount before the function prolog,
28446 so a profiled leaf function should stay a leaf function. */
28447 static bool
28448 rs6000_keep_leaf_when_profiled ()
28450 return TARGET_PROFILE_KERNEL;
28453 /* Non-zero if vmx regs are restored before the frame pop, zero if
28454 we restore after the pop when possible. */
28455 #define ALWAYS_RESTORE_ALTIVEC_BEFORE_POP 0
28457 /* Restoring cr is a two step process: loading a reg from the frame
28458 save, then moving the reg to cr. For ABI_V4 we must let the
28459 unwinder know that the stack location is no longer valid at or
28460 before the stack deallocation, but we can't emit a cfa_restore for
28461 cr at the stack deallocation like we do for other registers.
28462 The trouble is that it is possible for the move to cr to be
28463 scheduled after the stack deallocation. So say exactly where cr
28464 is located on each of the two insns. */
28466 static rtx
28467 load_cr_save (int regno, rtx frame_reg_rtx, int offset, bool exit_func)
28469 rtx mem = gen_frame_mem_offset (SImode, frame_reg_rtx, offset);
28470 rtx reg = gen_rtx_REG (SImode, regno);
28471 rtx_insn *insn = emit_move_insn (reg, mem);
28473 if (!exit_func && DEFAULT_ABI == ABI_V4)
28475 rtx cr = gen_rtx_REG (SImode, CR2_REGNO);
28476 rtx set = gen_rtx_SET (reg, cr);
28478 add_reg_note (insn, REG_CFA_REGISTER, set);
28479 RTX_FRAME_RELATED_P (insn) = 1;
28481 return reg;
28484 /* Reload CR from REG. */
28486 static void
28487 restore_saved_cr (rtx reg, int using_mfcr_multiple, bool exit_func)
28489 int count = 0;
28490 int i;
28492 if (using_mfcr_multiple)
28494 for (i = 0; i < 8; i++)
28495 if (save_reg_p (CR0_REGNO + i))
28496 count++;
28497 gcc_assert (count);
28500 if (using_mfcr_multiple && count > 1)
28502 rtx_insn *insn;
28503 rtvec p;
28504 int ndx;
28506 p = rtvec_alloc (count);
28508 ndx = 0;
28509 for (i = 0; i < 8; i++)
28510 if (save_reg_p (CR0_REGNO + i))
28512 rtvec r = rtvec_alloc (2);
28513 RTVEC_ELT (r, 0) = reg;
28514 RTVEC_ELT (r, 1) = GEN_INT (1 << (7-i));
28515 RTVEC_ELT (p, ndx) =
28516 gen_rtx_SET (gen_rtx_REG (CCmode, CR0_REGNO + i),
28517 gen_rtx_UNSPEC (CCmode, r, UNSPEC_MOVESI_TO_CR));
28518 ndx++;
28520 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
28521 gcc_assert (ndx == count);
28523 /* For the ELFv2 ABI we generate a CFA_RESTORE for each
28524 CR field separately. */
28525 if (!exit_func && DEFAULT_ABI == ABI_ELFv2 && flag_shrink_wrap)
28527 for (i = 0; i < 8; i++)
28528 if (save_reg_p (CR0_REGNO + i))
28529 add_reg_note (insn, REG_CFA_RESTORE,
28530 gen_rtx_REG (SImode, CR0_REGNO + i));
28532 RTX_FRAME_RELATED_P (insn) = 1;
28535 else
28536 for (i = 0; i < 8; i++)
28537 if (save_reg_p (CR0_REGNO + i))
28539 rtx insn = emit_insn (gen_movsi_to_cr_one
28540 (gen_rtx_REG (CCmode, CR0_REGNO + i), reg));
28542 /* For the ELFv2 ABI we generate a CFA_RESTORE for each
28543 CR field separately, attached to the insn that in fact
28544 restores this particular CR field. */
28545 if (!exit_func && DEFAULT_ABI == ABI_ELFv2 && flag_shrink_wrap)
28547 add_reg_note (insn, REG_CFA_RESTORE,
28548 gen_rtx_REG (SImode, CR0_REGNO + i));
28550 RTX_FRAME_RELATED_P (insn) = 1;
28554 /* For other ABIs, we just generate a single CFA_RESTORE for CR2. */
28555 if (!exit_func && DEFAULT_ABI != ABI_ELFv2
28556 && (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap))
28558 rtx_insn *insn = get_last_insn ();
28559 rtx cr = gen_rtx_REG (SImode, CR2_REGNO);
28561 add_reg_note (insn, REG_CFA_RESTORE, cr);
28562 RTX_FRAME_RELATED_P (insn) = 1;
28566 /* Like cr, the move to lr instruction can be scheduled after the
28567 stack deallocation, but unlike cr, its stack frame save is still
28568 valid. So we only need to emit the cfa_restore on the correct
28569 instruction. */
28571 static void
28572 load_lr_save (int regno, rtx frame_reg_rtx, int offset)
28574 rtx mem = gen_frame_mem_offset (Pmode, frame_reg_rtx, offset);
28575 rtx reg = gen_rtx_REG (Pmode, regno);
28577 emit_move_insn (reg, mem);
28580 static void
28581 restore_saved_lr (int regno, bool exit_func)
28583 rtx reg = gen_rtx_REG (Pmode, regno);
28584 rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
28585 rtx_insn *insn = emit_move_insn (lr, reg);
28587 if (!exit_func && flag_shrink_wrap)
28589 add_reg_note (insn, REG_CFA_RESTORE, lr);
28590 RTX_FRAME_RELATED_P (insn) = 1;
28594 static rtx
28595 add_crlr_cfa_restore (const rs6000_stack_t *info, rtx cfa_restores)
28597 if (DEFAULT_ABI == ABI_ELFv2)
28599 int i;
28600 for (i = 0; i < 8; i++)
28601 if (save_reg_p (CR0_REGNO + i))
28603 rtx cr = gen_rtx_REG (SImode, CR0_REGNO + i);
28604 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, cr,
28605 cfa_restores);
28608 else if (info->cr_save_p)
28609 cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
28610 gen_rtx_REG (SImode, CR2_REGNO),
28611 cfa_restores);
28613 if (info->lr_save_p)
28614 cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
28615 gen_rtx_REG (Pmode, LR_REGNO),
28616 cfa_restores);
28617 return cfa_restores;
28620 /* Return true if OFFSET from stack pointer can be clobbered by signals.
28621 V.4 doesn't have any stack cushion, AIX ABIs have 220 or 288 bytes
28622 below stack pointer not cloberred by signals. */
28624 static inline bool
28625 offset_below_red_zone_p (HOST_WIDE_INT offset)
28627 return offset < (DEFAULT_ABI == ABI_V4
28629 : TARGET_32BIT ? -220 : -288);
28632 /* Append CFA_RESTORES to any existing REG_NOTES on the last insn. */
28634 static void
28635 emit_cfa_restores (rtx cfa_restores)
28637 rtx_insn *insn = get_last_insn ();
28638 rtx *loc = &REG_NOTES (insn);
28640 while (*loc)
28641 loc = &XEXP (*loc, 1);
28642 *loc = cfa_restores;
28643 RTX_FRAME_RELATED_P (insn) = 1;
28646 /* Emit function epilogue as insns. */
28648 void
28649 rs6000_emit_epilogue (int sibcall)
28651 rs6000_stack_t *info;
28652 int restoring_GPRs_inline;
28653 int restoring_FPRs_inline;
28654 int using_load_multiple;
28655 int using_mtcr_multiple;
28656 int use_backchain_to_restore_sp;
28657 int restore_lr;
28658 int strategy;
28659 HOST_WIDE_INT frame_off = 0;
28660 rtx sp_reg_rtx = gen_rtx_REG (Pmode, 1);
28661 rtx frame_reg_rtx = sp_reg_rtx;
28662 rtx cfa_restores = NULL_RTX;
28663 rtx insn;
28664 rtx cr_save_reg = NULL_RTX;
28665 machine_mode reg_mode = Pmode;
28666 int reg_size = TARGET_32BIT ? 4 : 8;
28667 int i;
28668 bool exit_func;
28669 unsigned ptr_regno;
28671 info = rs6000_stack_info ();
28673 if (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0)
28675 reg_mode = V2SImode;
28676 reg_size = 8;
28679 strategy = info->savres_strategy;
28680 using_load_multiple = strategy & REST_MULTIPLE;
28681 restoring_FPRs_inline = sibcall || (strategy & REST_INLINE_FPRS);
28682 restoring_GPRs_inline = sibcall || (strategy & REST_INLINE_GPRS);
28683 using_mtcr_multiple = (rs6000_cpu == PROCESSOR_PPC601
28684 || rs6000_cpu == PROCESSOR_PPC603
28685 || rs6000_cpu == PROCESSOR_PPC750
28686 || optimize_size);
28687 /* Restore via the backchain when we have a large frame, since this
28688 is more efficient than an addis, addi pair. The second condition
28689 here will not trigger at the moment; We don't actually need a
28690 frame pointer for alloca, but the generic parts of the compiler
28691 give us one anyway. */
28692 use_backchain_to_restore_sp = (info->total_size + (info->lr_save_p
28693 ? info->lr_save_offset
28694 : 0) > 32767
28695 || (cfun->calls_alloca
28696 && !frame_pointer_needed));
28697 restore_lr = (info->lr_save_p
28698 && (restoring_FPRs_inline
28699 || (strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR))
28700 && (restoring_GPRs_inline
28701 || info->first_fp_reg_save < 64));
28703 if (WORLD_SAVE_P (info))
28705 int i, j;
28706 char rname[30];
28707 const char *alloc_rname;
28708 rtvec p;
28710 /* eh_rest_world_r10 will return to the location saved in the LR
28711 stack slot (which is not likely to be our caller.)
28712 Input: R10 -- stack adjustment. Clobbers R0, R11, R12, R7, R8.
28713 rest_world is similar, except any R10 parameter is ignored.
28714 The exception-handling stuff that was here in 2.95 is no
28715 longer necessary. */
28717 p = rtvec_alloc (9
28718 + 32 - info->first_gp_reg_save
28719 + LAST_ALTIVEC_REGNO + 1 - info->first_altivec_reg_save
28720 + 63 + 1 - info->first_fp_reg_save);
28722 strcpy (rname, ((crtl->calls_eh_return) ?
28723 "*eh_rest_world_r10" : "*rest_world"));
28724 alloc_rname = ggc_strdup (rname);
28726 j = 0;
28727 RTVEC_ELT (p, j++) = ret_rtx;
28728 RTVEC_ELT (p, j++)
28729 = gen_rtx_USE (VOIDmode, gen_rtx_SYMBOL_REF (Pmode, alloc_rname));
28730 /* The instruction pattern requires a clobber here;
28731 it is shared with the restVEC helper. */
28732 RTVEC_ELT (p, j++)
28733 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, 11));
28736 /* CR register traditionally saved as CR2. */
28737 rtx reg = gen_rtx_REG (SImode, CR2_REGNO);
28738 RTVEC_ELT (p, j++)
28739 = gen_frame_load (reg, frame_reg_rtx, info->cr_save_offset);
28740 if (flag_shrink_wrap)
28742 cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
28743 gen_rtx_REG (Pmode, LR_REGNO),
28744 cfa_restores);
28745 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
28749 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
28751 rtx reg = gen_rtx_REG (reg_mode, info->first_gp_reg_save + i);
28752 RTVEC_ELT (p, j++)
28753 = gen_frame_load (reg,
28754 frame_reg_rtx, info->gp_save_offset + reg_size * i);
28755 if (flag_shrink_wrap)
28756 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
28758 for (i = 0; info->first_altivec_reg_save + i <= LAST_ALTIVEC_REGNO; i++)
28760 rtx reg = gen_rtx_REG (V4SImode, info->first_altivec_reg_save + i);
28761 RTVEC_ELT (p, j++)
28762 = gen_frame_load (reg,
28763 frame_reg_rtx, info->altivec_save_offset + 16 * i);
28764 if (flag_shrink_wrap)
28765 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
28767 for (i = 0; info->first_fp_reg_save + i <= 63; i++)
28769 rtx reg = gen_rtx_REG ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT
28770 ? DFmode : SFmode),
28771 info->first_fp_reg_save + i);
28772 RTVEC_ELT (p, j++)
28773 = gen_frame_load (reg, frame_reg_rtx, info->fp_save_offset + 8 * i);
28774 if (flag_shrink_wrap)
28775 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
28777 RTVEC_ELT (p, j++)
28778 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, 0));
28779 RTVEC_ELT (p, j++)
28780 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 12));
28781 RTVEC_ELT (p, j++)
28782 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 7));
28783 RTVEC_ELT (p, j++)
28784 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 8));
28785 RTVEC_ELT (p, j++)
28786 = gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, 10));
28787 insn = emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, p));
28789 if (flag_shrink_wrap)
28791 REG_NOTES (insn) = cfa_restores;
28792 add_reg_note (insn, REG_CFA_DEF_CFA, sp_reg_rtx);
28793 RTX_FRAME_RELATED_P (insn) = 1;
28795 return;
28798 /* frame_reg_rtx + frame_off points to the top of this stack frame. */
28799 if (info->push_p)
28800 frame_off = info->total_size;
28802 /* Restore AltiVec registers if we must do so before adjusting the
28803 stack. */
28804 if (info->altivec_size != 0
28805 && (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
28806 || (DEFAULT_ABI != ABI_V4
28807 && offset_below_red_zone_p (info->altivec_save_offset))))
28809 int i;
28810 int scratch_regno = ptr_regno_for_savres (SAVRES_VR);
28812 gcc_checking_assert (scratch_regno == 11 || scratch_regno == 12);
28813 if (use_backchain_to_restore_sp)
28815 int frame_regno = 11;
28817 if ((strategy & REST_INLINE_VRS) == 0)
28819 /* Of r11 and r12, select the one not clobbered by an
28820 out-of-line restore function for the frame register. */
28821 frame_regno = 11 + 12 - scratch_regno;
28823 frame_reg_rtx = gen_rtx_REG (Pmode, frame_regno);
28824 emit_move_insn (frame_reg_rtx,
28825 gen_rtx_MEM (Pmode, sp_reg_rtx));
28826 frame_off = 0;
28828 else if (frame_pointer_needed)
28829 frame_reg_rtx = hard_frame_pointer_rtx;
28831 if ((strategy & REST_INLINE_VRS) == 0)
28833 int end_save = info->altivec_save_offset + info->altivec_size;
28834 int ptr_off;
28835 rtx ptr_reg = gen_rtx_REG (Pmode, 0);
28836 rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
28838 if (end_save + frame_off != 0)
28840 rtx offset = GEN_INT (end_save + frame_off);
28842 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
28844 else
28845 emit_move_insn (ptr_reg, frame_reg_rtx);
28847 ptr_off = -end_save;
28848 insn = rs6000_emit_savres_rtx (info, scratch_reg,
28849 info->altivec_save_offset + ptr_off,
28850 0, V4SImode, SAVRES_VR);
28852 else
28854 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
28855 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
28857 rtx addr, areg, mem, insn;
28858 rtx reg = gen_rtx_REG (V4SImode, i);
28859 HOST_WIDE_INT offset
28860 = (info->altivec_save_offset + frame_off
28861 + 16 * (i - info->first_altivec_reg_save));
28863 if (TARGET_P9_DFORM_VECTOR && quad_address_offset_p (offset))
28865 mem = gen_frame_mem (V4SImode,
28866 gen_rtx_PLUS (Pmode, frame_reg_rtx,
28867 GEN_INT (offset)));
28868 insn = gen_rtx_SET (reg, mem);
28870 else
28872 areg = gen_rtx_REG (Pmode, 0);
28873 emit_move_insn (areg, GEN_INT (offset));
28875 /* AltiVec addressing mode is [reg+reg]. */
28876 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx, areg);
28877 mem = gen_frame_mem (V4SImode, addr);
28879 /* Rather than emitting a generic move, force use of the
28880 lvx instruction, which we always want. In particular we
28881 don't want lxvd2x/xxpermdi for little endian. */
28882 insn = gen_altivec_lvx_v4si_internal (reg, mem);
28885 (void) emit_insn (insn);
28889 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
28890 if (((strategy & REST_INLINE_VRS) == 0
28891 || (info->vrsave_mask & ALTIVEC_REG_BIT (i)) != 0)
28892 && (flag_shrink_wrap
28893 || (offset_below_red_zone_p
28894 (info->altivec_save_offset
28895 + 16 * (i - info->first_altivec_reg_save)))))
28897 rtx reg = gen_rtx_REG (V4SImode, i);
28898 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
28902 /* Restore VRSAVE if we must do so before adjusting the stack. */
28903 if (info->vrsave_size != 0
28904 && (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
28905 || (DEFAULT_ABI != ABI_V4
28906 && offset_below_red_zone_p (info->vrsave_save_offset))))
28908 rtx reg;
28910 if (frame_reg_rtx == sp_reg_rtx)
28912 if (use_backchain_to_restore_sp)
28914 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
28915 emit_move_insn (frame_reg_rtx,
28916 gen_rtx_MEM (Pmode, sp_reg_rtx));
28917 frame_off = 0;
28919 else if (frame_pointer_needed)
28920 frame_reg_rtx = hard_frame_pointer_rtx;
28923 reg = gen_rtx_REG (SImode, 12);
28924 emit_insn (gen_frame_load (reg, frame_reg_rtx,
28925 info->vrsave_save_offset + frame_off));
28927 emit_insn (generate_set_vrsave (reg, info, 1));
28930 insn = NULL_RTX;
28931 /* If we have a large stack frame, restore the old stack pointer
28932 using the backchain. */
28933 if (use_backchain_to_restore_sp)
28935 if (frame_reg_rtx == sp_reg_rtx)
28937 /* Under V.4, don't reset the stack pointer until after we're done
28938 loading the saved registers. */
28939 if (DEFAULT_ABI == ABI_V4)
28940 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
28942 insn = emit_move_insn (frame_reg_rtx,
28943 gen_rtx_MEM (Pmode, sp_reg_rtx));
28944 frame_off = 0;
28946 else if (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
28947 && DEFAULT_ABI == ABI_V4)
28948 /* frame_reg_rtx has been set up by the altivec restore. */
28950 else
28952 insn = emit_move_insn (sp_reg_rtx, frame_reg_rtx);
28953 frame_reg_rtx = sp_reg_rtx;
28956 /* If we have a frame pointer, we can restore the old stack pointer
28957 from it. */
28958 else if (frame_pointer_needed)
28960 frame_reg_rtx = sp_reg_rtx;
28961 if (DEFAULT_ABI == ABI_V4)
28962 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
28963 /* Prevent reordering memory accesses against stack pointer restore. */
28964 else if (cfun->calls_alloca
28965 || offset_below_red_zone_p (-info->total_size))
28966 rs6000_emit_stack_tie (frame_reg_rtx, true);
28968 insn = emit_insn (gen_add3_insn (frame_reg_rtx, hard_frame_pointer_rtx,
28969 GEN_INT (info->total_size)));
28970 frame_off = 0;
28972 else if (info->push_p
28973 && DEFAULT_ABI != ABI_V4
28974 && !crtl->calls_eh_return)
28976 /* Prevent reordering memory accesses against stack pointer restore. */
28977 if (cfun->calls_alloca
28978 || offset_below_red_zone_p (-info->total_size))
28979 rs6000_emit_stack_tie (frame_reg_rtx, false);
28980 insn = emit_insn (gen_add3_insn (sp_reg_rtx, sp_reg_rtx,
28981 GEN_INT (info->total_size)));
28982 frame_off = 0;
28984 if (insn && frame_reg_rtx == sp_reg_rtx)
28986 if (cfa_restores)
28988 REG_NOTES (insn) = cfa_restores;
28989 cfa_restores = NULL_RTX;
28991 add_reg_note (insn, REG_CFA_DEF_CFA, sp_reg_rtx);
28992 RTX_FRAME_RELATED_P (insn) = 1;
28995 /* Restore AltiVec registers if we have not done so already. */
28996 if (!ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
28997 && info->altivec_size != 0
28998 && (DEFAULT_ABI == ABI_V4
28999 || !offset_below_red_zone_p (info->altivec_save_offset)))
29001 int i;
29003 if ((strategy & REST_INLINE_VRS) == 0)
29005 int end_save = info->altivec_save_offset + info->altivec_size;
29006 int ptr_off;
29007 rtx ptr_reg = gen_rtx_REG (Pmode, 0);
29008 int scratch_regno = ptr_regno_for_savres (SAVRES_VR);
29009 rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
29011 if (end_save + frame_off != 0)
29013 rtx offset = GEN_INT (end_save + frame_off);
29015 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
29017 else
29018 emit_move_insn (ptr_reg, frame_reg_rtx);
29020 ptr_off = -end_save;
29021 insn = rs6000_emit_savres_rtx (info, scratch_reg,
29022 info->altivec_save_offset + ptr_off,
29023 0, V4SImode, SAVRES_VR);
29024 if (REGNO (frame_reg_rtx) == REGNO (scratch_reg))
29026 /* Frame reg was clobbered by out-of-line save. Restore it
29027 from ptr_reg, and if we are calling out-of-line gpr or
29028 fpr restore set up the correct pointer and offset. */
29029 unsigned newptr_regno = 1;
29030 if (!restoring_GPRs_inline)
29032 bool lr = info->gp_save_offset + info->gp_size == 0;
29033 int sel = SAVRES_GPR | (lr ? SAVRES_LR : 0);
29034 newptr_regno = ptr_regno_for_savres (sel);
29035 end_save = info->gp_save_offset + info->gp_size;
29037 else if (!restoring_FPRs_inline)
29039 bool lr = !(strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR);
29040 int sel = SAVRES_FPR | (lr ? SAVRES_LR : 0);
29041 newptr_regno = ptr_regno_for_savres (sel);
29042 end_save = info->fp_save_offset + info->fp_size;
29045 if (newptr_regno != 1 && REGNO (frame_reg_rtx) != newptr_regno)
29046 frame_reg_rtx = gen_rtx_REG (Pmode, newptr_regno);
29048 if (end_save + ptr_off != 0)
29050 rtx offset = GEN_INT (end_save + ptr_off);
29052 frame_off = -end_save;
29053 if (TARGET_32BIT)
29054 emit_insn (gen_addsi3_carry (frame_reg_rtx,
29055 ptr_reg, offset));
29056 else
29057 emit_insn (gen_adddi3_carry (frame_reg_rtx,
29058 ptr_reg, offset));
29060 else
29062 frame_off = ptr_off;
29063 emit_move_insn (frame_reg_rtx, ptr_reg);
29067 else
29069 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
29070 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
29072 rtx addr, areg, mem, insn;
29073 rtx reg = gen_rtx_REG (V4SImode, i);
29074 HOST_WIDE_INT offset
29075 = (info->altivec_save_offset + frame_off
29076 + 16 * (i - info->first_altivec_reg_save));
29078 if (TARGET_P9_DFORM_VECTOR && quad_address_offset_p (offset))
29080 mem = gen_frame_mem (V4SImode,
29081 gen_rtx_PLUS (Pmode, frame_reg_rtx,
29082 GEN_INT (offset)));
29083 insn = gen_rtx_SET (reg, mem);
29085 else
29087 areg = gen_rtx_REG (Pmode, 0);
29088 emit_move_insn (areg, GEN_INT (offset));
29090 /* AltiVec addressing mode is [reg+reg]. */
29091 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx, areg);
29092 mem = gen_frame_mem (V4SImode, addr);
29094 /* Rather than emitting a generic move, force use of the
29095 lvx instruction, which we always want. In particular we
29096 don't want lxvd2x/xxpermdi for little endian. */
29097 insn = gen_altivec_lvx_v4si_internal (reg, mem);
29100 (void) emit_insn (insn);
29104 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
29105 if (((strategy & REST_INLINE_VRS) == 0
29106 || (info->vrsave_mask & ALTIVEC_REG_BIT (i)) != 0)
29107 && (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap))
29109 rtx reg = gen_rtx_REG (V4SImode, i);
29110 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
29114 /* Restore VRSAVE if we have not done so already. */
29115 if (!ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
29116 && info->vrsave_size != 0
29117 && (DEFAULT_ABI == ABI_V4
29118 || !offset_below_red_zone_p (info->vrsave_save_offset)))
29120 rtx reg;
29122 reg = gen_rtx_REG (SImode, 12);
29123 emit_insn (gen_frame_load (reg, frame_reg_rtx,
29124 info->vrsave_save_offset + frame_off));
29126 emit_insn (generate_set_vrsave (reg, info, 1));
29129 /* If we exit by an out-of-line restore function on ABI_V4 then that
29130 function will deallocate the stack, so we don't need to worry
29131 about the unwinder restoring cr from an invalid stack frame
29132 location. */
29133 exit_func = (!restoring_FPRs_inline
29134 || (!restoring_GPRs_inline
29135 && info->first_fp_reg_save == 64));
29137 /* In the ELFv2 ABI we need to restore all call-saved CR fields from
29138 *separate* slots if the routine calls __builtin_eh_return, so
29139 that they can be independently restored by the unwinder. */
29140 if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return)
29142 int i, cr_off = info->ehcr_offset;
29144 for (i = 0; i < 8; i++)
29145 if (!call_used_regs[CR0_REGNO + i])
29147 rtx reg = gen_rtx_REG (SImode, 0);
29148 emit_insn (gen_frame_load (reg, frame_reg_rtx,
29149 cr_off + frame_off));
29151 insn = emit_insn (gen_movsi_to_cr_one
29152 (gen_rtx_REG (CCmode, CR0_REGNO + i), reg));
29154 if (!exit_func && flag_shrink_wrap)
29156 add_reg_note (insn, REG_CFA_RESTORE,
29157 gen_rtx_REG (SImode, CR0_REGNO + i));
29159 RTX_FRAME_RELATED_P (insn) = 1;
29162 cr_off += reg_size;
29166 /* Get the old lr if we saved it. If we are restoring registers
29167 out-of-line, then the out-of-line routines can do this for us. */
29168 if (restore_lr && restoring_GPRs_inline)
29169 load_lr_save (0, frame_reg_rtx, info->lr_save_offset + frame_off);
29171 /* Get the old cr if we saved it. */
29172 if (info->cr_save_p)
29174 unsigned cr_save_regno = 12;
29176 if (!restoring_GPRs_inline)
29178 /* Ensure we don't use the register used by the out-of-line
29179 gpr register restore below. */
29180 bool lr = info->gp_save_offset + info->gp_size == 0;
29181 int sel = SAVRES_GPR | (lr ? SAVRES_LR : 0);
29182 int gpr_ptr_regno = ptr_regno_for_savres (sel);
29184 if (gpr_ptr_regno == 12)
29185 cr_save_regno = 11;
29186 gcc_checking_assert (REGNO (frame_reg_rtx) != cr_save_regno);
29188 else if (REGNO (frame_reg_rtx) == 12)
29189 cr_save_regno = 11;
29191 cr_save_reg = load_cr_save (cr_save_regno, frame_reg_rtx,
29192 info->cr_save_offset + frame_off,
29193 exit_func);
29196 /* Set LR here to try to overlap restores below. */
29197 if (restore_lr && restoring_GPRs_inline)
29198 restore_saved_lr (0, exit_func);
29200 /* Load exception handler data registers, if needed. */
29201 if (crtl->calls_eh_return)
29203 unsigned int i, regno;
29205 if (TARGET_AIX)
29207 rtx reg = gen_rtx_REG (reg_mode, 2);
29208 emit_insn (gen_frame_load (reg, frame_reg_rtx,
29209 frame_off + RS6000_TOC_SAVE_SLOT));
29212 for (i = 0; ; ++i)
29214 rtx mem;
29216 regno = EH_RETURN_DATA_REGNO (i);
29217 if (regno == INVALID_REGNUM)
29218 break;
29220 /* Note: possible use of r0 here to address SPE regs. */
29221 mem = gen_frame_mem_offset (reg_mode, frame_reg_rtx,
29222 info->ehrd_offset + frame_off
29223 + reg_size * (int) i);
29225 emit_move_insn (gen_rtx_REG (reg_mode, regno), mem);
29229 /* Restore GPRs. This is done as a PARALLEL if we are using
29230 the load-multiple instructions. */
29231 if (TARGET_SPE_ABI
29232 && info->spe_64bit_regs_used
29233 && info->first_gp_reg_save != 32)
29235 /* Determine whether we can address all of the registers that need
29236 to be saved with an offset from frame_reg_rtx that fits in
29237 the small const field for SPE memory instructions. */
29238 int spe_regs_addressable
29239 = (SPE_CONST_OFFSET_OK (info->spe_gp_save_offset + frame_off
29240 + reg_size * (32 - info->first_gp_reg_save - 1))
29241 && restoring_GPRs_inline);
29243 if (!spe_regs_addressable)
29245 int ool_adjust = 0;
29246 rtx old_frame_reg_rtx = frame_reg_rtx;
29247 /* Make r11 point to the start of the SPE save area. We worried about
29248 not clobbering it when we were saving registers in the prologue.
29249 There's no need to worry here because the static chain is passed
29250 anew to every function. */
29252 if (!restoring_GPRs_inline)
29253 ool_adjust = 8 * (info->first_gp_reg_save - FIRST_SAVED_GP_REGNO);
29254 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
29255 emit_insn (gen_addsi3 (frame_reg_rtx, old_frame_reg_rtx,
29256 GEN_INT (info->spe_gp_save_offset
29257 + frame_off
29258 - ool_adjust)));
29259 /* Keep the invariant that frame_reg_rtx + frame_off points
29260 at the top of the stack frame. */
29261 frame_off = -info->spe_gp_save_offset + ool_adjust;
29264 if (restoring_GPRs_inline)
29266 HOST_WIDE_INT spe_offset = info->spe_gp_save_offset + frame_off;
29268 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
29269 if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i))
29271 rtx offset, addr, mem, reg;
29273 /* We're doing all this to ensure that the immediate offset
29274 fits into the immediate field of 'evldd'. */
29275 gcc_assert (SPE_CONST_OFFSET_OK (spe_offset + reg_size * i));
29277 offset = GEN_INT (spe_offset + reg_size * i);
29278 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx, offset);
29279 mem = gen_rtx_MEM (V2SImode, addr);
29280 reg = gen_rtx_REG (reg_mode, info->first_gp_reg_save + i);
29282 emit_move_insn (reg, mem);
29285 else
29286 rs6000_emit_savres_rtx (info, frame_reg_rtx,
29287 info->spe_gp_save_offset + frame_off,
29288 info->lr_save_offset + frame_off,
29289 reg_mode,
29290 SAVRES_GPR | SAVRES_LR);
29292 else if (!restoring_GPRs_inline)
29294 /* We are jumping to an out-of-line function. */
29295 rtx ptr_reg;
29296 int end_save = info->gp_save_offset + info->gp_size;
29297 bool can_use_exit = end_save == 0;
29298 int sel = SAVRES_GPR | (can_use_exit ? SAVRES_LR : 0);
29299 int ptr_off;
29301 /* Emit stack reset code if we need it. */
29302 ptr_regno = ptr_regno_for_savres (sel);
29303 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
29304 if (can_use_exit)
29305 rs6000_emit_stack_reset (info, frame_reg_rtx, frame_off, ptr_regno);
29306 else if (end_save + frame_off != 0)
29307 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx,
29308 GEN_INT (end_save + frame_off)));
29309 else if (REGNO (frame_reg_rtx) != ptr_regno)
29310 emit_move_insn (ptr_reg, frame_reg_rtx);
29311 if (REGNO (frame_reg_rtx) == ptr_regno)
29312 frame_off = -end_save;
29314 if (can_use_exit && info->cr_save_p)
29315 restore_saved_cr (cr_save_reg, using_mtcr_multiple, true);
29317 ptr_off = -end_save;
29318 rs6000_emit_savres_rtx (info, ptr_reg,
29319 info->gp_save_offset + ptr_off,
29320 info->lr_save_offset + ptr_off,
29321 reg_mode, sel);
29323 else if (using_load_multiple)
29325 rtvec p;
29326 p = rtvec_alloc (32 - info->first_gp_reg_save);
29327 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
29328 RTVEC_ELT (p, i)
29329 = gen_frame_load (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
29330 frame_reg_rtx,
29331 info->gp_save_offset + frame_off + reg_size * i);
29332 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
29334 else
29336 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
29337 if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i))
29338 emit_insn (gen_frame_load
29339 (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
29340 frame_reg_rtx,
29341 info->gp_save_offset + frame_off + reg_size * i));
29344 if (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap)
29346 /* If the frame pointer was used then we can't delay emitting
29347 a REG_CFA_DEF_CFA note. This must happen on the insn that
29348 restores the frame pointer, r31. We may have already emitted
29349 a REG_CFA_DEF_CFA note, but that's OK; A duplicate is
29350 discarded by dwarf2cfi.c/dwarf2out.c, and in any case would
29351 be harmless if emitted. */
29352 if (frame_pointer_needed)
29354 insn = get_last_insn ();
29355 add_reg_note (insn, REG_CFA_DEF_CFA,
29356 plus_constant (Pmode, frame_reg_rtx, frame_off));
29357 RTX_FRAME_RELATED_P (insn) = 1;
29360 /* Set up cfa_restores. We always need these when
29361 shrink-wrapping. If not shrink-wrapping then we only need
29362 the cfa_restore when the stack location is no longer valid.
29363 The cfa_restores must be emitted on or before the insn that
29364 invalidates the stack, and of course must not be emitted
29365 before the insn that actually does the restore. The latter
29366 is why it is a bad idea to emit the cfa_restores as a group
29367 on the last instruction here that actually does a restore:
29368 That insn may be reordered with respect to others doing
29369 restores. */
29370 if (flag_shrink_wrap
29371 && !restoring_GPRs_inline
29372 && info->first_fp_reg_save == 64)
29373 cfa_restores = add_crlr_cfa_restore (info, cfa_restores);
29375 for (i = info->first_gp_reg_save; i < 32; i++)
29376 if (!restoring_GPRs_inline
29377 || using_load_multiple
29378 || rs6000_reg_live_or_pic_offset_p (i))
29380 rtx reg = gen_rtx_REG (reg_mode, i);
29382 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
29386 if (!restoring_GPRs_inline
29387 && info->first_fp_reg_save == 64)
29389 /* We are jumping to an out-of-line function. */
29390 if (cfa_restores)
29391 emit_cfa_restores (cfa_restores);
29392 return;
29395 if (restore_lr && !restoring_GPRs_inline)
29397 load_lr_save (0, frame_reg_rtx, info->lr_save_offset + frame_off);
29398 restore_saved_lr (0, exit_func);
29401 /* Restore fpr's if we need to do it without calling a function. */
29402 if (restoring_FPRs_inline)
29403 for (i = 0; i < 64 - info->first_fp_reg_save; i++)
29404 if (save_reg_p (info->first_fp_reg_save + i))
29406 rtx reg = gen_rtx_REG ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT
29407 ? DFmode : SFmode),
29408 info->first_fp_reg_save + i);
29409 emit_insn (gen_frame_load (reg, frame_reg_rtx,
29410 info->fp_save_offset + frame_off + 8 * i));
29411 if (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap)
29412 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
29415 /* If we saved cr, restore it here. Just those that were used. */
29416 if (info->cr_save_p)
29417 restore_saved_cr (cr_save_reg, using_mtcr_multiple, exit_func);
29419 /* If this is V.4, unwind the stack pointer after all of the loads
29420 have been done, or set up r11 if we are restoring fp out of line. */
29421 ptr_regno = 1;
29422 if (!restoring_FPRs_inline)
29424 bool lr = (strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR) == 0;
29425 int sel = SAVRES_FPR | (lr ? SAVRES_LR : 0);
29426 ptr_regno = ptr_regno_for_savres (sel);
29429 insn = rs6000_emit_stack_reset (info, frame_reg_rtx, frame_off, ptr_regno);
29430 if (REGNO (frame_reg_rtx) == ptr_regno)
29431 frame_off = 0;
29433 if (insn && restoring_FPRs_inline)
29435 if (cfa_restores)
29437 REG_NOTES (insn) = cfa_restores;
29438 cfa_restores = NULL_RTX;
29440 add_reg_note (insn, REG_CFA_DEF_CFA, sp_reg_rtx);
29441 RTX_FRAME_RELATED_P (insn) = 1;
29444 if (crtl->calls_eh_return)
29446 rtx sa = EH_RETURN_STACKADJ_RTX;
29447 emit_insn (gen_add3_insn (sp_reg_rtx, sp_reg_rtx, sa));
29450 if (!sibcall && restoring_FPRs_inline)
29452 if (cfa_restores)
29454 /* We can't hang the cfa_restores off a simple return,
29455 since the shrink-wrap code sometimes uses an existing
29456 return. This means there might be a path from
29457 pre-prologue code to this return, and dwarf2cfi code
29458 wants the eh_frame unwinder state to be the same on
29459 all paths to any point. So we need to emit the
29460 cfa_restores before the return. For -m64 we really
29461 don't need epilogue cfa_restores at all, except for
29462 this irritating dwarf2cfi with shrink-wrap
29463 requirement; The stack red-zone means eh_frame info
29464 from the prologue telling the unwinder to restore
29465 from the stack is perfectly good right to the end of
29466 the function. */
29467 emit_insn (gen_blockage ());
29468 emit_cfa_restores (cfa_restores);
29469 cfa_restores = NULL_RTX;
29472 emit_jump_insn (targetm.gen_simple_return ());
29475 if (!sibcall && !restoring_FPRs_inline)
29477 bool lr = (strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR) == 0;
29478 rtvec p = rtvec_alloc (3 + !!lr + 64 - info->first_fp_reg_save);
29479 int elt = 0;
29480 RTVEC_ELT (p, elt++) = ret_rtx;
29481 if (lr)
29482 RTVEC_ELT (p, elt++)
29483 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, LR_REGNO));
29485 /* We have to restore more than two FP registers, so branch to the
29486 restore function. It will return to our caller. */
29487 int i;
29488 int reg;
29489 rtx sym;
29491 if (flag_shrink_wrap)
29492 cfa_restores = add_crlr_cfa_restore (info, cfa_restores);
29494 sym = rs6000_savres_routine_sym (info, SAVRES_FPR | (lr ? SAVRES_LR : 0));
29495 RTVEC_ELT (p, elt++) = gen_rtx_USE (VOIDmode, sym);
29496 reg = (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)? 1 : 11;
29497 RTVEC_ELT (p, elt++) = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, reg));
29499 for (i = 0; i < 64 - info->first_fp_reg_save; i++)
29501 rtx reg = gen_rtx_REG (DFmode, info->first_fp_reg_save + i);
29503 RTVEC_ELT (p, elt++)
29504 = gen_frame_load (reg, sp_reg_rtx, info->fp_save_offset + 8 * i);
29505 if (flag_shrink_wrap)
29506 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
29509 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, p));
29512 if (cfa_restores)
29514 if (sibcall)
29515 /* Ensure the cfa_restores are hung off an insn that won't
29516 be reordered above other restores. */
29517 emit_insn (gen_blockage ());
29519 emit_cfa_restores (cfa_restores);
29523 /* Write function epilogue. */
29525 static void
29526 rs6000_output_function_epilogue (FILE *file,
29527 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
29529 #if TARGET_MACHO
29530 macho_branch_islands ();
29531 /* Mach-O doesn't support labels at the end of objects, so if
29532 it looks like we might want one, insert a NOP. */
29534 rtx_insn *insn = get_last_insn ();
29535 rtx_insn *deleted_debug_label = NULL;
29536 while (insn
29537 && NOTE_P (insn)
29538 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
29540 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
29541 notes only, instead set their CODE_LABEL_NUMBER to -1,
29542 otherwise there would be code generation differences
29543 in between -g and -g0. */
29544 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
29545 deleted_debug_label = insn;
29546 insn = PREV_INSN (insn);
29548 if (insn
29549 && (LABEL_P (insn)
29550 || (NOTE_P (insn)
29551 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
29552 fputs ("\tnop\n", file);
29553 else if (deleted_debug_label)
29554 for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
29555 if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
29556 CODE_LABEL_NUMBER (insn) = -1;
29558 #endif
29560 /* Output a traceback table here. See /usr/include/sys/debug.h for info
29561 on its format.
29563 We don't output a traceback table if -finhibit-size-directive was
29564 used. The documentation for -finhibit-size-directive reads
29565 ``don't output a @code{.size} assembler directive, or anything
29566 else that would cause trouble if the function is split in the
29567 middle, and the two halves are placed at locations far apart in
29568 memory.'' The traceback table has this property, since it
29569 includes the offset from the start of the function to the
29570 traceback table itself.
29572 System V.4 Powerpc's (and the embedded ABI derived from it) use a
29573 different traceback table. */
29574 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
29575 && ! flag_inhibit_size_directive
29576 && rs6000_traceback != traceback_none && !cfun->is_thunk)
29578 const char *fname = NULL;
29579 const char *language_string = lang_hooks.name;
29580 int fixed_parms = 0, float_parms = 0, parm_info = 0;
29581 int i;
29582 int optional_tbtab;
29583 rs6000_stack_t *info = rs6000_stack_info ();
29585 if (rs6000_traceback == traceback_full)
29586 optional_tbtab = 1;
29587 else if (rs6000_traceback == traceback_part)
29588 optional_tbtab = 0;
29589 else
29590 optional_tbtab = !optimize_size && !TARGET_ELF;
29592 if (optional_tbtab)
29594 fname = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
29595 while (*fname == '.') /* V.4 encodes . in the name */
29596 fname++;
29598 /* Need label immediately before tbtab, so we can compute
29599 its offset from the function start. */
29600 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LT");
29601 ASM_OUTPUT_LABEL (file, fname);
29604 /* The .tbtab pseudo-op can only be used for the first eight
29605 expressions, since it can't handle the possibly variable
29606 length fields that follow. However, if you omit the optional
29607 fields, the assembler outputs zeros for all optional fields
29608 anyways, giving each variable length field is minimum length
29609 (as defined in sys/debug.h). Thus we can not use the .tbtab
29610 pseudo-op at all. */
29612 /* An all-zero word flags the start of the tbtab, for debuggers
29613 that have to find it by searching forward from the entry
29614 point or from the current pc. */
29615 fputs ("\t.long 0\n", file);
29617 /* Tbtab format type. Use format type 0. */
29618 fputs ("\t.byte 0,", file);
29620 /* Language type. Unfortunately, there does not seem to be any
29621 official way to discover the language being compiled, so we
29622 use language_string.
29623 C is 0. Fortran is 1. Pascal is 2. Ada is 3. C++ is 9.
29624 Java is 13. Objective-C is 14. Objective-C++ isn't assigned
29625 a number, so for now use 9. LTO, Go and JIT aren't assigned numbers
29626 either, so for now use 0. */
29627 if (lang_GNU_C ()
29628 || ! strcmp (language_string, "GNU GIMPLE")
29629 || ! strcmp (language_string, "GNU Go")
29630 || ! strcmp (language_string, "libgccjit"))
29631 i = 0;
29632 else if (! strcmp (language_string, "GNU F77")
29633 || lang_GNU_Fortran ())
29634 i = 1;
29635 else if (! strcmp (language_string, "GNU Pascal"))
29636 i = 2;
29637 else if (! strcmp (language_string, "GNU Ada"))
29638 i = 3;
29639 else if (lang_GNU_CXX ()
29640 || ! strcmp (language_string, "GNU Objective-C++"))
29641 i = 9;
29642 else if (! strcmp (language_string, "GNU Java"))
29643 i = 13;
29644 else if (! strcmp (language_string, "GNU Objective-C"))
29645 i = 14;
29646 else
29647 gcc_unreachable ();
29648 fprintf (file, "%d,", i);
29650 /* 8 single bit fields: global linkage (not set for C extern linkage,
29651 apparently a PL/I convention?), out-of-line epilogue/prologue, offset
29652 from start of procedure stored in tbtab, internal function, function
29653 has controlled storage, function has no toc, function uses fp,
29654 function logs/aborts fp operations. */
29655 /* Assume that fp operations are used if any fp reg must be saved. */
29656 fprintf (file, "%d,",
29657 (optional_tbtab << 5) | ((info->first_fp_reg_save != 64) << 1));
29659 /* 6 bitfields: function is interrupt handler, name present in
29660 proc table, function calls alloca, on condition directives
29661 (controls stack walks, 3 bits), saves condition reg, saves
29662 link reg. */
29663 /* The `function calls alloca' bit seems to be set whenever reg 31 is
29664 set up as a frame pointer, even when there is no alloca call. */
29665 fprintf (file, "%d,",
29666 ((optional_tbtab << 6)
29667 | ((optional_tbtab & frame_pointer_needed) << 5)
29668 | (info->cr_save_p << 1)
29669 | (info->lr_save_p)));
29671 /* 3 bitfields: saves backchain, fixup code, number of fpr saved
29672 (6 bits). */
29673 fprintf (file, "%d,",
29674 (info->push_p << 7) | (64 - info->first_fp_reg_save));
29676 /* 2 bitfields: spare bits (2 bits), number of gpr saved (6 bits). */
29677 fprintf (file, "%d,", (32 - first_reg_to_save ()));
29679 if (optional_tbtab)
29681 /* Compute the parameter info from the function decl argument
29682 list. */
29683 tree decl;
29684 int next_parm_info_bit = 31;
29686 for (decl = DECL_ARGUMENTS (current_function_decl);
29687 decl; decl = DECL_CHAIN (decl))
29689 rtx parameter = DECL_INCOMING_RTL (decl);
29690 machine_mode mode = GET_MODE (parameter);
29692 if (GET_CODE (parameter) == REG)
29694 if (SCALAR_FLOAT_MODE_P (mode))
29696 int bits;
29698 float_parms++;
29700 switch (mode)
29702 case SFmode:
29703 case SDmode:
29704 bits = 0x2;
29705 break;
29707 case DFmode:
29708 case DDmode:
29709 case TFmode:
29710 case TDmode:
29711 case IFmode:
29712 case KFmode:
29713 bits = 0x3;
29714 break;
29716 default:
29717 gcc_unreachable ();
29720 /* If only one bit will fit, don't or in this entry. */
29721 if (next_parm_info_bit > 0)
29722 parm_info |= (bits << (next_parm_info_bit - 1));
29723 next_parm_info_bit -= 2;
29725 else
29727 fixed_parms += ((GET_MODE_SIZE (mode)
29728 + (UNITS_PER_WORD - 1))
29729 / UNITS_PER_WORD);
29730 next_parm_info_bit -= 1;
29736 /* Number of fixed point parameters. */
29737 /* This is actually the number of words of fixed point parameters; thus
29738 an 8 byte struct counts as 2; and thus the maximum value is 8. */
29739 fprintf (file, "%d,", fixed_parms);
29741 /* 2 bitfields: number of floating point parameters (7 bits), parameters
29742 all on stack. */
29743 /* This is actually the number of fp registers that hold parameters;
29744 and thus the maximum value is 13. */
29745 /* Set parameters on stack bit if parameters are not in their original
29746 registers, regardless of whether they are on the stack? Xlc
29747 seems to set the bit when not optimizing. */
29748 fprintf (file, "%d\n", ((float_parms << 1) | (! optimize)));
29750 if (! optional_tbtab)
29751 return;
29753 /* Optional fields follow. Some are variable length. */
29755 /* Parameter types, left adjusted bit fields: 0 fixed, 10 single float,
29756 11 double float. */
29757 /* There is an entry for each parameter in a register, in the order that
29758 they occur in the parameter list. Any intervening arguments on the
29759 stack are ignored. If the list overflows a long (max possible length
29760 34 bits) then completely leave off all elements that don't fit. */
29761 /* Only emit this long if there was at least one parameter. */
29762 if (fixed_parms || float_parms)
29763 fprintf (file, "\t.long %d\n", parm_info);
29765 /* Offset from start of code to tb table. */
29766 fputs ("\t.long ", file);
29767 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LT");
29768 RS6000_OUTPUT_BASENAME (file, fname);
29769 putc ('-', file);
29770 rs6000_output_function_entry (file, fname);
29771 putc ('\n', file);
29773 /* Interrupt handler mask. */
29774 /* Omit this long, since we never set the interrupt handler bit
29775 above. */
29777 /* Number of CTL (controlled storage) anchors. */
29778 /* Omit this long, since the has_ctl bit is never set above. */
29780 /* Displacement into stack of each CTL anchor. */
29781 /* Omit this list of longs, because there are no CTL anchors. */
29783 /* Length of function name. */
29784 if (*fname == '*')
29785 ++fname;
29786 fprintf (file, "\t.short %d\n", (int) strlen (fname));
29788 /* Function name. */
29789 assemble_string (fname, strlen (fname));
29791 /* Register for alloca automatic storage; this is always reg 31.
29792 Only emit this if the alloca bit was set above. */
29793 if (frame_pointer_needed)
29794 fputs ("\t.byte 31\n", file);
29796 fputs ("\t.align 2\n", file);
29799 /* Arrange to define .LCTOC1 label, if not already done. */
29800 if (need_toc_init)
29802 need_toc_init = 0;
29803 if (!toc_initialized)
29805 switch_to_section (toc_section);
29806 switch_to_section (current_function_section ());
29811 /* -fsplit-stack support. */
29813 /* A SYMBOL_REF for __morestack. */
29814 static GTY(()) rtx morestack_ref;
29816 static rtx
29817 gen_add3_const (rtx rt, rtx ra, long c)
29819 if (TARGET_64BIT)
29820 return gen_adddi3 (rt, ra, GEN_INT (c));
29821 else
29822 return gen_addsi3 (rt, ra, GEN_INT (c));
29825 /* Emit -fsplit-stack prologue, which goes before the regular function
29826 prologue (at local entry point in the case of ELFv2). */
29828 void
29829 rs6000_expand_split_stack_prologue (void)
29831 rs6000_stack_t *info = rs6000_stack_info ();
29832 unsigned HOST_WIDE_INT allocate;
29833 long alloc_hi, alloc_lo;
29834 rtx r0, r1, r12, lr, ok_label, compare, jump, call_fusage;
29835 rtx_insn *insn;
29837 gcc_assert (flag_split_stack && reload_completed);
29839 if (!info->push_p)
29840 return;
29842 if (global_regs[29])
29844 error ("-fsplit-stack uses register r29");
29845 inform (DECL_SOURCE_LOCATION (global_regs_decl[29]),
29846 "conflicts with %qD", global_regs_decl[29]);
29849 allocate = info->total_size;
29850 if (allocate > (unsigned HOST_WIDE_INT) 1 << 31)
29852 sorry ("Stack frame larger than 2G is not supported for -fsplit-stack");
29853 return;
29855 if (morestack_ref == NULL_RTX)
29857 morestack_ref = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
29858 SYMBOL_REF_FLAGS (morestack_ref) |= (SYMBOL_FLAG_LOCAL
29859 | SYMBOL_FLAG_FUNCTION);
29862 r0 = gen_rtx_REG (Pmode, 0);
29863 r1 = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
29864 r12 = gen_rtx_REG (Pmode, 12);
29865 emit_insn (gen_load_split_stack_limit (r0));
29866 /* Always emit two insns here to calculate the requested stack,
29867 so that the linker can edit them when adjusting size for calling
29868 non-split-stack code. */
29869 alloc_hi = (-allocate + 0x8000) & ~0xffffL;
29870 alloc_lo = -allocate - alloc_hi;
29871 if (alloc_hi != 0)
29873 emit_insn (gen_add3_const (r12, r1, alloc_hi));
29874 if (alloc_lo != 0)
29875 emit_insn (gen_add3_const (r12, r12, alloc_lo));
29876 else
29877 emit_insn (gen_nop ());
29879 else
29881 emit_insn (gen_add3_const (r12, r1, alloc_lo));
29882 emit_insn (gen_nop ());
29885 compare = gen_rtx_REG (CCUNSmode, CR7_REGNO);
29886 emit_insn (gen_rtx_SET (compare, gen_rtx_COMPARE (CCUNSmode, r12, r0)));
29887 ok_label = gen_label_rtx ();
29888 jump = gen_rtx_IF_THEN_ELSE (VOIDmode,
29889 gen_rtx_GEU (VOIDmode, compare, const0_rtx),
29890 gen_rtx_LABEL_REF (VOIDmode, ok_label),
29891 pc_rtx);
29892 jump = emit_jump_insn (gen_rtx_SET (pc_rtx, jump));
29893 JUMP_LABEL (jump) = ok_label;
29894 /* Mark the jump as very likely to be taken. */
29895 add_int_reg_note (jump, REG_BR_PROB,
29896 REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100);
29898 lr = gen_rtx_REG (Pmode, LR_REGNO);
29899 insn = emit_move_insn (r0, lr);
29900 RTX_FRAME_RELATED_P (insn) = 1;
29901 insn = emit_insn (gen_frame_store (r0, r1, info->lr_save_offset));
29902 RTX_FRAME_RELATED_P (insn) = 1;
29904 insn = emit_call_insn (gen_call (gen_rtx_MEM (SImode, morestack_ref),
29905 const0_rtx, const0_rtx));
29906 call_fusage = NULL_RTX;
29907 use_reg (&call_fusage, r12);
29908 /* Say the call uses r0, even though it doesn't, to stop regrename
29909 from twiddling with the insns saving lr, trashing args for cfun.
29910 The insns restoring lr are similarly protected by making
29911 split_stack_return use r0. */
29912 use_reg (&call_fusage, r0);
29913 add_function_usage_to (insn, call_fusage);
29914 emit_insn (gen_frame_load (r0, r1, info->lr_save_offset));
29915 insn = emit_move_insn (lr, r0);
29916 add_reg_note (insn, REG_CFA_RESTORE, lr);
29917 RTX_FRAME_RELATED_P (insn) = 1;
29918 emit_insn (gen_split_stack_return ());
29920 emit_label (ok_label);
29921 LABEL_NUSES (ok_label) = 1;
29924 /* Return the internal arg pointer used for function incoming
29925 arguments. When -fsplit-stack, the arg pointer is r12 so we need
29926 to copy it to a pseudo in order for it to be preserved over calls
29927 and suchlike. We'd really like to use a pseudo here for the
29928 internal arg pointer but data-flow analysis is not prepared to
29929 accept pseudos as live at the beginning of a function. */
29931 static rtx
29932 rs6000_internal_arg_pointer (void)
29934 if (flag_split_stack
29935 && (lookup_attribute ("no_split_stack", DECL_ATTRIBUTES (cfun->decl))
29936 == NULL))
29939 if (cfun->machine->split_stack_arg_pointer == NULL_RTX)
29941 rtx pat;
29943 cfun->machine->split_stack_arg_pointer = gen_reg_rtx (Pmode);
29944 REG_POINTER (cfun->machine->split_stack_arg_pointer) = 1;
29946 /* Put the pseudo initialization right after the note at the
29947 beginning of the function. */
29948 pat = gen_rtx_SET (cfun->machine->split_stack_arg_pointer,
29949 gen_rtx_REG (Pmode, 12));
29950 push_topmost_sequence ();
29951 emit_insn_after (pat, get_insns ());
29952 pop_topmost_sequence ();
29954 return plus_constant (Pmode, cfun->machine->split_stack_arg_pointer,
29955 FIRST_PARM_OFFSET (current_function_decl));
29957 return virtual_incoming_args_rtx;
29960 /* We may have to tell the dataflow pass that the split stack prologue
29961 is initializing a register. */
29963 static void
29964 rs6000_live_on_entry (bitmap regs)
29966 if (flag_split_stack)
29967 bitmap_set_bit (regs, 12);
29970 /* Emit -fsplit-stack dynamic stack allocation space check. */
29972 void
29973 rs6000_split_stack_space_check (rtx size, rtx label)
29975 rtx sp = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
29976 rtx limit = gen_reg_rtx (Pmode);
29977 rtx requested = gen_reg_rtx (Pmode);
29978 rtx cmp = gen_reg_rtx (CCUNSmode);
29979 rtx jump;
29981 emit_insn (gen_load_split_stack_limit (limit));
29982 if (CONST_INT_P (size))
29983 emit_insn (gen_add3_insn (requested, sp, GEN_INT (-INTVAL (size))));
29984 else
29986 size = force_reg (Pmode, size);
29987 emit_move_insn (requested, gen_rtx_MINUS (Pmode, sp, size));
29989 emit_insn (gen_rtx_SET (cmp, gen_rtx_COMPARE (CCUNSmode, requested, limit)));
29990 jump = gen_rtx_IF_THEN_ELSE (VOIDmode,
29991 gen_rtx_GEU (VOIDmode, cmp, const0_rtx),
29992 gen_rtx_LABEL_REF (VOIDmode, label),
29993 pc_rtx);
29994 jump = emit_jump_insn (gen_rtx_SET (pc_rtx, jump));
29995 JUMP_LABEL (jump) = label;
29998 /* A C compound statement that outputs the assembler code for a thunk
29999 function, used to implement C++ virtual function calls with
30000 multiple inheritance. The thunk acts as a wrapper around a virtual
30001 function, adjusting the implicit object parameter before handing
30002 control off to the real function.
30004 First, emit code to add the integer DELTA to the location that
30005 contains the incoming first argument. Assume that this argument
30006 contains a pointer, and is the one used to pass the `this' pointer
30007 in C++. This is the incoming argument *before* the function
30008 prologue, e.g. `%o0' on a sparc. The addition must preserve the
30009 values of all other incoming arguments.
30011 After the addition, emit code to jump to FUNCTION, which is a
30012 `FUNCTION_DECL'. This is a direct pure jump, not a call, and does
30013 not touch the return address. Hence returning from FUNCTION will
30014 return to whoever called the current `thunk'.
30016 The effect must be as if FUNCTION had been called directly with the
30017 adjusted first argument. This macro is responsible for emitting
30018 all of the code for a thunk function; output_function_prologue()
30019 and output_function_epilogue() are not invoked.
30021 The THUNK_FNDECL is redundant. (DELTA and FUNCTION have already
30022 been extracted from it.) It might possibly be useful on some
30023 targets, but probably not.
30025 If you do not define this macro, the target-independent code in the
30026 C++ frontend will generate a less efficient heavyweight thunk that
30027 calls FUNCTION instead of jumping to it. The generic approach does
30028 not support varargs. */
30030 static void
30031 rs6000_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
30032 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
30033 tree function)
30035 rtx this_rtx, funexp;
30036 rtx_insn *insn;
30038 reload_completed = 1;
30039 epilogue_completed = 1;
30041 /* Mark the end of the (empty) prologue. */
30042 emit_note (NOTE_INSN_PROLOGUE_END);
30044 /* Find the "this" pointer. If the function returns a structure,
30045 the structure return pointer is in r3. */
30046 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
30047 this_rtx = gen_rtx_REG (Pmode, 4);
30048 else
30049 this_rtx = gen_rtx_REG (Pmode, 3);
30051 /* Apply the constant offset, if required. */
30052 if (delta)
30053 emit_insn (gen_add3_insn (this_rtx, this_rtx, GEN_INT (delta)));
30055 /* Apply the offset from the vtable, if required. */
30056 if (vcall_offset)
30058 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
30059 rtx tmp = gen_rtx_REG (Pmode, 12);
30061 emit_move_insn (tmp, gen_rtx_MEM (Pmode, this_rtx));
30062 if (((unsigned HOST_WIDE_INT) vcall_offset) + 0x8000 >= 0x10000)
30064 emit_insn (gen_add3_insn (tmp, tmp, vcall_offset_rtx));
30065 emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp));
30067 else
30069 rtx loc = gen_rtx_PLUS (Pmode, tmp, vcall_offset_rtx);
30071 emit_move_insn (tmp, gen_rtx_MEM (Pmode, loc));
30073 emit_insn (gen_add3_insn (this_rtx, this_rtx, tmp));
30076 /* Generate a tail call to the target function. */
30077 if (!TREE_USED (function))
30079 assemble_external (function);
30080 TREE_USED (function) = 1;
30082 funexp = XEXP (DECL_RTL (function), 0);
30083 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
30085 #if TARGET_MACHO
30086 if (MACHOPIC_INDIRECT)
30087 funexp = machopic_indirect_call_target (funexp);
30088 #endif
30090 /* gen_sibcall expects reload to convert scratch pseudo to LR so we must
30091 generate sibcall RTL explicitly. */
30092 insn = emit_call_insn (
30093 gen_rtx_PARALLEL (VOIDmode,
30094 gen_rtvec (3,
30095 gen_rtx_CALL (VOIDmode,
30096 funexp, const0_rtx),
30097 gen_rtx_USE (VOIDmode, const0_rtx),
30098 simple_return_rtx)));
30099 SIBLING_CALL_P (insn) = 1;
30100 emit_barrier ();
30102 /* Run just enough of rest_of_compilation to get the insns emitted.
30103 There's not really enough bulk here to make other passes such as
30104 instruction scheduling worth while. Note that use_thunk calls
30105 assemble_start_function and assemble_end_function. */
30106 insn = get_insns ();
30107 shorten_branches (insn);
30108 final_start_function (insn, file, 1);
30109 final (insn, file, 1);
30110 final_end_function ();
30112 reload_completed = 0;
30113 epilogue_completed = 0;
30116 /* A quick summary of the various types of 'constant-pool tables'
30117 under PowerPC:
30119 Target Flags Name One table per
30120 AIX (none) AIX TOC object file
30121 AIX -mfull-toc AIX TOC object file
30122 AIX -mminimal-toc AIX minimal TOC translation unit
30123 SVR4/EABI (none) SVR4 SDATA object file
30124 SVR4/EABI -fpic SVR4 pic object file
30125 SVR4/EABI -fPIC SVR4 PIC translation unit
30126 SVR4/EABI -mrelocatable EABI TOC function
30127 SVR4/EABI -maix AIX TOC object file
30128 SVR4/EABI -maix -mminimal-toc
30129 AIX minimal TOC translation unit
30131 Name Reg. Set by entries contains:
30132 made by addrs? fp? sum?
30134 AIX TOC 2 crt0 as Y option option
30135 AIX minimal TOC 30 prolog gcc Y Y option
30136 SVR4 SDATA 13 crt0 gcc N Y N
30137 SVR4 pic 30 prolog ld Y not yet N
30138 SVR4 PIC 30 prolog gcc Y option option
30139 EABI TOC 30 prolog gcc Y option option
30143 /* Hash functions for the hash table. */
30145 static unsigned
30146 rs6000_hash_constant (rtx k)
30148 enum rtx_code code = GET_CODE (k);
30149 machine_mode mode = GET_MODE (k);
30150 unsigned result = (code << 3) ^ mode;
30151 const char *format;
30152 int flen, fidx;
30154 format = GET_RTX_FORMAT (code);
30155 flen = strlen (format);
30156 fidx = 0;
30158 switch (code)
30160 case LABEL_REF:
30161 return result * 1231 + (unsigned) INSN_UID (XEXP (k, 0));
30163 case CONST_WIDE_INT:
30165 int i;
30166 flen = CONST_WIDE_INT_NUNITS (k);
30167 for (i = 0; i < flen; i++)
30168 result = result * 613 + CONST_WIDE_INT_ELT (k, i);
30169 return result;
30172 case CONST_DOUBLE:
30173 if (mode != VOIDmode)
30174 return real_hash (CONST_DOUBLE_REAL_VALUE (k)) * result;
30175 flen = 2;
30176 break;
30178 case CODE_LABEL:
30179 fidx = 3;
30180 break;
30182 default:
30183 break;
30186 for (; fidx < flen; fidx++)
30187 switch (format[fidx])
30189 case 's':
30191 unsigned i, len;
30192 const char *str = XSTR (k, fidx);
30193 len = strlen (str);
30194 result = result * 613 + len;
30195 for (i = 0; i < len; i++)
30196 result = result * 613 + (unsigned) str[i];
30197 break;
30199 case 'u':
30200 case 'e':
30201 result = result * 1231 + rs6000_hash_constant (XEXP (k, fidx));
30202 break;
30203 case 'i':
30204 case 'n':
30205 result = result * 613 + (unsigned) XINT (k, fidx);
30206 break;
30207 case 'w':
30208 if (sizeof (unsigned) >= sizeof (HOST_WIDE_INT))
30209 result = result * 613 + (unsigned) XWINT (k, fidx);
30210 else
30212 size_t i;
30213 for (i = 0; i < sizeof (HOST_WIDE_INT) / sizeof (unsigned); i++)
30214 result = result * 613 + (unsigned) (XWINT (k, fidx)
30215 >> CHAR_BIT * i);
30217 break;
30218 case '0':
30219 break;
30220 default:
30221 gcc_unreachable ();
30224 return result;
30227 hashval_t
30228 toc_hasher::hash (toc_hash_struct *thc)
30230 return rs6000_hash_constant (thc->key) ^ thc->key_mode;
30233 /* Compare H1 and H2 for equivalence. */
30235 bool
30236 toc_hasher::equal (toc_hash_struct *h1, toc_hash_struct *h2)
30238 rtx r1 = h1->key;
30239 rtx r2 = h2->key;
30241 if (h1->key_mode != h2->key_mode)
30242 return 0;
30244 return rtx_equal_p (r1, r2);
30247 /* These are the names given by the C++ front-end to vtables, and
30248 vtable-like objects. Ideally, this logic should not be here;
30249 instead, there should be some programmatic way of inquiring as
30250 to whether or not an object is a vtable. */
30252 #define VTABLE_NAME_P(NAME) \
30253 (strncmp ("_vt.", name, strlen ("_vt.")) == 0 \
30254 || strncmp ("_ZTV", name, strlen ("_ZTV")) == 0 \
30255 || strncmp ("_ZTT", name, strlen ("_ZTT")) == 0 \
30256 || strncmp ("_ZTI", name, strlen ("_ZTI")) == 0 \
30257 || strncmp ("_ZTC", name, strlen ("_ZTC")) == 0)
30259 #ifdef NO_DOLLAR_IN_LABEL
30260 /* Return a GGC-allocated character string translating dollar signs in
30261 input NAME to underscores. Used by XCOFF ASM_OUTPUT_LABELREF. */
30263 const char *
30264 rs6000_xcoff_strip_dollar (const char *name)
30266 char *strip, *p;
30267 const char *q;
30268 size_t len;
30270 q = (const char *) strchr (name, '$');
30272 if (q == 0 || q == name)
30273 return name;
30275 len = strlen (name);
30276 strip = XALLOCAVEC (char, len + 1);
30277 strcpy (strip, name);
30278 p = strip + (q - name);
30279 while (p)
30281 *p = '_';
30282 p = strchr (p + 1, '$');
30285 return ggc_alloc_string (strip, len);
30287 #endif
30289 void
30290 rs6000_output_symbol_ref (FILE *file, rtx x)
30292 /* Currently C++ toc references to vtables can be emitted before it
30293 is decided whether the vtable is public or private. If this is
30294 the case, then the linker will eventually complain that there is
30295 a reference to an unknown section. Thus, for vtables only,
30296 we emit the TOC reference to reference the symbol and not the
30297 section. */
30298 const char *name = XSTR (x, 0);
30300 tree decl = SYMBOL_REF_DECL (x);
30301 if (decl /* sync condition with assemble_external () */
30302 && DECL_P (decl) && DECL_EXTERNAL (decl) && TREE_PUBLIC (decl)
30303 && (TREE_CODE (decl) == VAR_DECL
30304 || TREE_CODE (decl) == FUNCTION_DECL)
30305 && name[strlen (name) - 1] != ']')
30307 name = concat (name,
30308 (TREE_CODE (decl) == FUNCTION_DECL
30309 ? "[DS]" : "[UA]"),
30310 NULL);
30311 XSTR (x, 0) = name;
30314 if (VTABLE_NAME_P (name))
30316 RS6000_OUTPUT_BASENAME (file, name);
30318 else
30319 assemble_name (file, name);
30322 /* Output a TOC entry. We derive the entry name from what is being
30323 written. */
30325 void
30326 output_toc (FILE *file, rtx x, int labelno, machine_mode mode)
30328 char buf[256];
30329 const char *name = buf;
30330 rtx base = x;
30331 HOST_WIDE_INT offset = 0;
30333 gcc_assert (!TARGET_NO_TOC);
30335 /* When the linker won't eliminate them, don't output duplicate
30336 TOC entries (this happens on AIX if there is any kind of TOC,
30337 and on SVR4 under -fPIC or -mrelocatable). Don't do this for
30338 CODE_LABELs. */
30339 if (TARGET_TOC && GET_CODE (x) != LABEL_REF)
30341 struct toc_hash_struct *h;
30343 /* Create toc_hash_table. This can't be done at TARGET_OPTION_OVERRIDE
30344 time because GGC is not initialized at that point. */
30345 if (toc_hash_table == NULL)
30346 toc_hash_table = hash_table<toc_hasher>::create_ggc (1021);
30348 h = ggc_alloc<toc_hash_struct> ();
30349 h->key = x;
30350 h->key_mode = mode;
30351 h->labelno = labelno;
30353 toc_hash_struct **found = toc_hash_table->find_slot (h, INSERT);
30354 if (*found == NULL)
30355 *found = h;
30356 else /* This is indeed a duplicate.
30357 Set this label equal to that label. */
30359 fputs ("\t.set ", file);
30360 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
30361 fprintf (file, "%d,", labelno);
30362 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
30363 fprintf (file, "%d\n", ((*found)->labelno));
30365 #ifdef HAVE_AS_TLS
30366 if (TARGET_XCOFF && GET_CODE (x) == SYMBOL_REF
30367 && (SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_GLOBAL_DYNAMIC
30368 || SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC))
30370 fputs ("\t.set ", file);
30371 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
30372 fprintf (file, "%d,", labelno);
30373 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
30374 fprintf (file, "%d\n", ((*found)->labelno));
30376 #endif
30377 return;
30381 /* If we're going to put a double constant in the TOC, make sure it's
30382 aligned properly when strict alignment is on. */
30383 if ((CONST_DOUBLE_P (x) || CONST_WIDE_INT_P (x))
30384 && STRICT_ALIGNMENT
30385 && GET_MODE_BITSIZE (mode) >= 64
30386 && ! (TARGET_NO_FP_IN_TOC && ! TARGET_MINIMAL_TOC)) {
30387 ASM_OUTPUT_ALIGN (file, 3);
30390 (*targetm.asm_out.internal_label) (file, "LC", labelno);
30392 /* Handle FP constants specially. Note that if we have a minimal
30393 TOC, things we put here aren't actually in the TOC, so we can allow
30394 FP constants. */
30395 if (GET_CODE (x) == CONST_DOUBLE &&
30396 (GET_MODE (x) == TFmode || GET_MODE (x) == TDmode
30397 || GET_MODE (x) == IFmode || GET_MODE (x) == KFmode))
30399 long k[4];
30401 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
30402 REAL_VALUE_TO_TARGET_DECIMAL128 (*CONST_DOUBLE_REAL_VALUE (x), k);
30403 else
30404 REAL_VALUE_TO_TARGET_LONG_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), k);
30406 if (TARGET_64BIT)
30408 if (TARGET_ELF || TARGET_MINIMAL_TOC)
30409 fputs (DOUBLE_INT_ASM_OP, file);
30410 else
30411 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
30412 k[0] & 0xffffffff, k[1] & 0xffffffff,
30413 k[2] & 0xffffffff, k[3] & 0xffffffff);
30414 fprintf (file, "0x%lx%08lx,0x%lx%08lx\n",
30415 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
30416 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff,
30417 k[WORDS_BIG_ENDIAN ? 2 : 3] & 0xffffffff,
30418 k[WORDS_BIG_ENDIAN ? 3 : 2] & 0xffffffff);
30419 return;
30421 else
30423 if (TARGET_ELF || TARGET_MINIMAL_TOC)
30424 fputs ("\t.long ", file);
30425 else
30426 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
30427 k[0] & 0xffffffff, k[1] & 0xffffffff,
30428 k[2] & 0xffffffff, k[3] & 0xffffffff);
30429 fprintf (file, "0x%lx,0x%lx,0x%lx,0x%lx\n",
30430 k[0] & 0xffffffff, k[1] & 0xffffffff,
30431 k[2] & 0xffffffff, k[3] & 0xffffffff);
30432 return;
30435 else if (GET_CODE (x) == CONST_DOUBLE &&
30436 (GET_MODE (x) == DFmode || GET_MODE (x) == DDmode))
30438 long k[2];
30440 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
30441 REAL_VALUE_TO_TARGET_DECIMAL64 (*CONST_DOUBLE_REAL_VALUE (x), k);
30442 else
30443 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), k);
30445 if (TARGET_64BIT)
30447 if (TARGET_ELF || TARGET_MINIMAL_TOC)
30448 fputs (DOUBLE_INT_ASM_OP, file);
30449 else
30450 fprintf (file, "\t.tc FD_%lx_%lx[TC],",
30451 k[0] & 0xffffffff, k[1] & 0xffffffff);
30452 fprintf (file, "0x%lx%08lx\n",
30453 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
30454 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff);
30455 return;
30457 else
30459 if (TARGET_ELF || TARGET_MINIMAL_TOC)
30460 fputs ("\t.long ", file);
30461 else
30462 fprintf (file, "\t.tc FD_%lx_%lx[TC],",
30463 k[0] & 0xffffffff, k[1] & 0xffffffff);
30464 fprintf (file, "0x%lx,0x%lx\n",
30465 k[0] & 0xffffffff, k[1] & 0xffffffff);
30466 return;
30469 else if (GET_CODE (x) == CONST_DOUBLE &&
30470 (GET_MODE (x) == SFmode || GET_MODE (x) == SDmode))
30472 long l;
30474 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
30475 REAL_VALUE_TO_TARGET_DECIMAL32 (*CONST_DOUBLE_REAL_VALUE (x), l);
30476 else
30477 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l);
30479 if (TARGET_64BIT)
30481 if (TARGET_ELF || TARGET_MINIMAL_TOC)
30482 fputs (DOUBLE_INT_ASM_OP, file);
30483 else
30484 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
30485 if (WORDS_BIG_ENDIAN)
30486 fprintf (file, "0x%lx00000000\n", l & 0xffffffff);
30487 else
30488 fprintf (file, "0x%lx\n", l & 0xffffffff);
30489 return;
30491 else
30493 if (TARGET_ELF || TARGET_MINIMAL_TOC)
30494 fputs ("\t.long ", file);
30495 else
30496 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
30497 fprintf (file, "0x%lx\n", l & 0xffffffff);
30498 return;
30501 else if (GET_MODE (x) == VOIDmode && GET_CODE (x) == CONST_INT)
30503 unsigned HOST_WIDE_INT low;
30504 HOST_WIDE_INT high;
30506 low = INTVAL (x) & 0xffffffff;
30507 high = (HOST_WIDE_INT) INTVAL (x) >> 32;
30509 /* TOC entries are always Pmode-sized, so when big-endian
30510 smaller integer constants in the TOC need to be padded.
30511 (This is still a win over putting the constants in
30512 a separate constant pool, because then we'd have
30513 to have both a TOC entry _and_ the actual constant.)
30515 For a 32-bit target, CONST_INT values are loaded and shifted
30516 entirely within `low' and can be stored in one TOC entry. */
30518 /* It would be easy to make this work, but it doesn't now. */
30519 gcc_assert (!TARGET_64BIT || POINTER_SIZE >= GET_MODE_BITSIZE (mode));
30521 if (WORDS_BIG_ENDIAN && POINTER_SIZE > GET_MODE_BITSIZE (mode))
30523 low |= high << 32;
30524 low <<= POINTER_SIZE - GET_MODE_BITSIZE (mode);
30525 high = (HOST_WIDE_INT) low >> 32;
30526 low &= 0xffffffff;
30529 if (TARGET_64BIT)
30531 if (TARGET_ELF || TARGET_MINIMAL_TOC)
30532 fputs (DOUBLE_INT_ASM_OP, file);
30533 else
30534 fprintf (file, "\t.tc ID_%lx_%lx[TC],",
30535 (long) high & 0xffffffff, (long) low & 0xffffffff);
30536 fprintf (file, "0x%lx%08lx\n",
30537 (long) high & 0xffffffff, (long) low & 0xffffffff);
30538 return;
30540 else
30542 if (POINTER_SIZE < GET_MODE_BITSIZE (mode))
30544 if (TARGET_ELF || TARGET_MINIMAL_TOC)
30545 fputs ("\t.long ", file);
30546 else
30547 fprintf (file, "\t.tc ID_%lx_%lx[TC],",
30548 (long) high & 0xffffffff, (long) low & 0xffffffff);
30549 fprintf (file, "0x%lx,0x%lx\n",
30550 (long) high & 0xffffffff, (long) low & 0xffffffff);
30552 else
30554 if (TARGET_ELF || TARGET_MINIMAL_TOC)
30555 fputs ("\t.long ", file);
30556 else
30557 fprintf (file, "\t.tc IS_%lx[TC],", (long) low & 0xffffffff);
30558 fprintf (file, "0x%lx\n", (long) low & 0xffffffff);
30560 return;
30564 if (GET_CODE (x) == CONST)
30566 gcc_assert (GET_CODE (XEXP (x, 0)) == PLUS
30567 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT);
30569 base = XEXP (XEXP (x, 0), 0);
30570 offset = INTVAL (XEXP (XEXP (x, 0), 1));
30573 switch (GET_CODE (base))
30575 case SYMBOL_REF:
30576 name = XSTR (base, 0);
30577 break;
30579 case LABEL_REF:
30580 ASM_GENERATE_INTERNAL_LABEL (buf, "L",
30581 CODE_LABEL_NUMBER (XEXP (base, 0)));
30582 break;
30584 case CODE_LABEL:
30585 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (base));
30586 break;
30588 default:
30589 gcc_unreachable ();
30592 if (TARGET_ELF || TARGET_MINIMAL_TOC)
30593 fputs (TARGET_32BIT ? "\t.long " : DOUBLE_INT_ASM_OP, file);
30594 else
30596 fputs ("\t.tc ", file);
30597 RS6000_OUTPUT_BASENAME (file, name);
30599 if (offset < 0)
30600 fprintf (file, ".N" HOST_WIDE_INT_PRINT_UNSIGNED, - offset);
30601 else if (offset)
30602 fprintf (file, ".P" HOST_WIDE_INT_PRINT_UNSIGNED, offset);
30604 /* Mark large TOC symbols on AIX with [TE] so they are mapped
30605 after other TOC symbols, reducing overflow of small TOC access
30606 to [TC] symbols. */
30607 fputs (TARGET_XCOFF && TARGET_CMODEL != CMODEL_SMALL
30608 ? "[TE]," : "[TC],", file);
30611 /* Currently C++ toc references to vtables can be emitted before it
30612 is decided whether the vtable is public or private. If this is
30613 the case, then the linker will eventually complain that there is
30614 a TOC reference to an unknown section. Thus, for vtables only,
30615 we emit the TOC reference to reference the symbol and not the
30616 section. */
30617 if (VTABLE_NAME_P (name))
30619 RS6000_OUTPUT_BASENAME (file, name);
30620 if (offset < 0)
30621 fprintf (file, HOST_WIDE_INT_PRINT_DEC, offset);
30622 else if (offset > 0)
30623 fprintf (file, "+" HOST_WIDE_INT_PRINT_DEC, offset);
30625 else
30626 output_addr_const (file, x);
30628 #if HAVE_AS_TLS
30629 if (TARGET_XCOFF && GET_CODE (base) == SYMBOL_REF)
30631 switch (SYMBOL_REF_TLS_MODEL (base))
30633 case 0:
30634 break;
30635 case TLS_MODEL_LOCAL_EXEC:
30636 fputs ("@le", file);
30637 break;
30638 case TLS_MODEL_INITIAL_EXEC:
30639 fputs ("@ie", file);
30640 break;
30641 /* Use global-dynamic for local-dynamic. */
30642 case TLS_MODEL_GLOBAL_DYNAMIC:
30643 case TLS_MODEL_LOCAL_DYNAMIC:
30644 putc ('\n', file);
30645 (*targetm.asm_out.internal_label) (file, "LCM", labelno);
30646 fputs ("\t.tc .", file);
30647 RS6000_OUTPUT_BASENAME (file, name);
30648 fputs ("[TC],", file);
30649 output_addr_const (file, x);
30650 fputs ("@m", file);
30651 break;
30652 default:
30653 gcc_unreachable ();
30656 #endif
30658 putc ('\n', file);
30661 /* Output an assembler pseudo-op to write an ASCII string of N characters
30662 starting at P to FILE.
30664 On the RS/6000, we have to do this using the .byte operation and
30665 write out special characters outside the quoted string.
30666 Also, the assembler is broken; very long strings are truncated,
30667 so we must artificially break them up early. */
30669 void
30670 output_ascii (FILE *file, const char *p, int n)
30672 char c;
30673 int i, count_string;
30674 const char *for_string = "\t.byte \"";
30675 const char *for_decimal = "\t.byte ";
30676 const char *to_close = NULL;
30678 count_string = 0;
30679 for (i = 0; i < n; i++)
30681 c = *p++;
30682 if (c >= ' ' && c < 0177)
30684 if (for_string)
30685 fputs (for_string, file);
30686 putc (c, file);
30688 /* Write two quotes to get one. */
30689 if (c == '"')
30691 putc (c, file);
30692 ++count_string;
30695 for_string = NULL;
30696 for_decimal = "\"\n\t.byte ";
30697 to_close = "\"\n";
30698 ++count_string;
30700 if (count_string >= 512)
30702 fputs (to_close, file);
30704 for_string = "\t.byte \"";
30705 for_decimal = "\t.byte ";
30706 to_close = NULL;
30707 count_string = 0;
30710 else
30712 if (for_decimal)
30713 fputs (for_decimal, file);
30714 fprintf (file, "%d", c);
30716 for_string = "\n\t.byte \"";
30717 for_decimal = ", ";
30718 to_close = "\n";
30719 count_string = 0;
30723 /* Now close the string if we have written one. Then end the line. */
30724 if (to_close)
30725 fputs (to_close, file);
30728 /* Generate a unique section name for FILENAME for a section type
30729 represented by SECTION_DESC. Output goes into BUF.
30731 SECTION_DESC can be any string, as long as it is different for each
30732 possible section type.
30734 We name the section in the same manner as xlc. The name begins with an
30735 underscore followed by the filename (after stripping any leading directory
30736 names) with the last period replaced by the string SECTION_DESC. If
30737 FILENAME does not contain a period, SECTION_DESC is appended to the end of
30738 the name. */
30740 void
30741 rs6000_gen_section_name (char **buf, const char *filename,
30742 const char *section_desc)
30744 const char *q, *after_last_slash, *last_period = 0;
30745 char *p;
30746 int len;
30748 after_last_slash = filename;
30749 for (q = filename; *q; q++)
30751 if (*q == '/')
30752 after_last_slash = q + 1;
30753 else if (*q == '.')
30754 last_period = q;
30757 len = strlen (after_last_slash) + strlen (section_desc) + 2;
30758 *buf = (char *) xmalloc (len);
30760 p = *buf;
30761 *p++ = '_';
30763 for (q = after_last_slash; *q; q++)
30765 if (q == last_period)
30767 strcpy (p, section_desc);
30768 p += strlen (section_desc);
30769 break;
30772 else if (ISALNUM (*q))
30773 *p++ = *q;
30776 if (last_period == 0)
30777 strcpy (p, section_desc);
30778 else
30779 *p = '\0';
30782 /* Emit profile function. */
30784 void
30785 output_profile_hook (int labelno ATTRIBUTE_UNUSED)
30787 /* Non-standard profiling for kernels, which just saves LR then calls
30788 _mcount without worrying about arg saves. The idea is to change
30789 the function prologue as little as possible as it isn't easy to
30790 account for arg save/restore code added just for _mcount. */
30791 if (TARGET_PROFILE_KERNEL)
30792 return;
30794 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
30796 #ifndef NO_PROFILE_COUNTERS
30797 # define NO_PROFILE_COUNTERS 0
30798 #endif
30799 if (NO_PROFILE_COUNTERS)
30800 emit_library_call (init_one_libfunc (RS6000_MCOUNT),
30801 LCT_NORMAL, VOIDmode, 0);
30802 else
30804 char buf[30];
30805 const char *label_name;
30806 rtx fun;
30808 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
30809 label_name = ggc_strdup ((*targetm.strip_name_encoding) (buf));
30810 fun = gen_rtx_SYMBOL_REF (Pmode, label_name);
30812 emit_library_call (init_one_libfunc (RS6000_MCOUNT),
30813 LCT_NORMAL, VOIDmode, 1, fun, Pmode);
30816 else if (DEFAULT_ABI == ABI_DARWIN)
30818 const char *mcount_name = RS6000_MCOUNT;
30819 int caller_addr_regno = LR_REGNO;
30821 /* Be conservative and always set this, at least for now. */
30822 crtl->uses_pic_offset_table = 1;
30824 #if TARGET_MACHO
30825 /* For PIC code, set up a stub and collect the caller's address
30826 from r0, which is where the prologue puts it. */
30827 if (MACHOPIC_INDIRECT
30828 && crtl->uses_pic_offset_table)
30829 caller_addr_regno = 0;
30830 #endif
30831 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, mcount_name),
30832 LCT_NORMAL, VOIDmode, 1,
30833 gen_rtx_REG (Pmode, caller_addr_regno), Pmode);
30837 /* Write function profiler code. */
30839 void
30840 output_function_profiler (FILE *file, int labelno)
30842 char buf[100];
30844 switch (DEFAULT_ABI)
30846 default:
30847 gcc_unreachable ();
30849 case ABI_V4:
30850 if (!TARGET_32BIT)
30852 warning (0, "no profiling of 64-bit code for this ABI");
30853 return;
30855 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
30856 fprintf (file, "\tmflr %s\n", reg_names[0]);
30857 if (NO_PROFILE_COUNTERS)
30859 asm_fprintf (file, "\tstw %s,4(%s)\n",
30860 reg_names[0], reg_names[1]);
30862 else if (TARGET_SECURE_PLT && flag_pic)
30864 if (TARGET_LINK_STACK)
30866 char name[32];
30867 get_ppc476_thunk_name (name);
30868 asm_fprintf (file, "\tbl %s\n", name);
30870 else
30871 asm_fprintf (file, "\tbcl 20,31,1f\n1:\n");
30872 asm_fprintf (file, "\tstw %s,4(%s)\n",
30873 reg_names[0], reg_names[1]);
30874 asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
30875 asm_fprintf (file, "\taddis %s,%s,",
30876 reg_names[12], reg_names[12]);
30877 assemble_name (file, buf);
30878 asm_fprintf (file, "-1b@ha\n\tla %s,", reg_names[0]);
30879 assemble_name (file, buf);
30880 asm_fprintf (file, "-1b@l(%s)\n", reg_names[12]);
30882 else if (flag_pic == 1)
30884 fputs ("\tbl _GLOBAL_OFFSET_TABLE_@local-4\n", file);
30885 asm_fprintf (file, "\tstw %s,4(%s)\n",
30886 reg_names[0], reg_names[1]);
30887 asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
30888 asm_fprintf (file, "\tlwz %s,", reg_names[0]);
30889 assemble_name (file, buf);
30890 asm_fprintf (file, "@got(%s)\n", reg_names[12]);
30892 else if (flag_pic > 1)
30894 asm_fprintf (file, "\tstw %s,4(%s)\n",
30895 reg_names[0], reg_names[1]);
30896 /* Now, we need to get the address of the label. */
30897 if (TARGET_LINK_STACK)
30899 char name[32];
30900 get_ppc476_thunk_name (name);
30901 asm_fprintf (file, "\tbl %s\n\tb 1f\n\t.long ", name);
30902 assemble_name (file, buf);
30903 fputs ("-.\n1:", file);
30904 asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
30905 asm_fprintf (file, "\taddi %s,%s,4\n",
30906 reg_names[11], reg_names[11]);
30908 else
30910 fputs ("\tbcl 20,31,1f\n\t.long ", file);
30911 assemble_name (file, buf);
30912 fputs ("-.\n1:", file);
30913 asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
30915 asm_fprintf (file, "\tlwz %s,0(%s)\n",
30916 reg_names[0], reg_names[11]);
30917 asm_fprintf (file, "\tadd %s,%s,%s\n",
30918 reg_names[0], reg_names[0], reg_names[11]);
30920 else
30922 asm_fprintf (file, "\tlis %s,", reg_names[12]);
30923 assemble_name (file, buf);
30924 fputs ("@ha\n", file);
30925 asm_fprintf (file, "\tstw %s,4(%s)\n",
30926 reg_names[0], reg_names[1]);
30927 asm_fprintf (file, "\tla %s,", reg_names[0]);
30928 assemble_name (file, buf);
30929 asm_fprintf (file, "@l(%s)\n", reg_names[12]);
30932 /* ABI_V4 saves the static chain reg with ASM_OUTPUT_REG_PUSH. */
30933 fprintf (file, "\tbl %s%s\n",
30934 RS6000_MCOUNT, flag_pic ? "@plt" : "");
30935 break;
30937 case ABI_AIX:
30938 case ABI_ELFv2:
30939 case ABI_DARWIN:
30940 /* Don't do anything, done in output_profile_hook (). */
30941 break;
30947 /* The following variable value is the last issued insn. */
30949 static rtx_insn *last_scheduled_insn;
30951 /* The following variable helps to balance issuing of load and
30952 store instructions */
30954 static int load_store_pendulum;
30956 /* The following variable helps pair divide insns during scheduling. */
30957 static int divide_cnt;
30958 /* The following variable helps pair and alternate vector and vector load
30959 insns during scheduling. */
30960 static int vec_load_pendulum;
30963 /* Power4 load update and store update instructions are cracked into a
30964 load or store and an integer insn which are executed in the same cycle.
30965 Branches have their own dispatch slot which does not count against the
30966 GCC issue rate, but it changes the program flow so there are no other
30967 instructions to issue in this cycle. */
30969 static int
30970 rs6000_variable_issue_1 (rtx_insn *insn, int more)
30972 last_scheduled_insn = insn;
30973 if (GET_CODE (PATTERN (insn)) == USE
30974 || GET_CODE (PATTERN (insn)) == CLOBBER)
30976 cached_can_issue_more = more;
30977 return cached_can_issue_more;
30980 if (insn_terminates_group_p (insn, current_group))
30982 cached_can_issue_more = 0;
30983 return cached_can_issue_more;
30986 /* If no reservation, but reach here */
30987 if (recog_memoized (insn) < 0)
30988 return more;
30990 if (rs6000_sched_groups)
30992 if (is_microcoded_insn (insn))
30993 cached_can_issue_more = 0;
30994 else if (is_cracked_insn (insn))
30995 cached_can_issue_more = more > 2 ? more - 2 : 0;
30996 else
30997 cached_can_issue_more = more - 1;
30999 return cached_can_issue_more;
31002 if (rs6000_cpu_attr == CPU_CELL && is_nonpipeline_insn (insn))
31003 return 0;
31005 cached_can_issue_more = more - 1;
31006 return cached_can_issue_more;
31009 static int
31010 rs6000_variable_issue (FILE *stream, int verbose, rtx_insn *insn, int more)
31012 int r = rs6000_variable_issue_1 (insn, more);
31013 if (verbose)
31014 fprintf (stream, "// rs6000_variable_issue (more = %d) = %d\n", more, r);
31015 return r;
31018 /* Adjust the cost of a scheduling dependency. Return the new cost of
31019 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
31021 static int
31022 rs6000_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost,
31023 unsigned int)
31025 enum attr_type attr_type;
31027 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
31028 return cost;
31030 switch (dep_type)
31032 case REG_DEP_TRUE:
31034 /* Data dependency; DEP_INSN writes a register that INSN reads
31035 some cycles later. */
31037 /* Separate a load from a narrower, dependent store. */
31038 if ((rs6000_sched_groups || rs6000_cpu_attr == CPU_POWER9)
31039 && GET_CODE (PATTERN (insn)) == SET
31040 && GET_CODE (PATTERN (dep_insn)) == SET
31041 && GET_CODE (XEXP (PATTERN (insn), 1)) == MEM
31042 && GET_CODE (XEXP (PATTERN (dep_insn), 0)) == MEM
31043 && (GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (insn), 1)))
31044 > GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (dep_insn), 0)))))
31045 return cost + 14;
31047 attr_type = get_attr_type (insn);
31049 switch (attr_type)
31051 case TYPE_JMPREG:
31052 /* Tell the first scheduling pass about the latency between
31053 a mtctr and bctr (and mtlr and br/blr). The first
31054 scheduling pass will not know about this latency since
31055 the mtctr instruction, which has the latency associated
31056 to it, will be generated by reload. */
31057 return 4;
31058 case TYPE_BRANCH:
31059 /* Leave some extra cycles between a compare and its
31060 dependent branch, to inhibit expensive mispredicts. */
31061 if ((rs6000_cpu_attr == CPU_PPC603
31062 || rs6000_cpu_attr == CPU_PPC604
31063 || rs6000_cpu_attr == CPU_PPC604E
31064 || rs6000_cpu_attr == CPU_PPC620
31065 || rs6000_cpu_attr == CPU_PPC630
31066 || rs6000_cpu_attr == CPU_PPC750
31067 || rs6000_cpu_attr == CPU_PPC7400
31068 || rs6000_cpu_attr == CPU_PPC7450
31069 || rs6000_cpu_attr == CPU_PPCE5500
31070 || rs6000_cpu_attr == CPU_PPCE6500
31071 || rs6000_cpu_attr == CPU_POWER4
31072 || rs6000_cpu_attr == CPU_POWER5
31073 || rs6000_cpu_attr == CPU_POWER7
31074 || rs6000_cpu_attr == CPU_POWER8
31075 || rs6000_cpu_attr == CPU_POWER9
31076 || rs6000_cpu_attr == CPU_CELL)
31077 && recog_memoized (dep_insn)
31078 && (INSN_CODE (dep_insn) >= 0))
31080 switch (get_attr_type (dep_insn))
31082 case TYPE_CMP:
31083 case TYPE_FPCOMPARE:
31084 case TYPE_CR_LOGICAL:
31085 case TYPE_DELAYED_CR:
31086 return cost + 2;
31087 case TYPE_EXTS:
31088 case TYPE_MUL:
31089 if (get_attr_dot (dep_insn) == DOT_YES)
31090 return cost + 2;
31091 else
31092 break;
31093 case TYPE_SHIFT:
31094 if (get_attr_dot (dep_insn) == DOT_YES
31095 && get_attr_var_shift (dep_insn) == VAR_SHIFT_NO)
31096 return cost + 2;
31097 else
31098 break;
31099 default:
31100 break;
31102 break;
31104 case TYPE_STORE:
31105 case TYPE_FPSTORE:
31106 if ((rs6000_cpu == PROCESSOR_POWER6)
31107 && recog_memoized (dep_insn)
31108 && (INSN_CODE (dep_insn) >= 0))
31111 if (GET_CODE (PATTERN (insn)) != SET)
31112 /* If this happens, we have to extend this to schedule
31113 optimally. Return default for now. */
31114 return cost;
31116 /* Adjust the cost for the case where the value written
31117 by a fixed point operation is used as the address
31118 gen value on a store. */
31119 switch (get_attr_type (dep_insn))
31121 case TYPE_LOAD:
31122 case TYPE_CNTLZ:
31124 if (! store_data_bypass_p (dep_insn, insn))
31125 return get_attr_sign_extend (dep_insn)
31126 == SIGN_EXTEND_YES ? 6 : 4;
31127 break;
31129 case TYPE_SHIFT:
31131 if (! store_data_bypass_p (dep_insn, insn))
31132 return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
31133 6 : 3;
31134 break;
31136 case TYPE_INTEGER:
31137 case TYPE_ADD:
31138 case TYPE_LOGICAL:
31139 case TYPE_EXTS:
31140 case TYPE_INSERT:
31142 if (! store_data_bypass_p (dep_insn, insn))
31143 return 3;
31144 break;
31146 case TYPE_STORE:
31147 case TYPE_FPLOAD:
31148 case TYPE_FPSTORE:
31150 if (get_attr_update (dep_insn) == UPDATE_YES
31151 && ! store_data_bypass_p (dep_insn, insn))
31152 return 3;
31153 break;
31155 case TYPE_MUL:
31157 if (! store_data_bypass_p (dep_insn, insn))
31158 return 17;
31159 break;
31161 case TYPE_DIV:
31163 if (! store_data_bypass_p (dep_insn, insn))
31164 return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
31165 break;
31167 default:
31168 break;
31171 break;
31173 case TYPE_LOAD:
31174 if ((rs6000_cpu == PROCESSOR_POWER6)
31175 && recog_memoized (dep_insn)
31176 && (INSN_CODE (dep_insn) >= 0))
31179 /* Adjust the cost for the case where the value written
31180 by a fixed point instruction is used within the address
31181 gen portion of a subsequent load(u)(x) */
31182 switch (get_attr_type (dep_insn))
31184 case TYPE_LOAD:
31185 case TYPE_CNTLZ:
31187 if (set_to_load_agen (dep_insn, insn))
31188 return get_attr_sign_extend (dep_insn)
31189 == SIGN_EXTEND_YES ? 6 : 4;
31190 break;
31192 case TYPE_SHIFT:
31194 if (set_to_load_agen (dep_insn, insn))
31195 return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
31196 6 : 3;
31197 break;
31199 case TYPE_INTEGER:
31200 case TYPE_ADD:
31201 case TYPE_LOGICAL:
31202 case TYPE_EXTS:
31203 case TYPE_INSERT:
31205 if (set_to_load_agen (dep_insn, insn))
31206 return 3;
31207 break;
31209 case TYPE_STORE:
31210 case TYPE_FPLOAD:
31211 case TYPE_FPSTORE:
31213 if (get_attr_update (dep_insn) == UPDATE_YES
31214 && set_to_load_agen (dep_insn, insn))
31215 return 3;
31216 break;
31218 case TYPE_MUL:
31220 if (set_to_load_agen (dep_insn, insn))
31221 return 17;
31222 break;
31224 case TYPE_DIV:
31226 if (set_to_load_agen (dep_insn, insn))
31227 return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
31228 break;
31230 default:
31231 break;
31234 break;
31236 case TYPE_FPLOAD:
31237 if ((rs6000_cpu == PROCESSOR_POWER6)
31238 && get_attr_update (insn) == UPDATE_NO
31239 && recog_memoized (dep_insn)
31240 && (INSN_CODE (dep_insn) >= 0)
31241 && (get_attr_type (dep_insn) == TYPE_MFFGPR))
31242 return 2;
31244 default:
31245 break;
31248 /* Fall out to return default cost. */
31250 break;
31252 case REG_DEP_OUTPUT:
31253 /* Output dependency; DEP_INSN writes a register that INSN writes some
31254 cycles later. */
31255 if ((rs6000_cpu == PROCESSOR_POWER6)
31256 && recog_memoized (dep_insn)
31257 && (INSN_CODE (dep_insn) >= 0))
31259 attr_type = get_attr_type (insn);
31261 switch (attr_type)
31263 case TYPE_FP:
31264 case TYPE_FPSIMPLE:
31265 if (get_attr_type (dep_insn) == TYPE_FP
31266 || get_attr_type (dep_insn) == TYPE_FPSIMPLE)
31267 return 1;
31268 break;
31269 case TYPE_FPLOAD:
31270 if (get_attr_update (insn) == UPDATE_NO
31271 && get_attr_type (dep_insn) == TYPE_MFFGPR)
31272 return 2;
31273 break;
31274 default:
31275 break;
31278 /* Fall through, no cost for output dependency. */
31279 /* FALLTHRU */
31281 case REG_DEP_ANTI:
31282 /* Anti dependency; DEP_INSN reads a register that INSN writes some
31283 cycles later. */
31284 return 0;
31286 default:
31287 gcc_unreachable ();
31290 return cost;
31293 /* Debug version of rs6000_adjust_cost. */
31295 static int
31296 rs6000_debug_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn,
31297 int cost, unsigned int dw)
31299 int ret = rs6000_adjust_cost (insn, dep_type, dep_insn, cost, dw);
31301 if (ret != cost)
31303 const char *dep;
31305 switch (dep_type)
31307 default: dep = "unknown depencency"; break;
31308 case REG_DEP_TRUE: dep = "data dependency"; break;
31309 case REG_DEP_OUTPUT: dep = "output dependency"; break;
31310 case REG_DEP_ANTI: dep = "anti depencency"; break;
31313 fprintf (stderr,
31314 "\nrs6000_adjust_cost, final cost = %d, orig cost = %d, "
31315 "%s, insn:\n", ret, cost, dep);
31317 debug_rtx (insn);
31320 return ret;
31323 /* The function returns a true if INSN is microcoded.
31324 Return false otherwise. */
31326 static bool
31327 is_microcoded_insn (rtx_insn *insn)
31329 if (!insn || !NONDEBUG_INSN_P (insn)
31330 || GET_CODE (PATTERN (insn)) == USE
31331 || GET_CODE (PATTERN (insn)) == CLOBBER)
31332 return false;
31334 if (rs6000_cpu_attr == CPU_CELL)
31335 return get_attr_cell_micro (insn) == CELL_MICRO_ALWAYS;
31337 if (rs6000_sched_groups
31338 && (rs6000_cpu == PROCESSOR_POWER4 || rs6000_cpu == PROCESSOR_POWER5))
31340 enum attr_type type = get_attr_type (insn);
31341 if ((type == TYPE_LOAD
31342 && get_attr_update (insn) == UPDATE_YES
31343 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES)
31344 || ((type == TYPE_LOAD || type == TYPE_STORE)
31345 && get_attr_update (insn) == UPDATE_YES
31346 && get_attr_indexed (insn) == INDEXED_YES)
31347 || type == TYPE_MFCR)
31348 return true;
31351 return false;
31354 /* The function returns true if INSN is cracked into 2 instructions
31355 by the processor (and therefore occupies 2 issue slots). */
31357 static bool
31358 is_cracked_insn (rtx_insn *insn)
31360 if (!insn || !NONDEBUG_INSN_P (insn)
31361 || GET_CODE (PATTERN (insn)) == USE
31362 || GET_CODE (PATTERN (insn)) == CLOBBER)
31363 return false;
31365 if (rs6000_sched_groups
31366 && (rs6000_cpu == PROCESSOR_POWER4 || rs6000_cpu == PROCESSOR_POWER5))
31368 enum attr_type type = get_attr_type (insn);
31369 if ((type == TYPE_LOAD
31370 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES
31371 && get_attr_update (insn) == UPDATE_NO)
31372 || (type == TYPE_LOAD
31373 && get_attr_sign_extend (insn) == SIGN_EXTEND_NO
31374 && get_attr_update (insn) == UPDATE_YES
31375 && get_attr_indexed (insn) == INDEXED_NO)
31376 || (type == TYPE_STORE
31377 && get_attr_update (insn) == UPDATE_YES
31378 && get_attr_indexed (insn) == INDEXED_NO)
31379 || ((type == TYPE_FPLOAD || type == TYPE_FPSTORE)
31380 && get_attr_update (insn) == UPDATE_YES)
31381 || type == TYPE_DELAYED_CR
31382 || (type == TYPE_EXTS
31383 && get_attr_dot (insn) == DOT_YES)
31384 || (type == TYPE_SHIFT
31385 && get_attr_dot (insn) == DOT_YES
31386 && get_attr_var_shift (insn) == VAR_SHIFT_NO)
31387 || (type == TYPE_MUL
31388 && get_attr_dot (insn) == DOT_YES)
31389 || type == TYPE_DIV
31390 || (type == TYPE_INSERT
31391 && get_attr_size (insn) == SIZE_32))
31392 return true;
31395 return false;
31398 /* The function returns true if INSN can be issued only from
31399 the branch slot. */
31401 static bool
31402 is_branch_slot_insn (rtx_insn *insn)
31404 if (!insn || !NONDEBUG_INSN_P (insn)
31405 || GET_CODE (PATTERN (insn)) == USE
31406 || GET_CODE (PATTERN (insn)) == CLOBBER)
31407 return false;
31409 if (rs6000_sched_groups)
31411 enum attr_type type = get_attr_type (insn);
31412 if (type == TYPE_BRANCH || type == TYPE_JMPREG)
31413 return true;
31414 return false;
31417 return false;
31420 /* The function returns true if out_inst sets a value that is
31421 used in the address generation computation of in_insn */
31422 static bool
31423 set_to_load_agen (rtx_insn *out_insn, rtx_insn *in_insn)
31425 rtx out_set, in_set;
31427 /* For performance reasons, only handle the simple case where
31428 both loads are a single_set. */
31429 out_set = single_set (out_insn);
31430 if (out_set)
31432 in_set = single_set (in_insn);
31433 if (in_set)
31434 return reg_mentioned_p (SET_DEST (out_set), SET_SRC (in_set));
31437 return false;
31440 /* Try to determine base/offset/size parts of the given MEM.
31441 Return true if successful, false if all the values couldn't
31442 be determined.
31444 This function only looks for REG or REG+CONST address forms.
31445 REG+REG address form will return false. */
31447 static bool
31448 get_memref_parts (rtx mem, rtx *base, HOST_WIDE_INT *offset,
31449 HOST_WIDE_INT *size)
31451 rtx addr_rtx;
31452 if MEM_SIZE_KNOWN_P (mem)
31453 *size = MEM_SIZE (mem);
31454 else
31455 return false;
31457 addr_rtx = (XEXP (mem, 0));
31458 if (GET_CODE (addr_rtx) == PRE_MODIFY)
31459 addr_rtx = XEXP (addr_rtx, 1);
31461 *offset = 0;
31462 while (GET_CODE (addr_rtx) == PLUS
31463 && CONST_INT_P (XEXP (addr_rtx, 1)))
31465 *offset += INTVAL (XEXP (addr_rtx, 1));
31466 addr_rtx = XEXP (addr_rtx, 0);
31468 if (!REG_P (addr_rtx))
31469 return false;
31471 *base = addr_rtx;
31472 return true;
31475 /* The function returns true if the target storage location of
31476 mem1 is adjacent to the target storage location of mem2 */
31477 /* Return 1 if memory locations are adjacent. */
31479 static bool
31480 adjacent_mem_locations (rtx mem1, rtx mem2)
31482 rtx reg1, reg2;
31483 HOST_WIDE_INT off1, size1, off2, size2;
31485 if (get_memref_parts (mem1, &reg1, &off1, &size1)
31486 && get_memref_parts (mem2, &reg2, &off2, &size2))
31487 return ((REGNO (reg1) == REGNO (reg2))
31488 && ((off1 + size1 == off2)
31489 || (off2 + size2 == off1)));
31491 return false;
31494 /* This function returns true if it can be determined that the two MEM
31495 locations overlap by at least 1 byte based on base reg/offset/size. */
31497 static bool
31498 mem_locations_overlap (rtx mem1, rtx mem2)
31500 rtx reg1, reg2;
31501 HOST_WIDE_INT off1, size1, off2, size2;
31503 if (get_memref_parts (mem1, &reg1, &off1, &size1)
31504 && get_memref_parts (mem2, &reg2, &off2, &size2))
31505 return ((REGNO (reg1) == REGNO (reg2))
31506 && (((off1 <= off2) && (off1 + size1 > off2))
31507 || ((off2 <= off1) && (off2 + size2 > off1))));
31509 return false;
31512 /* A C statement (sans semicolon) to update the integer scheduling
31513 priority INSN_PRIORITY (INSN). Increase the priority to execute the
31514 INSN earlier, reduce the priority to execute INSN later. Do not
31515 define this macro if you do not need to adjust the scheduling
31516 priorities of insns. */
31518 static int
31519 rs6000_adjust_priority (rtx_insn *insn ATTRIBUTE_UNUSED, int priority)
31521 rtx load_mem, str_mem;
31522 /* On machines (like the 750) which have asymmetric integer units,
31523 where one integer unit can do multiply and divides and the other
31524 can't, reduce the priority of multiply/divide so it is scheduled
31525 before other integer operations. */
31527 #if 0
31528 if (! INSN_P (insn))
31529 return priority;
31531 if (GET_CODE (PATTERN (insn)) == USE)
31532 return priority;
31534 switch (rs6000_cpu_attr) {
31535 case CPU_PPC750:
31536 switch (get_attr_type (insn))
31538 default:
31539 break;
31541 case TYPE_MUL:
31542 case TYPE_DIV:
31543 fprintf (stderr, "priority was %#x (%d) before adjustment\n",
31544 priority, priority);
31545 if (priority >= 0 && priority < 0x01000000)
31546 priority >>= 3;
31547 break;
31550 #endif
31552 if (insn_must_be_first_in_group (insn)
31553 && reload_completed
31554 && current_sched_info->sched_max_insns_priority
31555 && rs6000_sched_restricted_insns_priority)
31558 /* Prioritize insns that can be dispatched only in the first
31559 dispatch slot. */
31560 if (rs6000_sched_restricted_insns_priority == 1)
31561 /* Attach highest priority to insn. This means that in
31562 haifa-sched.c:ready_sort(), dispatch-slot restriction considerations
31563 precede 'priority' (critical path) considerations. */
31564 return current_sched_info->sched_max_insns_priority;
31565 else if (rs6000_sched_restricted_insns_priority == 2)
31566 /* Increase priority of insn by a minimal amount. This means that in
31567 haifa-sched.c:ready_sort(), only 'priority' (critical path)
31568 considerations precede dispatch-slot restriction considerations. */
31569 return (priority + 1);
31572 if (rs6000_cpu == PROCESSOR_POWER6
31573 && ((load_store_pendulum == -2 && is_load_insn (insn, &load_mem))
31574 || (load_store_pendulum == 2 && is_store_insn (insn, &str_mem))))
31575 /* Attach highest priority to insn if the scheduler has just issued two
31576 stores and this instruction is a load, or two loads and this instruction
31577 is a store. Power6 wants loads and stores scheduled alternately
31578 when possible */
31579 return current_sched_info->sched_max_insns_priority;
31581 return priority;
31584 /* Return true if the instruction is nonpipelined on the Cell. */
31585 static bool
31586 is_nonpipeline_insn (rtx_insn *insn)
31588 enum attr_type type;
31589 if (!insn || !NONDEBUG_INSN_P (insn)
31590 || GET_CODE (PATTERN (insn)) == USE
31591 || GET_CODE (PATTERN (insn)) == CLOBBER)
31592 return false;
31594 type = get_attr_type (insn);
31595 if (type == TYPE_MUL
31596 || type == TYPE_DIV
31597 || type == TYPE_SDIV
31598 || type == TYPE_DDIV
31599 || type == TYPE_SSQRT
31600 || type == TYPE_DSQRT
31601 || type == TYPE_MFCR
31602 || type == TYPE_MFCRF
31603 || type == TYPE_MFJMPR)
31605 return true;
31607 return false;
31611 /* Return how many instructions the machine can issue per cycle. */
31613 static int
31614 rs6000_issue_rate (void)
31616 /* Unless scheduling for register pressure, use issue rate of 1 for
31617 first scheduling pass to decrease degradation. */
31618 if (!reload_completed && !flag_sched_pressure)
31619 return 1;
31621 switch (rs6000_cpu_attr) {
31622 case CPU_RS64A:
31623 case CPU_PPC601: /* ? */
31624 case CPU_PPC7450:
31625 return 3;
31626 case CPU_PPC440:
31627 case CPU_PPC603:
31628 case CPU_PPC750:
31629 case CPU_PPC7400:
31630 case CPU_PPC8540:
31631 case CPU_PPC8548:
31632 case CPU_CELL:
31633 case CPU_PPCE300C2:
31634 case CPU_PPCE300C3:
31635 case CPU_PPCE500MC:
31636 case CPU_PPCE500MC64:
31637 case CPU_PPCE5500:
31638 case CPU_PPCE6500:
31639 case CPU_TITAN:
31640 return 2;
31641 case CPU_PPC476:
31642 case CPU_PPC604:
31643 case CPU_PPC604E:
31644 case CPU_PPC620:
31645 case CPU_PPC630:
31646 return 4;
31647 case CPU_POWER4:
31648 case CPU_POWER5:
31649 case CPU_POWER6:
31650 case CPU_POWER7:
31651 return 5;
31652 case CPU_POWER8:
31653 return 7;
31654 case CPU_POWER9:
31655 return 6;
31656 default:
31657 return 1;
31661 /* Return how many instructions to look ahead for better insn
31662 scheduling. */
31664 static int
31665 rs6000_use_sched_lookahead (void)
31667 switch (rs6000_cpu_attr)
31669 case CPU_PPC8540:
31670 case CPU_PPC8548:
31671 return 4;
31673 case CPU_CELL:
31674 return (reload_completed ? 8 : 0);
31676 default:
31677 return 0;
31681 /* We are choosing insn from the ready queue. Return zero if INSN can be
31682 chosen. */
31683 static int
31684 rs6000_use_sched_lookahead_guard (rtx_insn *insn, int ready_index)
31686 if (ready_index == 0)
31687 return 0;
31689 if (rs6000_cpu_attr != CPU_CELL)
31690 return 0;
31692 gcc_assert (insn != NULL_RTX && INSN_P (insn));
31694 if (!reload_completed
31695 || is_nonpipeline_insn (insn)
31696 || is_microcoded_insn (insn))
31697 return 1;
31699 return 0;
31702 /* Determine if PAT refers to memory. If so, set MEM_REF to the MEM rtx
31703 and return true. */
31705 static bool
31706 find_mem_ref (rtx pat, rtx *mem_ref)
31708 const char * fmt;
31709 int i, j;
31711 /* stack_tie does not produce any real memory traffic. */
31712 if (tie_operand (pat, VOIDmode))
31713 return false;
31715 if (GET_CODE (pat) == MEM)
31717 *mem_ref = pat;
31718 return true;
31721 /* Recursively process the pattern. */
31722 fmt = GET_RTX_FORMAT (GET_CODE (pat));
31724 for (i = GET_RTX_LENGTH (GET_CODE (pat)) - 1; i >= 0; i--)
31726 if (fmt[i] == 'e')
31728 if (find_mem_ref (XEXP (pat, i), mem_ref))
31729 return true;
31731 else if (fmt[i] == 'E')
31732 for (j = XVECLEN (pat, i) - 1; j >= 0; j--)
31734 if (find_mem_ref (XVECEXP (pat, i, j), mem_ref))
31735 return true;
31739 return false;
31742 /* Determine if PAT is a PATTERN of a load insn. */
31744 static bool
31745 is_load_insn1 (rtx pat, rtx *load_mem)
31747 if (!pat || pat == NULL_RTX)
31748 return false;
31750 if (GET_CODE (pat) == SET)
31751 return find_mem_ref (SET_SRC (pat), load_mem);
31753 if (GET_CODE (pat) == PARALLEL)
31755 int i;
31757 for (i = 0; i < XVECLEN (pat, 0); i++)
31758 if (is_load_insn1 (XVECEXP (pat, 0, i), load_mem))
31759 return true;
31762 return false;
31765 /* Determine if INSN loads from memory. */
31767 static bool
31768 is_load_insn (rtx insn, rtx *load_mem)
31770 if (!insn || !INSN_P (insn))
31771 return false;
31773 if (CALL_P (insn))
31774 return false;
31776 return is_load_insn1 (PATTERN (insn), load_mem);
31779 /* Determine if PAT is a PATTERN of a store insn. */
31781 static bool
31782 is_store_insn1 (rtx pat, rtx *str_mem)
31784 if (!pat || pat == NULL_RTX)
31785 return false;
31787 if (GET_CODE (pat) == SET)
31788 return find_mem_ref (SET_DEST (pat), str_mem);
31790 if (GET_CODE (pat) == PARALLEL)
31792 int i;
31794 for (i = 0; i < XVECLEN (pat, 0); i++)
31795 if (is_store_insn1 (XVECEXP (pat, 0, i), str_mem))
31796 return true;
31799 return false;
31802 /* Determine if INSN stores to memory. */
31804 static bool
31805 is_store_insn (rtx insn, rtx *str_mem)
31807 if (!insn || !INSN_P (insn))
31808 return false;
31810 return is_store_insn1 (PATTERN (insn), str_mem);
31813 /* Return whether TYPE is a Power9 pairable vector instruction type. */
31815 static bool
31816 is_power9_pairable_vec_type (enum attr_type type)
31818 switch (type)
31820 case TYPE_VECSIMPLE:
31821 case TYPE_VECCOMPLEX:
31822 case TYPE_VECDIV:
31823 case TYPE_VECCMP:
31824 case TYPE_VECPERM:
31825 case TYPE_VECFLOAT:
31826 case TYPE_VECFDIV:
31827 case TYPE_VECDOUBLE:
31828 return true;
31829 default:
31830 break;
31832 return false;
31835 /* Returns whether the dependence between INSN and NEXT is considered
31836 costly by the given target. */
31838 static bool
31839 rs6000_is_costly_dependence (dep_t dep, int cost, int distance)
31841 rtx insn;
31842 rtx next;
31843 rtx load_mem, str_mem;
31845 /* If the flag is not enabled - no dependence is considered costly;
31846 allow all dependent insns in the same group.
31847 This is the most aggressive option. */
31848 if (rs6000_sched_costly_dep == no_dep_costly)
31849 return false;
31851 /* If the flag is set to 1 - a dependence is always considered costly;
31852 do not allow dependent instructions in the same group.
31853 This is the most conservative option. */
31854 if (rs6000_sched_costly_dep == all_deps_costly)
31855 return true;
31857 insn = DEP_PRO (dep);
31858 next = DEP_CON (dep);
31860 if (rs6000_sched_costly_dep == store_to_load_dep_costly
31861 && is_load_insn (next, &load_mem)
31862 && is_store_insn (insn, &str_mem))
31863 /* Prevent load after store in the same group. */
31864 return true;
31866 if (rs6000_sched_costly_dep == true_store_to_load_dep_costly
31867 && is_load_insn (next, &load_mem)
31868 && is_store_insn (insn, &str_mem)
31869 && DEP_TYPE (dep) == REG_DEP_TRUE
31870 && mem_locations_overlap(str_mem, load_mem))
31871 /* Prevent load after store in the same group if it is a true
31872 dependence. */
31873 return true;
31875 /* The flag is set to X; dependences with latency >= X are considered costly,
31876 and will not be scheduled in the same group. */
31877 if (rs6000_sched_costly_dep <= max_dep_latency
31878 && ((cost - distance) >= (int)rs6000_sched_costly_dep))
31879 return true;
31881 return false;
31884 /* Return the next insn after INSN that is found before TAIL is reached,
31885 skipping any "non-active" insns - insns that will not actually occupy
31886 an issue slot. Return NULL_RTX if such an insn is not found. */
31888 static rtx_insn *
31889 get_next_active_insn (rtx_insn *insn, rtx_insn *tail)
31891 if (insn == NULL_RTX || insn == tail)
31892 return NULL;
31894 while (1)
31896 insn = NEXT_INSN (insn);
31897 if (insn == NULL_RTX || insn == tail)
31898 return NULL;
31900 if (CALL_P (insn)
31901 || JUMP_P (insn) || JUMP_TABLE_DATA_P (insn)
31902 || (NONJUMP_INSN_P (insn)
31903 && GET_CODE (PATTERN (insn)) != USE
31904 && GET_CODE (PATTERN (insn)) != CLOBBER
31905 && INSN_CODE (insn) != CODE_FOR_stack_tie))
31906 break;
31908 return insn;
31911 /* Do Power9 specific sched_reorder2 reordering of ready list. */
31913 static int
31914 power9_sched_reorder2 (rtx_insn **ready, int lastpos)
31916 int pos;
31917 int i;
31918 rtx_insn *tmp;
31919 enum attr_type type;
31921 type = get_attr_type (last_scheduled_insn);
31923 /* Try to issue fixed point divides back-to-back in pairs so they will be
31924 routed to separate execution units and execute in parallel. */
31925 if (type == TYPE_DIV && divide_cnt == 0)
31927 /* First divide has been scheduled. */
31928 divide_cnt = 1;
31930 /* Scan the ready list looking for another divide, if found move it
31931 to the end of the list so it is chosen next. */
31932 pos = lastpos;
31933 while (pos >= 0)
31935 if (recog_memoized (ready[pos]) >= 0
31936 && get_attr_type (ready[pos]) == TYPE_DIV)
31938 tmp = ready[pos];
31939 for (i = pos; i < lastpos; i++)
31940 ready[i] = ready[i + 1];
31941 ready[lastpos] = tmp;
31942 break;
31944 pos--;
31947 else
31949 /* Last insn was the 2nd divide or not a divide, reset the counter. */
31950 divide_cnt = 0;
31952 /* Power9 can execute 2 vector operations and 2 vector loads in a single
31953 cycle. So try to pair up and alternate groups of vector and vector
31954 load instructions.
31956 To aid this formation, a counter is maintained to keep track of
31957 vec/vecload insns issued. The value of vec_load_pendulum maintains
31958 the current state with the following values:
31960 0 : Initial state, no vec/vecload group has been started.
31962 -1 : 1 vector load has been issued and another has been found on
31963 the ready list and moved to the end.
31965 -2 : 2 vector loads have been issued and a vector operation has
31966 been found and moved to the end of the ready list.
31968 -3 : 2 vector loads and a vector insn have been issued and a
31969 vector operation has been found and moved to the end of the
31970 ready list.
31972 1 : 1 vector insn has been issued and another has been found and
31973 moved to the end of the ready list.
31975 2 : 2 vector insns have been issued and a vector load has been
31976 found and moved to the end of the ready list.
31978 3 : 2 vector insns and a vector load have been issued and another
31979 vector load has been found and moved to the end of the ready
31980 list. */
31981 if (type == TYPE_VECLOAD)
31983 /* Issued a vecload. */
31984 if (vec_load_pendulum == 0)
31986 /* We issued a single vecload, look for another and move it to
31987 the end of the ready list so it will be scheduled next.
31988 Set pendulum if found. */
31989 pos = lastpos;
31990 while (pos >= 0)
31992 if (recog_memoized (ready[pos]) >= 0
31993 && get_attr_type (ready[pos]) == TYPE_VECLOAD)
31995 tmp = ready[pos];
31996 for (i = pos; i < lastpos; i++)
31997 ready[i] = ready[i + 1];
31998 ready[lastpos] = tmp;
31999 vec_load_pendulum = -1;
32000 return cached_can_issue_more;
32002 pos--;
32005 else if (vec_load_pendulum == -1)
32007 /* This is the second vecload we've issued, search the ready
32008 list for a vector operation so we can try to schedule a
32009 pair of those next. If found move to the end of the ready
32010 list so it is scheduled next and set the pendulum. */
32011 pos = lastpos;
32012 while (pos >= 0)
32014 if (recog_memoized (ready[pos]) >= 0
32015 && is_power9_pairable_vec_type (
32016 get_attr_type (ready[pos])))
32018 tmp = ready[pos];
32019 for (i = pos; i < lastpos; i++)
32020 ready[i] = ready[i + 1];
32021 ready[lastpos] = tmp;
32022 vec_load_pendulum = -2;
32023 return cached_can_issue_more;
32025 pos--;
32028 else if (vec_load_pendulum == 2)
32030 /* Two vector ops have been issued and we've just issued a
32031 vecload, look for another vecload and move to end of ready
32032 list if found. */
32033 pos = lastpos;
32034 while (pos >= 0)
32036 if (recog_memoized (ready[pos]) >= 0
32037 && get_attr_type (ready[pos]) == TYPE_VECLOAD)
32039 tmp = ready[pos];
32040 for (i = pos; i < lastpos; i++)
32041 ready[i] = ready[i + 1];
32042 ready[lastpos] = tmp;
32043 /* Set pendulum so that next vecload will be seen as
32044 finishing a group, not start of one. */
32045 vec_load_pendulum = 3;
32046 return cached_can_issue_more;
32048 pos--;
32052 else if (is_power9_pairable_vec_type (type))
32054 /* Issued a vector operation. */
32055 if (vec_load_pendulum == 0)
32056 /* We issued a single vec op, look for another and move it
32057 to the end of the ready list so it will be scheduled next.
32058 Set pendulum if found. */
32060 pos = lastpos;
32061 while (pos >= 0)
32063 if (recog_memoized (ready[pos]) >= 0
32064 && is_power9_pairable_vec_type (
32065 get_attr_type (ready[pos])))
32067 tmp = ready[pos];
32068 for (i = pos; i < lastpos; i++)
32069 ready[i] = ready[i + 1];
32070 ready[lastpos] = tmp;
32071 vec_load_pendulum = 1;
32072 return cached_can_issue_more;
32074 pos--;
32077 else if (vec_load_pendulum == 1)
32079 /* This is the second vec op we've issued, search the ready
32080 list for a vecload operation so we can try to schedule a
32081 pair of those next. If found move to the end of the ready
32082 list so it is scheduled next and set the pendulum. */
32083 pos = lastpos;
32084 while (pos >= 0)
32086 if (recog_memoized (ready[pos]) >= 0
32087 && get_attr_type (ready[pos]) == TYPE_VECLOAD)
32089 tmp = ready[pos];
32090 for (i = pos; i < lastpos; i++)
32091 ready[i] = ready[i + 1];
32092 ready[lastpos] = tmp;
32093 vec_load_pendulum = 2;
32094 return cached_can_issue_more;
32096 pos--;
32099 else if (vec_load_pendulum == -2)
32101 /* Two vecload ops have been issued and we've just issued a
32102 vec op, look for another vec op and move to end of ready
32103 list if found. */
32104 pos = lastpos;
32105 while (pos >= 0)
32107 if (recog_memoized (ready[pos]) >= 0
32108 && is_power9_pairable_vec_type (
32109 get_attr_type (ready[pos])))
32111 tmp = ready[pos];
32112 for (i = pos; i < lastpos; i++)
32113 ready[i] = ready[i + 1];
32114 ready[lastpos] = tmp;
32115 /* Set pendulum so that next vec op will be seen as
32116 finishing a group, not start of one. */
32117 vec_load_pendulum = -3;
32118 return cached_can_issue_more;
32120 pos--;
32125 /* We've either finished a vec/vecload group, couldn't find an insn to
32126 continue the current group, or the last insn had nothing to do with
32127 with a group. In any case, reset the pendulum. */
32128 vec_load_pendulum = 0;
32131 return cached_can_issue_more;
32134 /* We are about to begin issuing insns for this clock cycle. */
32136 static int
32137 rs6000_sched_reorder (FILE *dump ATTRIBUTE_UNUSED, int sched_verbose,
32138 rtx_insn **ready ATTRIBUTE_UNUSED,
32139 int *pn_ready ATTRIBUTE_UNUSED,
32140 int clock_var ATTRIBUTE_UNUSED)
32142 int n_ready = *pn_ready;
32144 if (sched_verbose)
32145 fprintf (dump, "// rs6000_sched_reorder :\n");
32147 /* Reorder the ready list, if the second to last ready insn
32148 is a nonepipeline insn. */
32149 if (rs6000_cpu_attr == CPU_CELL && n_ready > 1)
32151 if (is_nonpipeline_insn (ready[n_ready - 1])
32152 && (recog_memoized (ready[n_ready - 2]) > 0))
32153 /* Simply swap first two insns. */
32154 std::swap (ready[n_ready - 1], ready[n_ready - 2]);
32157 if (rs6000_cpu == PROCESSOR_POWER6)
32158 load_store_pendulum = 0;
32160 return rs6000_issue_rate ();
32163 /* Like rs6000_sched_reorder, but called after issuing each insn. */
32165 static int
32166 rs6000_sched_reorder2 (FILE *dump, int sched_verbose, rtx_insn **ready,
32167 int *pn_ready, int clock_var ATTRIBUTE_UNUSED)
32169 if (sched_verbose)
32170 fprintf (dump, "// rs6000_sched_reorder2 :\n");
32172 /* For Power6, we need to handle some special cases to try and keep the
32173 store queue from overflowing and triggering expensive flushes.
32175 This code monitors how load and store instructions are being issued
32176 and skews the ready list one way or the other to increase the likelihood
32177 that a desired instruction is issued at the proper time.
32179 A couple of things are done. First, we maintain a "load_store_pendulum"
32180 to track the current state of load/store issue.
32182 - If the pendulum is at zero, then no loads or stores have been
32183 issued in the current cycle so we do nothing.
32185 - If the pendulum is 1, then a single load has been issued in this
32186 cycle and we attempt to locate another load in the ready list to
32187 issue with it.
32189 - If the pendulum is -2, then two stores have already been
32190 issued in this cycle, so we increase the priority of the first load
32191 in the ready list to increase it's likelihood of being chosen first
32192 in the next cycle.
32194 - If the pendulum is -1, then a single store has been issued in this
32195 cycle and we attempt to locate another store in the ready list to
32196 issue with it, preferring a store to an adjacent memory location to
32197 facilitate store pairing in the store queue.
32199 - If the pendulum is 2, then two loads have already been
32200 issued in this cycle, so we increase the priority of the first store
32201 in the ready list to increase it's likelihood of being chosen first
32202 in the next cycle.
32204 - If the pendulum < -2 or > 2, then do nothing.
32206 Note: This code covers the most common scenarios. There exist non
32207 load/store instructions which make use of the LSU and which
32208 would need to be accounted for to strictly model the behavior
32209 of the machine. Those instructions are currently unaccounted
32210 for to help minimize compile time overhead of this code.
32212 if (rs6000_cpu == PROCESSOR_POWER6 && last_scheduled_insn)
32214 int pos;
32215 int i;
32216 rtx_insn *tmp;
32217 rtx load_mem, str_mem;
32219 if (is_store_insn (last_scheduled_insn, &str_mem))
32220 /* Issuing a store, swing the load_store_pendulum to the left */
32221 load_store_pendulum--;
32222 else if (is_load_insn (last_scheduled_insn, &load_mem))
32223 /* Issuing a load, swing the load_store_pendulum to the right */
32224 load_store_pendulum++;
32225 else
32226 return cached_can_issue_more;
32228 /* If the pendulum is balanced, or there is only one instruction on
32229 the ready list, then all is well, so return. */
32230 if ((load_store_pendulum == 0) || (*pn_ready <= 1))
32231 return cached_can_issue_more;
32233 if (load_store_pendulum == 1)
32235 /* A load has been issued in this cycle. Scan the ready list
32236 for another load to issue with it */
32237 pos = *pn_ready-1;
32239 while (pos >= 0)
32241 if (is_load_insn (ready[pos], &load_mem))
32243 /* Found a load. Move it to the head of the ready list,
32244 and adjust it's priority so that it is more likely to
32245 stay there */
32246 tmp = ready[pos];
32247 for (i=pos; i<*pn_ready-1; i++)
32248 ready[i] = ready[i + 1];
32249 ready[*pn_ready-1] = tmp;
32251 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
32252 INSN_PRIORITY (tmp)++;
32253 break;
32255 pos--;
32258 else if (load_store_pendulum == -2)
32260 /* Two stores have been issued in this cycle. Increase the
32261 priority of the first load in the ready list to favor it for
32262 issuing in the next cycle. */
32263 pos = *pn_ready-1;
32265 while (pos >= 0)
32267 if (is_load_insn (ready[pos], &load_mem)
32268 && !sel_sched_p ()
32269 && INSN_PRIORITY_KNOWN (ready[pos]))
32271 INSN_PRIORITY (ready[pos])++;
32273 /* Adjust the pendulum to account for the fact that a load
32274 was found and increased in priority. This is to prevent
32275 increasing the priority of multiple loads */
32276 load_store_pendulum--;
32278 break;
32280 pos--;
32283 else if (load_store_pendulum == -1)
32285 /* A store has been issued in this cycle. Scan the ready list for
32286 another store to issue with it, preferring a store to an adjacent
32287 memory location */
32288 int first_store_pos = -1;
32290 pos = *pn_ready-1;
32292 while (pos >= 0)
32294 if (is_store_insn (ready[pos], &str_mem))
32296 rtx str_mem2;
32297 /* Maintain the index of the first store found on the
32298 list */
32299 if (first_store_pos == -1)
32300 first_store_pos = pos;
32302 if (is_store_insn (last_scheduled_insn, &str_mem2)
32303 && adjacent_mem_locations (str_mem, str_mem2))
32305 /* Found an adjacent store. Move it to the head of the
32306 ready list, and adjust it's priority so that it is
32307 more likely to stay there */
32308 tmp = ready[pos];
32309 for (i=pos; i<*pn_ready-1; i++)
32310 ready[i] = ready[i + 1];
32311 ready[*pn_ready-1] = tmp;
32313 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
32314 INSN_PRIORITY (tmp)++;
32316 first_store_pos = -1;
32318 break;
32321 pos--;
32324 if (first_store_pos >= 0)
32326 /* An adjacent store wasn't found, but a non-adjacent store was,
32327 so move the non-adjacent store to the front of the ready
32328 list, and adjust its priority so that it is more likely to
32329 stay there. */
32330 tmp = ready[first_store_pos];
32331 for (i=first_store_pos; i<*pn_ready-1; i++)
32332 ready[i] = ready[i + 1];
32333 ready[*pn_ready-1] = tmp;
32334 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
32335 INSN_PRIORITY (tmp)++;
32338 else if (load_store_pendulum == 2)
32340 /* Two loads have been issued in this cycle. Increase the priority
32341 of the first store in the ready list to favor it for issuing in
32342 the next cycle. */
32343 pos = *pn_ready-1;
32345 while (pos >= 0)
32347 if (is_store_insn (ready[pos], &str_mem)
32348 && !sel_sched_p ()
32349 && INSN_PRIORITY_KNOWN (ready[pos]))
32351 INSN_PRIORITY (ready[pos])++;
32353 /* Adjust the pendulum to account for the fact that a store
32354 was found and increased in priority. This is to prevent
32355 increasing the priority of multiple stores */
32356 load_store_pendulum++;
32358 break;
32360 pos--;
32365 /* Do Power9 dependent reordering if necessary. */
32366 if (rs6000_cpu == PROCESSOR_POWER9 && last_scheduled_insn
32367 && recog_memoized (last_scheduled_insn) >= 0)
32368 return power9_sched_reorder2 (ready, *pn_ready - 1);
32370 return cached_can_issue_more;
32373 /* Return whether the presence of INSN causes a dispatch group termination
32374 of group WHICH_GROUP.
32376 If WHICH_GROUP == current_group, this function will return true if INSN
32377 causes the termination of the current group (i.e, the dispatch group to
32378 which INSN belongs). This means that INSN will be the last insn in the
32379 group it belongs to.
32381 If WHICH_GROUP == previous_group, this function will return true if INSN
32382 causes the termination of the previous group (i.e, the dispatch group that
32383 precedes the group to which INSN belongs). This means that INSN will be
32384 the first insn in the group it belongs to). */
32386 static bool
32387 insn_terminates_group_p (rtx_insn *insn, enum group_termination which_group)
32389 bool first, last;
32391 if (! insn)
32392 return false;
32394 first = insn_must_be_first_in_group (insn);
32395 last = insn_must_be_last_in_group (insn);
32397 if (first && last)
32398 return true;
32400 if (which_group == current_group)
32401 return last;
32402 else if (which_group == previous_group)
32403 return first;
32405 return false;
32409 static bool
32410 insn_must_be_first_in_group (rtx_insn *insn)
32412 enum attr_type type;
32414 if (!insn
32415 || NOTE_P (insn)
32416 || DEBUG_INSN_P (insn)
32417 || GET_CODE (PATTERN (insn)) == USE
32418 || GET_CODE (PATTERN (insn)) == CLOBBER)
32419 return false;
32421 switch (rs6000_cpu)
32423 case PROCESSOR_POWER5:
32424 if (is_cracked_insn (insn))
32425 return true;
32426 /* FALLTHRU */
32427 case PROCESSOR_POWER4:
32428 if (is_microcoded_insn (insn))
32429 return true;
32431 if (!rs6000_sched_groups)
32432 return false;
32434 type = get_attr_type (insn);
32436 switch (type)
32438 case TYPE_MFCR:
32439 case TYPE_MFCRF:
32440 case TYPE_MTCR:
32441 case TYPE_DELAYED_CR:
32442 case TYPE_CR_LOGICAL:
32443 case TYPE_MTJMPR:
32444 case TYPE_MFJMPR:
32445 case TYPE_DIV:
32446 case TYPE_LOAD_L:
32447 case TYPE_STORE_C:
32448 case TYPE_ISYNC:
32449 case TYPE_SYNC:
32450 return true;
32451 default:
32452 break;
32454 break;
32455 case PROCESSOR_POWER6:
32456 type = get_attr_type (insn);
32458 switch (type)
32460 case TYPE_EXTS:
32461 case TYPE_CNTLZ:
32462 case TYPE_TRAP:
32463 case TYPE_MUL:
32464 case TYPE_INSERT:
32465 case TYPE_FPCOMPARE:
32466 case TYPE_MFCR:
32467 case TYPE_MTCR:
32468 case TYPE_MFJMPR:
32469 case TYPE_MTJMPR:
32470 case TYPE_ISYNC:
32471 case TYPE_SYNC:
32472 case TYPE_LOAD_L:
32473 case TYPE_STORE_C:
32474 return true;
32475 case TYPE_SHIFT:
32476 if (get_attr_dot (insn) == DOT_NO
32477 || get_attr_var_shift (insn) == VAR_SHIFT_NO)
32478 return true;
32479 else
32480 break;
32481 case TYPE_DIV:
32482 if (get_attr_size (insn) == SIZE_32)
32483 return true;
32484 else
32485 break;
32486 case TYPE_LOAD:
32487 case TYPE_STORE:
32488 case TYPE_FPLOAD:
32489 case TYPE_FPSTORE:
32490 if (get_attr_update (insn) == UPDATE_YES)
32491 return true;
32492 else
32493 break;
32494 default:
32495 break;
32497 break;
32498 case PROCESSOR_POWER7:
32499 type = get_attr_type (insn);
32501 switch (type)
32503 case TYPE_CR_LOGICAL:
32504 case TYPE_MFCR:
32505 case TYPE_MFCRF:
32506 case TYPE_MTCR:
32507 case TYPE_DIV:
32508 case TYPE_ISYNC:
32509 case TYPE_LOAD_L:
32510 case TYPE_STORE_C:
32511 case TYPE_MFJMPR:
32512 case TYPE_MTJMPR:
32513 return true;
32514 case TYPE_MUL:
32515 case TYPE_SHIFT:
32516 case TYPE_EXTS:
32517 if (get_attr_dot (insn) == DOT_YES)
32518 return true;
32519 else
32520 break;
32521 case TYPE_LOAD:
32522 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
32523 || get_attr_update (insn) == UPDATE_YES)
32524 return true;
32525 else
32526 break;
32527 case TYPE_STORE:
32528 case TYPE_FPLOAD:
32529 case TYPE_FPSTORE:
32530 if (get_attr_update (insn) == UPDATE_YES)
32531 return true;
32532 else
32533 break;
32534 default:
32535 break;
32537 break;
32538 case PROCESSOR_POWER8:
32539 type = get_attr_type (insn);
32541 switch (type)
32543 case TYPE_CR_LOGICAL:
32544 case TYPE_DELAYED_CR:
32545 case TYPE_MFCR:
32546 case TYPE_MFCRF:
32547 case TYPE_MTCR:
32548 case TYPE_SYNC:
32549 case TYPE_ISYNC:
32550 case TYPE_LOAD_L:
32551 case TYPE_STORE_C:
32552 case TYPE_VECSTORE:
32553 case TYPE_MFJMPR:
32554 case TYPE_MTJMPR:
32555 return true;
32556 case TYPE_SHIFT:
32557 case TYPE_EXTS:
32558 case TYPE_MUL:
32559 if (get_attr_dot (insn) == DOT_YES)
32560 return true;
32561 else
32562 break;
32563 case TYPE_LOAD:
32564 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
32565 || get_attr_update (insn) == UPDATE_YES)
32566 return true;
32567 else
32568 break;
32569 case TYPE_STORE:
32570 if (get_attr_update (insn) == UPDATE_YES
32571 && get_attr_indexed (insn) == INDEXED_YES)
32572 return true;
32573 else
32574 break;
32575 default:
32576 break;
32578 break;
32579 default:
32580 break;
32583 return false;
32586 static bool
32587 insn_must_be_last_in_group (rtx_insn *insn)
32589 enum attr_type type;
32591 if (!insn
32592 || NOTE_P (insn)
32593 || DEBUG_INSN_P (insn)
32594 || GET_CODE (PATTERN (insn)) == USE
32595 || GET_CODE (PATTERN (insn)) == CLOBBER)
32596 return false;
32598 switch (rs6000_cpu) {
32599 case PROCESSOR_POWER4:
32600 case PROCESSOR_POWER5:
32601 if (is_microcoded_insn (insn))
32602 return true;
32604 if (is_branch_slot_insn (insn))
32605 return true;
32607 break;
32608 case PROCESSOR_POWER6:
32609 type = get_attr_type (insn);
32611 switch (type)
32613 case TYPE_EXTS:
32614 case TYPE_CNTLZ:
32615 case TYPE_TRAP:
32616 case TYPE_MUL:
32617 case TYPE_FPCOMPARE:
32618 case TYPE_MFCR:
32619 case TYPE_MTCR:
32620 case TYPE_MFJMPR:
32621 case TYPE_MTJMPR:
32622 case TYPE_ISYNC:
32623 case TYPE_SYNC:
32624 case TYPE_LOAD_L:
32625 case TYPE_STORE_C:
32626 return true;
32627 case TYPE_SHIFT:
32628 if (get_attr_dot (insn) == DOT_NO
32629 || get_attr_var_shift (insn) == VAR_SHIFT_NO)
32630 return true;
32631 else
32632 break;
32633 case TYPE_DIV:
32634 if (get_attr_size (insn) == SIZE_32)
32635 return true;
32636 else
32637 break;
32638 default:
32639 break;
32641 break;
32642 case PROCESSOR_POWER7:
32643 type = get_attr_type (insn);
32645 switch (type)
32647 case TYPE_ISYNC:
32648 case TYPE_SYNC:
32649 case TYPE_LOAD_L:
32650 case TYPE_STORE_C:
32651 return true;
32652 case TYPE_LOAD:
32653 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
32654 && get_attr_update (insn) == UPDATE_YES)
32655 return true;
32656 else
32657 break;
32658 case TYPE_STORE:
32659 if (get_attr_update (insn) == UPDATE_YES
32660 && get_attr_indexed (insn) == INDEXED_YES)
32661 return true;
32662 else
32663 break;
32664 default:
32665 break;
32667 break;
32668 case PROCESSOR_POWER8:
32669 type = get_attr_type (insn);
32671 switch (type)
32673 case TYPE_MFCR:
32674 case TYPE_MTCR:
32675 case TYPE_ISYNC:
32676 case TYPE_SYNC:
32677 case TYPE_LOAD_L:
32678 case TYPE_STORE_C:
32679 return true;
32680 case TYPE_LOAD:
32681 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
32682 && get_attr_update (insn) == UPDATE_YES)
32683 return true;
32684 else
32685 break;
32686 case TYPE_STORE:
32687 if (get_attr_update (insn) == UPDATE_YES
32688 && get_attr_indexed (insn) == INDEXED_YES)
32689 return true;
32690 else
32691 break;
32692 default:
32693 break;
32695 break;
32696 default:
32697 break;
32700 return false;
32703 /* Return true if it is recommended to keep NEXT_INSN "far" (in a separate
32704 dispatch group) from the insns in GROUP_INSNS. Return false otherwise. */
32706 static bool
32707 is_costly_group (rtx *group_insns, rtx next_insn)
32709 int i;
32710 int issue_rate = rs6000_issue_rate ();
32712 for (i = 0; i < issue_rate; i++)
32714 sd_iterator_def sd_it;
32715 dep_t dep;
32716 rtx insn = group_insns[i];
32718 if (!insn)
32719 continue;
32721 FOR_EACH_DEP (insn, SD_LIST_RES_FORW, sd_it, dep)
32723 rtx next = DEP_CON (dep);
32725 if (next == next_insn
32726 && rs6000_is_costly_dependence (dep, dep_cost (dep), 0))
32727 return true;
32731 return false;
32734 /* Utility of the function redefine_groups.
32735 Check if it is too costly to schedule NEXT_INSN together with GROUP_INSNS
32736 in the same dispatch group. If so, insert nops before NEXT_INSN, in order
32737 to keep it "far" (in a separate group) from GROUP_INSNS, following
32738 one of the following schemes, depending on the value of the flag
32739 -minsert_sched_nops = X:
32740 (1) X == sched_finish_regroup_exact: insert exactly as many nops as needed
32741 in order to force NEXT_INSN into a separate group.
32742 (2) X < sched_finish_regroup_exact: insert exactly X nops.
32743 GROUP_END, CAN_ISSUE_MORE and GROUP_COUNT record the state after nop
32744 insertion (has a group just ended, how many vacant issue slots remain in the
32745 last group, and how many dispatch groups were encountered so far). */
32747 static int
32748 force_new_group (int sched_verbose, FILE *dump, rtx *group_insns,
32749 rtx_insn *next_insn, bool *group_end, int can_issue_more,
32750 int *group_count)
32752 rtx nop;
32753 bool force;
32754 int issue_rate = rs6000_issue_rate ();
32755 bool end = *group_end;
32756 int i;
32758 if (next_insn == NULL_RTX || DEBUG_INSN_P (next_insn))
32759 return can_issue_more;
32761 if (rs6000_sched_insert_nops > sched_finish_regroup_exact)
32762 return can_issue_more;
32764 force = is_costly_group (group_insns, next_insn);
32765 if (!force)
32766 return can_issue_more;
32768 if (sched_verbose > 6)
32769 fprintf (dump,"force: group count = %d, can_issue_more = %d\n",
32770 *group_count ,can_issue_more);
32772 if (rs6000_sched_insert_nops == sched_finish_regroup_exact)
32774 if (*group_end)
32775 can_issue_more = 0;
32777 /* Since only a branch can be issued in the last issue_slot, it is
32778 sufficient to insert 'can_issue_more - 1' nops if next_insn is not
32779 a branch. If next_insn is a branch, we insert 'can_issue_more' nops;
32780 in this case the last nop will start a new group and the branch
32781 will be forced to the new group. */
32782 if (can_issue_more && !is_branch_slot_insn (next_insn))
32783 can_issue_more--;
32785 /* Do we have a special group ending nop? */
32786 if (rs6000_cpu_attr == CPU_POWER6 || rs6000_cpu_attr == CPU_POWER7
32787 || rs6000_cpu_attr == CPU_POWER8)
32789 nop = gen_group_ending_nop ();
32790 emit_insn_before (nop, next_insn);
32791 can_issue_more = 0;
32793 else
32794 while (can_issue_more > 0)
32796 nop = gen_nop ();
32797 emit_insn_before (nop, next_insn);
32798 can_issue_more--;
32801 *group_end = true;
32802 return 0;
32805 if (rs6000_sched_insert_nops < sched_finish_regroup_exact)
32807 int n_nops = rs6000_sched_insert_nops;
32809 /* Nops can't be issued from the branch slot, so the effective
32810 issue_rate for nops is 'issue_rate - 1'. */
32811 if (can_issue_more == 0)
32812 can_issue_more = issue_rate;
32813 can_issue_more--;
32814 if (can_issue_more == 0)
32816 can_issue_more = issue_rate - 1;
32817 (*group_count)++;
32818 end = true;
32819 for (i = 0; i < issue_rate; i++)
32821 group_insns[i] = 0;
32825 while (n_nops > 0)
32827 nop = gen_nop ();
32828 emit_insn_before (nop, next_insn);
32829 if (can_issue_more == issue_rate - 1) /* new group begins */
32830 end = false;
32831 can_issue_more--;
32832 if (can_issue_more == 0)
32834 can_issue_more = issue_rate - 1;
32835 (*group_count)++;
32836 end = true;
32837 for (i = 0; i < issue_rate; i++)
32839 group_insns[i] = 0;
32842 n_nops--;
32845 /* Scale back relative to 'issue_rate' (instead of 'issue_rate - 1'). */
32846 can_issue_more++;
32848 /* Is next_insn going to start a new group? */
32849 *group_end
32850 = (end
32851 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
32852 || (can_issue_more <= 2 && is_cracked_insn (next_insn))
32853 || (can_issue_more < issue_rate &&
32854 insn_terminates_group_p (next_insn, previous_group)));
32855 if (*group_end && end)
32856 (*group_count)--;
32858 if (sched_verbose > 6)
32859 fprintf (dump, "done force: group count = %d, can_issue_more = %d\n",
32860 *group_count, can_issue_more);
32861 return can_issue_more;
32864 return can_issue_more;
32867 /* This function tries to synch the dispatch groups that the compiler "sees"
32868 with the dispatch groups that the processor dispatcher is expected to
32869 form in practice. It tries to achieve this synchronization by forcing the
32870 estimated processor grouping on the compiler (as opposed to the function
32871 'pad_goups' which tries to force the scheduler's grouping on the processor).
32873 The function scans the insn sequence between PREV_HEAD_INSN and TAIL and
32874 examines the (estimated) dispatch groups that will be formed by the processor
32875 dispatcher. It marks these group boundaries to reflect the estimated
32876 processor grouping, overriding the grouping that the scheduler had marked.
32877 Depending on the value of the flag '-minsert-sched-nops' this function can
32878 force certain insns into separate groups or force a certain distance between
32879 them by inserting nops, for example, if there exists a "costly dependence"
32880 between the insns.
32882 The function estimates the group boundaries that the processor will form as
32883 follows: It keeps track of how many vacant issue slots are available after
32884 each insn. A subsequent insn will start a new group if one of the following
32885 4 cases applies:
32886 - no more vacant issue slots remain in the current dispatch group.
32887 - only the last issue slot, which is the branch slot, is vacant, but the next
32888 insn is not a branch.
32889 - only the last 2 or less issue slots, including the branch slot, are vacant,
32890 which means that a cracked insn (which occupies two issue slots) can't be
32891 issued in this group.
32892 - less than 'issue_rate' slots are vacant, and the next insn always needs to
32893 start a new group. */
32895 static int
32896 redefine_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
32897 rtx_insn *tail)
32899 rtx_insn *insn, *next_insn;
32900 int issue_rate;
32901 int can_issue_more;
32902 int slot, i;
32903 bool group_end;
32904 int group_count = 0;
32905 rtx *group_insns;
32907 /* Initialize. */
32908 issue_rate = rs6000_issue_rate ();
32909 group_insns = XALLOCAVEC (rtx, issue_rate);
32910 for (i = 0; i < issue_rate; i++)
32912 group_insns[i] = 0;
32914 can_issue_more = issue_rate;
32915 slot = 0;
32916 insn = get_next_active_insn (prev_head_insn, tail);
32917 group_end = false;
32919 while (insn != NULL_RTX)
32921 slot = (issue_rate - can_issue_more);
32922 group_insns[slot] = insn;
32923 can_issue_more =
32924 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
32925 if (insn_terminates_group_p (insn, current_group))
32926 can_issue_more = 0;
32928 next_insn = get_next_active_insn (insn, tail);
32929 if (next_insn == NULL_RTX)
32930 return group_count + 1;
32932 /* Is next_insn going to start a new group? */
32933 group_end
32934 = (can_issue_more == 0
32935 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
32936 || (can_issue_more <= 2 && is_cracked_insn (next_insn))
32937 || (can_issue_more < issue_rate &&
32938 insn_terminates_group_p (next_insn, previous_group)));
32940 can_issue_more = force_new_group (sched_verbose, dump, group_insns,
32941 next_insn, &group_end, can_issue_more,
32942 &group_count);
32944 if (group_end)
32946 group_count++;
32947 can_issue_more = 0;
32948 for (i = 0; i < issue_rate; i++)
32950 group_insns[i] = 0;
32954 if (GET_MODE (next_insn) == TImode && can_issue_more)
32955 PUT_MODE (next_insn, VOIDmode);
32956 else if (!can_issue_more && GET_MODE (next_insn) != TImode)
32957 PUT_MODE (next_insn, TImode);
32959 insn = next_insn;
32960 if (can_issue_more == 0)
32961 can_issue_more = issue_rate;
32962 } /* while */
32964 return group_count;
32967 /* Scan the insn sequence between PREV_HEAD_INSN and TAIL and examine the
32968 dispatch group boundaries that the scheduler had marked. Pad with nops
32969 any dispatch groups which have vacant issue slots, in order to force the
32970 scheduler's grouping on the processor dispatcher. The function
32971 returns the number of dispatch groups found. */
32973 static int
32974 pad_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
32975 rtx_insn *tail)
32977 rtx_insn *insn, *next_insn;
32978 rtx nop;
32979 int issue_rate;
32980 int can_issue_more;
32981 int group_end;
32982 int group_count = 0;
32984 /* Initialize issue_rate. */
32985 issue_rate = rs6000_issue_rate ();
32986 can_issue_more = issue_rate;
32988 insn = get_next_active_insn (prev_head_insn, tail);
32989 next_insn = get_next_active_insn (insn, tail);
32991 while (insn != NULL_RTX)
32993 can_issue_more =
32994 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
32996 group_end = (next_insn == NULL_RTX || GET_MODE (next_insn) == TImode);
32998 if (next_insn == NULL_RTX)
32999 break;
33001 if (group_end)
33003 /* If the scheduler had marked group termination at this location
33004 (between insn and next_insn), and neither insn nor next_insn will
33005 force group termination, pad the group with nops to force group
33006 termination. */
33007 if (can_issue_more
33008 && (rs6000_sched_insert_nops == sched_finish_pad_groups)
33009 && !insn_terminates_group_p (insn, current_group)
33010 && !insn_terminates_group_p (next_insn, previous_group))
33012 if (!is_branch_slot_insn (next_insn))
33013 can_issue_more--;
33015 while (can_issue_more)
33017 nop = gen_nop ();
33018 emit_insn_before (nop, next_insn);
33019 can_issue_more--;
33023 can_issue_more = issue_rate;
33024 group_count++;
33027 insn = next_insn;
33028 next_insn = get_next_active_insn (insn, tail);
33031 return group_count;
33034 /* We're beginning a new block. Initialize data structures as necessary. */
33036 static void
33037 rs6000_sched_init (FILE *dump ATTRIBUTE_UNUSED,
33038 int sched_verbose ATTRIBUTE_UNUSED,
33039 int max_ready ATTRIBUTE_UNUSED)
33041 last_scheduled_insn = NULL;
33042 load_store_pendulum = 0;
33043 divide_cnt = 0;
33044 vec_load_pendulum = 0;
33047 /* The following function is called at the end of scheduling BB.
33048 After reload, it inserts nops at insn group bundling. */
33050 static void
33051 rs6000_sched_finish (FILE *dump, int sched_verbose)
33053 int n_groups;
33055 if (sched_verbose)
33056 fprintf (dump, "=== Finishing schedule.\n");
33058 if (reload_completed && rs6000_sched_groups)
33060 /* Do not run sched_finish hook when selective scheduling enabled. */
33061 if (sel_sched_p ())
33062 return;
33064 if (rs6000_sched_insert_nops == sched_finish_none)
33065 return;
33067 if (rs6000_sched_insert_nops == sched_finish_pad_groups)
33068 n_groups = pad_groups (dump, sched_verbose,
33069 current_sched_info->prev_head,
33070 current_sched_info->next_tail);
33071 else
33072 n_groups = redefine_groups (dump, sched_verbose,
33073 current_sched_info->prev_head,
33074 current_sched_info->next_tail);
33076 if (sched_verbose >= 6)
33078 fprintf (dump, "ngroups = %d\n", n_groups);
33079 print_rtl (dump, current_sched_info->prev_head);
33080 fprintf (dump, "Done finish_sched\n");
33085 struct rs6000_sched_context
33087 short cached_can_issue_more;
33088 rtx_insn *last_scheduled_insn;
33089 int load_store_pendulum;
33090 int divide_cnt;
33091 int vec_load_pendulum;
33094 typedef struct rs6000_sched_context rs6000_sched_context_def;
33095 typedef rs6000_sched_context_def *rs6000_sched_context_t;
33097 /* Allocate store for new scheduling context. */
33098 static void *
33099 rs6000_alloc_sched_context (void)
33101 return xmalloc (sizeof (rs6000_sched_context_def));
33104 /* If CLEAN_P is true then initializes _SC with clean data,
33105 and from the global context otherwise. */
33106 static void
33107 rs6000_init_sched_context (void *_sc, bool clean_p)
33109 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
33111 if (clean_p)
33113 sc->cached_can_issue_more = 0;
33114 sc->last_scheduled_insn = NULL;
33115 sc->load_store_pendulum = 0;
33116 sc->divide_cnt = 0;
33117 sc->vec_load_pendulum = 0;
33119 else
33121 sc->cached_can_issue_more = cached_can_issue_more;
33122 sc->last_scheduled_insn = last_scheduled_insn;
33123 sc->load_store_pendulum = load_store_pendulum;
33124 sc->divide_cnt = divide_cnt;
33125 sc->vec_load_pendulum = vec_load_pendulum;
33129 /* Sets the global scheduling context to the one pointed to by _SC. */
33130 static void
33131 rs6000_set_sched_context (void *_sc)
33133 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
33135 gcc_assert (sc != NULL);
33137 cached_can_issue_more = sc->cached_can_issue_more;
33138 last_scheduled_insn = sc->last_scheduled_insn;
33139 load_store_pendulum = sc->load_store_pendulum;
33140 divide_cnt = sc->divide_cnt;
33141 vec_load_pendulum = sc->vec_load_pendulum;
33144 /* Free _SC. */
33145 static void
33146 rs6000_free_sched_context (void *_sc)
33148 gcc_assert (_sc != NULL);
33150 free (_sc);
33154 /* Length in units of the trampoline for entering a nested function. */
33157 rs6000_trampoline_size (void)
33159 int ret = 0;
33161 switch (DEFAULT_ABI)
33163 default:
33164 gcc_unreachable ();
33166 case ABI_AIX:
33167 ret = (TARGET_32BIT) ? 12 : 24;
33168 break;
33170 case ABI_ELFv2:
33171 gcc_assert (!TARGET_32BIT);
33172 ret = 32;
33173 break;
33175 case ABI_DARWIN:
33176 case ABI_V4:
33177 ret = (TARGET_32BIT) ? 40 : 48;
33178 break;
33181 return ret;
33184 /* Emit RTL insns to initialize the variable parts of a trampoline.
33185 FNADDR is an RTX for the address of the function's pure code.
33186 CXT is an RTX for the static chain value for the function. */
33188 static void
33189 rs6000_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
33191 int regsize = (TARGET_32BIT) ? 4 : 8;
33192 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
33193 rtx ctx_reg = force_reg (Pmode, cxt);
33194 rtx addr = force_reg (Pmode, XEXP (m_tramp, 0));
33196 switch (DEFAULT_ABI)
33198 default:
33199 gcc_unreachable ();
33201 /* Under AIX, just build the 3 word function descriptor */
33202 case ABI_AIX:
33204 rtx fnmem, fn_reg, toc_reg;
33206 if (!TARGET_POINTERS_TO_NESTED_FUNCTIONS)
33207 error ("You cannot take the address of a nested function if you use "
33208 "the -mno-pointers-to-nested-functions option.");
33210 fnmem = gen_const_mem (Pmode, force_reg (Pmode, fnaddr));
33211 fn_reg = gen_reg_rtx (Pmode);
33212 toc_reg = gen_reg_rtx (Pmode);
33214 /* Macro to shorten the code expansions below. */
33215 # define MEM_PLUS(MEM, OFFSET) adjust_address (MEM, Pmode, OFFSET)
33217 m_tramp = replace_equiv_address (m_tramp, addr);
33219 emit_move_insn (fn_reg, MEM_PLUS (fnmem, 0));
33220 emit_move_insn (toc_reg, MEM_PLUS (fnmem, regsize));
33221 emit_move_insn (MEM_PLUS (m_tramp, 0), fn_reg);
33222 emit_move_insn (MEM_PLUS (m_tramp, regsize), toc_reg);
33223 emit_move_insn (MEM_PLUS (m_tramp, 2*regsize), ctx_reg);
33225 # undef MEM_PLUS
33227 break;
33229 /* Under V.4/eabi/darwin, __trampoline_setup does the real work. */
33230 case ABI_ELFv2:
33231 case ABI_DARWIN:
33232 case ABI_V4:
33233 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__trampoline_setup"),
33234 LCT_NORMAL, VOIDmode, 4,
33235 addr, Pmode,
33236 GEN_INT (rs6000_trampoline_size ()), SImode,
33237 fnaddr, Pmode,
33238 ctx_reg, Pmode);
33239 break;
33244 /* Returns TRUE iff the target attribute indicated by ATTR_ID takes a plain
33245 identifier as an argument, so the front end shouldn't look it up. */
33247 static bool
33248 rs6000_attribute_takes_identifier_p (const_tree attr_id)
33250 return is_attribute_p ("altivec", attr_id);
33253 /* Handle the "altivec" attribute. The attribute may have
33254 arguments as follows:
33256 __attribute__((altivec(vector__)))
33257 __attribute__((altivec(pixel__))) (always followed by 'unsigned short')
33258 __attribute__((altivec(bool__))) (always followed by 'unsigned')
33260 and may appear more than once (e.g., 'vector bool char') in a
33261 given declaration. */
33263 static tree
33264 rs6000_handle_altivec_attribute (tree *node,
33265 tree name ATTRIBUTE_UNUSED,
33266 tree args,
33267 int flags ATTRIBUTE_UNUSED,
33268 bool *no_add_attrs)
33270 tree type = *node, result = NULL_TREE;
33271 machine_mode mode;
33272 int unsigned_p;
33273 char altivec_type
33274 = ((args && TREE_CODE (args) == TREE_LIST && TREE_VALUE (args)
33275 && TREE_CODE (TREE_VALUE (args)) == IDENTIFIER_NODE)
33276 ? *IDENTIFIER_POINTER (TREE_VALUE (args))
33277 : '?');
33279 while (POINTER_TYPE_P (type)
33280 || TREE_CODE (type) == FUNCTION_TYPE
33281 || TREE_CODE (type) == METHOD_TYPE
33282 || TREE_CODE (type) == ARRAY_TYPE)
33283 type = TREE_TYPE (type);
33285 mode = TYPE_MODE (type);
33287 /* Check for invalid AltiVec type qualifiers. */
33288 if (type == long_double_type_node)
33289 error ("use of %<long double%> in AltiVec types is invalid");
33290 else if (type == boolean_type_node)
33291 error ("use of boolean types in AltiVec types is invalid");
33292 else if (TREE_CODE (type) == COMPLEX_TYPE)
33293 error ("use of %<complex%> in AltiVec types is invalid");
33294 else if (DECIMAL_FLOAT_MODE_P (mode))
33295 error ("use of decimal floating point types in AltiVec types is invalid");
33296 else if (!TARGET_VSX)
33298 if (type == long_unsigned_type_node || type == long_integer_type_node)
33300 if (TARGET_64BIT)
33301 error ("use of %<long%> in AltiVec types is invalid for "
33302 "64-bit code without -mvsx");
33303 else if (rs6000_warn_altivec_long)
33304 warning (0, "use of %<long%> in AltiVec types is deprecated; "
33305 "use %<int%>");
33307 else if (type == long_long_unsigned_type_node
33308 || type == long_long_integer_type_node)
33309 error ("use of %<long long%> in AltiVec types is invalid without "
33310 "-mvsx");
33311 else if (type == double_type_node)
33312 error ("use of %<double%> in AltiVec types is invalid without -mvsx");
33315 switch (altivec_type)
33317 case 'v':
33318 unsigned_p = TYPE_UNSIGNED (type);
33319 switch (mode)
33321 case TImode:
33322 result = (unsigned_p ? unsigned_V1TI_type_node : V1TI_type_node);
33323 break;
33324 case DImode:
33325 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
33326 break;
33327 case SImode:
33328 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
33329 break;
33330 case HImode:
33331 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
33332 break;
33333 case QImode:
33334 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
33335 break;
33336 case SFmode: result = V4SF_type_node; break;
33337 case DFmode: result = V2DF_type_node; break;
33338 /* If the user says 'vector int bool', we may be handed the 'bool'
33339 attribute _before_ the 'vector' attribute, and so select the
33340 proper type in the 'b' case below. */
33341 case V4SImode: case V8HImode: case V16QImode: case V4SFmode:
33342 case V2DImode: case V2DFmode:
33343 result = type;
33344 default: break;
33346 break;
33347 case 'b':
33348 switch (mode)
33350 case DImode: case V2DImode: result = bool_V2DI_type_node; break;
33351 case SImode: case V4SImode: result = bool_V4SI_type_node; break;
33352 case HImode: case V8HImode: result = bool_V8HI_type_node; break;
33353 case QImode: case V16QImode: result = bool_V16QI_type_node;
33354 default: break;
33356 break;
33357 case 'p':
33358 switch (mode)
33360 case V8HImode: result = pixel_V8HI_type_node;
33361 default: break;
33363 default: break;
33366 /* Propagate qualifiers attached to the element type
33367 onto the vector type. */
33368 if (result && result != type && TYPE_QUALS (type))
33369 result = build_qualified_type (result, TYPE_QUALS (type));
33371 *no_add_attrs = true; /* No need to hang on to the attribute. */
33373 if (result)
33374 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
33376 return NULL_TREE;
33379 /* AltiVec defines four built-in scalar types that serve as vector
33380 elements; we must teach the compiler how to mangle them. */
33382 static const char *
33383 rs6000_mangle_type (const_tree type)
33385 type = TYPE_MAIN_VARIANT (type);
33387 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
33388 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
33389 return NULL;
33391 if (type == bool_char_type_node) return "U6__boolc";
33392 if (type == bool_short_type_node) return "U6__bools";
33393 if (type == pixel_type_node) return "u7__pixel";
33394 if (type == bool_int_type_node) return "U6__booli";
33395 if (type == bool_long_type_node) return "U6__booll";
33397 /* Use a unique name for __float128 rather than trying to use "e" or "g". Use
33398 "g" for IBM extended double, no matter whether it is long double (using
33399 -mabi=ibmlongdouble) or the distinct __ibm128 type. */
33400 if (TARGET_FLOAT128)
33402 if (type == ieee128_float_type_node)
33403 return "U10__float128";
33405 if (type == ibm128_float_type_node)
33406 return "g";
33408 if (type == long_double_type_node && TARGET_LONG_DOUBLE_128)
33409 return (TARGET_IEEEQUAD) ? "U10__float128" : "g";
33412 /* Mangle IBM extended float long double as `g' (__float128) on
33413 powerpc*-linux where long-double-64 previously was the default. */
33414 if (TYPE_MAIN_VARIANT (type) == long_double_type_node
33415 && TARGET_ELF
33416 && TARGET_LONG_DOUBLE_128
33417 && !TARGET_IEEEQUAD)
33418 return "g";
33420 /* For all other types, use normal C++ mangling. */
33421 return NULL;
33424 /* Handle a "longcall" or "shortcall" attribute; arguments as in
33425 struct attribute_spec.handler. */
33427 static tree
33428 rs6000_handle_longcall_attribute (tree *node, tree name,
33429 tree args ATTRIBUTE_UNUSED,
33430 int flags ATTRIBUTE_UNUSED,
33431 bool *no_add_attrs)
33433 if (TREE_CODE (*node) != FUNCTION_TYPE
33434 && TREE_CODE (*node) != FIELD_DECL
33435 && TREE_CODE (*node) != TYPE_DECL)
33437 warning (OPT_Wattributes, "%qE attribute only applies to functions",
33438 name);
33439 *no_add_attrs = true;
33442 return NULL_TREE;
33445 /* Set longcall attributes on all functions declared when
33446 rs6000_default_long_calls is true. */
33447 static void
33448 rs6000_set_default_type_attributes (tree type)
33450 if (rs6000_default_long_calls
33451 && (TREE_CODE (type) == FUNCTION_TYPE
33452 || TREE_CODE (type) == METHOD_TYPE))
33453 TYPE_ATTRIBUTES (type) = tree_cons (get_identifier ("longcall"),
33454 NULL_TREE,
33455 TYPE_ATTRIBUTES (type));
33457 #if TARGET_MACHO
33458 darwin_set_default_type_attributes (type);
33459 #endif
33462 /* Return a reference suitable for calling a function with the
33463 longcall attribute. */
33466 rs6000_longcall_ref (rtx call_ref)
33468 const char *call_name;
33469 tree node;
33471 if (GET_CODE (call_ref) != SYMBOL_REF)
33472 return call_ref;
33474 /* System V adds '.' to the internal name, so skip them. */
33475 call_name = XSTR (call_ref, 0);
33476 if (*call_name == '.')
33478 while (*call_name == '.')
33479 call_name++;
33481 node = get_identifier (call_name);
33482 call_ref = gen_rtx_SYMBOL_REF (VOIDmode, IDENTIFIER_POINTER (node));
33485 return force_reg (Pmode, call_ref);
33488 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
33489 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
33490 #endif
33492 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
33493 struct attribute_spec.handler. */
33494 static tree
33495 rs6000_handle_struct_attribute (tree *node, tree name,
33496 tree args ATTRIBUTE_UNUSED,
33497 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
33499 tree *type = NULL;
33500 if (DECL_P (*node))
33502 if (TREE_CODE (*node) == TYPE_DECL)
33503 type = &TREE_TYPE (*node);
33505 else
33506 type = node;
33508 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
33509 || TREE_CODE (*type) == UNION_TYPE)))
33511 warning (OPT_Wattributes, "%qE attribute ignored", name);
33512 *no_add_attrs = true;
33515 else if ((is_attribute_p ("ms_struct", name)
33516 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
33517 || ((is_attribute_p ("gcc_struct", name)
33518 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
33520 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
33521 name);
33522 *no_add_attrs = true;
33525 return NULL_TREE;
33528 static bool
33529 rs6000_ms_bitfield_layout_p (const_tree record_type)
33531 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
33532 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
33533 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
33536 #ifdef USING_ELFOS_H
33538 /* A get_unnamed_section callback, used for switching to toc_section. */
33540 static void
33541 rs6000_elf_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED)
33543 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
33544 && TARGET_MINIMAL_TOC)
33546 if (!toc_initialized)
33548 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
33549 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
33550 (*targetm.asm_out.internal_label) (asm_out_file, "LCTOC", 0);
33551 fprintf (asm_out_file, "\t.tc ");
33552 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1[TC],");
33553 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
33554 fprintf (asm_out_file, "\n");
33556 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
33557 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
33558 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
33559 fprintf (asm_out_file, " = .+32768\n");
33560 toc_initialized = 1;
33562 else
33563 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
33565 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
33567 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
33568 if (!toc_initialized)
33570 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
33571 toc_initialized = 1;
33574 else
33576 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
33577 if (!toc_initialized)
33579 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
33580 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
33581 fprintf (asm_out_file, " = .+32768\n");
33582 toc_initialized = 1;
33587 /* Implement TARGET_ASM_INIT_SECTIONS. */
33589 static void
33590 rs6000_elf_asm_init_sections (void)
33592 toc_section
33593 = get_unnamed_section (0, rs6000_elf_output_toc_section_asm_op, NULL);
33595 sdata2_section
33596 = get_unnamed_section (SECTION_WRITE, output_section_asm_op,
33597 SDATA2_SECTION_ASM_OP);
33600 /* Implement TARGET_SELECT_RTX_SECTION. */
33602 static section *
33603 rs6000_elf_select_rtx_section (machine_mode mode, rtx x,
33604 unsigned HOST_WIDE_INT align)
33606 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
33607 return toc_section;
33608 else
33609 return default_elf_select_rtx_section (mode, x, align);
33612 /* For a SYMBOL_REF, set generic flags and then perform some
33613 target-specific processing.
33615 When the AIX ABI is requested on a non-AIX system, replace the
33616 function name with the real name (with a leading .) rather than the
33617 function descriptor name. This saves a lot of overriding code to
33618 read the prefixes. */
33620 static void rs6000_elf_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED;
33621 static void
33622 rs6000_elf_encode_section_info (tree decl, rtx rtl, int first)
33624 default_encode_section_info (decl, rtl, first);
33626 if (first
33627 && TREE_CODE (decl) == FUNCTION_DECL
33628 && !TARGET_AIX
33629 && DEFAULT_ABI == ABI_AIX)
33631 rtx sym_ref = XEXP (rtl, 0);
33632 size_t len = strlen (XSTR (sym_ref, 0));
33633 char *str = XALLOCAVEC (char, len + 2);
33634 str[0] = '.';
33635 memcpy (str + 1, XSTR (sym_ref, 0), len + 1);
33636 XSTR (sym_ref, 0) = ggc_alloc_string (str, len + 1);
33640 static inline bool
33641 compare_section_name (const char *section, const char *templ)
33643 int len;
33645 len = strlen (templ);
33646 return (strncmp (section, templ, len) == 0
33647 && (section[len] == 0 || section[len] == '.'));
33650 bool
33651 rs6000_elf_in_small_data_p (const_tree decl)
33653 if (rs6000_sdata == SDATA_NONE)
33654 return false;
33656 /* We want to merge strings, so we never consider them small data. */
33657 if (TREE_CODE (decl) == STRING_CST)
33658 return false;
33660 /* Functions are never in the small data area. */
33661 if (TREE_CODE (decl) == FUNCTION_DECL)
33662 return false;
33664 if (TREE_CODE (decl) == VAR_DECL && DECL_SECTION_NAME (decl))
33666 const char *section = DECL_SECTION_NAME (decl);
33667 if (compare_section_name (section, ".sdata")
33668 || compare_section_name (section, ".sdata2")
33669 || compare_section_name (section, ".gnu.linkonce.s")
33670 || compare_section_name (section, ".sbss")
33671 || compare_section_name (section, ".sbss2")
33672 || compare_section_name (section, ".gnu.linkonce.sb")
33673 || strcmp (section, ".PPC.EMB.sdata0") == 0
33674 || strcmp (section, ".PPC.EMB.sbss0") == 0)
33675 return true;
33677 else
33679 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (decl));
33681 if (size > 0
33682 && size <= g_switch_value
33683 /* If it's not public, and we're not going to reference it there,
33684 there's no need to put it in the small data section. */
33685 && (rs6000_sdata != SDATA_DATA || TREE_PUBLIC (decl)))
33686 return true;
33689 return false;
33692 #endif /* USING_ELFOS_H */
33694 /* Implement TARGET_USE_BLOCKS_FOR_CONSTANT_P. */
33696 static bool
33697 rs6000_use_blocks_for_constant_p (machine_mode mode, const_rtx x)
33699 return !ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode);
33702 /* Do not place thread-local symbols refs in the object blocks. */
33704 static bool
33705 rs6000_use_blocks_for_decl_p (const_tree decl)
33707 return !DECL_THREAD_LOCAL_P (decl);
33710 /* Return a REG that occurs in ADDR with coefficient 1.
33711 ADDR can be effectively incremented by incrementing REG.
33713 r0 is special and we must not select it as an address
33714 register by this routine since our caller will try to
33715 increment the returned register via an "la" instruction. */
33718 find_addr_reg (rtx addr)
33720 while (GET_CODE (addr) == PLUS)
33722 if (GET_CODE (XEXP (addr, 0)) == REG
33723 && REGNO (XEXP (addr, 0)) != 0)
33724 addr = XEXP (addr, 0);
33725 else if (GET_CODE (XEXP (addr, 1)) == REG
33726 && REGNO (XEXP (addr, 1)) != 0)
33727 addr = XEXP (addr, 1);
33728 else if (CONSTANT_P (XEXP (addr, 0)))
33729 addr = XEXP (addr, 1);
33730 else if (CONSTANT_P (XEXP (addr, 1)))
33731 addr = XEXP (addr, 0);
33732 else
33733 gcc_unreachable ();
33735 gcc_assert (GET_CODE (addr) == REG && REGNO (addr) != 0);
33736 return addr;
33739 void
33740 rs6000_fatal_bad_address (rtx op)
33742 fatal_insn ("bad address", op);
33745 #if TARGET_MACHO
33747 typedef struct branch_island_d {
33748 tree function_name;
33749 tree label_name;
33750 int line_number;
33751 } branch_island;
33754 static vec<branch_island, va_gc> *branch_islands;
33756 /* Remember to generate a branch island for far calls to the given
33757 function. */
33759 static void
33760 add_compiler_branch_island (tree label_name, tree function_name,
33761 int line_number)
33763 branch_island bi = {function_name, label_name, line_number};
33764 vec_safe_push (branch_islands, bi);
33767 /* Generate far-jump branch islands for everything recorded in
33768 branch_islands. Invoked immediately after the last instruction of
33769 the epilogue has been emitted; the branch islands must be appended
33770 to, and contiguous with, the function body. Mach-O stubs are
33771 generated in machopic_output_stub(). */
33773 static void
33774 macho_branch_islands (void)
33776 char tmp_buf[512];
33778 while (!vec_safe_is_empty (branch_islands))
33780 branch_island *bi = &branch_islands->last ();
33781 const char *label = IDENTIFIER_POINTER (bi->label_name);
33782 const char *name = IDENTIFIER_POINTER (bi->function_name);
33783 char name_buf[512];
33784 /* Cheap copy of the details from the Darwin ASM_OUTPUT_LABELREF(). */
33785 if (name[0] == '*' || name[0] == '&')
33786 strcpy (name_buf, name+1);
33787 else
33789 name_buf[0] = '_';
33790 strcpy (name_buf+1, name);
33792 strcpy (tmp_buf, "\n");
33793 strcat (tmp_buf, label);
33794 #if defined (DBX_DEBUGGING_INFO) || defined (XCOFF_DEBUGGING_INFO)
33795 if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG)
33796 dbxout_stabd (N_SLINE, bi->line_number);
33797 #endif /* DBX_DEBUGGING_INFO || XCOFF_DEBUGGING_INFO */
33798 if (flag_pic)
33800 if (TARGET_LINK_STACK)
33802 char name[32];
33803 get_ppc476_thunk_name (name);
33804 strcat (tmp_buf, ":\n\tmflr r0\n\tbl ");
33805 strcat (tmp_buf, name);
33806 strcat (tmp_buf, "\n");
33807 strcat (tmp_buf, label);
33808 strcat (tmp_buf, "_pic:\n\tmflr r11\n");
33810 else
33812 strcat (tmp_buf, ":\n\tmflr r0\n\tbcl 20,31,");
33813 strcat (tmp_buf, label);
33814 strcat (tmp_buf, "_pic\n");
33815 strcat (tmp_buf, label);
33816 strcat (tmp_buf, "_pic:\n\tmflr r11\n");
33819 strcat (tmp_buf, "\taddis r11,r11,ha16(");
33820 strcat (tmp_buf, name_buf);
33821 strcat (tmp_buf, " - ");
33822 strcat (tmp_buf, label);
33823 strcat (tmp_buf, "_pic)\n");
33825 strcat (tmp_buf, "\tmtlr r0\n");
33827 strcat (tmp_buf, "\taddi r12,r11,lo16(");
33828 strcat (tmp_buf, name_buf);
33829 strcat (tmp_buf, " - ");
33830 strcat (tmp_buf, label);
33831 strcat (tmp_buf, "_pic)\n");
33833 strcat (tmp_buf, "\tmtctr r12\n\tbctr\n");
33835 else
33837 strcat (tmp_buf, ":\nlis r12,hi16(");
33838 strcat (tmp_buf, name_buf);
33839 strcat (tmp_buf, ")\n\tori r12,r12,lo16(");
33840 strcat (tmp_buf, name_buf);
33841 strcat (tmp_buf, ")\n\tmtctr r12\n\tbctr");
33843 output_asm_insn (tmp_buf, 0);
33844 #if defined (DBX_DEBUGGING_INFO) || defined (XCOFF_DEBUGGING_INFO)
33845 if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG)
33846 dbxout_stabd (N_SLINE, bi->line_number);
33847 #endif /* DBX_DEBUGGING_INFO || XCOFF_DEBUGGING_INFO */
33848 branch_islands->pop ();
33852 /* NO_PREVIOUS_DEF checks in the link list whether the function name is
33853 already there or not. */
33855 static int
33856 no_previous_def (tree function_name)
33858 branch_island *bi;
33859 unsigned ix;
33861 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
33862 if (function_name == bi->function_name)
33863 return 0;
33864 return 1;
33867 /* GET_PREV_LABEL gets the label name from the previous definition of
33868 the function. */
33870 static tree
33871 get_prev_label (tree function_name)
33873 branch_island *bi;
33874 unsigned ix;
33876 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
33877 if (function_name == bi->function_name)
33878 return bi->label_name;
33879 return NULL_TREE;
33882 /* INSN is either a function call or a millicode call. It may have an
33883 unconditional jump in its delay slot.
33885 CALL_DEST is the routine we are calling. */
33887 char *
33888 output_call (rtx_insn *insn, rtx *operands, int dest_operand_number,
33889 int cookie_operand_number)
33891 static char buf[256];
33892 if (darwin_emit_branch_islands
33893 && GET_CODE (operands[dest_operand_number]) == SYMBOL_REF
33894 && (INTVAL (operands[cookie_operand_number]) & CALL_LONG))
33896 tree labelname;
33897 tree funname = get_identifier (XSTR (operands[dest_operand_number], 0));
33899 if (no_previous_def (funname))
33901 rtx label_rtx = gen_label_rtx ();
33902 char *label_buf, temp_buf[256];
33903 ASM_GENERATE_INTERNAL_LABEL (temp_buf, "L",
33904 CODE_LABEL_NUMBER (label_rtx));
33905 label_buf = temp_buf[0] == '*' ? temp_buf + 1 : temp_buf;
33906 labelname = get_identifier (label_buf);
33907 add_compiler_branch_island (labelname, funname, insn_line (insn));
33909 else
33910 labelname = get_prev_label (funname);
33912 /* "jbsr foo, L42" is Mach-O for "Link as 'bl foo' if a 'bl'
33913 instruction will reach 'foo', otherwise link as 'bl L42'".
33914 "L42" should be a 'branch island', that will do a far jump to
33915 'foo'. Branch islands are generated in
33916 macho_branch_islands(). */
33917 sprintf (buf, "jbsr %%z%d,%.246s",
33918 dest_operand_number, IDENTIFIER_POINTER (labelname));
33920 else
33921 sprintf (buf, "bl %%z%d", dest_operand_number);
33922 return buf;
33925 /* Generate PIC and indirect symbol stubs. */
33927 void
33928 machopic_output_stub (FILE *file, const char *symb, const char *stub)
33930 unsigned int length;
33931 char *symbol_name, *lazy_ptr_name;
33932 char *local_label_0;
33933 static int label = 0;
33935 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
33936 symb = (*targetm.strip_name_encoding) (symb);
33939 length = strlen (symb);
33940 symbol_name = XALLOCAVEC (char, length + 32);
33941 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
33943 lazy_ptr_name = XALLOCAVEC (char, length + 32);
33944 GEN_LAZY_PTR_NAME_FOR_SYMBOL (lazy_ptr_name, symb, length);
33946 if (flag_pic == 2)
33947 switch_to_section (darwin_sections[machopic_picsymbol_stub1_section]);
33948 else
33949 switch_to_section (darwin_sections[machopic_symbol_stub1_section]);
33951 if (flag_pic == 2)
33953 fprintf (file, "\t.align 5\n");
33955 fprintf (file, "%s:\n", stub);
33956 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
33958 label++;
33959 local_label_0 = XALLOCAVEC (char, sizeof ("\"L00000000000$spb\""));
33960 sprintf (local_label_0, "\"L%011d$spb\"", label);
33962 fprintf (file, "\tmflr r0\n");
33963 if (TARGET_LINK_STACK)
33965 char name[32];
33966 get_ppc476_thunk_name (name);
33967 fprintf (file, "\tbl %s\n", name);
33968 fprintf (file, "%s:\n\tmflr r11\n", local_label_0);
33970 else
33972 fprintf (file, "\tbcl 20,31,%s\n", local_label_0);
33973 fprintf (file, "%s:\n\tmflr r11\n", local_label_0);
33975 fprintf (file, "\taddis r11,r11,ha16(%s-%s)\n",
33976 lazy_ptr_name, local_label_0);
33977 fprintf (file, "\tmtlr r0\n");
33978 fprintf (file, "\t%s r12,lo16(%s-%s)(r11)\n",
33979 (TARGET_64BIT ? "ldu" : "lwzu"),
33980 lazy_ptr_name, local_label_0);
33981 fprintf (file, "\tmtctr r12\n");
33982 fprintf (file, "\tbctr\n");
33984 else
33986 fprintf (file, "\t.align 4\n");
33988 fprintf (file, "%s:\n", stub);
33989 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
33991 fprintf (file, "\tlis r11,ha16(%s)\n", lazy_ptr_name);
33992 fprintf (file, "\t%s r12,lo16(%s)(r11)\n",
33993 (TARGET_64BIT ? "ldu" : "lwzu"),
33994 lazy_ptr_name);
33995 fprintf (file, "\tmtctr r12\n");
33996 fprintf (file, "\tbctr\n");
33999 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
34000 fprintf (file, "%s:\n", lazy_ptr_name);
34001 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
34002 fprintf (file, "%sdyld_stub_binding_helper\n",
34003 (TARGET_64BIT ? DOUBLE_INT_ASM_OP : "\t.long\t"));
34006 /* Legitimize PIC addresses. If the address is already
34007 position-independent, we return ORIG. Newly generated
34008 position-independent addresses go into a reg. This is REG if non
34009 zero, otherwise we allocate register(s) as necessary. */
34011 #define SMALL_INT(X) ((UINTVAL (X) + 0x8000) < 0x10000)
34014 rs6000_machopic_legitimize_pic_address (rtx orig, machine_mode mode,
34015 rtx reg)
34017 rtx base, offset;
34019 if (reg == NULL && ! reload_in_progress && ! reload_completed)
34020 reg = gen_reg_rtx (Pmode);
34022 if (GET_CODE (orig) == CONST)
34024 rtx reg_temp;
34026 if (GET_CODE (XEXP (orig, 0)) == PLUS
34027 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
34028 return orig;
34030 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
34032 /* Use a different reg for the intermediate value, as
34033 it will be marked UNCHANGING. */
34034 reg_temp = !can_create_pseudo_p () ? reg : gen_reg_rtx (Pmode);
34035 base = rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 0),
34036 Pmode, reg_temp);
34037 offset =
34038 rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 1),
34039 Pmode, reg);
34041 if (GET_CODE (offset) == CONST_INT)
34043 if (SMALL_INT (offset))
34044 return plus_constant (Pmode, base, INTVAL (offset));
34045 else if (! reload_in_progress && ! reload_completed)
34046 offset = force_reg (Pmode, offset);
34047 else
34049 rtx mem = force_const_mem (Pmode, orig);
34050 return machopic_legitimize_pic_address (mem, Pmode, reg);
34053 return gen_rtx_PLUS (Pmode, base, offset);
34056 /* Fall back on generic machopic code. */
34057 return machopic_legitimize_pic_address (orig, mode, reg);
34060 /* Output a .machine directive for the Darwin assembler, and call
34061 the generic start_file routine. */
34063 static void
34064 rs6000_darwin_file_start (void)
34066 static const struct
34068 const char *arg;
34069 const char *name;
34070 HOST_WIDE_INT if_set;
34071 } mapping[] = {
34072 { "ppc64", "ppc64", MASK_64BIT },
34073 { "970", "ppc970", MASK_PPC_GPOPT | MASK_MFCRF | MASK_POWERPC64 },
34074 { "power4", "ppc970", 0 },
34075 { "G5", "ppc970", 0 },
34076 { "7450", "ppc7450", 0 },
34077 { "7400", "ppc7400", MASK_ALTIVEC },
34078 { "G4", "ppc7400", 0 },
34079 { "750", "ppc750", 0 },
34080 { "740", "ppc750", 0 },
34081 { "G3", "ppc750", 0 },
34082 { "604e", "ppc604e", 0 },
34083 { "604", "ppc604", 0 },
34084 { "603e", "ppc603", 0 },
34085 { "603", "ppc603", 0 },
34086 { "601", "ppc601", 0 },
34087 { NULL, "ppc", 0 } };
34088 const char *cpu_id = "";
34089 size_t i;
34091 rs6000_file_start ();
34092 darwin_file_start ();
34094 /* Determine the argument to -mcpu=. Default to G3 if not specified. */
34096 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
34097 cpu_id = rs6000_default_cpu;
34099 if (global_options_set.x_rs6000_cpu_index)
34100 cpu_id = processor_target_table[rs6000_cpu_index].name;
34102 /* Look through the mapping array. Pick the first name that either
34103 matches the argument, has a bit set in IF_SET that is also set
34104 in the target flags, or has a NULL name. */
34106 i = 0;
34107 while (mapping[i].arg != NULL
34108 && strcmp (mapping[i].arg, cpu_id) != 0
34109 && (mapping[i].if_set & rs6000_isa_flags) == 0)
34110 i++;
34112 fprintf (asm_out_file, "\t.machine %s\n", mapping[i].name);
34115 #endif /* TARGET_MACHO */
34117 #if TARGET_ELF
34118 static int
34119 rs6000_elf_reloc_rw_mask (void)
34121 if (flag_pic)
34122 return 3;
34123 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
34124 return 2;
34125 else
34126 return 0;
34129 /* Record an element in the table of global constructors. SYMBOL is
34130 a SYMBOL_REF of the function to be called; PRIORITY is a number
34131 between 0 and MAX_INIT_PRIORITY.
34133 This differs from default_named_section_asm_out_constructor in
34134 that we have special handling for -mrelocatable. */
34136 static void rs6000_elf_asm_out_constructor (rtx, int) ATTRIBUTE_UNUSED;
34137 static void
34138 rs6000_elf_asm_out_constructor (rtx symbol, int priority)
34140 const char *section = ".ctors";
34141 char buf[16];
34143 if (priority != DEFAULT_INIT_PRIORITY)
34145 sprintf (buf, ".ctors.%.5u",
34146 /* Invert the numbering so the linker puts us in the proper
34147 order; constructors are run from right to left, and the
34148 linker sorts in increasing order. */
34149 MAX_INIT_PRIORITY - priority);
34150 section = buf;
34153 switch_to_section (get_section (section, SECTION_WRITE, NULL));
34154 assemble_align (POINTER_SIZE);
34156 if (DEFAULT_ABI == ABI_V4
34157 && (TARGET_RELOCATABLE || flag_pic > 1))
34159 fputs ("\t.long (", asm_out_file);
34160 output_addr_const (asm_out_file, symbol);
34161 fputs (")@fixup\n", asm_out_file);
34163 else
34164 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
34167 static void rs6000_elf_asm_out_destructor (rtx, int) ATTRIBUTE_UNUSED;
34168 static void
34169 rs6000_elf_asm_out_destructor (rtx symbol, int priority)
34171 const char *section = ".dtors";
34172 char buf[16];
34174 if (priority != DEFAULT_INIT_PRIORITY)
34176 sprintf (buf, ".dtors.%.5u",
34177 /* Invert the numbering so the linker puts us in the proper
34178 order; constructors are run from right to left, and the
34179 linker sorts in increasing order. */
34180 MAX_INIT_PRIORITY - priority);
34181 section = buf;
34184 switch_to_section (get_section (section, SECTION_WRITE, NULL));
34185 assemble_align (POINTER_SIZE);
34187 if (DEFAULT_ABI == ABI_V4
34188 && (TARGET_RELOCATABLE || flag_pic > 1))
34190 fputs ("\t.long (", asm_out_file);
34191 output_addr_const (asm_out_file, symbol);
34192 fputs (")@fixup\n", asm_out_file);
34194 else
34195 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
34198 void
34199 rs6000_elf_declare_function_name (FILE *file, const char *name, tree decl)
34201 if (TARGET_64BIT && DEFAULT_ABI != ABI_ELFv2)
34203 fputs ("\t.section\t\".opd\",\"aw\"\n\t.align 3\n", file);
34204 ASM_OUTPUT_LABEL (file, name);
34205 fputs (DOUBLE_INT_ASM_OP, file);
34206 rs6000_output_function_entry (file, name);
34207 fputs (",.TOC.@tocbase,0\n\t.previous\n", file);
34208 if (DOT_SYMBOLS)
34210 fputs ("\t.size\t", file);
34211 assemble_name (file, name);
34212 fputs (",24\n\t.type\t.", file);
34213 assemble_name (file, name);
34214 fputs (",@function\n", file);
34215 if (TREE_PUBLIC (decl) && ! DECL_WEAK (decl))
34217 fputs ("\t.globl\t.", file);
34218 assemble_name (file, name);
34219 putc ('\n', file);
34222 else
34223 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
34224 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
34225 rs6000_output_function_entry (file, name);
34226 fputs (":\n", file);
34227 return;
34230 if (DEFAULT_ABI == ABI_V4
34231 && (TARGET_RELOCATABLE || flag_pic > 1)
34232 && !TARGET_SECURE_PLT
34233 && (get_pool_size () != 0 || crtl->profile)
34234 && uses_TOC ())
34236 char buf[256];
34238 (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno);
34240 fprintf (file, "\t.long ");
34241 assemble_name (file, toc_label_name);
34242 need_toc_init = 1;
34243 putc ('-', file);
34244 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
34245 assemble_name (file, buf);
34246 putc ('\n', file);
34249 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
34250 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
34252 if (TARGET_CMODEL == CMODEL_LARGE && rs6000_global_entry_point_needed_p ())
34254 char buf[256];
34256 (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno);
34258 fprintf (file, "\t.quad .TOC.-");
34259 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
34260 assemble_name (file, buf);
34261 putc ('\n', file);
34264 if (DEFAULT_ABI == ABI_AIX)
34266 const char *desc_name, *orig_name;
34268 orig_name = (*targetm.strip_name_encoding) (name);
34269 desc_name = orig_name;
34270 while (*desc_name == '.')
34271 desc_name++;
34273 if (TREE_PUBLIC (decl))
34274 fprintf (file, "\t.globl %s\n", desc_name);
34276 fprintf (file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
34277 fprintf (file, "%s:\n", desc_name);
34278 fprintf (file, "\t.long %s\n", orig_name);
34279 fputs ("\t.long _GLOBAL_OFFSET_TABLE_\n", file);
34280 fputs ("\t.long 0\n", file);
34281 fprintf (file, "\t.previous\n");
34283 ASM_OUTPUT_LABEL (file, name);
34286 static void rs6000_elf_file_end (void) ATTRIBUTE_UNUSED;
34287 static void
34288 rs6000_elf_file_end (void)
34290 #ifdef HAVE_AS_GNU_ATTRIBUTE
34291 if (TARGET_32BIT && DEFAULT_ABI == ABI_V4)
34293 if (rs6000_passes_float)
34294 fprintf (asm_out_file, "\t.gnu_attribute 4, %d\n",
34295 ((TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT) ? 1
34296 : (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT) ? 3
34297 : 2));
34298 if (rs6000_passes_vector)
34299 fprintf (asm_out_file, "\t.gnu_attribute 8, %d\n",
34300 (TARGET_ALTIVEC_ABI ? 2
34301 : TARGET_SPE_ABI ? 3
34302 : 1));
34303 if (rs6000_returns_struct)
34304 fprintf (asm_out_file, "\t.gnu_attribute 12, %d\n",
34305 aix_struct_return ? 2 : 1);
34307 #endif
34308 #if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD)
34309 if (TARGET_32BIT || DEFAULT_ABI == ABI_ELFv2)
34310 file_end_indicate_exec_stack ();
34311 #endif
34313 if (flag_split_stack)
34314 file_end_indicate_split_stack ();
34316 if (cpu_builtin_p)
34318 /* We have expanded a CPU builtin, so we need to emit a reference to
34319 the special symbol that LIBC uses to declare it supports the
34320 AT_PLATFORM and AT_HWCAP/AT_HWCAP2 in the TCB feature. */
34321 switch_to_section (data_section);
34322 fprintf (asm_out_file, "\t.align %u\n", TARGET_32BIT ? 2 : 3);
34323 fprintf (asm_out_file, "\t%s %s\n",
34324 TARGET_32BIT ? ".long" : ".quad", tcb_verification_symbol);
34327 #endif
34329 #if TARGET_XCOFF
34331 #ifndef HAVE_XCOFF_DWARF_EXTRAS
34332 #define HAVE_XCOFF_DWARF_EXTRAS 0
34333 #endif
34335 static enum unwind_info_type
34336 rs6000_xcoff_debug_unwind_info (void)
34338 return UI_NONE;
34341 static void
34342 rs6000_xcoff_asm_output_anchor (rtx symbol)
34344 char buffer[100];
34346 sprintf (buffer, "$ + " HOST_WIDE_INT_PRINT_DEC,
34347 SYMBOL_REF_BLOCK_OFFSET (symbol));
34348 fprintf (asm_out_file, "%s", SET_ASM_OP);
34349 RS6000_OUTPUT_BASENAME (asm_out_file, XSTR (symbol, 0));
34350 fprintf (asm_out_file, ",");
34351 RS6000_OUTPUT_BASENAME (asm_out_file, buffer);
34352 fprintf (asm_out_file, "\n");
34355 static void
34356 rs6000_xcoff_asm_globalize_label (FILE *stream, const char *name)
34358 fputs (GLOBAL_ASM_OP, stream);
34359 RS6000_OUTPUT_BASENAME (stream, name);
34360 putc ('\n', stream);
34363 /* A get_unnamed_decl callback, used for read-only sections. PTR
34364 points to the section string variable. */
34366 static void
34367 rs6000_xcoff_output_readonly_section_asm_op (const void *directive)
34369 fprintf (asm_out_file, "\t.csect %s[RO],%s\n",
34370 *(const char *const *) directive,
34371 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
34374 /* Likewise for read-write sections. */
34376 static void
34377 rs6000_xcoff_output_readwrite_section_asm_op (const void *directive)
34379 fprintf (asm_out_file, "\t.csect %s[RW],%s\n",
34380 *(const char *const *) directive,
34381 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
34384 static void
34385 rs6000_xcoff_output_tls_section_asm_op (const void *directive)
34387 fprintf (asm_out_file, "\t.csect %s[TL],%s\n",
34388 *(const char *const *) directive,
34389 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
34392 /* A get_unnamed_section callback, used for switching to toc_section. */
34394 static void
34395 rs6000_xcoff_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED)
34397 if (TARGET_MINIMAL_TOC)
34399 /* toc_section is always selected at least once from
34400 rs6000_xcoff_file_start, so this is guaranteed to
34401 always be defined once and only once in each file. */
34402 if (!toc_initialized)
34404 fputs ("\t.toc\nLCTOC..1:\n", asm_out_file);
34405 fputs ("\t.tc toc_table[TC],toc_table[RW]\n", asm_out_file);
34406 toc_initialized = 1;
34408 fprintf (asm_out_file, "\t.csect toc_table[RW]%s\n",
34409 (TARGET_32BIT ? "" : ",3"));
34411 else
34412 fputs ("\t.toc\n", asm_out_file);
34415 /* Implement TARGET_ASM_INIT_SECTIONS. */
34417 static void
34418 rs6000_xcoff_asm_init_sections (void)
34420 read_only_data_section
34421 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
34422 &xcoff_read_only_section_name);
34424 private_data_section
34425 = get_unnamed_section (SECTION_WRITE,
34426 rs6000_xcoff_output_readwrite_section_asm_op,
34427 &xcoff_private_data_section_name);
34429 tls_data_section
34430 = get_unnamed_section (SECTION_TLS,
34431 rs6000_xcoff_output_tls_section_asm_op,
34432 &xcoff_tls_data_section_name);
34434 tls_private_data_section
34435 = get_unnamed_section (SECTION_TLS,
34436 rs6000_xcoff_output_tls_section_asm_op,
34437 &xcoff_private_data_section_name);
34439 read_only_private_data_section
34440 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
34441 &xcoff_private_data_section_name);
34443 toc_section
34444 = get_unnamed_section (0, rs6000_xcoff_output_toc_section_asm_op, NULL);
34446 readonly_data_section = read_only_data_section;
34449 static int
34450 rs6000_xcoff_reloc_rw_mask (void)
34452 return 3;
34455 static void
34456 rs6000_xcoff_asm_named_section (const char *name, unsigned int flags,
34457 tree decl ATTRIBUTE_UNUSED)
34459 int smclass;
34460 static const char * const suffix[5] = { "PR", "RO", "RW", "TL", "XO" };
34462 if (flags & SECTION_EXCLUDE)
34463 smclass = 4;
34464 else if (flags & SECTION_DEBUG)
34466 fprintf (asm_out_file, "\t.dwsect %s\n", name);
34467 return;
34469 else if (flags & SECTION_CODE)
34470 smclass = 0;
34471 else if (flags & SECTION_TLS)
34472 smclass = 3;
34473 else if (flags & SECTION_WRITE)
34474 smclass = 2;
34475 else
34476 smclass = 1;
34478 fprintf (asm_out_file, "\t.csect %s%s[%s],%u\n",
34479 (flags & SECTION_CODE) ? "." : "",
34480 name, suffix[smclass], flags & SECTION_ENTSIZE);
34483 #define IN_NAMED_SECTION(DECL) \
34484 ((TREE_CODE (DECL) == FUNCTION_DECL || TREE_CODE (DECL) == VAR_DECL) \
34485 && DECL_SECTION_NAME (DECL) != NULL)
34487 static section *
34488 rs6000_xcoff_select_section (tree decl, int reloc,
34489 unsigned HOST_WIDE_INT align)
34491 /* Place variables with alignment stricter than BIGGEST_ALIGNMENT into
34492 named section. */
34493 if (align > BIGGEST_ALIGNMENT)
34495 resolve_unique_section (decl, reloc, true);
34496 if (IN_NAMED_SECTION (decl))
34497 return get_named_section (decl, NULL, reloc);
34500 if (decl_readonly_section (decl, reloc))
34502 if (TREE_PUBLIC (decl))
34503 return read_only_data_section;
34504 else
34505 return read_only_private_data_section;
34507 else
34509 #if HAVE_AS_TLS
34510 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
34512 if (TREE_PUBLIC (decl))
34513 return tls_data_section;
34514 else if (bss_initializer_p (decl))
34516 /* Convert to COMMON to emit in BSS. */
34517 DECL_COMMON (decl) = 1;
34518 return tls_comm_section;
34520 else
34521 return tls_private_data_section;
34523 else
34524 #endif
34525 if (TREE_PUBLIC (decl))
34526 return data_section;
34527 else
34528 return private_data_section;
34532 static void
34533 rs6000_xcoff_unique_section (tree decl, int reloc ATTRIBUTE_UNUSED)
34535 const char *name;
34537 /* Use select_section for private data and uninitialized data with
34538 alignment <= BIGGEST_ALIGNMENT. */
34539 if (!TREE_PUBLIC (decl)
34540 || DECL_COMMON (decl)
34541 || (DECL_INITIAL (decl) == NULL_TREE
34542 && DECL_ALIGN (decl) <= BIGGEST_ALIGNMENT)
34543 || DECL_INITIAL (decl) == error_mark_node
34544 || (flag_zero_initialized_in_bss
34545 && initializer_zerop (DECL_INITIAL (decl))))
34546 return;
34548 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
34549 name = (*targetm.strip_name_encoding) (name);
34550 set_decl_section_name (decl, name);
34553 /* Select section for constant in constant pool.
34555 On RS/6000, all constants are in the private read-only data area.
34556 However, if this is being placed in the TOC it must be output as a
34557 toc entry. */
34559 static section *
34560 rs6000_xcoff_select_rtx_section (machine_mode mode, rtx x,
34561 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
34563 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
34564 return toc_section;
34565 else
34566 return read_only_private_data_section;
34569 /* Remove any trailing [DS] or the like from the symbol name. */
34571 static const char *
34572 rs6000_xcoff_strip_name_encoding (const char *name)
34574 size_t len;
34575 if (*name == '*')
34576 name++;
34577 len = strlen (name);
34578 if (name[len - 1] == ']')
34579 return ggc_alloc_string (name, len - 4);
34580 else
34581 return name;
34584 /* Section attributes. AIX is always PIC. */
34586 static unsigned int
34587 rs6000_xcoff_section_type_flags (tree decl, const char *name, int reloc)
34589 unsigned int align;
34590 unsigned int flags = default_section_type_flags (decl, name, reloc);
34592 /* Align to at least UNIT size. */
34593 if ((flags & SECTION_CODE) != 0 || !decl || !DECL_P (decl))
34594 align = MIN_UNITS_PER_WORD;
34595 else
34596 /* Increase alignment of large objects if not already stricter. */
34597 align = MAX ((DECL_ALIGN (decl) / BITS_PER_UNIT),
34598 int_size_in_bytes (TREE_TYPE (decl)) > MIN_UNITS_PER_WORD
34599 ? UNITS_PER_FP_WORD : MIN_UNITS_PER_WORD);
34601 return flags | (exact_log2 (align) & SECTION_ENTSIZE);
34604 /* Output at beginning of assembler file.
34606 Initialize the section names for the RS/6000 at this point.
34608 Specify filename, including full path, to assembler.
34610 We want to go into the TOC section so at least one .toc will be emitted.
34611 Also, in order to output proper .bs/.es pairs, we need at least one static
34612 [RW] section emitted.
34614 Finally, declare mcount when profiling to make the assembler happy. */
34616 static void
34617 rs6000_xcoff_file_start (void)
34619 rs6000_gen_section_name (&xcoff_bss_section_name,
34620 main_input_filename, ".bss_");
34621 rs6000_gen_section_name (&xcoff_private_data_section_name,
34622 main_input_filename, ".rw_");
34623 rs6000_gen_section_name (&xcoff_read_only_section_name,
34624 main_input_filename, ".ro_");
34625 rs6000_gen_section_name (&xcoff_tls_data_section_name,
34626 main_input_filename, ".tls_");
34627 rs6000_gen_section_name (&xcoff_tbss_section_name,
34628 main_input_filename, ".tbss_[UL]");
34630 fputs ("\t.file\t", asm_out_file);
34631 output_quoted_string (asm_out_file, main_input_filename);
34632 fputc ('\n', asm_out_file);
34633 if (write_symbols != NO_DEBUG)
34634 switch_to_section (private_data_section);
34635 switch_to_section (toc_section);
34636 switch_to_section (text_section);
34637 if (profile_flag)
34638 fprintf (asm_out_file, "\t.extern %s\n", RS6000_MCOUNT);
34639 rs6000_file_start ();
34642 /* Output at end of assembler file.
34643 On the RS/6000, referencing data should automatically pull in text. */
34645 static void
34646 rs6000_xcoff_file_end (void)
34648 switch_to_section (text_section);
34649 fputs ("_section_.text:\n", asm_out_file);
34650 switch_to_section (data_section);
34651 fputs (TARGET_32BIT
34652 ? "\t.long _section_.text\n" : "\t.llong _section_.text\n",
34653 asm_out_file);
34656 struct declare_alias_data
34658 FILE *file;
34659 bool function_descriptor;
34662 /* Declare alias N. A helper function for for_node_and_aliases. */
34664 static bool
34665 rs6000_declare_alias (struct symtab_node *n, void *d)
34667 struct declare_alias_data *data = (struct declare_alias_data *)d;
34668 /* Main symbol is output specially, because varasm machinery does part of
34669 the job for us - we do not need to declare .globl/lglobs and such. */
34670 if (!n->alias || n->weakref)
34671 return false;
34673 if (lookup_attribute ("ifunc", DECL_ATTRIBUTES (n->decl)))
34674 return false;
34676 /* Prevent assemble_alias from trying to use .set pseudo operation
34677 that does not behave as expected by the middle-end. */
34678 TREE_ASM_WRITTEN (n->decl) = true;
34680 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (n->decl));
34681 char *buffer = (char *) alloca (strlen (name) + 2);
34682 char *p;
34683 int dollar_inside = 0;
34685 strcpy (buffer, name);
34686 p = strchr (buffer, '$');
34687 while (p) {
34688 *p = '_';
34689 dollar_inside++;
34690 p = strchr (p + 1, '$');
34692 if (TREE_PUBLIC (n->decl))
34694 if (!RS6000_WEAK || !DECL_WEAK (n->decl))
34696 if (dollar_inside) {
34697 if (data->function_descriptor)
34698 fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
34699 fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
34701 if (data->function_descriptor)
34703 fputs ("\t.globl .", data->file);
34704 RS6000_OUTPUT_BASENAME (data->file, buffer);
34705 putc ('\n', data->file);
34707 fputs ("\t.globl ", data->file);
34708 RS6000_OUTPUT_BASENAME (data->file, buffer);
34709 putc ('\n', data->file);
34711 #ifdef ASM_WEAKEN_DECL
34712 else if (DECL_WEAK (n->decl) && !data->function_descriptor)
34713 ASM_WEAKEN_DECL (data->file, n->decl, name, NULL);
34714 #endif
34716 else
34718 if (dollar_inside)
34720 if (data->function_descriptor)
34721 fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
34722 fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
34724 if (data->function_descriptor)
34726 fputs ("\t.lglobl .", data->file);
34727 RS6000_OUTPUT_BASENAME (data->file, buffer);
34728 putc ('\n', data->file);
34730 fputs ("\t.lglobl ", data->file);
34731 RS6000_OUTPUT_BASENAME (data->file, buffer);
34732 putc ('\n', data->file);
34734 if (data->function_descriptor)
34735 fputs (".", data->file);
34736 RS6000_OUTPUT_BASENAME (data->file, buffer);
34737 fputs (":\n", data->file);
34738 return false;
34741 /* This macro produces the initial definition of a function name.
34742 On the RS/6000, we need to place an extra '.' in the function name and
34743 output the function descriptor.
34744 Dollar signs are converted to underscores.
34746 The csect for the function will have already been created when
34747 text_section was selected. We do have to go back to that csect, however.
34749 The third and fourth parameters to the .function pseudo-op (16 and 044)
34750 are placeholders which no longer have any use.
34752 Because AIX assembler's .set command has unexpected semantics, we output
34753 all aliases as alternative labels in front of the definition. */
34755 void
34756 rs6000_xcoff_declare_function_name (FILE *file, const char *name, tree decl)
34758 char *buffer = (char *) alloca (strlen (name) + 1);
34759 char *p;
34760 int dollar_inside = 0;
34761 struct declare_alias_data data = {file, false};
34763 strcpy (buffer, name);
34764 p = strchr (buffer, '$');
34765 while (p) {
34766 *p = '_';
34767 dollar_inside++;
34768 p = strchr (p + 1, '$');
34770 if (TREE_PUBLIC (decl))
34772 if (!RS6000_WEAK || !DECL_WEAK (decl))
34774 if (dollar_inside) {
34775 fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
34776 fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
34778 fputs ("\t.globl .", file);
34779 RS6000_OUTPUT_BASENAME (file, buffer);
34780 putc ('\n', file);
34783 else
34785 if (dollar_inside) {
34786 fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
34787 fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
34789 fputs ("\t.lglobl .", file);
34790 RS6000_OUTPUT_BASENAME (file, buffer);
34791 putc ('\n', file);
34793 fputs ("\t.csect ", file);
34794 RS6000_OUTPUT_BASENAME (file, buffer);
34795 fputs (TARGET_32BIT ? "[DS]\n" : "[DS],3\n", file);
34796 RS6000_OUTPUT_BASENAME (file, buffer);
34797 fputs (":\n", file);
34798 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias, &data, true);
34799 fputs (TARGET_32BIT ? "\t.long ." : "\t.llong .", file);
34800 RS6000_OUTPUT_BASENAME (file, buffer);
34801 fputs (", TOC[tc0], 0\n", file);
34802 in_section = NULL;
34803 switch_to_section (function_section (decl));
34804 putc ('.', file);
34805 RS6000_OUTPUT_BASENAME (file, buffer);
34806 fputs (":\n", file);
34807 data.function_descriptor = true;
34808 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias, &data, true);
34809 if (!DECL_IGNORED_P (decl))
34811 if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG)
34812 xcoffout_declare_function (file, decl, buffer);
34813 else if (write_symbols == DWARF2_DEBUG)
34815 name = (*targetm.strip_name_encoding) (name);
34816 fprintf (file, "\t.function .%s,.%s,2,0\n", name, name);
34819 return;
34822 /* This macro produces the initial definition of a object (variable) name.
34823 Because AIX assembler's .set command has unexpected semantics, we output
34824 all aliases as alternative labels in front of the definition. */
34826 void
34827 rs6000_xcoff_declare_object_name (FILE *file, const char *name, tree decl)
34829 struct declare_alias_data data = {file, false};
34830 RS6000_OUTPUT_BASENAME (file, name);
34831 fputs (":\n", file);
34832 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias, &data, true);
34835 /* Overide the default 'SYMBOL-.' syntax with AIX compatible 'SYMBOL-$'. */
34837 void
34838 rs6000_asm_output_dwarf_pcrel (FILE *file, int size, const char *label)
34840 fputs (integer_asm_op (size, FALSE), file);
34841 assemble_name (file, label);
34842 fputs ("-$", file);
34845 /* Output a symbol offset relative to the dbase for the current object.
34846 We use __gcc_unwind_dbase as an arbitrary base for dbase and assume
34847 signed offsets.
34849 __gcc_unwind_dbase is embedded in all executables/libraries through
34850 libgcc/config/rs6000/crtdbase.S. */
34852 void
34853 rs6000_asm_output_dwarf_datarel (FILE *file, int size, const char *label)
34855 fputs (integer_asm_op (size, FALSE), file);
34856 assemble_name (file, label);
34857 fputs("-__gcc_unwind_dbase", file);
34860 #ifdef HAVE_AS_TLS
34861 static void
34862 rs6000_xcoff_encode_section_info (tree decl, rtx rtl, int first)
34864 rtx symbol;
34865 int flags;
34867 default_encode_section_info (decl, rtl, first);
34869 /* Careful not to prod global register variables. */
34870 if (!MEM_P (rtl))
34871 return;
34872 symbol = XEXP (rtl, 0);
34873 if (GET_CODE (symbol) != SYMBOL_REF)
34874 return;
34876 flags = SYMBOL_REF_FLAGS (symbol);
34878 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
34879 flags &= ~SYMBOL_FLAG_HAS_BLOCK_INFO;
34881 SYMBOL_REF_FLAGS (symbol) = flags;
34883 #endif /* HAVE_AS_TLS */
34884 #endif /* TARGET_XCOFF */
34886 /* Return true if INSN should not be copied. */
34888 static bool
34889 rs6000_cannot_copy_insn_p (rtx_insn *insn)
34891 return recog_memoized (insn) >= 0
34892 && get_attr_cannot_copy (insn);
34895 /* Compute a (partial) cost for rtx X. Return true if the complete
34896 cost has been computed, and false if subexpressions should be
34897 scanned. In either case, *TOTAL contains the cost result. */
34899 static bool
34900 rs6000_rtx_costs (rtx x, machine_mode mode, int outer_code,
34901 int opno ATTRIBUTE_UNUSED, int *total, bool speed)
34903 int code = GET_CODE (x);
34905 switch (code)
34907 /* On the RS/6000, if it is valid in the insn, it is free. */
34908 case CONST_INT:
34909 if (((outer_code == SET
34910 || outer_code == PLUS
34911 || outer_code == MINUS)
34912 && (satisfies_constraint_I (x)
34913 || satisfies_constraint_L (x)))
34914 || (outer_code == AND
34915 && (satisfies_constraint_K (x)
34916 || (mode == SImode
34917 ? satisfies_constraint_L (x)
34918 : satisfies_constraint_J (x))))
34919 || ((outer_code == IOR || outer_code == XOR)
34920 && (satisfies_constraint_K (x)
34921 || (mode == SImode
34922 ? satisfies_constraint_L (x)
34923 : satisfies_constraint_J (x))))
34924 || outer_code == ASHIFT
34925 || outer_code == ASHIFTRT
34926 || outer_code == LSHIFTRT
34927 || outer_code == ROTATE
34928 || outer_code == ROTATERT
34929 || outer_code == ZERO_EXTRACT
34930 || (outer_code == MULT
34931 && satisfies_constraint_I (x))
34932 || ((outer_code == DIV || outer_code == UDIV
34933 || outer_code == MOD || outer_code == UMOD)
34934 && exact_log2 (INTVAL (x)) >= 0)
34935 || (outer_code == COMPARE
34936 && (satisfies_constraint_I (x)
34937 || satisfies_constraint_K (x)))
34938 || ((outer_code == EQ || outer_code == NE)
34939 && (satisfies_constraint_I (x)
34940 || satisfies_constraint_K (x)
34941 || (mode == SImode
34942 ? satisfies_constraint_L (x)
34943 : satisfies_constraint_J (x))))
34944 || (outer_code == GTU
34945 && satisfies_constraint_I (x))
34946 || (outer_code == LTU
34947 && satisfies_constraint_P (x)))
34949 *total = 0;
34950 return true;
34952 else if ((outer_code == PLUS
34953 && reg_or_add_cint_operand (x, VOIDmode))
34954 || (outer_code == MINUS
34955 && reg_or_sub_cint_operand (x, VOIDmode))
34956 || ((outer_code == SET
34957 || outer_code == IOR
34958 || outer_code == XOR)
34959 && (INTVAL (x)
34960 & ~ (unsigned HOST_WIDE_INT) 0xffffffff) == 0))
34962 *total = COSTS_N_INSNS (1);
34963 return true;
34965 /* FALLTHRU */
34967 case CONST_DOUBLE:
34968 case CONST_WIDE_INT:
34969 case CONST:
34970 case HIGH:
34971 case SYMBOL_REF:
34972 *total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
34973 return true;
34975 case MEM:
34976 /* When optimizing for size, MEM should be slightly more expensive
34977 than generating address, e.g., (plus (reg) (const)).
34978 L1 cache latency is about two instructions. */
34979 *total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
34980 if (SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (x)))
34981 *total += COSTS_N_INSNS (100);
34982 return true;
34984 case LABEL_REF:
34985 *total = 0;
34986 return true;
34988 case PLUS:
34989 case MINUS:
34990 if (FLOAT_MODE_P (mode))
34991 *total = rs6000_cost->fp;
34992 else
34993 *total = COSTS_N_INSNS (1);
34994 return false;
34996 case MULT:
34997 if (GET_CODE (XEXP (x, 1)) == CONST_INT
34998 && satisfies_constraint_I (XEXP (x, 1)))
35000 if (INTVAL (XEXP (x, 1)) >= -256
35001 && INTVAL (XEXP (x, 1)) <= 255)
35002 *total = rs6000_cost->mulsi_const9;
35003 else
35004 *total = rs6000_cost->mulsi_const;
35006 else if (mode == SFmode)
35007 *total = rs6000_cost->fp;
35008 else if (FLOAT_MODE_P (mode))
35009 *total = rs6000_cost->dmul;
35010 else if (mode == DImode)
35011 *total = rs6000_cost->muldi;
35012 else
35013 *total = rs6000_cost->mulsi;
35014 return false;
35016 case FMA:
35017 if (mode == SFmode)
35018 *total = rs6000_cost->fp;
35019 else
35020 *total = rs6000_cost->dmul;
35021 break;
35023 case DIV:
35024 case MOD:
35025 if (FLOAT_MODE_P (mode))
35027 *total = mode == DFmode ? rs6000_cost->ddiv
35028 : rs6000_cost->sdiv;
35029 return false;
35031 /* FALLTHRU */
35033 case UDIV:
35034 case UMOD:
35035 if (GET_CODE (XEXP (x, 1)) == CONST_INT
35036 && exact_log2 (INTVAL (XEXP (x, 1))) >= 0)
35038 if (code == DIV || code == MOD)
35039 /* Shift, addze */
35040 *total = COSTS_N_INSNS (2);
35041 else
35042 /* Shift */
35043 *total = COSTS_N_INSNS (1);
35045 else
35047 if (GET_MODE (XEXP (x, 1)) == DImode)
35048 *total = rs6000_cost->divdi;
35049 else
35050 *total = rs6000_cost->divsi;
35052 /* Add in shift and subtract for MOD unless we have a mod instruction. */
35053 if (!TARGET_MODULO && (code == MOD || code == UMOD))
35054 *total += COSTS_N_INSNS (2);
35055 return false;
35057 case CTZ:
35058 *total = COSTS_N_INSNS (TARGET_CTZ ? 1 : 4);
35059 return false;
35061 case FFS:
35062 *total = COSTS_N_INSNS (4);
35063 return false;
35065 case POPCOUNT:
35066 *total = COSTS_N_INSNS (TARGET_POPCNTD ? 1 : 6);
35067 return false;
35069 case PARITY:
35070 *total = COSTS_N_INSNS (TARGET_CMPB ? 2 : 6);
35071 return false;
35073 case NOT:
35074 if (outer_code == AND || outer_code == IOR || outer_code == XOR)
35075 *total = 0;
35076 else
35077 *total = COSTS_N_INSNS (1);
35078 return false;
35080 case AND:
35081 if (CONST_INT_P (XEXP (x, 1)))
35083 rtx left = XEXP (x, 0);
35084 rtx_code left_code = GET_CODE (left);
35086 /* rotate-and-mask: 1 insn. */
35087 if ((left_code == ROTATE
35088 || left_code == ASHIFT
35089 || left_code == LSHIFTRT)
35090 && rs6000_is_valid_shift_mask (XEXP (x, 1), left, mode))
35092 *total = rtx_cost (XEXP (left, 0), mode, left_code, 0, speed);
35093 if (!CONST_INT_P (XEXP (left, 1)))
35094 *total += rtx_cost (XEXP (left, 1), SImode, left_code, 1, speed);
35095 *total += COSTS_N_INSNS (1);
35096 return true;
35099 /* rotate-and-mask (no rotate), andi., andis.: 1 insn. */
35100 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
35101 if (rs6000_is_valid_and_mask (XEXP (x, 1), mode)
35102 || (val & 0xffff) == val
35103 || (val & 0xffff0000) == val
35104 || ((val & 0xffff) == 0 && mode == SImode))
35106 *total = rtx_cost (left, mode, AND, 0, speed);
35107 *total += COSTS_N_INSNS (1);
35108 return true;
35111 /* 2 insns. */
35112 if (rs6000_is_valid_2insn_and (XEXP (x, 1), mode))
35114 *total = rtx_cost (left, mode, AND, 0, speed);
35115 *total += COSTS_N_INSNS (2);
35116 return true;
35120 *total = COSTS_N_INSNS (1);
35121 return false;
35123 case IOR:
35124 /* FIXME */
35125 *total = COSTS_N_INSNS (1);
35126 return true;
35128 case CLZ:
35129 case XOR:
35130 case ZERO_EXTRACT:
35131 *total = COSTS_N_INSNS (1);
35132 return false;
35134 case ASHIFT:
35135 /* The EXTSWSLI instruction is a combined instruction. Don't count both
35136 the sign extend and shift separately within the insn. */
35137 if (TARGET_EXTSWSLI && mode == DImode
35138 && GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
35139 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode)
35141 *total = 0;
35142 return false;
35144 /* fall through */
35146 case ASHIFTRT:
35147 case LSHIFTRT:
35148 case ROTATE:
35149 case ROTATERT:
35150 /* Handle mul_highpart. */
35151 if (outer_code == TRUNCATE
35152 && GET_CODE (XEXP (x, 0)) == MULT)
35154 if (mode == DImode)
35155 *total = rs6000_cost->muldi;
35156 else
35157 *total = rs6000_cost->mulsi;
35158 return true;
35160 else if (outer_code == AND)
35161 *total = 0;
35162 else
35163 *total = COSTS_N_INSNS (1);
35164 return false;
35166 case SIGN_EXTEND:
35167 case ZERO_EXTEND:
35168 if (GET_CODE (XEXP (x, 0)) == MEM)
35169 *total = 0;
35170 else
35171 *total = COSTS_N_INSNS (1);
35172 return false;
35174 case COMPARE:
35175 case NEG:
35176 case ABS:
35177 if (!FLOAT_MODE_P (mode))
35179 *total = COSTS_N_INSNS (1);
35180 return false;
35182 /* FALLTHRU */
35184 case FLOAT:
35185 case UNSIGNED_FLOAT:
35186 case FIX:
35187 case UNSIGNED_FIX:
35188 case FLOAT_TRUNCATE:
35189 *total = rs6000_cost->fp;
35190 return false;
35192 case FLOAT_EXTEND:
35193 if (mode == DFmode)
35194 *total = rs6000_cost->sfdf_convert;
35195 else
35196 *total = rs6000_cost->fp;
35197 return false;
35199 case UNSPEC:
35200 switch (XINT (x, 1))
35202 case UNSPEC_FRSP:
35203 *total = rs6000_cost->fp;
35204 return true;
35206 default:
35207 break;
35209 break;
35211 case CALL:
35212 case IF_THEN_ELSE:
35213 if (!speed)
35215 *total = COSTS_N_INSNS (1);
35216 return true;
35218 else if (FLOAT_MODE_P (mode)
35219 && TARGET_PPC_GFXOPT && TARGET_HARD_FLOAT && TARGET_FPRS)
35221 *total = rs6000_cost->fp;
35222 return false;
35224 break;
35226 case NE:
35227 case EQ:
35228 case GTU:
35229 case LTU:
35230 /* Carry bit requires mode == Pmode.
35231 NEG or PLUS already counted so only add one. */
35232 if (mode == Pmode
35233 && (outer_code == NEG || outer_code == PLUS))
35235 *total = COSTS_N_INSNS (1);
35236 return true;
35238 if (outer_code == SET)
35240 if (XEXP (x, 1) == const0_rtx)
35242 if (TARGET_ISEL && !TARGET_MFCRF)
35243 *total = COSTS_N_INSNS (8);
35244 else
35245 *total = COSTS_N_INSNS (2);
35246 return true;
35248 else
35250 *total = COSTS_N_INSNS (3);
35251 return false;
35254 /* FALLTHRU */
35256 case GT:
35257 case LT:
35258 case UNORDERED:
35259 if (outer_code == SET && (XEXP (x, 1) == const0_rtx))
35261 if (TARGET_ISEL && !TARGET_MFCRF)
35262 *total = COSTS_N_INSNS (8);
35263 else
35264 *total = COSTS_N_INSNS (2);
35265 return true;
35267 /* CC COMPARE. */
35268 if (outer_code == COMPARE)
35270 *total = 0;
35271 return true;
35273 break;
35275 default:
35276 break;
35279 return false;
35282 /* Debug form of r6000_rtx_costs that is selected if -mdebug=cost. */
35284 static bool
35285 rs6000_debug_rtx_costs (rtx x, machine_mode mode, int outer_code,
35286 int opno, int *total, bool speed)
35288 bool ret = rs6000_rtx_costs (x, mode, outer_code, opno, total, speed);
35290 fprintf (stderr,
35291 "\nrs6000_rtx_costs, return = %s, mode = %s, outer_code = %s, "
35292 "opno = %d, total = %d, speed = %s, x:\n",
35293 ret ? "complete" : "scan inner",
35294 GET_MODE_NAME (mode),
35295 GET_RTX_NAME (outer_code),
35296 opno,
35297 *total,
35298 speed ? "true" : "false");
35300 debug_rtx (x);
35302 return ret;
35305 /* Debug form of ADDRESS_COST that is selected if -mdebug=cost. */
35307 static int
35308 rs6000_debug_address_cost (rtx x, machine_mode mode,
35309 addr_space_t as, bool speed)
35311 int ret = TARGET_ADDRESS_COST (x, mode, as, speed);
35313 fprintf (stderr, "\nrs6000_address_cost, return = %d, speed = %s, x:\n",
35314 ret, speed ? "true" : "false");
35315 debug_rtx (x);
35317 return ret;
35321 /* A C expression returning the cost of moving data from a register of class
35322 CLASS1 to one of CLASS2. */
35324 static int
35325 rs6000_register_move_cost (machine_mode mode,
35326 reg_class_t from, reg_class_t to)
35328 int ret;
35330 if (TARGET_DEBUG_COST)
35331 dbg_cost_ctrl++;
35333 /* Moves from/to GENERAL_REGS. */
35334 if (reg_classes_intersect_p (to, GENERAL_REGS)
35335 || reg_classes_intersect_p (from, GENERAL_REGS))
35337 reg_class_t rclass = from;
35339 if (! reg_classes_intersect_p (to, GENERAL_REGS))
35340 rclass = to;
35342 if (rclass == FLOAT_REGS || rclass == ALTIVEC_REGS || rclass == VSX_REGS)
35343 ret = (rs6000_memory_move_cost (mode, rclass, false)
35344 + rs6000_memory_move_cost (mode, GENERAL_REGS, false));
35346 /* It's more expensive to move CR_REGS than CR0_REGS because of the
35347 shift. */
35348 else if (rclass == CR_REGS)
35349 ret = 4;
35351 /* For those processors that have slow LR/CTR moves, make them more
35352 expensive than memory in order to bias spills to memory .*/
35353 else if ((rs6000_cpu == PROCESSOR_POWER6
35354 || rs6000_cpu == PROCESSOR_POWER7
35355 || rs6000_cpu == PROCESSOR_POWER8
35356 || rs6000_cpu == PROCESSOR_POWER9)
35357 && reg_classes_intersect_p (rclass, LINK_OR_CTR_REGS))
35358 ret = 6 * hard_regno_nregs[0][mode];
35360 else
35361 /* A move will cost one instruction per GPR moved. */
35362 ret = 2 * hard_regno_nregs[0][mode];
35365 /* If we have VSX, we can easily move between FPR or Altivec registers. */
35366 else if (VECTOR_MEM_VSX_P (mode)
35367 && reg_classes_intersect_p (to, VSX_REGS)
35368 && reg_classes_intersect_p (from, VSX_REGS))
35369 ret = 2 * hard_regno_nregs[32][mode];
35371 /* Moving between two similar registers is just one instruction. */
35372 else if (reg_classes_intersect_p (to, from))
35373 ret = (FLOAT128_2REG_P (mode)) ? 4 : 2;
35375 /* Everything else has to go through GENERAL_REGS. */
35376 else
35377 ret = (rs6000_register_move_cost (mode, GENERAL_REGS, to)
35378 + rs6000_register_move_cost (mode, from, GENERAL_REGS));
35380 if (TARGET_DEBUG_COST)
35382 if (dbg_cost_ctrl == 1)
35383 fprintf (stderr,
35384 "rs6000_register_move_cost:, ret=%d, mode=%s, from=%s, to=%s\n",
35385 ret, GET_MODE_NAME (mode), reg_class_names[from],
35386 reg_class_names[to]);
35387 dbg_cost_ctrl--;
35390 return ret;
35393 /* A C expressions returning the cost of moving data of MODE from a register to
35394 or from memory. */
35396 static int
35397 rs6000_memory_move_cost (machine_mode mode, reg_class_t rclass,
35398 bool in ATTRIBUTE_UNUSED)
35400 int ret;
35402 if (TARGET_DEBUG_COST)
35403 dbg_cost_ctrl++;
35405 if (reg_classes_intersect_p (rclass, GENERAL_REGS))
35406 ret = 4 * hard_regno_nregs[0][mode];
35407 else if ((reg_classes_intersect_p (rclass, FLOAT_REGS)
35408 || reg_classes_intersect_p (rclass, VSX_REGS)))
35409 ret = 4 * hard_regno_nregs[32][mode];
35410 else if (reg_classes_intersect_p (rclass, ALTIVEC_REGS))
35411 ret = 4 * hard_regno_nregs[FIRST_ALTIVEC_REGNO][mode];
35412 else
35413 ret = 4 + rs6000_register_move_cost (mode, rclass, GENERAL_REGS);
35415 if (TARGET_DEBUG_COST)
35417 if (dbg_cost_ctrl == 1)
35418 fprintf (stderr,
35419 "rs6000_memory_move_cost: ret=%d, mode=%s, rclass=%s, in=%d\n",
35420 ret, GET_MODE_NAME (mode), reg_class_names[rclass], in);
35421 dbg_cost_ctrl--;
35424 return ret;
35427 /* Returns a code for a target-specific builtin that implements
35428 reciprocal of the function, or NULL_TREE if not available. */
35430 static tree
35431 rs6000_builtin_reciprocal (tree fndecl)
35433 switch (DECL_FUNCTION_CODE (fndecl))
35435 case VSX_BUILTIN_XVSQRTDP:
35436 if (!RS6000_RECIP_AUTO_RSQRTE_P (V2DFmode))
35437 return NULL_TREE;
35439 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_2DF];
35441 case VSX_BUILTIN_XVSQRTSP:
35442 if (!RS6000_RECIP_AUTO_RSQRTE_P (V4SFmode))
35443 return NULL_TREE;
35445 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_4SF];
35447 default:
35448 return NULL_TREE;
35452 /* Load up a constant. If the mode is a vector mode, splat the value across
35453 all of the vector elements. */
35455 static rtx
35456 rs6000_load_constant_and_splat (machine_mode mode, REAL_VALUE_TYPE dconst)
35458 rtx reg;
35460 if (mode == SFmode || mode == DFmode)
35462 rtx d = const_double_from_real_value (dconst, mode);
35463 reg = force_reg (mode, d);
35465 else if (mode == V4SFmode)
35467 rtx d = const_double_from_real_value (dconst, SFmode);
35468 rtvec v = gen_rtvec (4, d, d, d, d);
35469 reg = gen_reg_rtx (mode);
35470 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
35472 else if (mode == V2DFmode)
35474 rtx d = const_double_from_real_value (dconst, DFmode);
35475 rtvec v = gen_rtvec (2, d, d);
35476 reg = gen_reg_rtx (mode);
35477 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
35479 else
35480 gcc_unreachable ();
35482 return reg;
35485 /* Generate an FMA instruction. */
35487 static void
35488 rs6000_emit_madd (rtx target, rtx m1, rtx m2, rtx a)
35490 machine_mode mode = GET_MODE (target);
35491 rtx dst;
35493 dst = expand_ternary_op (mode, fma_optab, m1, m2, a, target, 0);
35494 gcc_assert (dst != NULL);
35496 if (dst != target)
35497 emit_move_insn (target, dst);
35500 /* Generate a FNMSUB instruction: dst = -fma(m1, m2, -a). */
35502 static void
35503 rs6000_emit_nmsub (rtx dst, rtx m1, rtx m2, rtx a)
35505 machine_mode mode = GET_MODE (dst);
35506 rtx r;
35508 /* This is a tad more complicated, since the fnma_optab is for
35509 a different expression: fma(-m1, m2, a), which is the same
35510 thing except in the case of signed zeros.
35512 Fortunately we know that if FMA is supported that FNMSUB is
35513 also supported in the ISA. Just expand it directly. */
35515 gcc_assert (optab_handler (fma_optab, mode) != CODE_FOR_nothing);
35517 r = gen_rtx_NEG (mode, a);
35518 r = gen_rtx_FMA (mode, m1, m2, r);
35519 r = gen_rtx_NEG (mode, r);
35520 emit_insn (gen_rtx_SET (dst, r));
35523 /* Newton-Raphson approximation of floating point divide DST = N/D. If NOTE_P,
35524 add a reg_note saying that this was a division. Support both scalar and
35525 vector divide. Assumes no trapping math and finite arguments. */
35527 void
35528 rs6000_emit_swdiv (rtx dst, rtx n, rtx d, bool note_p)
35530 machine_mode mode = GET_MODE (dst);
35531 rtx one, x0, e0, x1, xprev, eprev, xnext, enext, u, v;
35532 int i;
35534 /* Low precision estimates guarantee 5 bits of accuracy. High
35535 precision estimates guarantee 14 bits of accuracy. SFmode
35536 requires 23 bits of accuracy. DFmode requires 52 bits of
35537 accuracy. Each pass at least doubles the accuracy, leading
35538 to the following. */
35539 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
35540 if (mode == DFmode || mode == V2DFmode)
35541 passes++;
35543 enum insn_code code = optab_handler (smul_optab, mode);
35544 insn_gen_fn gen_mul = GEN_FCN (code);
35546 gcc_assert (code != CODE_FOR_nothing);
35548 one = rs6000_load_constant_and_splat (mode, dconst1);
35550 /* x0 = 1./d estimate */
35551 x0 = gen_reg_rtx (mode);
35552 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, d),
35553 UNSPEC_FRES)));
35555 /* Each iteration but the last calculates x_(i+1) = x_i * (2 - d * x_i). */
35556 if (passes > 1) {
35558 /* e0 = 1. - d * x0 */
35559 e0 = gen_reg_rtx (mode);
35560 rs6000_emit_nmsub (e0, d, x0, one);
35562 /* x1 = x0 + e0 * x0 */
35563 x1 = gen_reg_rtx (mode);
35564 rs6000_emit_madd (x1, e0, x0, x0);
35566 for (i = 0, xprev = x1, eprev = e0; i < passes - 2;
35567 ++i, xprev = xnext, eprev = enext) {
35569 /* enext = eprev * eprev */
35570 enext = gen_reg_rtx (mode);
35571 emit_insn (gen_mul (enext, eprev, eprev));
35573 /* xnext = xprev + enext * xprev */
35574 xnext = gen_reg_rtx (mode);
35575 rs6000_emit_madd (xnext, enext, xprev, xprev);
35578 } else
35579 xprev = x0;
35581 /* The last iteration calculates x_(i+1) = n * x_i * (2 - d * x_i). */
35583 /* u = n * xprev */
35584 u = gen_reg_rtx (mode);
35585 emit_insn (gen_mul (u, n, xprev));
35587 /* v = n - (d * u) */
35588 v = gen_reg_rtx (mode);
35589 rs6000_emit_nmsub (v, d, u, n);
35591 /* dst = (v * xprev) + u */
35592 rs6000_emit_madd (dst, v, xprev, u);
35594 if (note_p)
35595 add_reg_note (get_last_insn (), REG_EQUAL, gen_rtx_DIV (mode, n, d));
35598 /* Goldschmidt's Algorithm for single/double-precision floating point
35599 sqrt and rsqrt. Assumes no trapping math and finite arguments. */
35601 void
35602 rs6000_emit_swsqrt (rtx dst, rtx src, bool recip)
35604 machine_mode mode = GET_MODE (src);
35605 rtx e = gen_reg_rtx (mode);
35606 rtx g = gen_reg_rtx (mode);
35607 rtx h = gen_reg_rtx (mode);
35609 /* Low precision estimates guarantee 5 bits of accuracy. High
35610 precision estimates guarantee 14 bits of accuracy. SFmode
35611 requires 23 bits of accuracy. DFmode requires 52 bits of
35612 accuracy. Each pass at least doubles the accuracy, leading
35613 to the following. */
35614 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
35615 if (mode == DFmode || mode == V2DFmode)
35616 passes++;
35618 int i;
35619 rtx mhalf;
35620 enum insn_code code = optab_handler (smul_optab, mode);
35621 insn_gen_fn gen_mul = GEN_FCN (code);
35623 gcc_assert (code != CODE_FOR_nothing);
35625 mhalf = rs6000_load_constant_and_splat (mode, dconsthalf);
35627 /* e = rsqrt estimate */
35628 emit_insn (gen_rtx_SET (e, gen_rtx_UNSPEC (mode, gen_rtvec (1, src),
35629 UNSPEC_RSQRT)));
35631 /* If (src == 0.0) filter infinity to prevent NaN for sqrt(0.0). */
35632 if (!recip)
35634 rtx zero = force_reg (mode, CONST0_RTX (mode));
35636 if (mode == SFmode)
35638 rtx target = emit_conditional_move (e, GT, src, zero, mode,
35639 e, zero, mode, 0);
35640 if (target != e)
35641 emit_move_insn (e, target);
35643 else
35645 rtx cond = gen_rtx_GT (VOIDmode, e, zero);
35646 rs6000_emit_vector_cond_expr (e, e, zero, cond, src, zero);
35650 /* g = sqrt estimate. */
35651 emit_insn (gen_mul (g, e, src));
35652 /* h = 1/(2*sqrt) estimate. */
35653 emit_insn (gen_mul (h, e, mhalf));
35655 if (recip)
35657 if (passes == 1)
35659 rtx t = gen_reg_rtx (mode);
35660 rs6000_emit_nmsub (t, g, h, mhalf);
35661 /* Apply correction directly to 1/rsqrt estimate. */
35662 rs6000_emit_madd (dst, e, t, e);
35664 else
35666 for (i = 0; i < passes; i++)
35668 rtx t1 = gen_reg_rtx (mode);
35669 rtx g1 = gen_reg_rtx (mode);
35670 rtx h1 = gen_reg_rtx (mode);
35672 rs6000_emit_nmsub (t1, g, h, mhalf);
35673 rs6000_emit_madd (g1, g, t1, g);
35674 rs6000_emit_madd (h1, h, t1, h);
35676 g = g1;
35677 h = h1;
35679 /* Multiply by 2 for 1/rsqrt. */
35680 emit_insn (gen_add3_insn (dst, h, h));
35683 else
35685 rtx t = gen_reg_rtx (mode);
35686 rs6000_emit_nmsub (t, g, h, mhalf);
35687 rs6000_emit_madd (dst, g, t, g);
35690 return;
35693 /* Emit popcount intrinsic on TARGET_POPCNTB (Power5) and TARGET_POPCNTD
35694 (Power7) targets. DST is the target, and SRC is the argument operand. */
35696 void
35697 rs6000_emit_popcount (rtx dst, rtx src)
35699 machine_mode mode = GET_MODE (dst);
35700 rtx tmp1, tmp2;
35702 /* Use the PPC ISA 2.06 popcnt{w,d} instruction if we can. */
35703 if (TARGET_POPCNTD)
35705 if (mode == SImode)
35706 emit_insn (gen_popcntdsi2 (dst, src));
35707 else
35708 emit_insn (gen_popcntddi2 (dst, src));
35709 return;
35712 tmp1 = gen_reg_rtx (mode);
35714 if (mode == SImode)
35716 emit_insn (gen_popcntbsi2 (tmp1, src));
35717 tmp2 = expand_mult (SImode, tmp1, GEN_INT (0x01010101),
35718 NULL_RTX, 0);
35719 tmp2 = force_reg (SImode, tmp2);
35720 emit_insn (gen_lshrsi3 (dst, tmp2, GEN_INT (24)));
35722 else
35724 emit_insn (gen_popcntbdi2 (tmp1, src));
35725 tmp2 = expand_mult (DImode, tmp1,
35726 GEN_INT ((HOST_WIDE_INT)
35727 0x01010101 << 32 | 0x01010101),
35728 NULL_RTX, 0);
35729 tmp2 = force_reg (DImode, tmp2);
35730 emit_insn (gen_lshrdi3 (dst, tmp2, GEN_INT (56)));
35735 /* Emit parity intrinsic on TARGET_POPCNTB targets. DST is the
35736 target, and SRC is the argument operand. */
35738 void
35739 rs6000_emit_parity (rtx dst, rtx src)
35741 machine_mode mode = GET_MODE (dst);
35742 rtx tmp;
35744 tmp = gen_reg_rtx (mode);
35746 /* Use the PPC ISA 2.05 prtyw/prtyd instruction if we can. */
35747 if (TARGET_CMPB)
35749 if (mode == SImode)
35751 emit_insn (gen_popcntbsi2 (tmp, src));
35752 emit_insn (gen_paritysi2_cmpb (dst, tmp));
35754 else
35756 emit_insn (gen_popcntbdi2 (tmp, src));
35757 emit_insn (gen_paritydi2_cmpb (dst, tmp));
35759 return;
35762 if (mode == SImode)
35764 /* Is mult+shift >= shift+xor+shift+xor? */
35765 if (rs6000_cost->mulsi_const >= COSTS_N_INSNS (3))
35767 rtx tmp1, tmp2, tmp3, tmp4;
35769 tmp1 = gen_reg_rtx (SImode);
35770 emit_insn (gen_popcntbsi2 (tmp1, src));
35772 tmp2 = gen_reg_rtx (SImode);
35773 emit_insn (gen_lshrsi3 (tmp2, tmp1, GEN_INT (16)));
35774 tmp3 = gen_reg_rtx (SImode);
35775 emit_insn (gen_xorsi3 (tmp3, tmp1, tmp2));
35777 tmp4 = gen_reg_rtx (SImode);
35778 emit_insn (gen_lshrsi3 (tmp4, tmp3, GEN_INT (8)));
35779 emit_insn (gen_xorsi3 (tmp, tmp3, tmp4));
35781 else
35782 rs6000_emit_popcount (tmp, src);
35783 emit_insn (gen_andsi3 (dst, tmp, const1_rtx));
35785 else
35787 /* Is mult+shift >= shift+xor+shift+xor+shift+xor? */
35788 if (rs6000_cost->muldi >= COSTS_N_INSNS (5))
35790 rtx tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
35792 tmp1 = gen_reg_rtx (DImode);
35793 emit_insn (gen_popcntbdi2 (tmp1, src));
35795 tmp2 = gen_reg_rtx (DImode);
35796 emit_insn (gen_lshrdi3 (tmp2, tmp1, GEN_INT (32)));
35797 tmp3 = gen_reg_rtx (DImode);
35798 emit_insn (gen_xordi3 (tmp3, tmp1, tmp2));
35800 tmp4 = gen_reg_rtx (DImode);
35801 emit_insn (gen_lshrdi3 (tmp4, tmp3, GEN_INT (16)));
35802 tmp5 = gen_reg_rtx (DImode);
35803 emit_insn (gen_xordi3 (tmp5, tmp3, tmp4));
35805 tmp6 = gen_reg_rtx (DImode);
35806 emit_insn (gen_lshrdi3 (tmp6, tmp5, GEN_INT (8)));
35807 emit_insn (gen_xordi3 (tmp, tmp5, tmp6));
35809 else
35810 rs6000_emit_popcount (tmp, src);
35811 emit_insn (gen_anddi3 (dst, tmp, const1_rtx));
35815 /* Expand an Altivec constant permutation for little endian mode.
35816 There are two issues: First, the two input operands must be
35817 swapped so that together they form a double-wide array in LE
35818 order. Second, the vperm instruction has surprising behavior
35819 in LE mode: it interprets the elements of the source vectors
35820 in BE mode ("left to right") and interprets the elements of
35821 the destination vector in LE mode ("right to left"). To
35822 correct for this, we must subtract each element of the permute
35823 control vector from 31.
35825 For example, suppose we want to concatenate vr10 = {0, 1, 2, 3}
35826 with vr11 = {4, 5, 6, 7} and extract {0, 2, 4, 6} using a vperm.
35827 We place {0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27} in vr12 to
35828 serve as the permute control vector. Then, in BE mode,
35830 vperm 9,10,11,12
35832 places the desired result in vr9. However, in LE mode the
35833 vector contents will be
35835 vr10 = 00000003 00000002 00000001 00000000
35836 vr11 = 00000007 00000006 00000005 00000004
35838 The result of the vperm using the same permute control vector is
35840 vr9 = 05000000 07000000 01000000 03000000
35842 That is, the leftmost 4 bytes of vr10 are interpreted as the
35843 source for the rightmost 4 bytes of vr9, and so on.
35845 If we change the permute control vector to
35847 vr12 = {31,20,29,28,23,22,21,20,15,14,13,12,7,6,5,4}
35849 and issue
35851 vperm 9,11,10,12
35853 we get the desired
35855 vr9 = 00000006 00000004 00000002 00000000. */
35857 void
35858 altivec_expand_vec_perm_const_le (rtx operands[4])
35860 unsigned int i;
35861 rtx perm[16];
35862 rtx constv, unspec;
35863 rtx target = operands[0];
35864 rtx op0 = operands[1];
35865 rtx op1 = operands[2];
35866 rtx sel = operands[3];
35868 /* Unpack and adjust the constant selector. */
35869 for (i = 0; i < 16; ++i)
35871 rtx e = XVECEXP (sel, 0, i);
35872 unsigned int elt = 31 - (INTVAL (e) & 31);
35873 perm[i] = GEN_INT (elt);
35876 /* Expand to a permute, swapping the inputs and using the
35877 adjusted selector. */
35878 if (!REG_P (op0))
35879 op0 = force_reg (V16QImode, op0);
35880 if (!REG_P (op1))
35881 op1 = force_reg (V16QImode, op1);
35883 constv = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm));
35884 constv = force_reg (V16QImode, constv);
35885 unspec = gen_rtx_UNSPEC (V16QImode, gen_rtvec (3, op1, op0, constv),
35886 UNSPEC_VPERM);
35887 if (!REG_P (target))
35889 rtx tmp = gen_reg_rtx (V16QImode);
35890 emit_move_insn (tmp, unspec);
35891 unspec = tmp;
35894 emit_move_insn (target, unspec);
35897 /* Similarly to altivec_expand_vec_perm_const_le, we must adjust the
35898 permute control vector. But here it's not a constant, so we must
35899 generate a vector NAND or NOR to do the adjustment. */
35901 void
35902 altivec_expand_vec_perm_le (rtx operands[4])
35904 rtx notx, iorx, unspec;
35905 rtx target = operands[0];
35906 rtx op0 = operands[1];
35907 rtx op1 = operands[2];
35908 rtx sel = operands[3];
35909 rtx tmp = target;
35910 rtx norreg = gen_reg_rtx (V16QImode);
35911 machine_mode mode = GET_MODE (target);
35913 /* Get everything in regs so the pattern matches. */
35914 if (!REG_P (op0))
35915 op0 = force_reg (mode, op0);
35916 if (!REG_P (op1))
35917 op1 = force_reg (mode, op1);
35918 if (!REG_P (sel))
35919 sel = force_reg (V16QImode, sel);
35920 if (!REG_P (target))
35921 tmp = gen_reg_rtx (mode);
35923 if (TARGET_P9_VECTOR)
35925 unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op0, op1, sel),
35926 UNSPEC_VPERMR);
35928 else
35930 /* Invert the selector with a VNAND if available, else a VNOR.
35931 The VNAND is preferred for future fusion opportunities. */
35932 notx = gen_rtx_NOT (V16QImode, sel);
35933 iorx = (TARGET_P8_VECTOR
35934 ? gen_rtx_IOR (V16QImode, notx, notx)
35935 : gen_rtx_AND (V16QImode, notx, notx));
35936 emit_insn (gen_rtx_SET (norreg, iorx));
35938 /* Permute with operands reversed and adjusted selector. */
35939 unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op0, norreg),
35940 UNSPEC_VPERM);
35943 /* Copy into target, possibly by way of a register. */
35944 if (!REG_P (target))
35946 emit_move_insn (tmp, unspec);
35947 unspec = tmp;
35950 emit_move_insn (target, unspec);
35953 /* Expand an Altivec constant permutation. Return true if we match
35954 an efficient implementation; false to fall back to VPERM. */
35956 bool
35957 altivec_expand_vec_perm_const (rtx operands[4])
35959 struct altivec_perm_insn {
35960 HOST_WIDE_INT mask;
35961 enum insn_code impl;
35962 unsigned char perm[16];
35964 static const struct altivec_perm_insn patterns[] = {
35965 { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuhum_direct,
35966 { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } },
35967 { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuwum_direct,
35968 { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } },
35969 { OPTION_MASK_ALTIVEC,
35970 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghb_direct
35971 : CODE_FOR_altivec_vmrglb_direct),
35972 { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } },
35973 { OPTION_MASK_ALTIVEC,
35974 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghh_direct
35975 : CODE_FOR_altivec_vmrglh_direct),
35976 { 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } },
35977 { OPTION_MASK_ALTIVEC,
35978 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghw_direct
35979 : CODE_FOR_altivec_vmrglw_direct),
35980 { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } },
35981 { OPTION_MASK_ALTIVEC,
35982 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglb_direct
35983 : CODE_FOR_altivec_vmrghb_direct),
35984 { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } },
35985 { OPTION_MASK_ALTIVEC,
35986 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglh_direct
35987 : CODE_FOR_altivec_vmrghh_direct),
35988 { 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } },
35989 { OPTION_MASK_ALTIVEC,
35990 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglw_direct
35991 : CODE_FOR_altivec_vmrghw_direct),
35992 { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } },
35993 { OPTION_MASK_P8_VECTOR, CODE_FOR_p8_vmrgew,
35994 { 0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 24, 25, 26, 27 } },
35995 { OPTION_MASK_P8_VECTOR, CODE_FOR_p8_vmrgow,
35996 { 4, 5, 6, 7, 20, 21, 22, 23, 12, 13, 14, 15, 28, 29, 30, 31 } }
35999 unsigned int i, j, elt, which;
36000 unsigned char perm[16];
36001 rtx target, op0, op1, sel, x;
36002 bool one_vec;
36004 target = operands[0];
36005 op0 = operands[1];
36006 op1 = operands[2];
36007 sel = operands[3];
36009 /* Unpack the constant selector. */
36010 for (i = which = 0; i < 16; ++i)
36012 rtx e = XVECEXP (sel, 0, i);
36013 elt = INTVAL (e) & 31;
36014 which |= (elt < 16 ? 1 : 2);
36015 perm[i] = elt;
36018 /* Simplify the constant selector based on operands. */
36019 switch (which)
36021 default:
36022 gcc_unreachable ();
36024 case 3:
36025 one_vec = false;
36026 if (!rtx_equal_p (op0, op1))
36027 break;
36028 /* FALLTHRU */
36030 case 2:
36031 for (i = 0; i < 16; ++i)
36032 perm[i] &= 15;
36033 op0 = op1;
36034 one_vec = true;
36035 break;
36037 case 1:
36038 op1 = op0;
36039 one_vec = true;
36040 break;
36043 /* Look for splat patterns. */
36044 if (one_vec)
36046 elt = perm[0];
36048 for (i = 0; i < 16; ++i)
36049 if (perm[i] != elt)
36050 break;
36051 if (i == 16)
36053 if (!BYTES_BIG_ENDIAN)
36054 elt = 15 - elt;
36055 emit_insn (gen_altivec_vspltb_direct (target, op0, GEN_INT (elt)));
36056 return true;
36059 if (elt % 2 == 0)
36061 for (i = 0; i < 16; i += 2)
36062 if (perm[i] != elt || perm[i + 1] != elt + 1)
36063 break;
36064 if (i == 16)
36066 int field = BYTES_BIG_ENDIAN ? elt / 2 : 7 - elt / 2;
36067 x = gen_reg_rtx (V8HImode);
36068 emit_insn (gen_altivec_vsplth_direct (x, gen_lowpart (V8HImode, op0),
36069 GEN_INT (field)));
36070 emit_move_insn (target, gen_lowpart (V16QImode, x));
36071 return true;
36075 if (elt % 4 == 0)
36077 for (i = 0; i < 16; i += 4)
36078 if (perm[i] != elt
36079 || perm[i + 1] != elt + 1
36080 || perm[i + 2] != elt + 2
36081 || perm[i + 3] != elt + 3)
36082 break;
36083 if (i == 16)
36085 int field = BYTES_BIG_ENDIAN ? elt / 4 : 3 - elt / 4;
36086 x = gen_reg_rtx (V4SImode);
36087 emit_insn (gen_altivec_vspltw_direct (x, gen_lowpart (V4SImode, op0),
36088 GEN_INT (field)));
36089 emit_move_insn (target, gen_lowpart (V16QImode, x));
36090 return true;
36095 /* Look for merge and pack patterns. */
36096 for (j = 0; j < ARRAY_SIZE (patterns); ++j)
36098 bool swapped;
36100 if ((patterns[j].mask & rs6000_isa_flags) == 0)
36101 continue;
36103 elt = patterns[j].perm[0];
36104 if (perm[0] == elt)
36105 swapped = false;
36106 else if (perm[0] == elt + 16)
36107 swapped = true;
36108 else
36109 continue;
36110 for (i = 1; i < 16; ++i)
36112 elt = patterns[j].perm[i];
36113 if (swapped)
36114 elt = (elt >= 16 ? elt - 16 : elt + 16);
36115 else if (one_vec && elt >= 16)
36116 elt -= 16;
36117 if (perm[i] != elt)
36118 break;
36120 if (i == 16)
36122 enum insn_code icode = patterns[j].impl;
36123 machine_mode omode = insn_data[icode].operand[0].mode;
36124 machine_mode imode = insn_data[icode].operand[1].mode;
36126 /* For little-endian, don't use vpkuwum and vpkuhum if the
36127 underlying vector type is not V4SI and V8HI, respectively.
36128 For example, using vpkuwum with a V8HI picks up the even
36129 halfwords (BE numbering) when the even halfwords (LE
36130 numbering) are what we need. */
36131 if (!BYTES_BIG_ENDIAN
36132 && icode == CODE_FOR_altivec_vpkuwum_direct
36133 && ((GET_CODE (op0) == REG
36134 && GET_MODE (op0) != V4SImode)
36135 || (GET_CODE (op0) == SUBREG
36136 && GET_MODE (XEXP (op0, 0)) != V4SImode)))
36137 continue;
36138 if (!BYTES_BIG_ENDIAN
36139 && icode == CODE_FOR_altivec_vpkuhum_direct
36140 && ((GET_CODE (op0) == REG
36141 && GET_MODE (op0) != V8HImode)
36142 || (GET_CODE (op0) == SUBREG
36143 && GET_MODE (XEXP (op0, 0)) != V8HImode)))
36144 continue;
36146 /* For little-endian, the two input operands must be swapped
36147 (or swapped back) to ensure proper right-to-left numbering
36148 from 0 to 2N-1. */
36149 if (swapped ^ !BYTES_BIG_ENDIAN)
36150 std::swap (op0, op1);
36151 if (imode != V16QImode)
36153 op0 = gen_lowpart (imode, op0);
36154 op1 = gen_lowpart (imode, op1);
36156 if (omode == V16QImode)
36157 x = target;
36158 else
36159 x = gen_reg_rtx (omode);
36160 emit_insn (GEN_FCN (icode) (x, op0, op1));
36161 if (omode != V16QImode)
36162 emit_move_insn (target, gen_lowpart (V16QImode, x));
36163 return true;
36167 if (!BYTES_BIG_ENDIAN)
36169 altivec_expand_vec_perm_const_le (operands);
36170 return true;
36173 return false;
36176 /* Expand a Paired Single, VSX Permute Doubleword, or SPE constant permutation.
36177 Return true if we match an efficient implementation. */
36179 static bool
36180 rs6000_expand_vec_perm_const_1 (rtx target, rtx op0, rtx op1,
36181 unsigned char perm0, unsigned char perm1)
36183 rtx x;
36185 /* If both selectors come from the same operand, fold to single op. */
36186 if ((perm0 & 2) == (perm1 & 2))
36188 if (perm0 & 2)
36189 op0 = op1;
36190 else
36191 op1 = op0;
36193 /* If both operands are equal, fold to simpler permutation. */
36194 if (rtx_equal_p (op0, op1))
36196 perm0 = perm0 & 1;
36197 perm1 = (perm1 & 1) + 2;
36199 /* If the first selector comes from the second operand, swap. */
36200 else if (perm0 & 2)
36202 if (perm1 & 2)
36203 return false;
36204 perm0 -= 2;
36205 perm1 += 2;
36206 std::swap (op0, op1);
36208 /* If the second selector does not come from the second operand, fail. */
36209 else if ((perm1 & 2) == 0)
36210 return false;
36212 /* Success! */
36213 if (target != NULL)
36215 machine_mode vmode, dmode;
36216 rtvec v;
36218 vmode = GET_MODE (target);
36219 gcc_assert (GET_MODE_NUNITS (vmode) == 2);
36220 dmode = mode_for_vector (GET_MODE_INNER (vmode), 4);
36221 x = gen_rtx_VEC_CONCAT (dmode, op0, op1);
36222 v = gen_rtvec (2, GEN_INT (perm0), GEN_INT (perm1));
36223 x = gen_rtx_VEC_SELECT (vmode, x, gen_rtx_PARALLEL (VOIDmode, v));
36224 emit_insn (gen_rtx_SET (target, x));
36226 return true;
36229 bool
36230 rs6000_expand_vec_perm_const (rtx operands[4])
36232 rtx target, op0, op1, sel;
36233 unsigned char perm0, perm1;
36235 target = operands[0];
36236 op0 = operands[1];
36237 op1 = operands[2];
36238 sel = operands[3];
36240 /* Unpack the constant selector. */
36241 perm0 = INTVAL (XVECEXP (sel, 0, 0)) & 3;
36242 perm1 = INTVAL (XVECEXP (sel, 0, 1)) & 3;
36244 return rs6000_expand_vec_perm_const_1 (target, op0, op1, perm0, perm1);
36247 /* Test whether a constant permutation is supported. */
36249 static bool
36250 rs6000_vectorize_vec_perm_const_ok (machine_mode vmode,
36251 const unsigned char *sel)
36253 /* AltiVec (and thus VSX) can handle arbitrary permutations. */
36254 if (TARGET_ALTIVEC)
36255 return true;
36257 /* Check for ps_merge* or evmerge* insns. */
36258 if ((TARGET_PAIRED_FLOAT && vmode == V2SFmode)
36259 || (TARGET_SPE && vmode == V2SImode))
36261 rtx op0 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 1);
36262 rtx op1 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 2);
36263 return rs6000_expand_vec_perm_const_1 (NULL, op0, op1, sel[0], sel[1]);
36266 return false;
36269 /* A subroutine for rs6000_expand_extract_even & rs6000_expand_interleave. */
36271 static void
36272 rs6000_do_expand_vec_perm (rtx target, rtx op0, rtx op1,
36273 machine_mode vmode, unsigned nelt, rtx perm[])
36275 machine_mode imode;
36276 rtx x;
36278 imode = vmode;
36279 if (GET_MODE_CLASS (vmode) != MODE_VECTOR_INT)
36281 imode = mode_for_size (GET_MODE_UNIT_BITSIZE (vmode), MODE_INT, 0);
36282 imode = mode_for_vector (imode, nelt);
36285 x = gen_rtx_CONST_VECTOR (imode, gen_rtvec_v (nelt, perm));
36286 x = expand_vec_perm (vmode, op0, op1, x, target);
36287 if (x != target)
36288 emit_move_insn (target, x);
36291 /* Expand an extract even operation. */
36293 void
36294 rs6000_expand_extract_even (rtx target, rtx op0, rtx op1)
36296 machine_mode vmode = GET_MODE (target);
36297 unsigned i, nelt = GET_MODE_NUNITS (vmode);
36298 rtx perm[16];
36300 for (i = 0; i < nelt; i++)
36301 perm[i] = GEN_INT (i * 2);
36303 rs6000_do_expand_vec_perm (target, op0, op1, vmode, nelt, perm);
36306 /* Expand a vector interleave operation. */
36308 void
36309 rs6000_expand_interleave (rtx target, rtx op0, rtx op1, bool highp)
36311 machine_mode vmode = GET_MODE (target);
36312 unsigned i, high, nelt = GET_MODE_NUNITS (vmode);
36313 rtx perm[16];
36315 high = (highp ? 0 : nelt / 2);
36316 for (i = 0; i < nelt / 2; i++)
36318 perm[i * 2] = GEN_INT (i + high);
36319 perm[i * 2 + 1] = GEN_INT (i + nelt + high);
36322 rs6000_do_expand_vec_perm (target, op0, op1, vmode, nelt, perm);
36325 /* Scale a V2DF vector SRC by two to the SCALE and place in TGT. */
36326 void
36327 rs6000_scale_v2df (rtx tgt, rtx src, int scale)
36329 HOST_WIDE_INT hwi_scale (scale);
36330 REAL_VALUE_TYPE r_pow;
36331 rtvec v = rtvec_alloc (2);
36332 rtx elt;
36333 rtx scale_vec = gen_reg_rtx (V2DFmode);
36334 (void)real_powi (&r_pow, DFmode, &dconst2, hwi_scale);
36335 elt = const_double_from_real_value (r_pow, DFmode);
36336 RTVEC_ELT (v, 0) = elt;
36337 RTVEC_ELT (v, 1) = elt;
36338 rs6000_expand_vector_init (scale_vec, gen_rtx_PARALLEL (V2DFmode, v));
36339 emit_insn (gen_mulv2df3 (tgt, src, scale_vec));
36342 /* Return an RTX representing where to find the function value of a
36343 function returning MODE. */
36344 static rtx
36345 rs6000_complex_function_value (machine_mode mode)
36347 unsigned int regno;
36348 rtx r1, r2;
36349 machine_mode inner = GET_MODE_INNER (mode);
36350 unsigned int inner_bytes = GET_MODE_UNIT_SIZE (mode);
36352 if (TARGET_FLOAT128
36353 && (mode == KCmode
36354 || (mode == TCmode && TARGET_IEEEQUAD)))
36355 regno = ALTIVEC_ARG_RETURN;
36357 else if (FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT && TARGET_FPRS)
36358 regno = FP_ARG_RETURN;
36360 else
36362 regno = GP_ARG_RETURN;
36364 /* 32-bit is OK since it'll go in r3/r4. */
36365 if (TARGET_32BIT && inner_bytes >= 4)
36366 return gen_rtx_REG (mode, regno);
36369 if (inner_bytes >= 8)
36370 return gen_rtx_REG (mode, regno);
36372 r1 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno),
36373 const0_rtx);
36374 r2 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno + 1),
36375 GEN_INT (inner_bytes));
36376 return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r2));
36379 /* Return an rtx describing a return value of MODE as a PARALLEL
36380 in N_ELTS registers, each of mode ELT_MODE, starting at REGNO,
36381 stride REG_STRIDE. */
36383 static rtx
36384 rs6000_parallel_return (machine_mode mode,
36385 int n_elts, machine_mode elt_mode,
36386 unsigned int regno, unsigned int reg_stride)
36388 rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
36390 int i;
36391 for (i = 0; i < n_elts; i++)
36393 rtx r = gen_rtx_REG (elt_mode, regno);
36394 rtx off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
36395 XVECEXP (par, 0, i) = gen_rtx_EXPR_LIST (VOIDmode, r, off);
36396 regno += reg_stride;
36399 return par;
36402 /* Target hook for TARGET_FUNCTION_VALUE.
36404 On the SPE, both FPs and vectors are returned in r3.
36406 On RS/6000 an integer value is in r3 and a floating-point value is in
36407 fp1, unless -msoft-float. */
36409 static rtx
36410 rs6000_function_value (const_tree valtype,
36411 const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
36412 bool outgoing ATTRIBUTE_UNUSED)
36414 machine_mode mode;
36415 unsigned int regno;
36416 machine_mode elt_mode;
36417 int n_elts;
36419 /* Special handling for structs in darwin64. */
36420 if (TARGET_MACHO
36421 && rs6000_darwin64_struct_check_p (TYPE_MODE (valtype), valtype))
36423 CUMULATIVE_ARGS valcum;
36424 rtx valret;
36426 valcum.words = 0;
36427 valcum.fregno = FP_ARG_MIN_REG;
36428 valcum.vregno = ALTIVEC_ARG_MIN_REG;
36429 /* Do a trial code generation as if this were going to be passed as
36430 an argument; if any part goes in memory, we return NULL. */
36431 valret = rs6000_darwin64_record_arg (&valcum, valtype, true, /* retval= */ true);
36432 if (valret)
36433 return valret;
36434 /* Otherwise fall through to standard ABI rules. */
36437 mode = TYPE_MODE (valtype);
36439 /* The ELFv2 ABI returns homogeneous VFP aggregates in registers. */
36440 if (rs6000_discover_homogeneous_aggregate (mode, valtype, &elt_mode, &n_elts))
36442 int first_reg, n_regs;
36444 if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (elt_mode))
36446 /* _Decimal128 must use even/odd register pairs. */
36447 first_reg = (elt_mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
36448 n_regs = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
36450 else
36452 first_reg = ALTIVEC_ARG_RETURN;
36453 n_regs = 1;
36456 return rs6000_parallel_return (mode, n_elts, elt_mode, first_reg, n_regs);
36459 /* Some return value types need be split in -mpowerpc64, 32bit ABI. */
36460 if (TARGET_32BIT && TARGET_POWERPC64)
36461 switch (mode)
36463 default:
36464 break;
36465 case DImode:
36466 case SCmode:
36467 case DCmode:
36468 case TCmode:
36469 int count = GET_MODE_SIZE (mode) / 4;
36470 return rs6000_parallel_return (mode, count, SImode, GP_ARG_RETURN, 1);
36473 if ((INTEGRAL_TYPE_P (valtype)
36474 && GET_MODE_BITSIZE (mode) < (TARGET_32BIT ? 32 : 64))
36475 || POINTER_TYPE_P (valtype))
36476 mode = TARGET_32BIT ? SImode : DImode;
36478 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT && TARGET_FPRS)
36479 /* _Decimal128 must use an even/odd register pair. */
36480 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
36481 else if (SCALAR_FLOAT_TYPE_P (valtype) && TARGET_HARD_FLOAT && TARGET_FPRS
36482 && !FLOAT128_VECTOR_P (mode)
36483 && ((TARGET_SINGLE_FLOAT && (mode == SFmode)) || TARGET_DOUBLE_FLOAT))
36484 regno = FP_ARG_RETURN;
36485 else if (TREE_CODE (valtype) == COMPLEX_TYPE
36486 && targetm.calls.split_complex_arg)
36487 return rs6000_complex_function_value (mode);
36488 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
36489 return register is used in both cases, and we won't see V2DImode/V2DFmode
36490 for pure altivec, combine the two cases. */
36491 else if ((TREE_CODE (valtype) == VECTOR_TYPE || FLOAT128_VECTOR_P (mode))
36492 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI
36493 && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
36494 regno = ALTIVEC_ARG_RETURN;
36495 else if (TARGET_E500_DOUBLE && TARGET_HARD_FLOAT
36496 && (mode == DFmode || mode == DCmode
36497 || FLOAT128_IBM_P (mode) || mode == TCmode))
36498 return spe_build_register_parallel (mode, GP_ARG_RETURN);
36499 else
36500 regno = GP_ARG_RETURN;
36502 return gen_rtx_REG (mode, regno);
36505 /* Define how to find the value returned by a library function
36506 assuming the value has mode MODE. */
36508 rs6000_libcall_value (machine_mode mode)
36510 unsigned int regno;
36512 /* Long long return value need be split in -mpowerpc64, 32bit ABI. */
36513 if (TARGET_32BIT && TARGET_POWERPC64 && mode == DImode)
36514 return rs6000_parallel_return (mode, 2, SImode, GP_ARG_RETURN, 1);
36516 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT && TARGET_FPRS)
36517 /* _Decimal128 must use an even/odd register pair. */
36518 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
36519 else if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode)
36520 && TARGET_HARD_FLOAT && TARGET_FPRS
36521 && ((TARGET_SINGLE_FLOAT && mode == SFmode) || TARGET_DOUBLE_FLOAT))
36522 regno = FP_ARG_RETURN;
36523 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
36524 return register is used in both cases, and we won't see V2DImode/V2DFmode
36525 for pure altivec, combine the two cases. */
36526 else if (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
36527 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI)
36528 regno = ALTIVEC_ARG_RETURN;
36529 else if (COMPLEX_MODE_P (mode) && targetm.calls.split_complex_arg)
36530 return rs6000_complex_function_value (mode);
36531 else if (TARGET_E500_DOUBLE && TARGET_HARD_FLOAT
36532 && (mode == DFmode || mode == DCmode
36533 || FLOAT128_IBM_P (mode) || mode == TCmode))
36534 return spe_build_register_parallel (mode, GP_ARG_RETURN);
36535 else
36536 regno = GP_ARG_RETURN;
36538 return gen_rtx_REG (mode, regno);
36542 /* Return true if we use LRA instead of reload pass. */
36543 static bool
36544 rs6000_lra_p (void)
36546 return TARGET_LRA;
36549 /* Given FROM and TO register numbers, say whether this elimination is allowed.
36550 Frame pointer elimination is automatically handled.
36552 For the RS/6000, if frame pointer elimination is being done, we would like
36553 to convert ap into fp, not sp.
36555 We need r30 if -mminimal-toc was specified, and there are constant pool
36556 references. */
36558 static bool
36559 rs6000_can_eliminate (const int from, const int to)
36561 return (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM
36562 ? ! frame_pointer_needed
36563 : from == RS6000_PIC_OFFSET_TABLE_REGNUM
36564 ? ! TARGET_MINIMAL_TOC || TARGET_NO_TOC || get_pool_size () == 0
36565 : true);
36568 /* Define the offset between two registers, FROM to be eliminated and its
36569 replacement TO, at the start of a routine. */
36570 HOST_WIDE_INT
36571 rs6000_initial_elimination_offset (int from, int to)
36573 rs6000_stack_t *info = rs6000_stack_info ();
36574 HOST_WIDE_INT offset;
36576 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
36577 offset = info->push_p ? 0 : -info->total_size;
36578 else if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
36580 offset = info->push_p ? 0 : -info->total_size;
36581 if (FRAME_GROWS_DOWNWARD)
36582 offset += info->fixed_size + info->vars_size + info->parm_size;
36584 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
36585 offset = FRAME_GROWS_DOWNWARD
36586 ? info->fixed_size + info->vars_size + info->parm_size
36587 : 0;
36588 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
36589 offset = info->total_size;
36590 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
36591 offset = info->push_p ? info->total_size : 0;
36592 else if (from == RS6000_PIC_OFFSET_TABLE_REGNUM)
36593 offset = 0;
36594 else
36595 gcc_unreachable ();
36597 return offset;
36600 static rtx
36601 rs6000_dwarf_register_span (rtx reg)
36603 rtx parts[8];
36604 int i, words;
36605 unsigned regno = REGNO (reg);
36606 machine_mode mode = GET_MODE (reg);
36608 if (TARGET_SPE
36609 && regno < 32
36610 && (SPE_VECTOR_MODE (GET_MODE (reg))
36611 || (TARGET_E500_DOUBLE && FLOAT_MODE_P (mode)
36612 && mode != SFmode && mode != SDmode && mode != SCmode)))
36614 else
36615 return NULL_RTX;
36617 regno = REGNO (reg);
36619 /* The duality of the SPE register size wreaks all kinds of havoc.
36620 This is a way of distinguishing r0 in 32-bits from r0 in
36621 64-bits. */
36622 words = (GET_MODE_SIZE (mode) + UNITS_PER_FP_WORD - 1) / UNITS_PER_FP_WORD;
36623 gcc_assert (words <= 4);
36624 for (i = 0; i < words; i++, regno++)
36626 if (BYTES_BIG_ENDIAN)
36628 parts[2 * i] = gen_rtx_REG (SImode, regno + FIRST_SPE_HIGH_REGNO);
36629 parts[2 * i + 1] = gen_rtx_REG (SImode, regno);
36631 else
36633 parts[2 * i] = gen_rtx_REG (SImode, regno);
36634 parts[2 * i + 1] = gen_rtx_REG (SImode, regno + FIRST_SPE_HIGH_REGNO);
36638 return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (words * 2, parts));
36641 /* Fill in sizes for SPE register high parts in table used by unwinder. */
36643 static void
36644 rs6000_init_dwarf_reg_sizes_extra (tree address)
36646 if (TARGET_SPE)
36648 int i;
36649 machine_mode mode = TYPE_MODE (char_type_node);
36650 rtx addr = expand_expr (address, NULL_RTX, VOIDmode, EXPAND_NORMAL);
36651 rtx mem = gen_rtx_MEM (BLKmode, addr);
36652 rtx value = gen_int_mode (4, mode);
36654 for (i = FIRST_SPE_HIGH_REGNO; i < LAST_SPE_HIGH_REGNO+1; i++)
36656 int column = DWARF_REG_TO_UNWIND_COLUMN
36657 (DWARF2_FRAME_REG_OUT (DWARF_FRAME_REGNUM (i), true));
36658 HOST_WIDE_INT offset = column * GET_MODE_SIZE (mode);
36660 emit_move_insn (adjust_address (mem, mode, offset), value);
36664 if (TARGET_MACHO && ! TARGET_ALTIVEC)
36666 int i;
36667 machine_mode mode = TYPE_MODE (char_type_node);
36668 rtx addr = expand_expr (address, NULL_RTX, VOIDmode, EXPAND_NORMAL);
36669 rtx mem = gen_rtx_MEM (BLKmode, addr);
36670 rtx value = gen_int_mode (16, mode);
36672 /* On Darwin, libgcc may be built to run on both G3 and G4/5.
36673 The unwinder still needs to know the size of Altivec registers. */
36675 for (i = FIRST_ALTIVEC_REGNO; i < LAST_ALTIVEC_REGNO+1; i++)
36677 int column = DWARF_REG_TO_UNWIND_COLUMN
36678 (DWARF2_FRAME_REG_OUT (DWARF_FRAME_REGNUM (i), true));
36679 HOST_WIDE_INT offset = column * GET_MODE_SIZE (mode);
36681 emit_move_insn (adjust_address (mem, mode, offset), value);
36686 /* Map internal gcc register numbers to debug format register numbers.
36687 FORMAT specifies the type of debug register number to use:
36688 0 -- debug information, except for frame-related sections
36689 1 -- DWARF .debug_frame section
36690 2 -- DWARF .eh_frame section */
36692 unsigned int
36693 rs6000_dbx_register_number (unsigned int regno, unsigned int format)
36695 /* We never use the GCC internal number for SPE high registers.
36696 Those are mapped to the 1200..1231 range for all debug formats. */
36697 if (SPE_HIGH_REGNO_P (regno))
36698 return regno - FIRST_SPE_HIGH_REGNO + 1200;
36700 /* Except for the above, we use the internal number for non-DWARF
36701 debug information, and also for .eh_frame. */
36702 if ((format == 0 && write_symbols != DWARF2_DEBUG) || format == 2)
36703 return regno;
36705 /* On some platforms, we use the standard DWARF register
36706 numbering for .debug_info and .debug_frame. */
36707 #ifdef RS6000_USE_DWARF_NUMBERING
36708 if (regno <= 63)
36709 return regno;
36710 if (regno == LR_REGNO)
36711 return 108;
36712 if (regno == CTR_REGNO)
36713 return 109;
36714 /* Special handling for CR for .debug_frame: rs6000_emit_prologue has
36715 translated any combination of CR2, CR3, CR4 saves to a save of CR2.
36716 The actual code emitted saves the whole of CR, so we map CR2_REGNO
36717 to the DWARF reg for CR. */
36718 if (format == 1 && regno == CR2_REGNO)
36719 return 64;
36720 if (CR_REGNO_P (regno))
36721 return regno - CR0_REGNO + 86;
36722 if (regno == CA_REGNO)
36723 return 101; /* XER */
36724 if (ALTIVEC_REGNO_P (regno))
36725 return regno - FIRST_ALTIVEC_REGNO + 1124;
36726 if (regno == VRSAVE_REGNO)
36727 return 356;
36728 if (regno == VSCR_REGNO)
36729 return 67;
36730 if (regno == SPE_ACC_REGNO)
36731 return 99;
36732 if (regno == SPEFSCR_REGNO)
36733 return 612;
36734 #endif
36735 return regno;
36738 /* target hook eh_return_filter_mode */
36739 static machine_mode
36740 rs6000_eh_return_filter_mode (void)
36742 return TARGET_32BIT ? SImode : word_mode;
36745 /* Target hook for scalar_mode_supported_p. */
36746 static bool
36747 rs6000_scalar_mode_supported_p (machine_mode mode)
36749 /* -m32 does not support TImode. This is the default, from
36750 default_scalar_mode_supported_p. For -m32 -mpowerpc64 we want the
36751 same ABI as for -m32. But default_scalar_mode_supported_p allows
36752 integer modes of precision 2 * BITS_PER_WORD, which matches TImode
36753 for -mpowerpc64. */
36754 if (TARGET_32BIT && mode == TImode)
36755 return false;
36757 if (DECIMAL_FLOAT_MODE_P (mode))
36758 return default_decimal_float_supported_p ();
36759 else if (TARGET_FLOAT128 && (mode == KFmode || mode == IFmode))
36760 return true;
36761 else
36762 return default_scalar_mode_supported_p (mode);
36765 /* Target hook for vector_mode_supported_p. */
36766 static bool
36767 rs6000_vector_mode_supported_p (machine_mode mode)
36770 if (TARGET_PAIRED_FLOAT && PAIRED_VECTOR_MODE (mode))
36771 return true;
36773 if (TARGET_SPE && SPE_VECTOR_MODE (mode))
36774 return true;
36776 /* There is no vector form for IEEE 128-bit. If we return true for IEEE
36777 128-bit, the compiler might try to widen IEEE 128-bit to IBM
36778 double-double. */
36779 else if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode) && !FLOAT128_IEEE_P (mode))
36780 return true;
36782 else
36783 return false;
36786 /* Target hook for floatn_mode. */
36787 static machine_mode
36788 rs6000_floatn_mode (int n, bool extended)
36790 if (extended)
36792 switch (n)
36794 case 32:
36795 return DFmode;
36797 case 64:
36798 if (TARGET_FLOAT128)
36799 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
36800 else
36801 return VOIDmode;
36803 case 128:
36804 return VOIDmode;
36806 default:
36807 /* Those are the only valid _FloatNx types. */
36808 gcc_unreachable ();
36811 else
36813 switch (n)
36815 case 32:
36816 return SFmode;
36818 case 64:
36819 return DFmode;
36821 case 128:
36822 if (TARGET_FLOAT128)
36823 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
36824 else
36825 return VOIDmode;
36827 default:
36828 return VOIDmode;
36834 /* Target hook for c_mode_for_suffix. */
36835 static machine_mode
36836 rs6000_c_mode_for_suffix (char suffix)
36838 if (TARGET_FLOAT128)
36840 if (suffix == 'q' || suffix == 'Q')
36841 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
36843 /* At the moment, we are not defining a suffix for IBM extended double.
36844 If/when the default for -mabi=ieeelongdouble is changed, and we want
36845 to support __ibm128 constants in legacy library code, we may need to
36846 re-evalaute this decision. Currently, c-lex.c only supports 'w' and
36847 'q' as machine dependent suffixes. The x86_64 port uses 'w' for
36848 __float80 constants. */
36851 return VOIDmode;
36854 /* Target hook for invalid_arg_for_unprototyped_fn. */
36855 static const char *
36856 invalid_arg_for_unprototyped_fn (const_tree typelist, const_tree funcdecl, const_tree val)
36858 return (!rs6000_darwin64_abi
36859 && typelist == 0
36860 && TREE_CODE (TREE_TYPE (val)) == VECTOR_TYPE
36861 && (funcdecl == NULL_TREE
36862 || (TREE_CODE (funcdecl) == FUNCTION_DECL
36863 && DECL_BUILT_IN_CLASS (funcdecl) != BUILT_IN_MD)))
36864 ? N_("AltiVec argument passed to unprototyped function")
36865 : NULL;
36868 /* For TARGET_SECURE_PLT 32-bit PIC code we can save PIC register
36869 setup by using __stack_chk_fail_local hidden function instead of
36870 calling __stack_chk_fail directly. Otherwise it is better to call
36871 __stack_chk_fail directly. */
36873 static tree ATTRIBUTE_UNUSED
36874 rs6000_stack_protect_fail (void)
36876 return (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
36877 ? default_hidden_stack_protect_fail ()
36878 : default_external_stack_protect_fail ();
36881 void
36882 rs6000_final_prescan_insn (rtx_insn *insn, rtx *operand ATTRIBUTE_UNUSED,
36883 int num_operands ATTRIBUTE_UNUSED)
36885 if (rs6000_warn_cell_microcode)
36887 const char *temp;
36888 int insn_code_number = recog_memoized (insn);
36889 location_t location = INSN_LOCATION (insn);
36891 /* Punt on insns we cannot recognize. */
36892 if (insn_code_number < 0)
36893 return;
36895 temp = get_insn_template (insn_code_number, insn);
36897 if (get_attr_cell_micro (insn) == CELL_MICRO_ALWAYS)
36898 warning_at (location, OPT_mwarn_cell_microcode,
36899 "emitting microcode insn %s\t[%s] #%d",
36900 temp, insn_data[INSN_CODE (insn)].name, INSN_UID (insn));
36901 else if (get_attr_cell_micro (insn) == CELL_MICRO_CONDITIONAL)
36902 warning_at (location, OPT_mwarn_cell_microcode,
36903 "emitting conditional microcode insn %s\t[%s] #%d",
36904 temp, insn_data[INSN_CODE (insn)].name, INSN_UID (insn));
36908 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
36910 #if TARGET_ELF
36911 static unsigned HOST_WIDE_INT
36912 rs6000_asan_shadow_offset (void)
36914 return (unsigned HOST_WIDE_INT) 1 << (TARGET_64BIT ? 41 : 29);
36916 #endif
36918 /* Mask options that we want to support inside of attribute((target)) and
36919 #pragma GCC target operations. Note, we do not include things like
36920 64/32-bit, endianess, hard/soft floating point, etc. that would have
36921 different calling sequences. */
36923 struct rs6000_opt_mask {
36924 const char *name; /* option name */
36925 HOST_WIDE_INT mask; /* mask to set */
36926 bool invert; /* invert sense of mask */
36927 bool valid_target; /* option is a target option */
36930 static struct rs6000_opt_mask const rs6000_opt_masks[] =
36932 { "altivec", OPTION_MASK_ALTIVEC, false, true },
36933 { "cmpb", OPTION_MASK_CMPB, false, true },
36934 { "crypto", OPTION_MASK_CRYPTO, false, true },
36935 { "direct-move", OPTION_MASK_DIRECT_MOVE, false, true },
36936 { "dlmzb", OPTION_MASK_DLMZB, false, true },
36937 { "efficient-unaligned-vsx", OPTION_MASK_EFFICIENT_UNALIGNED_VSX,
36938 false, true },
36939 { "float128", OPTION_MASK_FLOAT128, false, false },
36940 { "float128-hardware", OPTION_MASK_FLOAT128_HW, false, false },
36941 { "fprnd", OPTION_MASK_FPRND, false, true },
36942 { "hard-dfp", OPTION_MASK_DFP, false, true },
36943 { "htm", OPTION_MASK_HTM, false, true },
36944 { "isel", OPTION_MASK_ISEL, false, true },
36945 { "mfcrf", OPTION_MASK_MFCRF, false, true },
36946 { "mfpgpr", OPTION_MASK_MFPGPR, false, true },
36947 { "modulo", OPTION_MASK_MODULO, false, true },
36948 { "mulhw", OPTION_MASK_MULHW, false, true },
36949 { "multiple", OPTION_MASK_MULTIPLE, false, true },
36950 { "popcntb", OPTION_MASK_POPCNTB, false, true },
36951 { "popcntd", OPTION_MASK_POPCNTD, false, true },
36952 { "power8-fusion", OPTION_MASK_P8_FUSION, false, true },
36953 { "power8-fusion-sign", OPTION_MASK_P8_FUSION_SIGN, false, true },
36954 { "power8-vector", OPTION_MASK_P8_VECTOR, false, true },
36955 { "power9-dform-scalar", OPTION_MASK_P9_DFORM_SCALAR, false, true },
36956 { "power9-dform-vector", OPTION_MASK_P9_DFORM_VECTOR, false, true },
36957 { "power9-fusion", OPTION_MASK_P9_FUSION, false, true },
36958 { "power9-minmax", OPTION_MASK_P9_MINMAX, false, true },
36959 { "power9-misc", OPTION_MASK_P9_MISC, false, true },
36960 { "power9-vector", OPTION_MASK_P9_VECTOR, false, true },
36961 { "powerpc-gfxopt", OPTION_MASK_PPC_GFXOPT, false, true },
36962 { "powerpc-gpopt", OPTION_MASK_PPC_GPOPT, false, true },
36963 { "quad-memory", OPTION_MASK_QUAD_MEMORY, false, true },
36964 { "quad-memory-atomic", OPTION_MASK_QUAD_MEMORY_ATOMIC, false, true },
36965 { "recip-precision", OPTION_MASK_RECIP_PRECISION, false, true },
36966 { "save-toc-indirect", OPTION_MASK_SAVE_TOC_INDIRECT, false, true },
36967 { "string", OPTION_MASK_STRING, false, true },
36968 { "toc-fusion", OPTION_MASK_TOC_FUSION, false, true },
36969 { "update", OPTION_MASK_NO_UPDATE, true , true },
36970 { "upper-regs-di", OPTION_MASK_UPPER_REGS_DI, false, true },
36971 { "upper-regs-df", OPTION_MASK_UPPER_REGS_DF, false, true },
36972 { "upper-regs-sf", OPTION_MASK_UPPER_REGS_SF, false, true },
36973 { "vsx", OPTION_MASK_VSX, false, true },
36974 { "vsx-timode", OPTION_MASK_VSX_TIMODE, false, true },
36975 #ifdef OPTION_MASK_64BIT
36976 #if TARGET_AIX_OS
36977 { "aix64", OPTION_MASK_64BIT, false, false },
36978 { "aix32", OPTION_MASK_64BIT, true, false },
36979 #else
36980 { "64", OPTION_MASK_64BIT, false, false },
36981 { "32", OPTION_MASK_64BIT, true, false },
36982 #endif
36983 #endif
36984 #ifdef OPTION_MASK_EABI
36985 { "eabi", OPTION_MASK_EABI, false, false },
36986 #endif
36987 #ifdef OPTION_MASK_LITTLE_ENDIAN
36988 { "little", OPTION_MASK_LITTLE_ENDIAN, false, false },
36989 { "big", OPTION_MASK_LITTLE_ENDIAN, true, false },
36990 #endif
36991 #ifdef OPTION_MASK_RELOCATABLE
36992 { "relocatable", OPTION_MASK_RELOCATABLE, false, false },
36993 #endif
36994 #ifdef OPTION_MASK_STRICT_ALIGN
36995 { "strict-align", OPTION_MASK_STRICT_ALIGN, false, false },
36996 #endif
36997 { "soft-float", OPTION_MASK_SOFT_FLOAT, false, false },
36998 { "string", OPTION_MASK_STRING, false, false },
37001 /* Builtin mask mapping for printing the flags. */
37002 static struct rs6000_opt_mask const rs6000_builtin_mask_names[] =
37004 { "altivec", RS6000_BTM_ALTIVEC, false, false },
37005 { "vsx", RS6000_BTM_VSX, false, false },
37006 { "spe", RS6000_BTM_SPE, false, false },
37007 { "paired", RS6000_BTM_PAIRED, false, false },
37008 { "fre", RS6000_BTM_FRE, false, false },
37009 { "fres", RS6000_BTM_FRES, false, false },
37010 { "frsqrte", RS6000_BTM_FRSQRTE, false, false },
37011 { "frsqrtes", RS6000_BTM_FRSQRTES, false, false },
37012 { "popcntd", RS6000_BTM_POPCNTD, false, false },
37013 { "cell", RS6000_BTM_CELL, false, false },
37014 { "power8-vector", RS6000_BTM_P8_VECTOR, false, false },
37015 { "power9-vector", RS6000_BTM_P9_VECTOR, false, false },
37016 { "power9-misc", RS6000_BTM_P9_MISC, false, false },
37017 { "crypto", RS6000_BTM_CRYPTO, false, false },
37018 { "htm", RS6000_BTM_HTM, false, false },
37019 { "hard-dfp", RS6000_BTM_DFP, false, false },
37020 { "hard-float", RS6000_BTM_HARD_FLOAT, false, false },
37021 { "long-double-128", RS6000_BTM_LDBL128, false, false },
37022 { "float128", RS6000_BTM_FLOAT128, false, false },
37025 /* Option variables that we want to support inside attribute((target)) and
37026 #pragma GCC target operations. */
37028 struct rs6000_opt_var {
37029 const char *name; /* option name */
37030 size_t global_offset; /* offset of the option in global_options. */
37031 size_t target_offset; /* offset of the option in target options. */
37034 static struct rs6000_opt_var const rs6000_opt_vars[] =
37036 { "friz",
37037 offsetof (struct gcc_options, x_TARGET_FRIZ),
37038 offsetof (struct cl_target_option, x_TARGET_FRIZ), },
37039 { "avoid-indexed-addresses",
37040 offsetof (struct gcc_options, x_TARGET_AVOID_XFORM),
37041 offsetof (struct cl_target_option, x_TARGET_AVOID_XFORM) },
37042 { "paired",
37043 offsetof (struct gcc_options, x_rs6000_paired_float),
37044 offsetof (struct cl_target_option, x_rs6000_paired_float), },
37045 { "longcall",
37046 offsetof (struct gcc_options, x_rs6000_default_long_calls),
37047 offsetof (struct cl_target_option, x_rs6000_default_long_calls), },
37048 { "optimize-swaps",
37049 offsetof (struct gcc_options, x_rs6000_optimize_swaps),
37050 offsetof (struct cl_target_option, x_rs6000_optimize_swaps), },
37051 { "allow-movmisalign",
37052 offsetof (struct gcc_options, x_TARGET_ALLOW_MOVMISALIGN),
37053 offsetof (struct cl_target_option, x_TARGET_ALLOW_MOVMISALIGN), },
37054 { "allow-df-permute",
37055 offsetof (struct gcc_options, x_TARGET_ALLOW_DF_PERMUTE),
37056 offsetof (struct cl_target_option, x_TARGET_ALLOW_DF_PERMUTE), },
37057 { "sched-groups",
37058 offsetof (struct gcc_options, x_TARGET_SCHED_GROUPS),
37059 offsetof (struct cl_target_option, x_TARGET_SCHED_GROUPS), },
37060 { "always-hint",
37061 offsetof (struct gcc_options, x_TARGET_ALWAYS_HINT),
37062 offsetof (struct cl_target_option, x_TARGET_ALWAYS_HINT), },
37063 { "align-branch-targets",
37064 offsetof (struct gcc_options, x_TARGET_ALIGN_BRANCH_TARGETS),
37065 offsetof (struct cl_target_option, x_TARGET_ALIGN_BRANCH_TARGETS), },
37066 { "vectorize-builtins",
37067 offsetof (struct gcc_options, x_TARGET_VECTORIZE_BUILTINS),
37068 offsetof (struct cl_target_option, x_TARGET_VECTORIZE_BUILTINS), },
37069 { "tls-markers",
37070 offsetof (struct gcc_options, x_tls_markers),
37071 offsetof (struct cl_target_option, x_tls_markers), },
37072 { "sched-prolog",
37073 offsetof (struct gcc_options, x_TARGET_SCHED_PROLOG),
37074 offsetof (struct cl_target_option, x_TARGET_SCHED_PROLOG), },
37075 { "sched-epilog",
37076 offsetof (struct gcc_options, x_TARGET_SCHED_PROLOG),
37077 offsetof (struct cl_target_option, x_TARGET_SCHED_PROLOG), },
37078 { "gen-cell-microcode",
37079 offsetof (struct gcc_options, x_rs6000_gen_cell_microcode),
37080 offsetof (struct cl_target_option, x_rs6000_gen_cell_microcode), },
37081 { "warn-cell-microcode",
37082 offsetof (struct gcc_options, x_rs6000_warn_cell_microcode),
37083 offsetof (struct cl_target_option, x_rs6000_warn_cell_microcode), },
37086 /* Inner function to handle attribute((target("..."))) and #pragma GCC target
37087 parsing. Return true if there were no errors. */
37089 static bool
37090 rs6000_inner_target_options (tree args, bool attr_p)
37092 bool ret = true;
37094 if (args == NULL_TREE)
37097 else if (TREE_CODE (args) == STRING_CST)
37099 char *p = ASTRDUP (TREE_STRING_POINTER (args));
37100 char *q;
37102 while ((q = strtok (p, ",")) != NULL)
37104 bool error_p = false;
37105 bool not_valid_p = false;
37106 const char *cpu_opt = NULL;
37108 p = NULL;
37109 if (strncmp (q, "cpu=", 4) == 0)
37111 int cpu_index = rs6000_cpu_name_lookup (q+4);
37112 if (cpu_index >= 0)
37113 rs6000_cpu_index = cpu_index;
37114 else
37116 error_p = true;
37117 cpu_opt = q+4;
37120 else if (strncmp (q, "tune=", 5) == 0)
37122 int tune_index = rs6000_cpu_name_lookup (q+5);
37123 if (tune_index >= 0)
37124 rs6000_tune_index = tune_index;
37125 else
37127 error_p = true;
37128 cpu_opt = q+5;
37131 else
37133 size_t i;
37134 bool invert = false;
37135 char *r = q;
37137 error_p = true;
37138 if (strncmp (r, "no-", 3) == 0)
37140 invert = true;
37141 r += 3;
37144 for (i = 0; i < ARRAY_SIZE (rs6000_opt_masks); i++)
37145 if (strcmp (r, rs6000_opt_masks[i].name) == 0)
37147 HOST_WIDE_INT mask = rs6000_opt_masks[i].mask;
37149 if (!rs6000_opt_masks[i].valid_target)
37150 not_valid_p = true;
37151 else
37153 error_p = false;
37154 rs6000_isa_flags_explicit |= mask;
37156 /* VSX needs altivec, so -mvsx automagically sets
37157 altivec and disables -mavoid-indexed-addresses. */
37158 if (!invert)
37160 if (mask == OPTION_MASK_VSX)
37162 mask |= OPTION_MASK_ALTIVEC;
37163 TARGET_AVOID_XFORM = 0;
37167 if (rs6000_opt_masks[i].invert)
37168 invert = !invert;
37170 if (invert)
37171 rs6000_isa_flags &= ~mask;
37172 else
37173 rs6000_isa_flags |= mask;
37175 break;
37178 if (error_p && !not_valid_p)
37180 for (i = 0; i < ARRAY_SIZE (rs6000_opt_vars); i++)
37181 if (strcmp (r, rs6000_opt_vars[i].name) == 0)
37183 size_t j = rs6000_opt_vars[i].global_offset;
37184 *((int *) ((char *)&global_options + j)) = !invert;
37185 error_p = false;
37186 not_valid_p = false;
37187 break;
37192 if (error_p)
37194 const char *eprefix, *esuffix;
37196 ret = false;
37197 if (attr_p)
37199 eprefix = "__attribute__((__target__(";
37200 esuffix = ")))";
37202 else
37204 eprefix = "#pragma GCC target ";
37205 esuffix = "";
37208 if (cpu_opt)
37209 error ("invalid cpu \"%s\" for %s\"%s\"%s", cpu_opt, eprefix,
37210 q, esuffix);
37211 else if (not_valid_p)
37212 error ("%s\"%s\"%s is not allowed", eprefix, q, esuffix);
37213 else
37214 error ("%s\"%s\"%s is invalid", eprefix, q, esuffix);
37219 else if (TREE_CODE (args) == TREE_LIST)
37223 tree value = TREE_VALUE (args);
37224 if (value)
37226 bool ret2 = rs6000_inner_target_options (value, attr_p);
37227 if (!ret2)
37228 ret = false;
37230 args = TREE_CHAIN (args);
37232 while (args != NULL_TREE);
37235 else
37236 gcc_unreachable ();
37238 return ret;
37241 /* Print out the target options as a list for -mdebug=target. */
37243 static void
37244 rs6000_debug_target_options (tree args, const char *prefix)
37246 if (args == NULL_TREE)
37247 fprintf (stderr, "%s<NULL>", prefix);
37249 else if (TREE_CODE (args) == STRING_CST)
37251 char *p = ASTRDUP (TREE_STRING_POINTER (args));
37252 char *q;
37254 while ((q = strtok (p, ",")) != NULL)
37256 p = NULL;
37257 fprintf (stderr, "%s\"%s\"", prefix, q);
37258 prefix = ", ";
37262 else if (TREE_CODE (args) == TREE_LIST)
37266 tree value = TREE_VALUE (args);
37267 if (value)
37269 rs6000_debug_target_options (value, prefix);
37270 prefix = ", ";
37272 args = TREE_CHAIN (args);
37274 while (args != NULL_TREE);
37277 else
37278 gcc_unreachable ();
37280 return;
37284 /* Hook to validate attribute((target("..."))). */
37286 static bool
37287 rs6000_valid_attribute_p (tree fndecl,
37288 tree ARG_UNUSED (name),
37289 tree args,
37290 int flags)
37292 struct cl_target_option cur_target;
37293 bool ret;
37294 tree old_optimize = build_optimization_node (&global_options);
37295 tree new_target, new_optimize;
37296 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
37298 gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
37300 if (TARGET_DEBUG_TARGET)
37302 tree tname = DECL_NAME (fndecl);
37303 fprintf (stderr, "\n==================== rs6000_valid_attribute_p:\n");
37304 if (tname)
37305 fprintf (stderr, "function: %.*s\n",
37306 (int) IDENTIFIER_LENGTH (tname),
37307 IDENTIFIER_POINTER (tname));
37308 else
37309 fprintf (stderr, "function: unknown\n");
37311 fprintf (stderr, "args:");
37312 rs6000_debug_target_options (args, " ");
37313 fprintf (stderr, "\n");
37315 if (flags)
37316 fprintf (stderr, "flags: 0x%x\n", flags);
37318 fprintf (stderr, "--------------------\n");
37321 old_optimize = build_optimization_node (&global_options);
37322 func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
37324 /* If the function changed the optimization levels as well as setting target
37325 options, start with the optimizations specified. */
37326 if (func_optimize && func_optimize != old_optimize)
37327 cl_optimization_restore (&global_options,
37328 TREE_OPTIMIZATION (func_optimize));
37330 /* The target attributes may also change some optimization flags, so update
37331 the optimization options if necessary. */
37332 cl_target_option_save (&cur_target, &global_options);
37333 rs6000_cpu_index = rs6000_tune_index = -1;
37334 ret = rs6000_inner_target_options (args, true);
37336 /* Set up any additional state. */
37337 if (ret)
37339 ret = rs6000_option_override_internal (false);
37340 new_target = build_target_option_node (&global_options);
37342 else
37343 new_target = NULL;
37345 new_optimize = build_optimization_node (&global_options);
37347 if (!new_target)
37348 ret = false;
37350 else if (fndecl)
37352 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
37354 if (old_optimize != new_optimize)
37355 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
37358 cl_target_option_restore (&global_options, &cur_target);
37360 if (old_optimize != new_optimize)
37361 cl_optimization_restore (&global_options,
37362 TREE_OPTIMIZATION (old_optimize));
37364 return ret;
37368 /* Hook to validate the current #pragma GCC target and set the state, and
37369 update the macros based on what was changed. If ARGS is NULL, then
37370 POP_TARGET is used to reset the options. */
37372 bool
37373 rs6000_pragma_target_parse (tree args, tree pop_target)
37375 tree prev_tree = build_target_option_node (&global_options);
37376 tree cur_tree;
37377 struct cl_target_option *prev_opt, *cur_opt;
37378 HOST_WIDE_INT prev_flags, cur_flags, diff_flags;
37379 HOST_WIDE_INT prev_bumask, cur_bumask, diff_bumask;
37381 if (TARGET_DEBUG_TARGET)
37383 fprintf (stderr, "\n==================== rs6000_pragma_target_parse\n");
37384 fprintf (stderr, "args:");
37385 rs6000_debug_target_options (args, " ");
37386 fprintf (stderr, "\n");
37388 if (pop_target)
37390 fprintf (stderr, "pop_target:\n");
37391 debug_tree (pop_target);
37393 else
37394 fprintf (stderr, "pop_target: <NULL>\n");
37396 fprintf (stderr, "--------------------\n");
37399 if (! args)
37401 cur_tree = ((pop_target)
37402 ? pop_target
37403 : target_option_default_node);
37404 cl_target_option_restore (&global_options,
37405 TREE_TARGET_OPTION (cur_tree));
37407 else
37409 rs6000_cpu_index = rs6000_tune_index = -1;
37410 if (!rs6000_inner_target_options (args, false)
37411 || !rs6000_option_override_internal (false)
37412 || (cur_tree = build_target_option_node (&global_options))
37413 == NULL_TREE)
37415 if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
37416 fprintf (stderr, "invalid pragma\n");
37418 return false;
37422 target_option_current_node = cur_tree;
37424 /* If we have the preprocessor linked in (i.e. C or C++ languages), possibly
37425 change the macros that are defined. */
37426 if (rs6000_target_modify_macros_ptr)
37428 prev_opt = TREE_TARGET_OPTION (prev_tree);
37429 prev_bumask = prev_opt->x_rs6000_builtin_mask;
37430 prev_flags = prev_opt->x_rs6000_isa_flags;
37432 cur_opt = TREE_TARGET_OPTION (cur_tree);
37433 cur_flags = cur_opt->x_rs6000_isa_flags;
37434 cur_bumask = cur_opt->x_rs6000_builtin_mask;
37436 diff_bumask = (prev_bumask ^ cur_bumask);
37437 diff_flags = (prev_flags ^ cur_flags);
37439 if ((diff_flags != 0) || (diff_bumask != 0))
37441 /* Delete old macros. */
37442 rs6000_target_modify_macros_ptr (false,
37443 prev_flags & diff_flags,
37444 prev_bumask & diff_bumask);
37446 /* Define new macros. */
37447 rs6000_target_modify_macros_ptr (true,
37448 cur_flags & diff_flags,
37449 cur_bumask & diff_bumask);
37453 return true;
37457 /* Remember the last target of rs6000_set_current_function. */
37458 static GTY(()) tree rs6000_previous_fndecl;
37460 /* Establish appropriate back-end context for processing the function
37461 FNDECL. The argument might be NULL to indicate processing at top
37462 level, outside of any function scope. */
37463 static void
37464 rs6000_set_current_function (tree fndecl)
37466 tree old_tree = (rs6000_previous_fndecl
37467 ? DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl)
37468 : NULL_TREE);
37470 tree new_tree = (fndecl
37471 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
37472 : NULL_TREE);
37474 if (TARGET_DEBUG_TARGET)
37476 bool print_final = false;
37477 fprintf (stderr, "\n==================== rs6000_set_current_function");
37479 if (fndecl)
37480 fprintf (stderr, ", fndecl %s (%p)",
37481 (DECL_NAME (fndecl)
37482 ? IDENTIFIER_POINTER (DECL_NAME (fndecl))
37483 : "<unknown>"), (void *)fndecl);
37485 if (rs6000_previous_fndecl)
37486 fprintf (stderr, ", prev_fndecl (%p)", (void *)rs6000_previous_fndecl);
37488 fprintf (stderr, "\n");
37489 if (new_tree)
37491 fprintf (stderr, "\nnew fndecl target specific options:\n");
37492 debug_tree (new_tree);
37493 print_final = true;
37496 if (old_tree)
37498 fprintf (stderr, "\nold fndecl target specific options:\n");
37499 debug_tree (old_tree);
37500 print_final = true;
37503 if (print_final)
37504 fprintf (stderr, "--------------------\n");
37507 /* Only change the context if the function changes. This hook is called
37508 several times in the course of compiling a function, and we don't want to
37509 slow things down too much or call target_reinit when it isn't safe. */
37510 if (fndecl && fndecl != rs6000_previous_fndecl)
37512 rs6000_previous_fndecl = fndecl;
37513 if (old_tree == new_tree)
37516 else if (new_tree && new_tree != target_option_default_node)
37518 cl_target_option_restore (&global_options,
37519 TREE_TARGET_OPTION (new_tree));
37520 if (TREE_TARGET_GLOBALS (new_tree))
37521 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
37522 else
37523 TREE_TARGET_GLOBALS (new_tree)
37524 = save_target_globals_default_opts ();
37527 else if (old_tree && old_tree != target_option_default_node)
37529 new_tree = target_option_current_node;
37530 cl_target_option_restore (&global_options,
37531 TREE_TARGET_OPTION (new_tree));
37532 if (TREE_TARGET_GLOBALS (new_tree))
37533 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
37534 else if (new_tree == target_option_default_node)
37535 restore_target_globals (&default_target_globals);
37536 else
37537 TREE_TARGET_GLOBALS (new_tree)
37538 = save_target_globals_default_opts ();
37544 /* Save the current options */
37546 static void
37547 rs6000_function_specific_save (struct cl_target_option *ptr,
37548 struct gcc_options *opts)
37550 ptr->x_rs6000_isa_flags = opts->x_rs6000_isa_flags;
37551 ptr->x_rs6000_isa_flags_explicit = opts->x_rs6000_isa_flags_explicit;
37554 /* Restore the current options */
37556 static void
37557 rs6000_function_specific_restore (struct gcc_options *opts,
37558 struct cl_target_option *ptr)
37561 opts->x_rs6000_isa_flags = ptr->x_rs6000_isa_flags;
37562 opts->x_rs6000_isa_flags_explicit = ptr->x_rs6000_isa_flags_explicit;
37563 (void) rs6000_option_override_internal (false);
37566 /* Print the current options */
37568 static void
37569 rs6000_function_specific_print (FILE *file, int indent,
37570 struct cl_target_option *ptr)
37572 rs6000_print_isa_options (file, indent, "Isa options set",
37573 ptr->x_rs6000_isa_flags);
37575 rs6000_print_isa_options (file, indent, "Isa options explicit",
37576 ptr->x_rs6000_isa_flags_explicit);
37579 /* Helper function to print the current isa or misc options on a line. */
37581 static void
37582 rs6000_print_options_internal (FILE *file,
37583 int indent,
37584 const char *string,
37585 HOST_WIDE_INT flags,
37586 const char *prefix,
37587 const struct rs6000_opt_mask *opts,
37588 size_t num_elements)
37590 size_t i;
37591 size_t start_column = 0;
37592 size_t cur_column;
37593 size_t max_column = 120;
37594 size_t prefix_len = strlen (prefix);
37595 size_t comma_len = 0;
37596 const char *comma = "";
37598 if (indent)
37599 start_column += fprintf (file, "%*s", indent, "");
37601 if (!flags)
37603 fprintf (stderr, DEBUG_FMT_S, string, "<none>");
37604 return;
37607 start_column += fprintf (stderr, DEBUG_FMT_WX, string, flags);
37609 /* Print the various mask options. */
37610 cur_column = start_column;
37611 for (i = 0; i < num_elements; i++)
37613 bool invert = opts[i].invert;
37614 const char *name = opts[i].name;
37615 const char *no_str = "";
37616 HOST_WIDE_INT mask = opts[i].mask;
37617 size_t len = comma_len + prefix_len + strlen (name);
37619 if (!invert)
37621 if ((flags & mask) == 0)
37623 no_str = "no-";
37624 len += sizeof ("no-") - 1;
37627 flags &= ~mask;
37630 else
37632 if ((flags & mask) != 0)
37634 no_str = "no-";
37635 len += sizeof ("no-") - 1;
37638 flags |= mask;
37641 cur_column += len;
37642 if (cur_column > max_column)
37644 fprintf (stderr, ", \\\n%*s", (int)start_column, "");
37645 cur_column = start_column + len;
37646 comma = "";
37649 fprintf (file, "%s%s%s%s", comma, prefix, no_str, name);
37650 comma = ", ";
37651 comma_len = sizeof (", ") - 1;
37654 fputs ("\n", file);
37657 /* Helper function to print the current isa options on a line. */
37659 static void
37660 rs6000_print_isa_options (FILE *file, int indent, const char *string,
37661 HOST_WIDE_INT flags)
37663 rs6000_print_options_internal (file, indent, string, flags, "-m",
37664 &rs6000_opt_masks[0],
37665 ARRAY_SIZE (rs6000_opt_masks));
37668 static void
37669 rs6000_print_builtin_options (FILE *file, int indent, const char *string,
37670 HOST_WIDE_INT flags)
37672 rs6000_print_options_internal (file, indent, string, flags, "",
37673 &rs6000_builtin_mask_names[0],
37674 ARRAY_SIZE (rs6000_builtin_mask_names));
37678 /* Hook to determine if one function can safely inline another. */
37680 static bool
37681 rs6000_can_inline_p (tree caller, tree callee)
37683 bool ret = false;
37684 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
37685 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
37687 /* If callee has no option attributes, then it is ok to inline. */
37688 if (!callee_tree)
37689 ret = true;
37691 /* If caller has no option attributes, but callee does then it is not ok to
37692 inline. */
37693 else if (!caller_tree)
37694 ret = false;
37696 else
37698 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
37699 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
37701 /* Callee's options should a subset of the caller's, i.e. a vsx function
37702 can inline an altivec function but a non-vsx function can't inline a
37703 vsx function. */
37704 if ((caller_opts->x_rs6000_isa_flags & callee_opts->x_rs6000_isa_flags)
37705 == callee_opts->x_rs6000_isa_flags)
37706 ret = true;
37709 if (TARGET_DEBUG_TARGET)
37710 fprintf (stderr, "rs6000_can_inline_p:, caller %s, callee %s, %s inline\n",
37711 (DECL_NAME (caller)
37712 ? IDENTIFIER_POINTER (DECL_NAME (caller))
37713 : "<unknown>"),
37714 (DECL_NAME (callee)
37715 ? IDENTIFIER_POINTER (DECL_NAME (callee))
37716 : "<unknown>"),
37717 (ret ? "can" : "cannot"));
37719 return ret;
37722 /* Allocate a stack temp and fixup the address so it meets the particular
37723 memory requirements (either offetable or REG+REG addressing). */
37726 rs6000_allocate_stack_temp (machine_mode mode,
37727 bool offsettable_p,
37728 bool reg_reg_p)
37730 rtx stack = assign_stack_temp (mode, GET_MODE_SIZE (mode));
37731 rtx addr = XEXP (stack, 0);
37732 int strict_p = (reload_in_progress || reload_completed);
37734 if (!legitimate_indirect_address_p (addr, strict_p))
37736 if (offsettable_p
37737 && !rs6000_legitimate_offset_address_p (mode, addr, strict_p, true))
37738 stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
37740 else if (reg_reg_p && !legitimate_indexed_address_p (addr, strict_p))
37741 stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
37744 return stack;
37747 /* Given a memory reference, if it is not a reg or reg+reg addressing, convert
37748 to such a form to deal with memory reference instructions like STFIWX that
37749 only take reg+reg addressing. */
37752 rs6000_address_for_fpconvert (rtx x)
37754 int strict_p = (reload_in_progress || reload_completed);
37755 rtx addr;
37757 gcc_assert (MEM_P (x));
37758 addr = XEXP (x, 0);
37759 if (! legitimate_indirect_address_p (addr, strict_p)
37760 && ! legitimate_indexed_address_p (addr, strict_p))
37762 if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
37764 rtx reg = XEXP (addr, 0);
37765 HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (x));
37766 rtx size_rtx = GEN_INT ((GET_CODE (addr) == PRE_DEC) ? -size : size);
37767 gcc_assert (REG_P (reg));
37768 emit_insn (gen_add3_insn (reg, reg, size_rtx));
37769 addr = reg;
37771 else if (GET_CODE (addr) == PRE_MODIFY)
37773 rtx reg = XEXP (addr, 0);
37774 rtx expr = XEXP (addr, 1);
37775 gcc_assert (REG_P (reg));
37776 gcc_assert (GET_CODE (expr) == PLUS);
37777 emit_insn (gen_add3_insn (reg, XEXP (expr, 0), XEXP (expr, 1)));
37778 addr = reg;
37781 x = replace_equiv_address (x, copy_addr_to_reg (addr));
37784 return x;
37787 /* Given a memory reference, if it is not in the form for altivec memory
37788 reference instructions (i.e. reg or reg+reg addressing with AND of -16),
37789 convert to the altivec format. */
37792 rs6000_address_for_altivec (rtx x)
37794 gcc_assert (MEM_P (x));
37795 if (!altivec_indexed_or_indirect_operand (x, GET_MODE (x)))
37797 rtx addr = XEXP (x, 0);
37798 int strict_p = (reload_in_progress || reload_completed);
37800 if (!legitimate_indexed_address_p (addr, strict_p)
37801 && !legitimate_indirect_address_p (addr, strict_p))
37802 addr = copy_to_mode_reg (Pmode, addr);
37804 addr = gen_rtx_AND (Pmode, addr, GEN_INT (-16));
37805 x = change_address (x, GET_MODE (x), addr);
37808 return x;
37811 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
37813 On the RS/6000, all integer constants are acceptable, most won't be valid
37814 for particular insns, though. Only easy FP constants are acceptable. */
37816 static bool
37817 rs6000_legitimate_constant_p (machine_mode mode, rtx x)
37819 if (TARGET_ELF && tls_referenced_p (x))
37820 return false;
37822 return ((GET_CODE (x) != CONST_DOUBLE && GET_CODE (x) != CONST_VECTOR)
37823 || GET_MODE (x) == VOIDmode
37824 || (TARGET_POWERPC64 && mode == DImode)
37825 || easy_fp_constant (x, mode)
37826 || easy_vector_constant (x, mode));
37830 /* Return TRUE iff the sequence ending in LAST sets the static chain. */
37832 static bool
37833 chain_already_loaded (rtx_insn *last)
37835 for (; last != NULL; last = PREV_INSN (last))
37837 if (NONJUMP_INSN_P (last))
37839 rtx patt = PATTERN (last);
37841 if (GET_CODE (patt) == SET)
37843 rtx lhs = XEXP (patt, 0);
37845 if (REG_P (lhs) && REGNO (lhs) == STATIC_CHAIN_REGNUM)
37846 return true;
37850 return false;
37853 /* Expand code to perform a call under the AIX or ELFv2 ABI. */
37855 void
37856 rs6000_call_aix (rtx value, rtx func_desc, rtx flag, rtx cookie)
37858 const bool direct_call_p
37859 = GET_CODE (func_desc) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (func_desc);
37860 rtx toc_reg = gen_rtx_REG (Pmode, TOC_REGNUM);
37861 rtx toc_load = NULL_RTX;
37862 rtx toc_restore = NULL_RTX;
37863 rtx func_addr;
37864 rtx abi_reg = NULL_RTX;
37865 rtx call[4];
37866 int n_call;
37867 rtx insn;
37869 /* Handle longcall attributes. */
37870 if (INTVAL (cookie) & CALL_LONG)
37871 func_desc = rs6000_longcall_ref (func_desc);
37873 /* Handle indirect calls. */
37874 if (GET_CODE (func_desc) != SYMBOL_REF
37875 || (DEFAULT_ABI == ABI_AIX && !SYMBOL_REF_FUNCTION_P (func_desc)))
37877 /* Save the TOC into its reserved slot before the call,
37878 and prepare to restore it after the call. */
37879 rtx stack_ptr = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
37880 rtx stack_toc_offset = GEN_INT (RS6000_TOC_SAVE_SLOT);
37881 rtx stack_toc_mem = gen_frame_mem (Pmode,
37882 gen_rtx_PLUS (Pmode, stack_ptr,
37883 stack_toc_offset));
37884 rtx stack_toc_unspec = gen_rtx_UNSPEC (Pmode,
37885 gen_rtvec (1, stack_toc_offset),
37886 UNSPEC_TOCSLOT);
37887 toc_restore = gen_rtx_SET (toc_reg, stack_toc_unspec);
37889 /* Can we optimize saving the TOC in the prologue or
37890 do we need to do it at every call? */
37891 if (TARGET_SAVE_TOC_INDIRECT && !cfun->calls_alloca)
37892 cfun->machine->save_toc_in_prologue = true;
37893 else
37895 MEM_VOLATILE_P (stack_toc_mem) = 1;
37896 emit_move_insn (stack_toc_mem, toc_reg);
37899 if (DEFAULT_ABI == ABI_ELFv2)
37901 /* A function pointer in the ELFv2 ABI is just a plain address, but
37902 the ABI requires it to be loaded into r12 before the call. */
37903 func_addr = gen_rtx_REG (Pmode, 12);
37904 emit_move_insn (func_addr, func_desc);
37905 abi_reg = func_addr;
37907 else
37909 /* A function pointer under AIX is a pointer to a data area whose
37910 first word contains the actual address of the function, whose
37911 second word contains a pointer to its TOC, and whose third word
37912 contains a value to place in the static chain register (r11).
37913 Note that if we load the static chain, our "trampoline" need
37914 not have any executable code. */
37916 /* Load up address of the actual function. */
37917 func_desc = force_reg (Pmode, func_desc);
37918 func_addr = gen_reg_rtx (Pmode);
37919 emit_move_insn (func_addr, gen_rtx_MEM (Pmode, func_desc));
37921 /* Prepare to load the TOC of the called function. Note that the
37922 TOC load must happen immediately before the actual call so
37923 that unwinding the TOC registers works correctly. See the
37924 comment in frob_update_context. */
37925 rtx func_toc_offset = GEN_INT (GET_MODE_SIZE (Pmode));
37926 rtx func_toc_mem = gen_rtx_MEM (Pmode,
37927 gen_rtx_PLUS (Pmode, func_desc,
37928 func_toc_offset));
37929 toc_load = gen_rtx_USE (VOIDmode, func_toc_mem);
37931 /* If we have a static chain, load it up. But, if the call was
37932 originally direct, the 3rd word has not been written since no
37933 trampoline has been built, so we ought not to load it, lest we
37934 override a static chain value. */
37935 if (!direct_call_p
37936 && TARGET_POINTERS_TO_NESTED_FUNCTIONS
37937 && !chain_already_loaded (get_current_sequence ()->next->last))
37939 rtx sc_reg = gen_rtx_REG (Pmode, STATIC_CHAIN_REGNUM);
37940 rtx func_sc_offset = GEN_INT (2 * GET_MODE_SIZE (Pmode));
37941 rtx func_sc_mem = gen_rtx_MEM (Pmode,
37942 gen_rtx_PLUS (Pmode, func_desc,
37943 func_sc_offset));
37944 emit_move_insn (sc_reg, func_sc_mem);
37945 abi_reg = sc_reg;
37949 else
37951 /* Direct calls use the TOC: for local calls, the callee will
37952 assume the TOC register is set; for non-local calls, the
37953 PLT stub needs the TOC register. */
37954 abi_reg = toc_reg;
37955 func_addr = func_desc;
37958 /* Create the call. */
37959 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), flag);
37960 if (value != NULL_RTX)
37961 call[0] = gen_rtx_SET (value, call[0]);
37962 n_call = 1;
37964 if (toc_load)
37965 call[n_call++] = toc_load;
37966 if (toc_restore)
37967 call[n_call++] = toc_restore;
37969 call[n_call++] = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, LR_REGNO));
37971 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (n_call, call));
37972 insn = emit_call_insn (insn);
37974 /* Mention all registers defined by the ABI to hold information
37975 as uses in CALL_INSN_FUNCTION_USAGE. */
37976 if (abi_reg)
37977 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
37980 /* Expand code to perform a sibling call under the AIX or ELFv2 ABI. */
37982 void
37983 rs6000_sibcall_aix (rtx value, rtx func_desc, rtx flag, rtx cookie)
37985 rtx call[2];
37986 rtx insn;
37988 gcc_assert (INTVAL (cookie) == 0);
37990 /* Create the call. */
37991 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_desc), flag);
37992 if (value != NULL_RTX)
37993 call[0] = gen_rtx_SET (value, call[0]);
37995 call[1] = simple_return_rtx;
37997 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (2, call));
37998 insn = emit_call_insn (insn);
38000 /* Note use of the TOC register. */
38001 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), gen_rtx_REG (Pmode, TOC_REGNUM));
38004 /* Return whether we need to always update the saved TOC pointer when we update
38005 the stack pointer. */
38007 static bool
38008 rs6000_save_toc_in_prologue_p (void)
38010 return (cfun && cfun->machine && cfun->machine->save_toc_in_prologue);
38013 #ifdef HAVE_GAS_HIDDEN
38014 # define USE_HIDDEN_LINKONCE 1
38015 #else
38016 # define USE_HIDDEN_LINKONCE 0
38017 #endif
38019 /* Fills in the label name that should be used for a 476 link stack thunk. */
38021 void
38022 get_ppc476_thunk_name (char name[32])
38024 gcc_assert (TARGET_LINK_STACK);
38026 if (USE_HIDDEN_LINKONCE)
38027 sprintf (name, "__ppc476.get_thunk");
38028 else
38029 ASM_GENERATE_INTERNAL_LABEL (name, "LPPC476_", 0);
38032 /* This function emits the simple thunk routine that is used to preserve
38033 the link stack on the 476 cpu. */
38035 static void rs6000_code_end (void) ATTRIBUTE_UNUSED;
38036 static void
38037 rs6000_code_end (void)
38039 char name[32];
38040 tree decl;
38042 if (!TARGET_LINK_STACK)
38043 return;
38045 get_ppc476_thunk_name (name);
38047 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, get_identifier (name),
38048 build_function_type_list (void_type_node, NULL_TREE));
38049 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
38050 NULL_TREE, void_type_node);
38051 TREE_PUBLIC (decl) = 1;
38052 TREE_STATIC (decl) = 1;
38054 #if RS6000_WEAK
38055 if (USE_HIDDEN_LINKONCE)
38057 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
38058 targetm.asm_out.unique_section (decl, 0);
38059 switch_to_section (get_named_section (decl, NULL, 0));
38060 DECL_WEAK (decl) = 1;
38061 ASM_WEAKEN_DECL (asm_out_file, decl, name, 0);
38062 targetm.asm_out.globalize_label (asm_out_file, name);
38063 targetm.asm_out.assemble_visibility (decl, VISIBILITY_HIDDEN);
38064 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
38066 else
38067 #endif
38069 switch_to_section (text_section);
38070 ASM_OUTPUT_LABEL (asm_out_file, name);
38073 DECL_INITIAL (decl) = make_node (BLOCK);
38074 current_function_decl = decl;
38075 allocate_struct_function (decl, false);
38076 init_function_start (decl);
38077 first_function_block_is_cold = false;
38078 /* Make sure unwind info is emitted for the thunk if needed. */
38079 final_start_function (emit_barrier (), asm_out_file, 1);
38081 fputs ("\tblr\n", asm_out_file);
38083 final_end_function ();
38084 init_insn_lengths ();
38085 free_after_compilation (cfun);
38086 set_cfun (NULL);
38087 current_function_decl = NULL;
38090 /* Add r30 to hard reg set if the prologue sets it up and it is not
38091 pic_offset_table_rtx. */
38093 static void
38094 rs6000_set_up_by_prologue (struct hard_reg_set_container *set)
38096 if (!TARGET_SINGLE_PIC_BASE
38097 && TARGET_TOC
38098 && TARGET_MINIMAL_TOC
38099 && get_pool_size () != 0)
38100 add_to_hard_reg_set (&set->set, Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
38101 if (cfun->machine->split_stack_argp_used)
38102 add_to_hard_reg_set (&set->set, Pmode, 12);
38106 /* Helper function for rs6000_split_logical to emit a logical instruction after
38107 spliting the operation to single GPR registers.
38109 DEST is the destination register.
38110 OP1 and OP2 are the input source registers.
38111 CODE is the base operation (AND, IOR, XOR, NOT).
38112 MODE is the machine mode.
38113 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
38114 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
38115 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
38117 static void
38118 rs6000_split_logical_inner (rtx dest,
38119 rtx op1,
38120 rtx op2,
38121 enum rtx_code code,
38122 machine_mode mode,
38123 bool complement_final_p,
38124 bool complement_op1_p,
38125 bool complement_op2_p)
38127 rtx bool_rtx;
38129 /* Optimize AND of 0/0xffffffff and IOR/XOR of 0. */
38130 if (op2 && GET_CODE (op2) == CONST_INT
38131 && (mode == SImode || (mode == DImode && TARGET_POWERPC64))
38132 && !complement_final_p && !complement_op1_p && !complement_op2_p)
38134 HOST_WIDE_INT mask = GET_MODE_MASK (mode);
38135 HOST_WIDE_INT value = INTVAL (op2) & mask;
38137 /* Optimize AND of 0 to just set 0. Optimize AND of -1 to be a move. */
38138 if (code == AND)
38140 if (value == 0)
38142 emit_insn (gen_rtx_SET (dest, const0_rtx));
38143 return;
38146 else if (value == mask)
38148 if (!rtx_equal_p (dest, op1))
38149 emit_insn (gen_rtx_SET (dest, op1));
38150 return;
38154 /* Optimize IOR/XOR of 0 to be a simple move. Split large operations
38155 into separate ORI/ORIS or XORI/XORIS instrucitons. */
38156 else if (code == IOR || code == XOR)
38158 if (value == 0)
38160 if (!rtx_equal_p (dest, op1))
38161 emit_insn (gen_rtx_SET (dest, op1));
38162 return;
38167 if (code == AND && mode == SImode
38168 && !complement_final_p && !complement_op1_p && !complement_op2_p)
38170 emit_insn (gen_andsi3 (dest, op1, op2));
38171 return;
38174 if (complement_op1_p)
38175 op1 = gen_rtx_NOT (mode, op1);
38177 if (complement_op2_p)
38178 op2 = gen_rtx_NOT (mode, op2);
38180 /* For canonical RTL, if only one arm is inverted it is the first. */
38181 if (!complement_op1_p && complement_op2_p)
38182 std::swap (op1, op2);
38184 bool_rtx = ((code == NOT)
38185 ? gen_rtx_NOT (mode, op1)
38186 : gen_rtx_fmt_ee (code, mode, op1, op2));
38188 if (complement_final_p)
38189 bool_rtx = gen_rtx_NOT (mode, bool_rtx);
38191 emit_insn (gen_rtx_SET (dest, bool_rtx));
38194 /* Split a DImode AND/IOR/XOR with a constant on a 32-bit system. These
38195 operations are split immediately during RTL generation to allow for more
38196 optimizations of the AND/IOR/XOR.
38198 OPERANDS is an array containing the destination and two input operands.
38199 CODE is the base operation (AND, IOR, XOR, NOT).
38200 MODE is the machine mode.
38201 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
38202 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
38203 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT.
38204 CLOBBER_REG is either NULL or a scratch register of type CC to allow
38205 formation of the AND instructions. */
38207 static void
38208 rs6000_split_logical_di (rtx operands[3],
38209 enum rtx_code code,
38210 bool complement_final_p,
38211 bool complement_op1_p,
38212 bool complement_op2_p)
38214 const HOST_WIDE_INT lower_32bits = HOST_WIDE_INT_C(0xffffffff);
38215 const HOST_WIDE_INT upper_32bits = ~ lower_32bits;
38216 const HOST_WIDE_INT sign_bit = HOST_WIDE_INT_C(0x80000000);
38217 enum hi_lo { hi = 0, lo = 1 };
38218 rtx op0_hi_lo[2], op1_hi_lo[2], op2_hi_lo[2];
38219 size_t i;
38221 op0_hi_lo[hi] = gen_highpart (SImode, operands[0]);
38222 op1_hi_lo[hi] = gen_highpart (SImode, operands[1]);
38223 op0_hi_lo[lo] = gen_lowpart (SImode, operands[0]);
38224 op1_hi_lo[lo] = gen_lowpart (SImode, operands[1]);
38226 if (code == NOT)
38227 op2_hi_lo[hi] = op2_hi_lo[lo] = NULL_RTX;
38228 else
38230 if (GET_CODE (operands[2]) != CONST_INT)
38232 op2_hi_lo[hi] = gen_highpart_mode (SImode, DImode, operands[2]);
38233 op2_hi_lo[lo] = gen_lowpart (SImode, operands[2]);
38235 else
38237 HOST_WIDE_INT value = INTVAL (operands[2]);
38238 HOST_WIDE_INT value_hi_lo[2];
38240 gcc_assert (!complement_final_p);
38241 gcc_assert (!complement_op1_p);
38242 gcc_assert (!complement_op2_p);
38244 value_hi_lo[hi] = value >> 32;
38245 value_hi_lo[lo] = value & lower_32bits;
38247 for (i = 0; i < 2; i++)
38249 HOST_WIDE_INT sub_value = value_hi_lo[i];
38251 if (sub_value & sign_bit)
38252 sub_value |= upper_32bits;
38254 op2_hi_lo[i] = GEN_INT (sub_value);
38256 /* If this is an AND instruction, check to see if we need to load
38257 the value in a register. */
38258 if (code == AND && sub_value != -1 && sub_value != 0
38259 && !and_operand (op2_hi_lo[i], SImode))
38260 op2_hi_lo[i] = force_reg (SImode, op2_hi_lo[i]);
38265 for (i = 0; i < 2; i++)
38267 /* Split large IOR/XOR operations. */
38268 if ((code == IOR || code == XOR)
38269 && GET_CODE (op2_hi_lo[i]) == CONST_INT
38270 && !complement_final_p
38271 && !complement_op1_p
38272 && !complement_op2_p
38273 && !logical_const_operand (op2_hi_lo[i], SImode))
38275 HOST_WIDE_INT value = INTVAL (op2_hi_lo[i]);
38276 HOST_WIDE_INT hi_16bits = value & HOST_WIDE_INT_C(0xffff0000);
38277 HOST_WIDE_INT lo_16bits = value & HOST_WIDE_INT_C(0x0000ffff);
38278 rtx tmp = gen_reg_rtx (SImode);
38280 /* Make sure the constant is sign extended. */
38281 if ((hi_16bits & sign_bit) != 0)
38282 hi_16bits |= upper_32bits;
38284 rs6000_split_logical_inner (tmp, op1_hi_lo[i], GEN_INT (hi_16bits),
38285 code, SImode, false, false, false);
38287 rs6000_split_logical_inner (op0_hi_lo[i], tmp, GEN_INT (lo_16bits),
38288 code, SImode, false, false, false);
38290 else
38291 rs6000_split_logical_inner (op0_hi_lo[i], op1_hi_lo[i], op2_hi_lo[i],
38292 code, SImode, complement_final_p,
38293 complement_op1_p, complement_op2_p);
38296 return;
38299 /* Split the insns that make up boolean operations operating on multiple GPR
38300 registers. The boolean MD patterns ensure that the inputs either are
38301 exactly the same as the output registers, or there is no overlap.
38303 OPERANDS is an array containing the destination and two input operands.
38304 CODE is the base operation (AND, IOR, XOR, NOT).
38305 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
38306 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
38307 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
38309 void
38310 rs6000_split_logical (rtx operands[3],
38311 enum rtx_code code,
38312 bool complement_final_p,
38313 bool complement_op1_p,
38314 bool complement_op2_p)
38316 machine_mode mode = GET_MODE (operands[0]);
38317 machine_mode sub_mode;
38318 rtx op0, op1, op2;
38319 int sub_size, regno0, regno1, nregs, i;
38321 /* If this is DImode, use the specialized version that can run before
38322 register allocation. */
38323 if (mode == DImode && !TARGET_POWERPC64)
38325 rs6000_split_logical_di (operands, code, complement_final_p,
38326 complement_op1_p, complement_op2_p);
38327 return;
38330 op0 = operands[0];
38331 op1 = operands[1];
38332 op2 = (code == NOT) ? NULL_RTX : operands[2];
38333 sub_mode = (TARGET_POWERPC64) ? DImode : SImode;
38334 sub_size = GET_MODE_SIZE (sub_mode);
38335 regno0 = REGNO (op0);
38336 regno1 = REGNO (op1);
38338 gcc_assert (reload_completed);
38339 gcc_assert (IN_RANGE (regno0, FIRST_GPR_REGNO, LAST_GPR_REGNO));
38340 gcc_assert (IN_RANGE (regno1, FIRST_GPR_REGNO, LAST_GPR_REGNO));
38342 nregs = rs6000_hard_regno_nregs[(int)mode][regno0];
38343 gcc_assert (nregs > 1);
38345 if (op2 && REG_P (op2))
38346 gcc_assert (IN_RANGE (REGNO (op2), FIRST_GPR_REGNO, LAST_GPR_REGNO));
38348 for (i = 0; i < nregs; i++)
38350 int offset = i * sub_size;
38351 rtx sub_op0 = simplify_subreg (sub_mode, op0, mode, offset);
38352 rtx sub_op1 = simplify_subreg (sub_mode, op1, mode, offset);
38353 rtx sub_op2 = ((code == NOT)
38354 ? NULL_RTX
38355 : simplify_subreg (sub_mode, op2, mode, offset));
38357 rs6000_split_logical_inner (sub_op0, sub_op1, sub_op2, code, sub_mode,
38358 complement_final_p, complement_op1_p,
38359 complement_op2_p);
38362 return;
38366 /* Return true if the peephole2 can combine a load involving a combination of
38367 an addis instruction and a load with an offset that can be fused together on
38368 a power8. */
38370 bool
38371 fusion_gpr_load_p (rtx addis_reg, /* register set via addis. */
38372 rtx addis_value, /* addis value. */
38373 rtx target, /* target register that is loaded. */
38374 rtx mem) /* bottom part of the memory addr. */
38376 rtx addr;
38377 rtx base_reg;
38379 /* Validate arguments. */
38380 if (!base_reg_operand (addis_reg, GET_MODE (addis_reg)))
38381 return false;
38383 if (!base_reg_operand (target, GET_MODE (target)))
38384 return false;
38386 if (!fusion_gpr_addis (addis_value, GET_MODE (addis_value)))
38387 return false;
38389 /* Allow sign/zero extension. */
38390 if (GET_CODE (mem) == ZERO_EXTEND
38391 || (GET_CODE (mem) == SIGN_EXTEND && TARGET_P8_FUSION_SIGN))
38392 mem = XEXP (mem, 0);
38394 if (!MEM_P (mem))
38395 return false;
38397 if (!fusion_gpr_mem_load (mem, GET_MODE (mem)))
38398 return false;
38400 addr = XEXP (mem, 0); /* either PLUS or LO_SUM. */
38401 if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM)
38402 return false;
38404 /* Validate that the register used to load the high value is either the
38405 register being loaded, or we can safely replace its use.
38407 This function is only called from the peephole2 pass and we assume that
38408 there are 2 instructions in the peephole (addis and load), so we want to
38409 check if the target register was not used in the memory address and the
38410 register to hold the addis result is dead after the peephole. */
38411 if (REGNO (addis_reg) != REGNO (target))
38413 if (reg_mentioned_p (target, mem))
38414 return false;
38416 if (!peep2_reg_dead_p (2, addis_reg))
38417 return false;
38419 /* If the target register being loaded is the stack pointer, we must
38420 avoid loading any other value into it, even temporarily. */
38421 if (REG_P (target) && REGNO (target) == STACK_POINTER_REGNUM)
38422 return false;
38425 base_reg = XEXP (addr, 0);
38426 return REGNO (addis_reg) == REGNO (base_reg);
38429 /* During the peephole2 pass, adjust and expand the insns for a load fusion
38430 sequence. We adjust the addis register to use the target register. If the
38431 load sign extends, we adjust the code to do the zero extending load, and an
38432 explicit sign extension later since the fusion only covers zero extending
38433 loads.
38435 The operands are:
38436 operands[0] register set with addis (to be replaced with target)
38437 operands[1] value set via addis
38438 operands[2] target register being loaded
38439 operands[3] D-form memory reference using operands[0]. */
38441 void
38442 expand_fusion_gpr_load (rtx *operands)
38444 rtx addis_value = operands[1];
38445 rtx target = operands[2];
38446 rtx orig_mem = operands[3];
38447 rtx new_addr, new_mem, orig_addr, offset;
38448 enum rtx_code plus_or_lo_sum;
38449 machine_mode target_mode = GET_MODE (target);
38450 machine_mode extend_mode = target_mode;
38451 machine_mode ptr_mode = Pmode;
38452 enum rtx_code extend = UNKNOWN;
38454 if (GET_CODE (orig_mem) == ZERO_EXTEND
38455 || (TARGET_P8_FUSION_SIGN && GET_CODE (orig_mem) == SIGN_EXTEND))
38457 extend = GET_CODE (orig_mem);
38458 orig_mem = XEXP (orig_mem, 0);
38459 target_mode = GET_MODE (orig_mem);
38462 gcc_assert (MEM_P (orig_mem));
38464 orig_addr = XEXP (orig_mem, 0);
38465 plus_or_lo_sum = GET_CODE (orig_addr);
38466 gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM);
38468 offset = XEXP (orig_addr, 1);
38469 new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_value, offset);
38470 new_mem = replace_equiv_address_nv (orig_mem, new_addr, false);
38472 if (extend != UNKNOWN)
38473 new_mem = gen_rtx_fmt_e (ZERO_EXTEND, extend_mode, new_mem);
38475 new_mem = gen_rtx_UNSPEC (extend_mode, gen_rtvec (1, new_mem),
38476 UNSPEC_FUSION_GPR);
38477 emit_insn (gen_rtx_SET (target, new_mem));
38479 if (extend == SIGN_EXTEND)
38481 int sub_off = ((BYTES_BIG_ENDIAN)
38482 ? GET_MODE_SIZE (extend_mode) - GET_MODE_SIZE (target_mode)
38483 : 0);
38484 rtx sign_reg
38485 = simplify_subreg (target_mode, target, extend_mode, sub_off);
38487 emit_insn (gen_rtx_SET (target,
38488 gen_rtx_SIGN_EXTEND (extend_mode, sign_reg)));
38491 return;
38494 /* Emit the addis instruction that will be part of a fused instruction
38495 sequence. */
38497 void
38498 emit_fusion_addis (rtx target, rtx addis_value, const char *comment,
38499 const char *mode_name)
38501 rtx fuse_ops[10];
38502 char insn_template[80];
38503 const char *addis_str = NULL;
38504 const char *comment_str = ASM_COMMENT_START;
38506 if (*comment_str == ' ')
38507 comment_str++;
38509 /* Emit the addis instruction. */
38510 fuse_ops[0] = target;
38511 if (satisfies_constraint_L (addis_value))
38513 fuse_ops[1] = addis_value;
38514 addis_str = "lis %0,%v1";
38517 else if (GET_CODE (addis_value) == PLUS)
38519 rtx op0 = XEXP (addis_value, 0);
38520 rtx op1 = XEXP (addis_value, 1);
38522 if (REG_P (op0) && CONST_INT_P (op1)
38523 && satisfies_constraint_L (op1))
38525 fuse_ops[1] = op0;
38526 fuse_ops[2] = op1;
38527 addis_str = "addis %0,%1,%v2";
38531 else if (GET_CODE (addis_value) == HIGH)
38533 rtx value = XEXP (addis_value, 0);
38534 if (GET_CODE (value) == UNSPEC && XINT (value, 1) == UNSPEC_TOCREL)
38536 fuse_ops[1] = XVECEXP (value, 0, 0); /* symbol ref. */
38537 fuse_ops[2] = XVECEXP (value, 0, 1); /* TOC register. */
38538 if (TARGET_ELF)
38539 addis_str = "addis %0,%2,%1@toc@ha";
38541 else if (TARGET_XCOFF)
38542 addis_str = "addis %0,%1@u(%2)";
38544 else
38545 gcc_unreachable ();
38548 else if (GET_CODE (value) == PLUS)
38550 rtx op0 = XEXP (value, 0);
38551 rtx op1 = XEXP (value, 1);
38553 if (GET_CODE (op0) == UNSPEC
38554 && XINT (op0, 1) == UNSPEC_TOCREL
38555 && CONST_INT_P (op1))
38557 fuse_ops[1] = XVECEXP (op0, 0, 0); /* symbol ref. */
38558 fuse_ops[2] = XVECEXP (op0, 0, 1); /* TOC register. */
38559 fuse_ops[3] = op1;
38560 if (TARGET_ELF)
38561 addis_str = "addis %0,%2,%1+%3@toc@ha";
38563 else if (TARGET_XCOFF)
38564 addis_str = "addis %0,%1+%3@u(%2)";
38566 else
38567 gcc_unreachable ();
38571 else if (satisfies_constraint_L (value))
38573 fuse_ops[1] = value;
38574 addis_str = "lis %0,%v1";
38577 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (value))
38579 fuse_ops[1] = value;
38580 addis_str = "lis %0,%1@ha";
38584 if (!addis_str)
38585 fatal_insn ("Could not generate addis value for fusion", addis_value);
38587 sprintf (insn_template, "%s\t\t%s %s, type %s", addis_str, comment_str,
38588 comment, mode_name);
38589 output_asm_insn (insn_template, fuse_ops);
38592 /* Emit a D-form load or store instruction that is the second instruction
38593 of a fusion sequence. */
38595 void
38596 emit_fusion_load_store (rtx load_store_reg, rtx addis_reg, rtx offset,
38597 const char *insn_str)
38599 rtx fuse_ops[10];
38600 char insn_template[80];
38602 fuse_ops[0] = load_store_reg;
38603 fuse_ops[1] = addis_reg;
38605 if (CONST_INT_P (offset) && satisfies_constraint_I (offset))
38607 sprintf (insn_template, "%s %%0,%%2(%%1)", insn_str);
38608 fuse_ops[2] = offset;
38609 output_asm_insn (insn_template, fuse_ops);
38612 else if (GET_CODE (offset) == UNSPEC
38613 && XINT (offset, 1) == UNSPEC_TOCREL)
38615 if (TARGET_ELF)
38616 sprintf (insn_template, "%s %%0,%%2@toc@l(%%1)", insn_str);
38618 else if (TARGET_XCOFF)
38619 sprintf (insn_template, "%s %%0,%%2@l(%%1)", insn_str);
38621 else
38622 gcc_unreachable ();
38624 fuse_ops[2] = XVECEXP (offset, 0, 0);
38625 output_asm_insn (insn_template, fuse_ops);
38628 else if (GET_CODE (offset) == PLUS
38629 && GET_CODE (XEXP (offset, 0)) == UNSPEC
38630 && XINT (XEXP (offset, 0), 1) == UNSPEC_TOCREL
38631 && CONST_INT_P (XEXP (offset, 1)))
38633 rtx tocrel_unspec = XEXP (offset, 0);
38634 if (TARGET_ELF)
38635 sprintf (insn_template, "%s %%0,%%2+%%3@toc@l(%%1)", insn_str);
38637 else if (TARGET_XCOFF)
38638 sprintf (insn_template, "%s %%0,%%2+%%3@l(%%1)", insn_str);
38640 else
38641 gcc_unreachable ();
38643 fuse_ops[2] = XVECEXP (tocrel_unspec, 0, 0);
38644 fuse_ops[3] = XEXP (offset, 1);
38645 output_asm_insn (insn_template, fuse_ops);
38648 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (offset))
38650 sprintf (insn_template, "%s %%0,%%2@l(%%1)", insn_str);
38652 fuse_ops[2] = offset;
38653 output_asm_insn (insn_template, fuse_ops);
38656 else
38657 fatal_insn ("Unable to generate load/store offset for fusion", offset);
38659 return;
38662 /* Wrap a TOC address that can be fused to indicate that special fusion
38663 processing is needed. */
38666 fusion_wrap_memory_address (rtx old_mem)
38668 rtx old_addr = XEXP (old_mem, 0);
38669 rtvec v = gen_rtvec (1, old_addr);
38670 rtx new_addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_FUSION_ADDIS);
38671 return replace_equiv_address_nv (old_mem, new_addr, false);
38674 /* Given an address, convert it into the addis and load offset parts. Addresses
38675 created during the peephole2 process look like:
38676 (lo_sum (high (unspec [(sym)] UNSPEC_TOCREL))
38677 (unspec [(...)] UNSPEC_TOCREL))
38679 Addresses created via toc fusion look like:
38680 (unspec [(unspec [(...)] UNSPEC_TOCREL)] UNSPEC_FUSION_ADDIS)) */
38682 static void
38683 fusion_split_address (rtx addr, rtx *p_hi, rtx *p_lo)
38685 rtx hi, lo;
38687 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_FUSION_ADDIS)
38689 lo = XVECEXP (addr, 0, 0);
38690 hi = gen_rtx_HIGH (Pmode, lo);
38692 else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
38694 hi = XEXP (addr, 0);
38695 lo = XEXP (addr, 1);
38697 else
38698 gcc_unreachable ();
38700 *p_hi = hi;
38701 *p_lo = lo;
38704 /* Return a string to fuse an addis instruction with a gpr load to the same
38705 register that we loaded up the addis instruction. The address that is used
38706 is the logical address that was formed during peephole2:
38707 (lo_sum (high) (low-part))
38709 Or the address is the TOC address that is wrapped before register allocation:
38710 (unspec [(addr) (toc-reg)] UNSPEC_FUSION_ADDIS)
38712 The code is complicated, so we call output_asm_insn directly, and just
38713 return "". */
38715 const char *
38716 emit_fusion_gpr_load (rtx target, rtx mem)
38718 rtx addis_value;
38719 rtx addr;
38720 rtx load_offset;
38721 const char *load_str = NULL;
38722 const char *mode_name = NULL;
38723 machine_mode mode;
38725 if (GET_CODE (mem) == ZERO_EXTEND)
38726 mem = XEXP (mem, 0);
38728 gcc_assert (REG_P (target) && MEM_P (mem));
38730 addr = XEXP (mem, 0);
38731 fusion_split_address (addr, &addis_value, &load_offset);
38733 /* Now emit the load instruction to the same register. */
38734 mode = GET_MODE (mem);
38735 switch (mode)
38737 case QImode:
38738 mode_name = "char";
38739 load_str = "lbz";
38740 break;
38742 case HImode:
38743 mode_name = "short";
38744 load_str = "lhz";
38745 break;
38747 case SImode:
38748 case SFmode:
38749 mode_name = (mode == SFmode) ? "float" : "int";
38750 load_str = "lwz";
38751 break;
38753 case DImode:
38754 case DFmode:
38755 gcc_assert (TARGET_POWERPC64);
38756 mode_name = (mode == DFmode) ? "double" : "long";
38757 load_str = "ld";
38758 break;
38760 default:
38761 fatal_insn ("Bad GPR fusion", gen_rtx_SET (target, mem));
38764 /* Emit the addis instruction. */
38765 emit_fusion_addis (target, addis_value, "gpr load fusion", mode_name);
38767 /* Emit the D-form load instruction. */
38768 emit_fusion_load_store (target, target, load_offset, load_str);
38770 return "";
38774 /* Return true if the peephole2 can combine a load/store involving a
38775 combination of an addis instruction and the memory operation. This was
38776 added to the ISA 3.0 (power9) hardware. */
38778 bool
38779 fusion_p9_p (rtx addis_reg, /* register set via addis. */
38780 rtx addis_value, /* addis value. */
38781 rtx dest, /* destination (memory or register). */
38782 rtx src) /* source (register or memory). */
38784 rtx addr, mem, offset;
38785 enum machine_mode mode = GET_MODE (src);
38787 /* Validate arguments. */
38788 if (!base_reg_operand (addis_reg, GET_MODE (addis_reg)))
38789 return false;
38791 if (!fusion_gpr_addis (addis_value, GET_MODE (addis_value)))
38792 return false;
38794 /* Ignore extend operations that are part of the load. */
38795 if (GET_CODE (src) == FLOAT_EXTEND || GET_CODE (src) == ZERO_EXTEND)
38796 src = XEXP (src, 0);
38798 /* Test for memory<-register or register<-memory. */
38799 if (fpr_reg_operand (src, mode) || int_reg_operand (src, mode))
38801 if (!MEM_P (dest))
38802 return false;
38804 mem = dest;
38807 else if (MEM_P (src))
38809 if (!fpr_reg_operand (dest, mode) && !int_reg_operand (dest, mode))
38810 return false;
38812 mem = src;
38815 else
38816 return false;
38818 addr = XEXP (mem, 0); /* either PLUS or LO_SUM. */
38819 if (GET_CODE (addr) == PLUS)
38821 if (!rtx_equal_p (addis_reg, XEXP (addr, 0)))
38822 return false;
38824 return satisfies_constraint_I (XEXP (addr, 1));
38827 else if (GET_CODE (addr) == LO_SUM)
38829 if (!rtx_equal_p (addis_reg, XEXP (addr, 0)))
38830 return false;
38832 offset = XEXP (addr, 1);
38833 if (TARGET_XCOFF || (TARGET_ELF && TARGET_POWERPC64))
38834 return small_toc_ref (offset, GET_MODE (offset));
38836 else if (TARGET_ELF && !TARGET_POWERPC64)
38837 return CONSTANT_P (offset);
38840 return false;
38843 /* During the peephole2 pass, adjust and expand the insns for an extended fusion
38844 load sequence.
38846 The operands are:
38847 operands[0] register set with addis
38848 operands[1] value set via addis
38849 operands[2] target register being loaded
38850 operands[3] D-form memory reference using operands[0].
38852 This is similar to the fusion introduced with power8, except it scales to
38853 both loads/stores and does not require the result register to be the same as
38854 the base register. At the moment, we only do this if register set with addis
38855 is dead. */
38857 void
38858 expand_fusion_p9_load (rtx *operands)
38860 rtx tmp_reg = operands[0];
38861 rtx addis_value = operands[1];
38862 rtx target = operands[2];
38863 rtx orig_mem = operands[3];
38864 rtx new_addr, new_mem, orig_addr, offset, set, clobber, insn;
38865 enum rtx_code plus_or_lo_sum;
38866 machine_mode target_mode = GET_MODE (target);
38867 machine_mode extend_mode = target_mode;
38868 machine_mode ptr_mode = Pmode;
38869 enum rtx_code extend = UNKNOWN;
38871 if (GET_CODE (orig_mem) == FLOAT_EXTEND || GET_CODE (orig_mem) == ZERO_EXTEND)
38873 extend = GET_CODE (orig_mem);
38874 orig_mem = XEXP (orig_mem, 0);
38875 target_mode = GET_MODE (orig_mem);
38878 gcc_assert (MEM_P (orig_mem));
38880 orig_addr = XEXP (orig_mem, 0);
38881 plus_or_lo_sum = GET_CODE (orig_addr);
38882 gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM);
38884 offset = XEXP (orig_addr, 1);
38885 new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_value, offset);
38886 new_mem = replace_equiv_address_nv (orig_mem, new_addr, false);
38888 if (extend != UNKNOWN)
38889 new_mem = gen_rtx_fmt_e (extend, extend_mode, new_mem);
38891 new_mem = gen_rtx_UNSPEC (extend_mode, gen_rtvec (1, new_mem),
38892 UNSPEC_FUSION_P9);
38894 set = gen_rtx_SET (target, new_mem);
38895 clobber = gen_rtx_CLOBBER (VOIDmode, tmp_reg);
38896 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber));
38897 emit_insn (insn);
38899 return;
38902 /* During the peephole2 pass, adjust and expand the insns for an extended fusion
38903 store sequence.
38905 The operands are:
38906 operands[0] register set with addis
38907 operands[1] value set via addis
38908 operands[2] target D-form memory being stored to
38909 operands[3] register being stored
38911 This is similar to the fusion introduced with power8, except it scales to
38912 both loads/stores and does not require the result register to be the same as
38913 the base register. At the moment, we only do this if register set with addis
38914 is dead. */
38916 void
38917 expand_fusion_p9_store (rtx *operands)
38919 rtx tmp_reg = operands[0];
38920 rtx addis_value = operands[1];
38921 rtx orig_mem = operands[2];
38922 rtx src = operands[3];
38923 rtx new_addr, new_mem, orig_addr, offset, set, clobber, insn, new_src;
38924 enum rtx_code plus_or_lo_sum;
38925 machine_mode target_mode = GET_MODE (orig_mem);
38926 machine_mode ptr_mode = Pmode;
38928 gcc_assert (MEM_P (orig_mem));
38930 orig_addr = XEXP (orig_mem, 0);
38931 plus_or_lo_sum = GET_CODE (orig_addr);
38932 gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM);
38934 offset = XEXP (orig_addr, 1);
38935 new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_value, offset);
38936 new_mem = replace_equiv_address_nv (orig_mem, new_addr, false);
38938 new_src = gen_rtx_UNSPEC (target_mode, gen_rtvec (1, src),
38939 UNSPEC_FUSION_P9);
38941 set = gen_rtx_SET (new_mem, new_src);
38942 clobber = gen_rtx_CLOBBER (VOIDmode, tmp_reg);
38943 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber));
38944 emit_insn (insn);
38946 return;
38949 /* Return a string to fuse an addis instruction with a load using extended
38950 fusion. The address that is used is the logical address that was formed
38951 during peephole2: (lo_sum (high) (low-part))
38953 The code is complicated, so we call output_asm_insn directly, and just
38954 return "". */
38956 const char *
38957 emit_fusion_p9_load (rtx reg, rtx mem, rtx tmp_reg)
38959 enum machine_mode mode = GET_MODE (reg);
38960 rtx hi;
38961 rtx lo;
38962 rtx addr;
38963 const char *load_string;
38964 int r;
38966 if (GET_CODE (mem) == FLOAT_EXTEND || GET_CODE (mem) == ZERO_EXTEND)
38968 mem = XEXP (mem, 0);
38969 mode = GET_MODE (mem);
38972 if (GET_CODE (reg) == SUBREG)
38974 gcc_assert (SUBREG_BYTE (reg) == 0);
38975 reg = SUBREG_REG (reg);
38978 if (!REG_P (reg))
38979 fatal_insn ("emit_fusion_p9_load, bad reg #1", reg);
38981 r = REGNO (reg);
38982 if (FP_REGNO_P (r))
38984 if (mode == SFmode)
38985 load_string = "lfs";
38986 else if (mode == DFmode || mode == DImode)
38987 load_string = "lfd";
38988 else
38989 gcc_unreachable ();
38991 else if (INT_REGNO_P (r))
38993 switch (mode)
38995 case QImode:
38996 load_string = "lbz";
38997 break;
38998 case HImode:
38999 load_string = "lhz";
39000 break;
39001 case SImode:
39002 case SFmode:
39003 load_string = "lwz";
39004 break;
39005 case DImode:
39006 case DFmode:
39007 if (!TARGET_POWERPC64)
39008 gcc_unreachable ();
39009 load_string = "ld";
39010 break;
39011 default:
39012 gcc_unreachable ();
39015 else
39016 fatal_insn ("emit_fusion_p9_load, bad reg #2", reg);
39018 if (!MEM_P (mem))
39019 fatal_insn ("emit_fusion_p9_load not MEM", mem);
39021 addr = XEXP (mem, 0);
39022 fusion_split_address (addr, &hi, &lo);
39024 /* Emit the addis instruction. */
39025 emit_fusion_addis (tmp_reg, hi, "power9 load fusion", GET_MODE_NAME (mode));
39027 /* Emit the D-form load instruction. */
39028 emit_fusion_load_store (reg, tmp_reg, lo, load_string);
39030 return "";
39033 /* Return a string to fuse an addis instruction with a store using extended
39034 fusion. The address that is used is the logical address that was formed
39035 during peephole2: (lo_sum (high) (low-part))
39037 The code is complicated, so we call output_asm_insn directly, and just
39038 return "". */
39040 const char *
39041 emit_fusion_p9_store (rtx mem, rtx reg, rtx tmp_reg)
39043 enum machine_mode mode = GET_MODE (reg);
39044 rtx hi;
39045 rtx lo;
39046 rtx addr;
39047 const char *store_string;
39048 int r;
39050 if (GET_CODE (reg) == SUBREG)
39052 gcc_assert (SUBREG_BYTE (reg) == 0);
39053 reg = SUBREG_REG (reg);
39056 if (!REG_P (reg))
39057 fatal_insn ("emit_fusion_p9_store, bad reg #1", reg);
39059 r = REGNO (reg);
39060 if (FP_REGNO_P (r))
39062 if (mode == SFmode)
39063 store_string = "stfs";
39064 else if (mode == DFmode)
39065 store_string = "stfd";
39066 else
39067 gcc_unreachable ();
39069 else if (INT_REGNO_P (r))
39071 switch (mode)
39073 case QImode:
39074 store_string = "stb";
39075 break;
39076 case HImode:
39077 store_string = "sth";
39078 break;
39079 case SImode:
39080 case SFmode:
39081 store_string = "stw";
39082 break;
39083 case DImode:
39084 case DFmode:
39085 if (!TARGET_POWERPC64)
39086 gcc_unreachable ();
39087 store_string = "std";
39088 break;
39089 default:
39090 gcc_unreachable ();
39093 else
39094 fatal_insn ("emit_fusion_p9_store, bad reg #2", reg);
39096 if (!MEM_P (mem))
39097 fatal_insn ("emit_fusion_p9_store not MEM", mem);
39099 addr = XEXP (mem, 0);
39100 fusion_split_address (addr, &hi, &lo);
39102 /* Emit the addis instruction. */
39103 emit_fusion_addis (tmp_reg, hi, "power9 store fusion", GET_MODE_NAME (mode));
39105 /* Emit the D-form load instruction. */
39106 emit_fusion_load_store (reg, tmp_reg, lo, store_string);
39108 return "";
39112 /* Analyze vector computations and remove unnecessary doubleword
39113 swaps (xxswapdi instructions). This pass is performed only
39114 for little-endian VSX code generation.
39116 For this specific case, loads and stores of 4x32 and 2x64 vectors
39117 are inefficient. These are implemented using the lvx2dx and
39118 stvx2dx instructions, which invert the order of doublewords in
39119 a vector register. Thus the code generation inserts an xxswapdi
39120 after each such load, and prior to each such store. (For spill
39121 code after register assignment, an additional xxswapdi is inserted
39122 following each store in order to return a hard register to its
39123 unpermuted value.)
39125 The extra xxswapdi instructions reduce performance. This can be
39126 particularly bad for vectorized code. The purpose of this pass
39127 is to reduce the number of xxswapdi instructions required for
39128 correctness.
39130 The primary insight is that much code that operates on vectors
39131 does not care about the relative order of elements in a register,
39132 so long as the correct memory order is preserved. If we have
39133 a computation where all input values are provided by lvxd2x/xxswapdi
39134 sequences, all outputs are stored using xxswapdi/stvxd2x sequences,
39135 and all intermediate computations are pure SIMD (independent of
39136 element order), then all the xxswapdi's associated with the loads
39137 and stores may be removed.
39139 This pass uses some of the infrastructure and logical ideas from
39140 the "web" pass in web.c. We create maximal webs of computations
39141 fitting the description above using union-find. Each such web is
39142 then optimized by removing its unnecessary xxswapdi instructions.
39144 The pass is placed prior to global optimization so that we can
39145 perform the optimization in the safest and simplest way possible;
39146 that is, by replacing each xxswapdi insn with a register copy insn.
39147 Subsequent forward propagation will remove copies where possible.
39149 There are some operations sensitive to element order for which we
39150 can still allow the operation, provided we modify those operations.
39151 These include CONST_VECTORs, for which we must swap the first and
39152 second halves of the constant vector; and SUBREGs, for which we
39153 must adjust the byte offset to account for the swapped doublewords.
39154 A remaining opportunity would be non-immediate-form splats, for
39155 which we should adjust the selected lane of the input. We should
39156 also make code generation adjustments for sum-across operations,
39157 since this is a common vectorizer reduction.
39159 Because we run prior to the first split, we can see loads and stores
39160 here that match *vsx_le_perm_{load,store}_<mode>. These are vanilla
39161 vector loads and stores that have not yet been split into a permuting
39162 load/store and a swap. (One way this can happen is with a builtin
39163 call to vec_vsx_{ld,st}.) We can handle these as well, but rather
39164 than deleting a swap, we convert the load/store into a permuting
39165 load/store (which effectively removes the swap). */
39167 /* Notes on Permutes
39169 We do not currently handle computations that contain permutes. There
39170 is a general transformation that can be performed correctly, but it
39171 may introduce more expensive code than it replaces. To handle these
39172 would require a cost model to determine when to perform the optimization.
39173 This commentary records how this could be done if desired.
39175 The most general permute is something like this (example for V16QI):
39177 (vec_select:V16QI (vec_concat:V32QI (op1:V16QI) (op2:V16QI))
39178 (parallel [(const_int a0) (const_int a1)
39180 (const_int a14) (const_int a15)]))
39182 where a0,...,a15 are in [0,31] and select elements from op1 and op2
39183 to produce in the result.
39185 Regardless of mode, we can convert the PARALLEL to a mask of 16
39186 byte-element selectors. Let's call this M, with M[i] representing
39187 the ith byte-element selector value. Then if we swap doublewords
39188 throughout the computation, we can get correct behavior by replacing
39189 M with M' as follows:
39191 M'[i] = { (M[i]+8)%16 : M[i] in [0,15]
39192 { ((M[i]+8)%16)+16 : M[i] in [16,31]
39194 This seems promising at first, since we are just replacing one mask
39195 with another. But certain masks are preferable to others. If M
39196 is a mask that matches a vmrghh pattern, for example, M' certainly
39197 will not. Instead of a single vmrghh, we would generate a load of
39198 M' and a vperm. So we would need to know how many xxswapd's we can
39199 remove as a result of this transformation to determine if it's
39200 profitable; and preferably the logic would need to be aware of all
39201 the special preferable masks.
39203 Another form of permute is an UNSPEC_VPERM, in which the mask is
39204 already in a register. In some cases, this mask may be a constant
39205 that we can discover with ud-chains, in which case the above
39206 transformation is ok. However, the common usage here is for the
39207 mask to be produced by an UNSPEC_LVSL, in which case the mask
39208 cannot be known at compile time. In such a case we would have to
39209 generate several instructions to compute M' as above at run time,
39210 and a cost model is needed again.
39212 However, when the mask M for an UNSPEC_VPERM is loaded from the
39213 constant pool, we can replace M with M' as above at no cost
39214 beyond adding a constant pool entry. */
39216 /* This is based on the union-find logic in web.c. web_entry_base is
39217 defined in df.h. */
39218 class swap_web_entry : public web_entry_base
39220 public:
39221 /* Pointer to the insn. */
39222 rtx_insn *insn;
39223 /* Set if insn contains a mention of a vector register. All other
39224 fields are undefined if this field is unset. */
39225 unsigned int is_relevant : 1;
39226 /* Set if insn is a load. */
39227 unsigned int is_load : 1;
39228 /* Set if insn is a store. */
39229 unsigned int is_store : 1;
39230 /* Set if insn is a doubleword swap. This can either be a register swap
39231 or a permuting load or store (test is_load and is_store for this). */
39232 unsigned int is_swap : 1;
39233 /* Set if the insn has a live-in use of a parameter register. */
39234 unsigned int is_live_in : 1;
39235 /* Set if the insn has a live-out def of a return register. */
39236 unsigned int is_live_out : 1;
39237 /* Set if the insn contains a subreg reference of a vector register. */
39238 unsigned int contains_subreg : 1;
39239 /* Set if the insn contains a 128-bit integer operand. */
39240 unsigned int is_128_int : 1;
39241 /* Set if this is a call-insn. */
39242 unsigned int is_call : 1;
39243 /* Set if this insn does not perform a vector operation for which
39244 element order matters, or if we know how to fix it up if it does.
39245 Undefined if is_swap is set. */
39246 unsigned int is_swappable : 1;
39247 /* A nonzero value indicates what kind of special handling for this
39248 insn is required if doublewords are swapped. Undefined if
39249 is_swappable is not set. */
39250 unsigned int special_handling : 4;
39251 /* Set if the web represented by this entry cannot be optimized. */
39252 unsigned int web_not_optimizable : 1;
39253 /* Set if this insn should be deleted. */
39254 unsigned int will_delete : 1;
39257 enum special_handling_values {
39258 SH_NONE = 0,
39259 SH_CONST_VECTOR,
39260 SH_SUBREG,
39261 SH_NOSWAP_LD,
39262 SH_NOSWAP_ST,
39263 SH_EXTRACT,
39264 SH_SPLAT,
39265 SH_XXPERMDI,
39266 SH_CONCAT,
39267 SH_VPERM
39270 /* Union INSN with all insns containing definitions that reach USE.
39271 Detect whether USE is live-in to the current function. */
39272 static void
39273 union_defs (swap_web_entry *insn_entry, rtx insn, df_ref use)
39275 struct df_link *link = DF_REF_CHAIN (use);
39277 if (!link)
39278 insn_entry[INSN_UID (insn)].is_live_in = 1;
39280 while (link)
39282 if (DF_REF_IS_ARTIFICIAL (link->ref))
39283 insn_entry[INSN_UID (insn)].is_live_in = 1;
39285 if (DF_REF_INSN_INFO (link->ref))
39287 rtx def_insn = DF_REF_INSN (link->ref);
39288 (void)unionfind_union (insn_entry + INSN_UID (insn),
39289 insn_entry + INSN_UID (def_insn));
39292 link = link->next;
39296 /* Union INSN with all insns containing uses reached from DEF.
39297 Detect whether DEF is live-out from the current function. */
39298 static void
39299 union_uses (swap_web_entry *insn_entry, rtx insn, df_ref def)
39301 struct df_link *link = DF_REF_CHAIN (def);
39303 if (!link)
39304 insn_entry[INSN_UID (insn)].is_live_out = 1;
39306 while (link)
39308 /* This could be an eh use or some other artificial use;
39309 we treat these all the same (killing the optimization). */
39310 if (DF_REF_IS_ARTIFICIAL (link->ref))
39311 insn_entry[INSN_UID (insn)].is_live_out = 1;
39313 if (DF_REF_INSN_INFO (link->ref))
39315 rtx use_insn = DF_REF_INSN (link->ref);
39316 (void)unionfind_union (insn_entry + INSN_UID (insn),
39317 insn_entry + INSN_UID (use_insn));
39320 link = link->next;
39324 /* Return 1 iff INSN is a load insn, including permuting loads that
39325 represent an lvxd2x instruction; else return 0. */
39326 static unsigned int
39327 insn_is_load_p (rtx insn)
39329 rtx body = PATTERN (insn);
39331 if (GET_CODE (body) == SET)
39333 if (GET_CODE (SET_SRC (body)) == MEM)
39334 return 1;
39336 if (GET_CODE (SET_SRC (body)) == VEC_SELECT
39337 && GET_CODE (XEXP (SET_SRC (body), 0)) == MEM)
39338 return 1;
39340 return 0;
39343 if (GET_CODE (body) != PARALLEL)
39344 return 0;
39346 rtx set = XVECEXP (body, 0, 0);
39348 if (GET_CODE (set) == SET && GET_CODE (SET_SRC (set)) == MEM)
39349 return 1;
39351 return 0;
39354 /* Return 1 iff INSN is a store insn, including permuting stores that
39355 represent an stvxd2x instruction; else return 0. */
39356 static unsigned int
39357 insn_is_store_p (rtx insn)
39359 rtx body = PATTERN (insn);
39360 if (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == MEM)
39361 return 1;
39362 if (GET_CODE (body) != PARALLEL)
39363 return 0;
39364 rtx set = XVECEXP (body, 0, 0);
39365 if (GET_CODE (set) == SET && GET_CODE (SET_DEST (set)) == MEM)
39366 return 1;
39367 return 0;
39370 /* Return 1 iff INSN swaps doublewords. This may be a reg-reg swap,
39371 a permuting load, or a permuting store. */
39372 static unsigned int
39373 insn_is_swap_p (rtx insn)
39375 rtx body = PATTERN (insn);
39376 if (GET_CODE (body) != SET)
39377 return 0;
39378 rtx rhs = SET_SRC (body);
39379 if (GET_CODE (rhs) != VEC_SELECT)
39380 return 0;
39381 rtx parallel = XEXP (rhs, 1);
39382 if (GET_CODE (parallel) != PARALLEL)
39383 return 0;
39384 unsigned int len = XVECLEN (parallel, 0);
39385 if (len != 2 && len != 4 && len != 8 && len != 16)
39386 return 0;
39387 for (unsigned int i = 0; i < len / 2; ++i)
39389 rtx op = XVECEXP (parallel, 0, i);
39390 if (GET_CODE (op) != CONST_INT || INTVAL (op) != len / 2 + i)
39391 return 0;
39393 for (unsigned int i = len / 2; i < len; ++i)
39395 rtx op = XVECEXP (parallel, 0, i);
39396 if (GET_CODE (op) != CONST_INT || INTVAL (op) != i - len / 2)
39397 return 0;
39399 return 1;
39402 /* Return TRUE if insn is a swap fed by a load from the constant pool. */
39403 static bool
39404 const_load_sequence_p (swap_web_entry *insn_entry, rtx insn)
39406 unsigned uid = INSN_UID (insn);
39407 if (!insn_entry[uid].is_swap || insn_entry[uid].is_load)
39408 return false;
39410 /* Find the unique use in the swap and locate its def. If the def
39411 isn't unique, punt. */
39412 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
39413 df_ref use;
39414 FOR_EACH_INSN_INFO_USE (use, insn_info)
39416 struct df_link *def_link = DF_REF_CHAIN (use);
39417 if (!def_link || def_link->next)
39418 return false;
39420 rtx def_insn = DF_REF_INSN (def_link->ref);
39421 unsigned uid2 = INSN_UID (def_insn);
39422 if (!insn_entry[uid2].is_load || !insn_entry[uid2].is_swap)
39423 return false;
39425 rtx body = PATTERN (def_insn);
39426 if (GET_CODE (body) != SET
39427 || GET_CODE (SET_SRC (body)) != VEC_SELECT
39428 || GET_CODE (XEXP (SET_SRC (body), 0)) != MEM)
39429 return false;
39431 rtx mem = XEXP (SET_SRC (body), 0);
39432 rtx base_reg = XEXP (mem, 0);
39434 df_ref base_use;
39435 insn_info = DF_INSN_INFO_GET (def_insn);
39436 FOR_EACH_INSN_INFO_USE (base_use, insn_info)
39438 if (!rtx_equal_p (DF_REF_REG (base_use), base_reg))
39439 continue;
39441 struct df_link *base_def_link = DF_REF_CHAIN (base_use);
39442 if (!base_def_link || base_def_link->next)
39443 return false;
39445 rtx tocrel_insn = DF_REF_INSN (base_def_link->ref);
39446 rtx tocrel_body = PATTERN (tocrel_insn);
39447 rtx base, offset;
39448 if (GET_CODE (tocrel_body) != SET)
39449 return false;
39450 /* There is an extra level of indirection for small/large
39451 code models. */
39452 rtx tocrel_expr = SET_SRC (tocrel_body);
39453 if (GET_CODE (tocrel_expr) == MEM)
39454 tocrel_expr = XEXP (tocrel_expr, 0);
39455 if (!toc_relative_expr_p (tocrel_expr, false))
39456 return false;
39457 split_const (XVECEXP (tocrel_base, 0, 0), &base, &offset);
39458 if (GET_CODE (base) != SYMBOL_REF || !CONSTANT_POOL_ADDRESS_P (base))
39459 return false;
39462 return true;
39465 /* Return TRUE iff OP matches a V2DF reduction pattern. See the
39466 definition of vsx_reduc_<VEC_reduc_name>_v2df in vsx.md. */
39467 static bool
39468 v2df_reduction_p (rtx op)
39470 if (GET_MODE (op) != V2DFmode)
39471 return false;
39473 enum rtx_code code = GET_CODE (op);
39474 if (code != PLUS && code != SMIN && code != SMAX)
39475 return false;
39477 rtx concat = XEXP (op, 0);
39478 if (GET_CODE (concat) != VEC_CONCAT)
39479 return false;
39481 rtx select0 = XEXP (concat, 0);
39482 rtx select1 = XEXP (concat, 1);
39483 if (GET_CODE (select0) != VEC_SELECT || GET_CODE (select1) != VEC_SELECT)
39484 return false;
39486 rtx reg0 = XEXP (select0, 0);
39487 rtx reg1 = XEXP (select1, 0);
39488 if (!rtx_equal_p (reg0, reg1) || !REG_P (reg0))
39489 return false;
39491 rtx parallel0 = XEXP (select0, 1);
39492 rtx parallel1 = XEXP (select1, 1);
39493 if (GET_CODE (parallel0) != PARALLEL || GET_CODE (parallel1) != PARALLEL)
39494 return false;
39496 if (!rtx_equal_p (XVECEXP (parallel0, 0, 0), const1_rtx)
39497 || !rtx_equal_p (XVECEXP (parallel1, 0, 0), const0_rtx))
39498 return false;
39500 return true;
39503 /* Return 1 iff OP is an operand that will not be affected by having
39504 vector doublewords swapped in memory. */
39505 static unsigned int
39506 rtx_is_swappable_p (rtx op, unsigned int *special)
39508 enum rtx_code code = GET_CODE (op);
39509 int i, j;
39510 rtx parallel;
39512 switch (code)
39514 case LABEL_REF:
39515 case SYMBOL_REF:
39516 case CLOBBER:
39517 case REG:
39518 return 1;
39520 case VEC_CONCAT:
39521 case ASM_INPUT:
39522 case ASM_OPERANDS:
39523 return 0;
39525 case CONST_VECTOR:
39527 *special = SH_CONST_VECTOR;
39528 return 1;
39531 case VEC_DUPLICATE:
39532 /* Opportunity: If XEXP (op, 0) has the same mode as the result,
39533 and XEXP (op, 1) is a PARALLEL with a single QImode const int,
39534 it represents a vector splat for which we can do special
39535 handling. */
39536 if (GET_CODE (XEXP (op, 0)) == CONST_INT)
39537 return 1;
39538 else if (REG_P (XEXP (op, 0))
39539 && GET_MODE_INNER (GET_MODE (op)) == GET_MODE (XEXP (op, 0)))
39540 /* This catches V2DF and V2DI splat, at a minimum. */
39541 return 1;
39542 else if (GET_CODE (XEXP (op, 0)) == TRUNCATE
39543 && REG_P (XEXP (XEXP (op, 0), 0))
39544 && GET_MODE_INNER (GET_MODE (op)) == GET_MODE (XEXP (op, 0)))
39545 /* This catches splat of a truncated value. */
39546 return 1;
39547 else if (GET_CODE (XEXP (op, 0)) == VEC_SELECT)
39548 /* If the duplicated item is from a select, defer to the select
39549 processing to see if we can change the lane for the splat. */
39550 return rtx_is_swappable_p (XEXP (op, 0), special);
39551 else
39552 return 0;
39554 case VEC_SELECT:
39555 /* A vec_extract operation is ok if we change the lane. */
39556 if (GET_CODE (XEXP (op, 0)) == REG
39557 && GET_MODE_INNER (GET_MODE (XEXP (op, 0))) == GET_MODE (op)
39558 && GET_CODE ((parallel = XEXP (op, 1))) == PARALLEL
39559 && XVECLEN (parallel, 0) == 1
39560 && GET_CODE (XVECEXP (parallel, 0, 0)) == CONST_INT)
39562 *special = SH_EXTRACT;
39563 return 1;
39565 /* An XXPERMDI is ok if we adjust the lanes. Note that if the
39566 XXPERMDI is a swap operation, it will be identified by
39567 insn_is_swap_p and therefore we won't get here. */
39568 else if (GET_CODE (XEXP (op, 0)) == VEC_CONCAT
39569 && (GET_MODE (XEXP (op, 0)) == V4DFmode
39570 || GET_MODE (XEXP (op, 0)) == V4DImode)
39571 && GET_CODE ((parallel = XEXP (op, 1))) == PARALLEL
39572 && XVECLEN (parallel, 0) == 2
39573 && GET_CODE (XVECEXP (parallel, 0, 0)) == CONST_INT
39574 && GET_CODE (XVECEXP (parallel, 0, 1)) == CONST_INT)
39576 *special = SH_XXPERMDI;
39577 return 1;
39579 else if (v2df_reduction_p (op))
39580 return 1;
39581 else
39582 return 0;
39584 case UNSPEC:
39586 /* Various operations are unsafe for this optimization, at least
39587 without significant additional work. Permutes are obviously
39588 problematic, as both the permute control vector and the ordering
39589 of the target values are invalidated by doubleword swapping.
39590 Vector pack and unpack modify the number of vector lanes.
39591 Merge-high/low will not operate correctly on swapped operands.
39592 Vector shifts across element boundaries are clearly uncool,
39593 as are vector select and concatenate operations. Vector
39594 sum-across instructions define one operand with a specific
39595 order-dependent element, so additional fixup code would be
39596 needed to make those work. Vector set and non-immediate-form
39597 vector splat are element-order sensitive. A few of these
39598 cases might be workable with special handling if required.
39599 Adding cost modeling would be appropriate in some cases. */
39600 int val = XINT (op, 1);
39601 switch (val)
39603 default:
39604 break;
39605 case UNSPEC_VMRGH_DIRECT:
39606 case UNSPEC_VMRGL_DIRECT:
39607 case UNSPEC_VPACK_SIGN_SIGN_SAT:
39608 case UNSPEC_VPACK_SIGN_UNS_SAT:
39609 case UNSPEC_VPACK_UNS_UNS_MOD:
39610 case UNSPEC_VPACK_UNS_UNS_MOD_DIRECT:
39611 case UNSPEC_VPACK_UNS_UNS_SAT:
39612 case UNSPEC_VPERM:
39613 case UNSPEC_VPERM_UNS:
39614 case UNSPEC_VPERMHI:
39615 case UNSPEC_VPERMSI:
39616 case UNSPEC_VPKPX:
39617 case UNSPEC_VSLDOI:
39618 case UNSPEC_VSLO:
39619 case UNSPEC_VSRO:
39620 case UNSPEC_VSUM2SWS:
39621 case UNSPEC_VSUM4S:
39622 case UNSPEC_VSUM4UBS:
39623 case UNSPEC_VSUMSWS:
39624 case UNSPEC_VSUMSWS_DIRECT:
39625 case UNSPEC_VSX_CONCAT:
39626 case UNSPEC_VSX_SET:
39627 case UNSPEC_VSX_SLDWI:
39628 case UNSPEC_VUNPACK_HI_SIGN:
39629 case UNSPEC_VUNPACK_HI_SIGN_DIRECT:
39630 case UNSPEC_VUNPACK_LO_SIGN:
39631 case UNSPEC_VUNPACK_LO_SIGN_DIRECT:
39632 case UNSPEC_VUPKHPX:
39633 case UNSPEC_VUPKHS_V4SF:
39634 case UNSPEC_VUPKHU_V4SF:
39635 case UNSPEC_VUPKLPX:
39636 case UNSPEC_VUPKLS_V4SF:
39637 case UNSPEC_VUPKLU_V4SF:
39638 case UNSPEC_VSX_CVDPSPN:
39639 case UNSPEC_VSX_CVSPDP:
39640 case UNSPEC_VSX_CVSPDPN:
39641 case UNSPEC_VSX_EXTRACT:
39642 case UNSPEC_VSX_VSLO:
39643 case UNSPEC_VSX_VEC_INIT:
39644 return 0;
39645 case UNSPEC_VSPLT_DIRECT:
39646 *special = SH_SPLAT;
39647 return 1;
39648 case UNSPEC_REDUC_PLUS:
39649 case UNSPEC_REDUC:
39650 return 1;
39654 default:
39655 break;
39658 const char *fmt = GET_RTX_FORMAT (code);
39659 int ok = 1;
39661 for (i = 0; i < GET_RTX_LENGTH (code); ++i)
39662 if (fmt[i] == 'e' || fmt[i] == 'u')
39664 unsigned int special_op = SH_NONE;
39665 ok &= rtx_is_swappable_p (XEXP (op, i), &special_op);
39666 if (special_op == SH_NONE)
39667 continue;
39668 /* Ensure we never have two kinds of special handling
39669 for the same insn. */
39670 if (*special != SH_NONE && *special != special_op)
39671 return 0;
39672 *special = special_op;
39674 else if (fmt[i] == 'E')
39675 for (j = 0; j < XVECLEN (op, i); ++j)
39677 unsigned int special_op = SH_NONE;
39678 ok &= rtx_is_swappable_p (XVECEXP (op, i, j), &special_op);
39679 if (special_op == SH_NONE)
39680 continue;
39681 /* Ensure we never have two kinds of special handling
39682 for the same insn. */
39683 if (*special != SH_NONE && *special != special_op)
39684 return 0;
39685 *special = special_op;
39688 return ok;
39691 /* Return 1 iff INSN is an operand that will not be affected by
39692 having vector doublewords swapped in memory (in which case
39693 *SPECIAL is unchanged), or that can be modified to be correct
39694 if vector doublewords are swapped in memory (in which case
39695 *SPECIAL is changed to a value indicating how). */
39696 static unsigned int
39697 insn_is_swappable_p (swap_web_entry *insn_entry, rtx insn,
39698 unsigned int *special)
39700 /* Calls are always bad. */
39701 if (GET_CODE (insn) == CALL_INSN)
39702 return 0;
39704 /* Loads and stores seen here are not permuting, but we can still
39705 fix them up by converting them to permuting ones. Exceptions:
39706 UNSPEC_LVE, UNSPEC_LVX, and UNSPEC_STVX, which have a PARALLEL
39707 body instead of a SET; and UNSPEC_STVE, which has an UNSPEC
39708 for the SET source. Also we must now make an exception for lvx
39709 and stvx when they are not in the UNSPEC_LVX/STVX form (with the
39710 explicit "& -16") since this leads to unrecognizable insns. */
39711 rtx body = PATTERN (insn);
39712 int i = INSN_UID (insn);
39714 if (insn_entry[i].is_load)
39716 if (GET_CODE (body) == SET)
39718 rtx rhs = SET_SRC (body);
39719 gcc_assert (GET_CODE (rhs) == MEM);
39720 if (GET_CODE (XEXP (rhs, 0)) == AND)
39721 return 0;
39723 *special = SH_NOSWAP_LD;
39724 return 1;
39726 else
39727 return 0;
39730 if (insn_entry[i].is_store)
39732 if (GET_CODE (body) == SET
39733 && GET_CODE (SET_SRC (body)) != UNSPEC)
39735 rtx lhs = SET_DEST (body);
39736 gcc_assert (GET_CODE (lhs) == MEM);
39737 if (GET_CODE (XEXP (lhs, 0)) == AND)
39738 return 0;
39740 *special = SH_NOSWAP_ST;
39741 return 1;
39743 else
39744 return 0;
39747 /* A convert to single precision can be left as is provided that
39748 all of its uses are in xxspltw instructions that splat BE element
39749 zero. */
39750 if (GET_CODE (body) == SET
39751 && GET_CODE (SET_SRC (body)) == UNSPEC
39752 && XINT (SET_SRC (body), 1) == UNSPEC_VSX_CVDPSPN)
39754 df_ref def;
39755 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
39757 FOR_EACH_INSN_INFO_DEF (def, insn_info)
39759 struct df_link *link = DF_REF_CHAIN (def);
39760 if (!link)
39761 return 0;
39763 for (; link; link = link->next) {
39764 rtx use_insn = DF_REF_INSN (link->ref);
39765 rtx use_body = PATTERN (use_insn);
39766 if (GET_CODE (use_body) != SET
39767 || GET_CODE (SET_SRC (use_body)) != UNSPEC
39768 || XINT (SET_SRC (use_body), 1) != UNSPEC_VSX_XXSPLTW
39769 || XEXP (XEXP (SET_SRC (use_body), 0), 1) != const0_rtx)
39770 return 0;
39774 return 1;
39777 /* A concatenation of two doublewords is ok if we reverse the
39778 order of the inputs. */
39779 if (GET_CODE (body) == SET
39780 && GET_CODE (SET_SRC (body)) == VEC_CONCAT
39781 && (GET_MODE (SET_SRC (body)) == V2DFmode
39782 || GET_MODE (SET_SRC (body)) == V2DImode))
39784 *special = SH_CONCAT;
39785 return 1;
39788 /* V2DF reductions are always swappable. */
39789 if (GET_CODE (body) == PARALLEL)
39791 rtx expr = XVECEXP (body, 0, 0);
39792 if (GET_CODE (expr) == SET
39793 && v2df_reduction_p (SET_SRC (expr)))
39794 return 1;
39797 /* An UNSPEC_VPERM is ok if the mask operand is loaded from the
39798 constant pool. */
39799 if (GET_CODE (body) == SET
39800 && GET_CODE (SET_SRC (body)) == UNSPEC
39801 && XINT (SET_SRC (body), 1) == UNSPEC_VPERM
39802 && XVECLEN (SET_SRC (body), 0) == 3
39803 && GET_CODE (XVECEXP (SET_SRC (body), 0, 2)) == REG)
39805 rtx mask_reg = XVECEXP (SET_SRC (body), 0, 2);
39806 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
39807 df_ref use;
39808 FOR_EACH_INSN_INFO_USE (use, insn_info)
39809 if (rtx_equal_p (DF_REF_REG (use), mask_reg))
39811 struct df_link *def_link = DF_REF_CHAIN (use);
39812 /* Punt if multiple definitions for this reg. */
39813 if (def_link && !def_link->next &&
39814 const_load_sequence_p (insn_entry,
39815 DF_REF_INSN (def_link->ref)))
39817 *special = SH_VPERM;
39818 return 1;
39823 /* Otherwise check the operands for vector lane violations. */
39824 return rtx_is_swappable_p (body, special);
39827 enum chain_purpose { FOR_LOADS, FOR_STORES };
39829 /* Return true if the UD or DU chain headed by LINK is non-empty,
39830 and every entry on the chain references an insn that is a
39831 register swap. Furthermore, if PURPOSE is FOR_LOADS, each such
39832 register swap must have only permuting loads as reaching defs.
39833 If PURPOSE is FOR_STORES, each such register swap must have only
39834 register swaps or permuting stores as reached uses. */
39835 static bool
39836 chain_contains_only_swaps (swap_web_entry *insn_entry, struct df_link *link,
39837 enum chain_purpose purpose)
39839 if (!link)
39840 return false;
39842 for (; link; link = link->next)
39844 if (!ALTIVEC_OR_VSX_VECTOR_MODE (GET_MODE (DF_REF_REG (link->ref))))
39845 continue;
39847 if (DF_REF_IS_ARTIFICIAL (link->ref))
39848 return false;
39850 rtx reached_insn = DF_REF_INSN (link->ref);
39851 unsigned uid = INSN_UID (reached_insn);
39852 struct df_insn_info *insn_info = DF_INSN_INFO_GET (reached_insn);
39854 if (!insn_entry[uid].is_swap || insn_entry[uid].is_load
39855 || insn_entry[uid].is_store)
39856 return false;
39858 if (purpose == FOR_LOADS)
39860 df_ref use;
39861 FOR_EACH_INSN_INFO_USE (use, insn_info)
39863 struct df_link *swap_link = DF_REF_CHAIN (use);
39865 while (swap_link)
39867 if (DF_REF_IS_ARTIFICIAL (link->ref))
39868 return false;
39870 rtx swap_def_insn = DF_REF_INSN (swap_link->ref);
39871 unsigned uid2 = INSN_UID (swap_def_insn);
39873 /* Only permuting loads are allowed. */
39874 if (!insn_entry[uid2].is_swap || !insn_entry[uid2].is_load)
39875 return false;
39877 swap_link = swap_link->next;
39881 else if (purpose == FOR_STORES)
39883 df_ref def;
39884 FOR_EACH_INSN_INFO_DEF (def, insn_info)
39886 struct df_link *swap_link = DF_REF_CHAIN (def);
39888 while (swap_link)
39890 if (DF_REF_IS_ARTIFICIAL (link->ref))
39891 return false;
39893 rtx swap_use_insn = DF_REF_INSN (swap_link->ref);
39894 unsigned uid2 = INSN_UID (swap_use_insn);
39896 /* Permuting stores or register swaps are allowed. */
39897 if (!insn_entry[uid2].is_swap || insn_entry[uid2].is_load)
39898 return false;
39900 swap_link = swap_link->next;
39906 return true;
39909 /* Mark the xxswapdi instructions associated with permuting loads and
39910 stores for removal. Note that we only flag them for deletion here,
39911 as there is a possibility of a swap being reached from multiple
39912 loads, etc. */
39913 static void
39914 mark_swaps_for_removal (swap_web_entry *insn_entry, unsigned int i)
39916 rtx insn = insn_entry[i].insn;
39917 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
39919 if (insn_entry[i].is_load)
39921 df_ref def;
39922 FOR_EACH_INSN_INFO_DEF (def, insn_info)
39924 struct df_link *link = DF_REF_CHAIN (def);
39926 /* We know by now that these are swaps, so we can delete
39927 them confidently. */
39928 while (link)
39930 rtx use_insn = DF_REF_INSN (link->ref);
39931 insn_entry[INSN_UID (use_insn)].will_delete = 1;
39932 link = link->next;
39936 else if (insn_entry[i].is_store)
39938 df_ref use;
39939 FOR_EACH_INSN_INFO_USE (use, insn_info)
39941 /* Ignore uses for addressability. */
39942 machine_mode mode = GET_MODE (DF_REF_REG (use));
39943 if (!ALTIVEC_OR_VSX_VECTOR_MODE (mode))
39944 continue;
39946 struct df_link *link = DF_REF_CHAIN (use);
39948 /* We know by now that these are swaps, so we can delete
39949 them confidently. */
39950 while (link)
39952 rtx def_insn = DF_REF_INSN (link->ref);
39953 insn_entry[INSN_UID (def_insn)].will_delete = 1;
39954 link = link->next;
39960 /* OP is either a CONST_VECTOR or an expression containing one.
39961 Swap the first half of the vector with the second in the first
39962 case. Recurse to find it in the second. */
39963 static void
39964 swap_const_vector_halves (rtx op)
39966 int i;
39967 enum rtx_code code = GET_CODE (op);
39968 if (GET_CODE (op) == CONST_VECTOR)
39970 int half_units = GET_MODE_NUNITS (GET_MODE (op)) / 2;
39971 for (i = 0; i < half_units; ++i)
39973 rtx temp = CONST_VECTOR_ELT (op, i);
39974 CONST_VECTOR_ELT (op, i) = CONST_VECTOR_ELT (op, i + half_units);
39975 CONST_VECTOR_ELT (op, i + half_units) = temp;
39978 else
39980 int j;
39981 const char *fmt = GET_RTX_FORMAT (code);
39982 for (i = 0; i < GET_RTX_LENGTH (code); ++i)
39983 if (fmt[i] == 'e' || fmt[i] == 'u')
39984 swap_const_vector_halves (XEXP (op, i));
39985 else if (fmt[i] == 'E')
39986 for (j = 0; j < XVECLEN (op, i); ++j)
39987 swap_const_vector_halves (XVECEXP (op, i, j));
39991 /* Find all subregs of a vector expression that perform a narrowing,
39992 and adjust the subreg index to account for doubleword swapping. */
39993 static void
39994 adjust_subreg_index (rtx op)
39996 enum rtx_code code = GET_CODE (op);
39997 if (code == SUBREG
39998 && (GET_MODE_SIZE (GET_MODE (op))
39999 < GET_MODE_SIZE (GET_MODE (XEXP (op, 0)))))
40001 unsigned int index = SUBREG_BYTE (op);
40002 if (index < 8)
40003 index += 8;
40004 else
40005 index -= 8;
40006 SUBREG_BYTE (op) = index;
40009 const char *fmt = GET_RTX_FORMAT (code);
40010 int i,j;
40011 for (i = 0; i < GET_RTX_LENGTH (code); ++i)
40012 if (fmt[i] == 'e' || fmt[i] == 'u')
40013 adjust_subreg_index (XEXP (op, i));
40014 else if (fmt[i] == 'E')
40015 for (j = 0; j < XVECLEN (op, i); ++j)
40016 adjust_subreg_index (XVECEXP (op, i, j));
40019 /* Convert the non-permuting load INSN to a permuting one. */
40020 static void
40021 permute_load (rtx_insn *insn)
40023 rtx body = PATTERN (insn);
40024 rtx mem_op = SET_SRC (body);
40025 rtx tgt_reg = SET_DEST (body);
40026 machine_mode mode = GET_MODE (tgt_reg);
40027 int n_elts = GET_MODE_NUNITS (mode);
40028 int half_elts = n_elts / 2;
40029 rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
40030 int i, j;
40031 for (i = 0, j = half_elts; i < half_elts; ++i, ++j)
40032 XVECEXP (par, 0, i) = GEN_INT (j);
40033 for (i = half_elts, j = 0; j < half_elts; ++i, ++j)
40034 XVECEXP (par, 0, i) = GEN_INT (j);
40035 rtx sel = gen_rtx_VEC_SELECT (mode, mem_op, par);
40036 SET_SRC (body) = sel;
40037 INSN_CODE (insn) = -1; /* Force re-recognition. */
40038 df_insn_rescan (insn);
40040 if (dump_file)
40041 fprintf (dump_file, "Replacing load %d with permuted load\n",
40042 INSN_UID (insn));
40045 /* Convert the non-permuting store INSN to a permuting one. */
40046 static void
40047 permute_store (rtx_insn *insn)
40049 rtx body = PATTERN (insn);
40050 rtx src_reg = SET_SRC (body);
40051 machine_mode mode = GET_MODE (src_reg);
40052 int n_elts = GET_MODE_NUNITS (mode);
40053 int half_elts = n_elts / 2;
40054 rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
40055 int i, j;
40056 for (i = 0, j = half_elts; i < half_elts; ++i, ++j)
40057 XVECEXP (par, 0, i) = GEN_INT (j);
40058 for (i = half_elts, j = 0; j < half_elts; ++i, ++j)
40059 XVECEXP (par, 0, i) = GEN_INT (j);
40060 rtx sel = gen_rtx_VEC_SELECT (mode, src_reg, par);
40061 SET_SRC (body) = sel;
40062 INSN_CODE (insn) = -1; /* Force re-recognition. */
40063 df_insn_rescan (insn);
40065 if (dump_file)
40066 fprintf (dump_file, "Replacing store %d with permuted store\n",
40067 INSN_UID (insn));
40070 /* Given OP that contains a vector extract operation, adjust the index
40071 of the extracted lane to account for the doubleword swap. */
40072 static void
40073 adjust_extract (rtx_insn *insn)
40075 rtx pattern = PATTERN (insn);
40076 if (GET_CODE (pattern) == PARALLEL)
40077 pattern = XVECEXP (pattern, 0, 0);
40078 rtx src = SET_SRC (pattern);
40079 /* The vec_select may be wrapped in a vec_duplicate for a splat, so
40080 account for that. */
40081 rtx sel = GET_CODE (src) == VEC_DUPLICATE ? XEXP (src, 0) : src;
40082 rtx par = XEXP (sel, 1);
40083 int half_elts = GET_MODE_NUNITS (GET_MODE (XEXP (sel, 0))) >> 1;
40084 int lane = INTVAL (XVECEXP (par, 0, 0));
40085 lane = lane >= half_elts ? lane - half_elts : lane + half_elts;
40086 XVECEXP (par, 0, 0) = GEN_INT (lane);
40087 INSN_CODE (insn) = -1; /* Force re-recognition. */
40088 df_insn_rescan (insn);
40090 if (dump_file)
40091 fprintf (dump_file, "Changing lane for extract %d\n", INSN_UID (insn));
40094 /* Given OP that contains a vector direct-splat operation, adjust the index
40095 of the source lane to account for the doubleword swap. */
40096 static void
40097 adjust_splat (rtx_insn *insn)
40099 rtx body = PATTERN (insn);
40100 rtx unspec = XEXP (body, 1);
40101 int half_elts = GET_MODE_NUNITS (GET_MODE (unspec)) >> 1;
40102 int lane = INTVAL (XVECEXP (unspec, 0, 1));
40103 lane = lane >= half_elts ? lane - half_elts : lane + half_elts;
40104 XVECEXP (unspec, 0, 1) = GEN_INT (lane);
40105 INSN_CODE (insn) = -1; /* Force re-recognition. */
40106 df_insn_rescan (insn);
40108 if (dump_file)
40109 fprintf (dump_file, "Changing lane for splat %d\n", INSN_UID (insn));
40112 /* Given OP that contains an XXPERMDI operation (that is not a doubleword
40113 swap), reverse the order of the source operands and adjust the indices
40114 of the source lanes to account for doubleword reversal. */
40115 static void
40116 adjust_xxpermdi (rtx_insn *insn)
40118 rtx set = PATTERN (insn);
40119 rtx select = XEXP (set, 1);
40120 rtx concat = XEXP (select, 0);
40121 rtx src0 = XEXP (concat, 0);
40122 XEXP (concat, 0) = XEXP (concat, 1);
40123 XEXP (concat, 1) = src0;
40124 rtx parallel = XEXP (select, 1);
40125 int lane0 = INTVAL (XVECEXP (parallel, 0, 0));
40126 int lane1 = INTVAL (XVECEXP (parallel, 0, 1));
40127 int new_lane0 = 3 - lane1;
40128 int new_lane1 = 3 - lane0;
40129 XVECEXP (parallel, 0, 0) = GEN_INT (new_lane0);
40130 XVECEXP (parallel, 0, 1) = GEN_INT (new_lane1);
40131 INSN_CODE (insn) = -1; /* Force re-recognition. */
40132 df_insn_rescan (insn);
40134 if (dump_file)
40135 fprintf (dump_file, "Changing lanes for xxpermdi %d\n", INSN_UID (insn));
40138 /* Given OP that contains a VEC_CONCAT operation of two doublewords,
40139 reverse the order of those inputs. */
40140 static void
40141 adjust_concat (rtx_insn *insn)
40143 rtx set = PATTERN (insn);
40144 rtx concat = XEXP (set, 1);
40145 rtx src0 = XEXP (concat, 0);
40146 XEXP (concat, 0) = XEXP (concat, 1);
40147 XEXP (concat, 1) = src0;
40148 INSN_CODE (insn) = -1; /* Force re-recognition. */
40149 df_insn_rescan (insn);
40151 if (dump_file)
40152 fprintf (dump_file, "Reversing inputs for concat %d\n", INSN_UID (insn));
40155 /* Given an UNSPEC_VPERM insn, modify the mask loaded from the
40156 constant pool to reflect swapped doublewords. */
40157 static void
40158 adjust_vperm (rtx_insn *insn)
40160 /* We previously determined that the UNSPEC_VPERM was fed by a
40161 swap of a swapping load of a TOC-relative constant pool symbol.
40162 Find the MEM in the swapping load and replace it with a MEM for
40163 the adjusted mask constant. */
40164 rtx set = PATTERN (insn);
40165 rtx mask_reg = XVECEXP (SET_SRC (set), 0, 2);
40167 /* Find the swap. */
40168 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
40169 df_ref use;
40170 rtx_insn *swap_insn = 0;
40171 FOR_EACH_INSN_INFO_USE (use, insn_info)
40172 if (rtx_equal_p (DF_REF_REG (use), mask_reg))
40174 struct df_link *def_link = DF_REF_CHAIN (use);
40175 gcc_assert (def_link && !def_link->next);
40176 swap_insn = DF_REF_INSN (def_link->ref);
40177 break;
40179 gcc_assert (swap_insn);
40181 /* Find the load. */
40182 insn_info = DF_INSN_INFO_GET (swap_insn);
40183 rtx_insn *load_insn = 0;
40184 FOR_EACH_INSN_INFO_USE (use, insn_info)
40186 struct df_link *def_link = DF_REF_CHAIN (use);
40187 gcc_assert (def_link && !def_link->next);
40188 load_insn = DF_REF_INSN (def_link->ref);
40189 break;
40191 gcc_assert (load_insn);
40193 /* Find the TOC-relative symbol access. */
40194 insn_info = DF_INSN_INFO_GET (load_insn);
40195 rtx_insn *tocrel_insn = 0;
40196 FOR_EACH_INSN_INFO_USE (use, insn_info)
40198 struct df_link *def_link = DF_REF_CHAIN (use);
40199 gcc_assert (def_link && !def_link->next);
40200 tocrel_insn = DF_REF_INSN (def_link->ref);
40201 break;
40203 gcc_assert (tocrel_insn);
40205 /* Find the embedded CONST_VECTOR. We have to call toc_relative_expr_p
40206 to set tocrel_base; otherwise it would be unnecessary as we've
40207 already established it will return true. */
40208 rtx base, offset;
40209 rtx tocrel_expr = SET_SRC (PATTERN (tocrel_insn));
40210 /* There is an extra level of indirection for small/large code models. */
40211 if (GET_CODE (tocrel_expr) == MEM)
40212 tocrel_expr = XEXP (tocrel_expr, 0);
40213 if (!toc_relative_expr_p (tocrel_expr, false))
40214 gcc_unreachable ();
40215 split_const (XVECEXP (tocrel_base, 0, 0), &base, &offset);
40216 rtx const_vector = get_pool_constant (base);
40217 /* With the extra indirection, get_pool_constant will produce the
40218 real constant from the reg_equal expression, so get the real
40219 constant. */
40220 if (GET_CODE (const_vector) == SYMBOL_REF)
40221 const_vector = get_pool_constant (const_vector);
40222 gcc_assert (GET_CODE (const_vector) == CONST_VECTOR);
40224 /* Create an adjusted mask from the initial mask. */
40225 unsigned int new_mask[16], i, val;
40226 for (i = 0; i < 16; ++i) {
40227 val = INTVAL (XVECEXP (const_vector, 0, i));
40228 if (val < 16)
40229 new_mask[i] = (val + 8) % 16;
40230 else
40231 new_mask[i] = ((val + 8) % 16) + 16;
40234 /* Create a new CONST_VECTOR and a MEM that references it. */
40235 rtx vals = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
40236 for (i = 0; i < 16; ++i)
40237 XVECEXP (vals, 0, i) = GEN_INT (new_mask[i]);
40238 rtx new_const_vector = gen_rtx_CONST_VECTOR (V16QImode, XVEC (vals, 0));
40239 rtx new_mem = force_const_mem (V16QImode, new_const_vector);
40240 /* This gives us a MEM whose base operand is a SYMBOL_REF, which we
40241 can't recognize. Force the SYMBOL_REF into a register. */
40242 if (!REG_P (XEXP (new_mem, 0))) {
40243 rtx base_reg = force_reg (Pmode, XEXP (new_mem, 0));
40244 XEXP (new_mem, 0) = base_reg;
40245 /* Move the newly created insn ahead of the load insn. */
40246 rtx_insn *force_insn = get_last_insn ();
40247 remove_insn (force_insn);
40248 rtx_insn *before_load_insn = PREV_INSN (load_insn);
40249 add_insn_after (force_insn, before_load_insn, BLOCK_FOR_INSN (load_insn));
40250 df_insn_rescan (before_load_insn);
40251 df_insn_rescan (force_insn);
40254 /* Replace the MEM in the load instruction and rescan it. */
40255 XEXP (SET_SRC (PATTERN (load_insn)), 0) = new_mem;
40256 INSN_CODE (load_insn) = -1; /* Force re-recognition. */
40257 df_insn_rescan (load_insn);
40259 if (dump_file)
40260 fprintf (dump_file, "Adjusting mask for vperm %d\n", INSN_UID (insn));
40263 /* The insn described by INSN_ENTRY[I] can be swapped, but only
40264 with special handling. Take care of that here. */
40265 static void
40266 handle_special_swappables (swap_web_entry *insn_entry, unsigned i)
40268 rtx_insn *insn = insn_entry[i].insn;
40269 rtx body = PATTERN (insn);
40271 switch (insn_entry[i].special_handling)
40273 default:
40274 gcc_unreachable ();
40275 case SH_CONST_VECTOR:
40277 /* A CONST_VECTOR will only show up somewhere in the RHS of a SET. */
40278 gcc_assert (GET_CODE (body) == SET);
40279 rtx rhs = SET_SRC (body);
40280 swap_const_vector_halves (rhs);
40281 if (dump_file)
40282 fprintf (dump_file, "Swapping constant halves in insn %d\n", i);
40283 break;
40285 case SH_SUBREG:
40286 /* A subreg of the same size is already safe. For subregs that
40287 select a smaller portion of a reg, adjust the index for
40288 swapped doublewords. */
40289 adjust_subreg_index (body);
40290 if (dump_file)
40291 fprintf (dump_file, "Adjusting subreg in insn %d\n", i);
40292 break;
40293 case SH_NOSWAP_LD:
40294 /* Convert a non-permuting load to a permuting one. */
40295 permute_load (insn);
40296 break;
40297 case SH_NOSWAP_ST:
40298 /* Convert a non-permuting store to a permuting one. */
40299 permute_store (insn);
40300 break;
40301 case SH_EXTRACT:
40302 /* Change the lane on an extract operation. */
40303 adjust_extract (insn);
40304 break;
40305 case SH_SPLAT:
40306 /* Change the lane on a direct-splat operation. */
40307 adjust_splat (insn);
40308 break;
40309 case SH_XXPERMDI:
40310 /* Change the lanes on an XXPERMDI operation. */
40311 adjust_xxpermdi (insn);
40312 break;
40313 case SH_CONCAT:
40314 /* Reverse the order of a concatenation operation. */
40315 adjust_concat (insn);
40316 break;
40317 case SH_VPERM:
40318 /* Change the mask loaded from the constant pool for a VPERM. */
40319 adjust_vperm (insn);
40320 break;
40324 /* Find the insn from the Ith table entry, which is known to be a
40325 register swap Y = SWAP(X). Replace it with a copy Y = X. */
40326 static void
40327 replace_swap_with_copy (swap_web_entry *insn_entry, unsigned i)
40329 rtx_insn *insn = insn_entry[i].insn;
40330 rtx body = PATTERN (insn);
40331 rtx src_reg = XEXP (SET_SRC (body), 0);
40332 rtx copy = gen_rtx_SET (SET_DEST (body), src_reg);
40333 rtx_insn *new_insn = emit_insn_before (copy, insn);
40334 set_block_for_insn (new_insn, BLOCK_FOR_INSN (insn));
40335 df_insn_rescan (new_insn);
40337 if (dump_file)
40339 unsigned int new_uid = INSN_UID (new_insn);
40340 fprintf (dump_file, "Replacing swap %d with copy %d\n", i, new_uid);
40343 df_insn_delete (insn);
40344 remove_insn (insn);
40345 insn->set_deleted ();
40348 /* Dump the swap table to DUMP_FILE. */
40349 static void
40350 dump_swap_insn_table (swap_web_entry *insn_entry)
40352 int e = get_max_uid ();
40353 fprintf (dump_file, "\nRelevant insns with their flag settings\n\n");
40355 for (int i = 0; i < e; ++i)
40356 if (insn_entry[i].is_relevant)
40358 swap_web_entry *pred_entry = (swap_web_entry *)insn_entry[i].pred ();
40359 fprintf (dump_file, "%6d %6d ", i,
40360 pred_entry && pred_entry->insn
40361 ? INSN_UID (pred_entry->insn) : 0);
40362 if (insn_entry[i].is_load)
40363 fputs ("load ", dump_file);
40364 if (insn_entry[i].is_store)
40365 fputs ("store ", dump_file);
40366 if (insn_entry[i].is_swap)
40367 fputs ("swap ", dump_file);
40368 if (insn_entry[i].is_live_in)
40369 fputs ("live-in ", dump_file);
40370 if (insn_entry[i].is_live_out)
40371 fputs ("live-out ", dump_file);
40372 if (insn_entry[i].contains_subreg)
40373 fputs ("subreg ", dump_file);
40374 if (insn_entry[i].is_128_int)
40375 fputs ("int128 ", dump_file);
40376 if (insn_entry[i].is_call)
40377 fputs ("call ", dump_file);
40378 if (insn_entry[i].is_swappable)
40380 fputs ("swappable ", dump_file);
40381 if (insn_entry[i].special_handling == SH_CONST_VECTOR)
40382 fputs ("special:constvec ", dump_file);
40383 else if (insn_entry[i].special_handling == SH_SUBREG)
40384 fputs ("special:subreg ", dump_file);
40385 else if (insn_entry[i].special_handling == SH_NOSWAP_LD)
40386 fputs ("special:load ", dump_file);
40387 else if (insn_entry[i].special_handling == SH_NOSWAP_ST)
40388 fputs ("special:store ", dump_file);
40389 else if (insn_entry[i].special_handling == SH_EXTRACT)
40390 fputs ("special:extract ", dump_file);
40391 else if (insn_entry[i].special_handling == SH_SPLAT)
40392 fputs ("special:splat ", dump_file);
40393 else if (insn_entry[i].special_handling == SH_XXPERMDI)
40394 fputs ("special:xxpermdi ", dump_file);
40395 else if (insn_entry[i].special_handling == SH_CONCAT)
40396 fputs ("special:concat ", dump_file);
40397 else if (insn_entry[i].special_handling == SH_VPERM)
40398 fputs ("special:vperm ", dump_file);
40400 if (insn_entry[i].web_not_optimizable)
40401 fputs ("unoptimizable ", dump_file);
40402 if (insn_entry[i].will_delete)
40403 fputs ("delete ", dump_file);
40404 fputs ("\n", dump_file);
40406 fputs ("\n", dump_file);
40409 /* Return RTX with its address canonicalized to (reg) or (+ reg reg).
40410 Here RTX is an (& addr (const_int -16)). Always return a new copy
40411 to avoid problems with combine. */
40412 static rtx
40413 alignment_with_canonical_addr (rtx align)
40415 rtx canon;
40416 rtx addr = XEXP (align, 0);
40418 if (REG_P (addr))
40419 canon = addr;
40421 else if (GET_CODE (addr) == PLUS)
40423 rtx addrop0 = XEXP (addr, 0);
40424 rtx addrop1 = XEXP (addr, 1);
40426 if (!REG_P (addrop0))
40427 addrop0 = force_reg (GET_MODE (addrop0), addrop0);
40429 if (!REG_P (addrop1))
40430 addrop1 = force_reg (GET_MODE (addrop1), addrop1);
40432 canon = gen_rtx_PLUS (GET_MODE (addr), addrop0, addrop1);
40435 else
40436 canon = force_reg (GET_MODE (addr), addr);
40438 return gen_rtx_AND (GET_MODE (align), canon, GEN_INT (-16));
40441 /* Check whether an rtx is an alignment mask, and if so, return
40442 a fully-expanded rtx for the masking operation. */
40443 static rtx
40444 alignment_mask (rtx_insn *insn)
40446 rtx body = PATTERN (insn);
40448 if (GET_CODE (body) != SET
40449 || GET_CODE (SET_SRC (body)) != AND
40450 || !REG_P (XEXP (SET_SRC (body), 0)))
40451 return 0;
40453 rtx mask = XEXP (SET_SRC (body), 1);
40455 if (GET_CODE (mask) == CONST_INT)
40457 if (INTVAL (mask) == -16)
40458 return alignment_with_canonical_addr (SET_SRC (body));
40459 else
40460 return 0;
40463 if (!REG_P (mask))
40464 return 0;
40466 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
40467 df_ref use;
40468 rtx real_mask = 0;
40470 FOR_EACH_INSN_INFO_USE (use, insn_info)
40472 if (!rtx_equal_p (DF_REF_REG (use), mask))
40473 continue;
40475 struct df_link *def_link = DF_REF_CHAIN (use);
40476 if (!def_link || def_link->next)
40477 return 0;
40479 rtx_insn *const_insn = DF_REF_INSN (def_link->ref);
40480 rtx const_body = PATTERN (const_insn);
40481 if (GET_CODE (const_body) != SET)
40482 return 0;
40484 real_mask = SET_SRC (const_body);
40486 if (GET_CODE (real_mask) != CONST_INT
40487 || INTVAL (real_mask) != -16)
40488 return 0;
40491 if (real_mask == 0)
40492 return 0;
40494 return alignment_with_canonical_addr (SET_SRC (body));
40497 /* Given INSN that's a load or store based at BASE_REG, look for a
40498 feeding computation that aligns its address on a 16-byte boundary. */
40499 static rtx
40500 find_alignment_op (rtx_insn *insn, rtx base_reg)
40502 df_ref base_use;
40503 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
40504 rtx and_operation = 0;
40506 FOR_EACH_INSN_INFO_USE (base_use, insn_info)
40508 if (!rtx_equal_p (DF_REF_REG (base_use), base_reg))
40509 continue;
40511 struct df_link *base_def_link = DF_REF_CHAIN (base_use);
40512 if (!base_def_link || base_def_link->next)
40513 break;
40515 rtx_insn *and_insn = DF_REF_INSN (base_def_link->ref);
40516 and_operation = alignment_mask (and_insn);
40517 if (and_operation != 0)
40518 break;
40521 return and_operation;
40524 struct del_info { bool replace; rtx_insn *replace_insn; };
40526 /* If INSN is the load for an lvx pattern, put it in canonical form. */
40527 static void
40528 recombine_lvx_pattern (rtx_insn *insn, del_info *to_delete)
40530 rtx body = PATTERN (insn);
40531 gcc_assert (GET_CODE (body) == SET
40532 && GET_CODE (SET_SRC (body)) == VEC_SELECT
40533 && GET_CODE (XEXP (SET_SRC (body), 0)) == MEM);
40535 rtx mem = XEXP (SET_SRC (body), 0);
40536 rtx base_reg = XEXP (mem, 0);
40538 rtx and_operation = find_alignment_op (insn, base_reg);
40540 if (and_operation != 0)
40542 df_ref def;
40543 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
40544 FOR_EACH_INSN_INFO_DEF (def, insn_info)
40546 struct df_link *link = DF_REF_CHAIN (def);
40547 if (!link || link->next)
40548 break;
40550 rtx_insn *swap_insn = DF_REF_INSN (link->ref);
40551 if (!insn_is_swap_p (swap_insn)
40552 || insn_is_load_p (swap_insn)
40553 || insn_is_store_p (swap_insn))
40554 break;
40556 /* Expected lvx pattern found. Change the swap to
40557 a copy, and propagate the AND operation into the
40558 load. */
40559 to_delete[INSN_UID (swap_insn)].replace = true;
40560 to_delete[INSN_UID (swap_insn)].replace_insn = swap_insn;
40562 XEXP (mem, 0) = and_operation;
40563 SET_SRC (body) = mem;
40564 INSN_CODE (insn) = -1; /* Force re-recognition. */
40565 df_insn_rescan (insn);
40567 if (dump_file)
40568 fprintf (dump_file, "lvx opportunity found at %d\n",
40569 INSN_UID (insn));
40574 /* If INSN is the store for an stvx pattern, put it in canonical form. */
40575 static void
40576 recombine_stvx_pattern (rtx_insn *insn, del_info *to_delete)
40578 rtx body = PATTERN (insn);
40579 gcc_assert (GET_CODE (body) == SET
40580 && GET_CODE (SET_DEST (body)) == MEM
40581 && GET_CODE (SET_SRC (body)) == VEC_SELECT);
40582 rtx mem = SET_DEST (body);
40583 rtx base_reg = XEXP (mem, 0);
40585 rtx and_operation = find_alignment_op (insn, base_reg);
40587 if (and_operation != 0)
40589 rtx src_reg = XEXP (SET_SRC (body), 0);
40590 df_ref src_use;
40591 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
40592 FOR_EACH_INSN_INFO_USE (src_use, insn_info)
40594 if (!rtx_equal_p (DF_REF_REG (src_use), src_reg))
40595 continue;
40597 struct df_link *link = DF_REF_CHAIN (src_use);
40598 if (!link || link->next)
40599 break;
40601 rtx_insn *swap_insn = DF_REF_INSN (link->ref);
40602 if (!insn_is_swap_p (swap_insn)
40603 || insn_is_load_p (swap_insn)
40604 || insn_is_store_p (swap_insn))
40605 break;
40607 /* Expected stvx pattern found. Change the swap to
40608 a copy, and propagate the AND operation into the
40609 store. */
40610 to_delete[INSN_UID (swap_insn)].replace = true;
40611 to_delete[INSN_UID (swap_insn)].replace_insn = swap_insn;
40613 XEXP (mem, 0) = and_operation;
40614 SET_SRC (body) = src_reg;
40615 INSN_CODE (insn) = -1; /* Force re-recognition. */
40616 df_insn_rescan (insn);
40618 if (dump_file)
40619 fprintf (dump_file, "stvx opportunity found at %d\n",
40620 INSN_UID (insn));
40625 /* Look for patterns created from builtin lvx and stvx calls, and
40626 canonicalize them to be properly recognized as such. */
40627 static void
40628 recombine_lvx_stvx_patterns (function *fun)
40630 int i;
40631 basic_block bb;
40632 rtx_insn *insn;
40634 int num_insns = get_max_uid ();
40635 del_info *to_delete = XCNEWVEC (del_info, num_insns);
40637 FOR_ALL_BB_FN (bb, fun)
40638 FOR_BB_INSNS (bb, insn)
40640 if (!NONDEBUG_INSN_P (insn))
40641 continue;
40643 if (insn_is_load_p (insn) && insn_is_swap_p (insn))
40644 recombine_lvx_pattern (insn, to_delete);
40645 else if (insn_is_store_p (insn) && insn_is_swap_p (insn))
40646 recombine_stvx_pattern (insn, to_delete);
40649 /* Turning swaps into copies is delayed until now, to avoid problems
40650 with deleting instructions during the insn walk. */
40651 for (i = 0; i < num_insns; i++)
40652 if (to_delete[i].replace)
40654 rtx swap_body = PATTERN (to_delete[i].replace_insn);
40655 rtx src_reg = XEXP (SET_SRC (swap_body), 0);
40656 rtx copy = gen_rtx_SET (SET_DEST (swap_body), src_reg);
40657 rtx_insn *new_insn = emit_insn_before (copy,
40658 to_delete[i].replace_insn);
40659 set_block_for_insn (new_insn,
40660 BLOCK_FOR_INSN (to_delete[i].replace_insn));
40661 df_insn_rescan (new_insn);
40662 df_insn_delete (to_delete[i].replace_insn);
40663 remove_insn (to_delete[i].replace_insn);
40664 to_delete[i].replace_insn->set_deleted ();
40667 free (to_delete);
40670 /* Main entry point for this pass. */
40671 unsigned int
40672 rs6000_analyze_swaps (function *fun)
40674 swap_web_entry *insn_entry;
40675 basic_block bb;
40676 rtx_insn *insn, *curr_insn = 0;
40678 /* Dataflow analysis for use-def chains. */
40679 df_set_flags (DF_RD_PRUNE_DEAD_DEFS);
40680 df_chain_add_problem (DF_DU_CHAIN | DF_UD_CHAIN);
40681 df_analyze ();
40682 df_set_flags (DF_DEFER_INSN_RESCAN);
40684 /* Pre-pass to recombine lvx and stvx patterns so we don't lose info. */
40685 recombine_lvx_stvx_patterns (fun);
40687 /* Allocate structure to represent webs of insns. */
40688 insn_entry = XCNEWVEC (swap_web_entry, get_max_uid ());
40690 /* Walk the insns to gather basic data. */
40691 FOR_ALL_BB_FN (bb, fun)
40692 FOR_BB_INSNS_SAFE (bb, insn, curr_insn)
40694 unsigned int uid = INSN_UID (insn);
40695 if (NONDEBUG_INSN_P (insn))
40697 insn_entry[uid].insn = insn;
40699 if (GET_CODE (insn) == CALL_INSN)
40700 insn_entry[uid].is_call = 1;
40702 /* Walk the uses and defs to see if we mention vector regs.
40703 Record any constraints on optimization of such mentions. */
40704 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
40705 df_ref mention;
40706 FOR_EACH_INSN_INFO_USE (mention, insn_info)
40708 /* We use DF_REF_REAL_REG here to get inside any subregs. */
40709 machine_mode mode = GET_MODE (DF_REF_REAL_REG (mention));
40711 /* If a use gets its value from a call insn, it will be
40712 a hard register and will look like (reg:V4SI 3 3).
40713 The df analysis creates two mentions for GPR3 and GPR4,
40714 both DImode. We must recognize this and treat it as a
40715 vector mention to ensure the call is unioned with this
40716 use. */
40717 if (mode == DImode && DF_REF_INSN_INFO (mention))
40719 rtx feeder = DF_REF_INSN (mention);
40720 /* FIXME: It is pretty hard to get from the df mention
40721 to the mode of the use in the insn. We arbitrarily
40722 pick a vector mode here, even though the use might
40723 be a real DImode. We can be too conservative
40724 (create a web larger than necessary) because of
40725 this, so consider eventually fixing this. */
40726 if (GET_CODE (feeder) == CALL_INSN)
40727 mode = V4SImode;
40730 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode) || mode == TImode)
40732 insn_entry[uid].is_relevant = 1;
40733 if (mode == TImode || mode == V1TImode
40734 || FLOAT128_VECTOR_P (mode))
40735 insn_entry[uid].is_128_int = 1;
40736 if (DF_REF_INSN_INFO (mention))
40737 insn_entry[uid].contains_subreg
40738 = !rtx_equal_p (DF_REF_REG (mention),
40739 DF_REF_REAL_REG (mention));
40740 union_defs (insn_entry, insn, mention);
40743 FOR_EACH_INSN_INFO_DEF (mention, insn_info)
40745 /* We use DF_REF_REAL_REG here to get inside any subregs. */
40746 machine_mode mode = GET_MODE (DF_REF_REAL_REG (mention));
40748 /* If we're loading up a hard vector register for a call,
40749 it looks like (set (reg:V4SI 9 9) (...)). The df
40750 analysis creates two mentions for GPR9 and GPR10, both
40751 DImode. So relying on the mode from the mentions
40752 isn't sufficient to ensure we union the call into the
40753 web with the parameter setup code. */
40754 if (mode == DImode && GET_CODE (insn) == SET
40755 && ALTIVEC_OR_VSX_VECTOR_MODE (GET_MODE (SET_DEST (insn))))
40756 mode = GET_MODE (SET_DEST (insn));
40758 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode) || mode == TImode)
40760 insn_entry[uid].is_relevant = 1;
40761 if (mode == TImode || mode == V1TImode
40762 || FLOAT128_VECTOR_P (mode))
40763 insn_entry[uid].is_128_int = 1;
40764 if (DF_REF_INSN_INFO (mention))
40765 insn_entry[uid].contains_subreg
40766 = !rtx_equal_p (DF_REF_REG (mention),
40767 DF_REF_REAL_REG (mention));
40768 /* REG_FUNCTION_VALUE_P is not valid for subregs. */
40769 else if (REG_FUNCTION_VALUE_P (DF_REF_REG (mention)))
40770 insn_entry[uid].is_live_out = 1;
40771 union_uses (insn_entry, insn, mention);
40775 if (insn_entry[uid].is_relevant)
40777 /* Determine if this is a load or store. */
40778 insn_entry[uid].is_load = insn_is_load_p (insn);
40779 insn_entry[uid].is_store = insn_is_store_p (insn);
40781 /* Determine if this is a doubleword swap. If not,
40782 determine whether it can legally be swapped. */
40783 if (insn_is_swap_p (insn))
40784 insn_entry[uid].is_swap = 1;
40785 else
40787 unsigned int special = SH_NONE;
40788 insn_entry[uid].is_swappable
40789 = insn_is_swappable_p (insn_entry, insn, &special);
40790 if (special != SH_NONE && insn_entry[uid].contains_subreg)
40791 insn_entry[uid].is_swappable = 0;
40792 else if (special != SH_NONE)
40793 insn_entry[uid].special_handling = special;
40794 else if (insn_entry[uid].contains_subreg)
40795 insn_entry[uid].special_handling = SH_SUBREG;
40801 if (dump_file)
40803 fprintf (dump_file, "\nSwap insn entry table when first built\n");
40804 dump_swap_insn_table (insn_entry);
40807 /* Record unoptimizable webs. */
40808 unsigned e = get_max_uid (), i;
40809 for (i = 0; i < e; ++i)
40811 if (!insn_entry[i].is_relevant)
40812 continue;
40814 swap_web_entry *root
40815 = (swap_web_entry*)(&insn_entry[i])->unionfind_root ();
40817 if (insn_entry[i].is_live_in || insn_entry[i].is_live_out
40818 || (insn_entry[i].contains_subreg
40819 && insn_entry[i].special_handling != SH_SUBREG)
40820 || insn_entry[i].is_128_int || insn_entry[i].is_call
40821 || !(insn_entry[i].is_swappable || insn_entry[i].is_swap))
40822 root->web_not_optimizable = 1;
40824 /* If we have loads or stores that aren't permuting then the
40825 optimization isn't appropriate. */
40826 else if ((insn_entry[i].is_load || insn_entry[i].is_store)
40827 && !insn_entry[i].is_swap && !insn_entry[i].is_swappable)
40828 root->web_not_optimizable = 1;
40830 /* If we have permuting loads or stores that are not accompanied
40831 by a register swap, the optimization isn't appropriate. */
40832 else if (insn_entry[i].is_load && insn_entry[i].is_swap)
40834 rtx insn = insn_entry[i].insn;
40835 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
40836 df_ref def;
40838 FOR_EACH_INSN_INFO_DEF (def, insn_info)
40840 struct df_link *link = DF_REF_CHAIN (def);
40842 if (!chain_contains_only_swaps (insn_entry, link, FOR_LOADS))
40844 root->web_not_optimizable = 1;
40845 break;
40849 else if (insn_entry[i].is_store && insn_entry[i].is_swap)
40851 rtx insn = insn_entry[i].insn;
40852 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
40853 df_ref use;
40855 FOR_EACH_INSN_INFO_USE (use, insn_info)
40857 struct df_link *link = DF_REF_CHAIN (use);
40859 if (!chain_contains_only_swaps (insn_entry, link, FOR_STORES))
40861 root->web_not_optimizable = 1;
40862 break;
40868 if (dump_file)
40870 fprintf (dump_file, "\nSwap insn entry table after web analysis\n");
40871 dump_swap_insn_table (insn_entry);
40874 /* For each load and store in an optimizable web (which implies
40875 the loads and stores are permuting), find the associated
40876 register swaps and mark them for removal. Due to various
40877 optimizations we may mark the same swap more than once. Also
40878 perform special handling for swappable insns that require it. */
40879 for (i = 0; i < e; ++i)
40880 if ((insn_entry[i].is_load || insn_entry[i].is_store)
40881 && insn_entry[i].is_swap)
40883 swap_web_entry* root_entry
40884 = (swap_web_entry*)((&insn_entry[i])->unionfind_root ());
40885 if (!root_entry->web_not_optimizable)
40886 mark_swaps_for_removal (insn_entry, i);
40888 else if (insn_entry[i].is_swappable && insn_entry[i].special_handling)
40890 swap_web_entry* root_entry
40891 = (swap_web_entry*)((&insn_entry[i])->unionfind_root ());
40892 if (!root_entry->web_not_optimizable)
40893 handle_special_swappables (insn_entry, i);
40896 /* Now delete the swaps marked for removal. */
40897 for (i = 0; i < e; ++i)
40898 if (insn_entry[i].will_delete)
40899 replace_swap_with_copy (insn_entry, i);
40901 /* Clean up. */
40902 free (insn_entry);
40903 return 0;
40906 const pass_data pass_data_analyze_swaps =
40908 RTL_PASS, /* type */
40909 "swaps", /* name */
40910 OPTGROUP_NONE, /* optinfo_flags */
40911 TV_NONE, /* tv_id */
40912 0, /* properties_required */
40913 0, /* properties_provided */
40914 0, /* properties_destroyed */
40915 0, /* todo_flags_start */
40916 TODO_df_finish, /* todo_flags_finish */
40919 class pass_analyze_swaps : public rtl_opt_pass
40921 public:
40922 pass_analyze_swaps(gcc::context *ctxt)
40923 : rtl_opt_pass(pass_data_analyze_swaps, ctxt)
40926 /* opt_pass methods: */
40927 virtual bool gate (function *)
40929 return (optimize > 0 && !BYTES_BIG_ENDIAN && TARGET_VSX
40930 && !TARGET_P9_VECTOR && rs6000_optimize_swaps);
40933 virtual unsigned int execute (function *fun)
40935 return rs6000_analyze_swaps (fun);
40938 }; // class pass_analyze_swaps
40940 rtl_opt_pass *
40941 make_pass_analyze_swaps (gcc::context *ctxt)
40943 return new pass_analyze_swaps (ctxt);
40946 #ifdef RS6000_GLIBC_ATOMIC_FENV
40947 /* Function declarations for rs6000_atomic_assign_expand_fenv. */
40948 static tree atomic_hold_decl, atomic_clear_decl, atomic_update_decl;
40949 #endif
40951 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
40953 static void
40954 rs6000_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
40956 if (!TARGET_HARD_FLOAT || !TARGET_FPRS)
40958 #ifdef RS6000_GLIBC_ATOMIC_FENV
40959 if (atomic_hold_decl == NULL_TREE)
40961 atomic_hold_decl
40962 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
40963 get_identifier ("__atomic_feholdexcept"),
40964 build_function_type_list (void_type_node,
40965 double_ptr_type_node,
40966 NULL_TREE));
40967 TREE_PUBLIC (atomic_hold_decl) = 1;
40968 DECL_EXTERNAL (atomic_hold_decl) = 1;
40971 if (atomic_clear_decl == NULL_TREE)
40973 atomic_clear_decl
40974 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
40975 get_identifier ("__atomic_feclearexcept"),
40976 build_function_type_list (void_type_node,
40977 NULL_TREE));
40978 TREE_PUBLIC (atomic_clear_decl) = 1;
40979 DECL_EXTERNAL (atomic_clear_decl) = 1;
40982 tree const_double = build_qualified_type (double_type_node,
40983 TYPE_QUAL_CONST);
40984 tree const_double_ptr = build_pointer_type (const_double);
40985 if (atomic_update_decl == NULL_TREE)
40987 atomic_update_decl
40988 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
40989 get_identifier ("__atomic_feupdateenv"),
40990 build_function_type_list (void_type_node,
40991 const_double_ptr,
40992 NULL_TREE));
40993 TREE_PUBLIC (atomic_update_decl) = 1;
40994 DECL_EXTERNAL (atomic_update_decl) = 1;
40997 tree fenv_var = create_tmp_var_raw (double_type_node);
40998 TREE_ADDRESSABLE (fenv_var) = 1;
40999 tree fenv_addr = build1 (ADDR_EXPR, double_ptr_type_node, fenv_var);
41001 *hold = build_call_expr (atomic_hold_decl, 1, fenv_addr);
41002 *clear = build_call_expr (atomic_clear_decl, 0);
41003 *update = build_call_expr (atomic_update_decl, 1,
41004 fold_convert (const_double_ptr, fenv_addr));
41005 #endif
41006 return;
41009 tree mffs = rs6000_builtin_decls[RS6000_BUILTIN_MFFS];
41010 tree mtfsf = rs6000_builtin_decls[RS6000_BUILTIN_MTFSF];
41011 tree call_mffs = build_call_expr (mffs, 0);
41013 /* Generates the equivalent of feholdexcept (&fenv_var)
41015 *fenv_var = __builtin_mffs ();
41016 double fenv_hold;
41017 *(uint64_t*)&fenv_hold = *(uint64_t*)fenv_var & 0xffffffff00000007LL;
41018 __builtin_mtfsf (0xff, fenv_hold); */
41020 /* Mask to clear everything except for the rounding modes and non-IEEE
41021 arithmetic flag. */
41022 const unsigned HOST_WIDE_INT hold_exception_mask =
41023 HOST_WIDE_INT_C (0xffffffff00000007);
41025 tree fenv_var = create_tmp_var_raw (double_type_node);
41027 tree hold_mffs = build2 (MODIFY_EXPR, void_type_node, fenv_var, call_mffs);
41029 tree fenv_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_var);
41030 tree fenv_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
41031 build_int_cst (uint64_type_node,
41032 hold_exception_mask));
41034 tree fenv_hold_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
41035 fenv_llu_and);
41037 tree hold_mtfsf = build_call_expr (mtfsf, 2,
41038 build_int_cst (unsigned_type_node, 0xff),
41039 fenv_hold_mtfsf);
41041 *hold = build2 (COMPOUND_EXPR, void_type_node, hold_mffs, hold_mtfsf);
41043 /* Generates the equivalent of feclearexcept (FE_ALL_EXCEPT):
41045 double fenv_clear = __builtin_mffs ();
41046 *(uint64_t)&fenv_clear &= 0xffffffff00000000LL;
41047 __builtin_mtfsf (0xff, fenv_clear); */
41049 /* Mask to clear everything except for the rounding modes and non-IEEE
41050 arithmetic flag. */
41051 const unsigned HOST_WIDE_INT clear_exception_mask =
41052 HOST_WIDE_INT_C (0xffffffff00000000);
41054 tree fenv_clear = create_tmp_var_raw (double_type_node);
41056 tree clear_mffs = build2 (MODIFY_EXPR, void_type_node, fenv_clear, call_mffs);
41058 tree fenv_clean_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_clear);
41059 tree fenv_clear_llu_and = build2 (BIT_AND_EXPR, uint64_type_node,
41060 fenv_clean_llu,
41061 build_int_cst (uint64_type_node,
41062 clear_exception_mask));
41064 tree fenv_clear_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
41065 fenv_clear_llu_and);
41067 tree clear_mtfsf = build_call_expr (mtfsf, 2,
41068 build_int_cst (unsigned_type_node, 0xff),
41069 fenv_clear_mtfsf);
41071 *clear = build2 (COMPOUND_EXPR, void_type_node, clear_mffs, clear_mtfsf);
41073 /* Generates the equivalent of feupdateenv (&fenv_var)
41075 double old_fenv = __builtin_mffs ();
41076 double fenv_update;
41077 *(uint64_t*)&fenv_update = (*(uint64_t*)&old & 0xffffffff1fffff00LL) |
41078 (*(uint64_t*)fenv_var 0x1ff80fff);
41079 __builtin_mtfsf (0xff, fenv_update); */
41081 const unsigned HOST_WIDE_INT update_exception_mask =
41082 HOST_WIDE_INT_C (0xffffffff1fffff00);
41083 const unsigned HOST_WIDE_INT new_exception_mask =
41084 HOST_WIDE_INT_C (0x1ff80fff);
41086 tree old_fenv = create_tmp_var_raw (double_type_node);
41087 tree update_mffs = build2 (MODIFY_EXPR, void_type_node, old_fenv, call_mffs);
41089 tree old_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, old_fenv);
41090 tree old_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, old_llu,
41091 build_int_cst (uint64_type_node,
41092 update_exception_mask));
41094 tree new_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
41095 build_int_cst (uint64_type_node,
41096 new_exception_mask));
41098 tree new_llu_mask = build2 (BIT_IOR_EXPR, uint64_type_node,
41099 old_llu_and, new_llu_and);
41101 tree fenv_update_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
41102 new_llu_mask);
41104 tree update_mtfsf = build_call_expr (mtfsf, 2,
41105 build_int_cst (unsigned_type_node, 0xff),
41106 fenv_update_mtfsf);
41108 *update = build2 (COMPOUND_EXPR, void_type_node, update_mffs, update_mtfsf);
41111 /* Implement the TARGET_OPTAB_SUPPORTED_P hook. */
41113 static bool
41114 rs6000_optab_supported_p (int op, machine_mode mode1, machine_mode,
41115 optimization_type opt_type)
41117 switch (op)
41119 case rsqrt_optab:
41120 return (opt_type == OPTIMIZE_FOR_SPEED
41121 && RS6000_RECIP_AUTO_RSQRTE_P (mode1));
41123 default:
41124 return true;
41128 struct gcc_target targetm = TARGET_INITIALIZER;
41130 #include "gt-rs6000.h"